summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraguzovsk <none@none>2006-07-21 21:13:27 -0700
committeraguzovsk <none@none>2006-07-21 21:13:27 -0700
commit07b65a646252c0f0ec200acf82c256c5bf6883b1 (patch)
tree376eab52fa782b1e9b5fc1ef1ce725a0a7e10fe2
parent33bc63788dbca06354c73b744c41d4781504e89b (diff)
downloadillumos-gate-07b65a646252c0f0ec200acf82c256c5bf6883b1.tar.gz
4614772 MPSS to be extended to shared memory
6413095 madvise() freeing up whole pages even when passed sizes smaller than a page 6416714 anon_map_getpages panic during stress test 6428347 multilple pagelocks/softlocks on the same large page should decrement availrmem just once
-rw-r--r--usr/src/uts/common/fs/swapfs/swap_vnops.c21
-rw-r--r--usr/src/uts/common/os/grow.c9
-rw-r--r--usr/src/uts/common/os/shm.c35
-rw-r--r--usr/src/uts/common/sys/vmsystm.h8
-rw-r--r--usr/src/uts/common/syscall/memcntl.c10
-rw-r--r--usr/src/uts/common/vm/anon.h12
-rw-r--r--usr/src/uts/common/vm/page.h6
-rw-r--r--usr/src/uts/common/vm/seg_spt.c93
-rw-r--r--usr/src/uts/common/vm/seg_vn.c671
-rw-r--r--usr/src/uts/common/vm/seg_vn.h11
-rw-r--r--usr/src/uts/common/vm/vm_anon.c311
-rw-r--r--usr/src/uts/common/vm/vm_as.c140
-rw-r--r--usr/src/uts/common/vm/vm_page.c22
-rw-r--r--usr/src/uts/i86pc/vm/vm_machdep.c24
-rw-r--r--usr/src/uts/sfmmu/vm/hat_sfmmu.c2
-rw-r--r--usr/src/uts/sun4/vm/vm_dep.c46
-rw-r--r--usr/src/uts/sun4u/cpu/us3_cheetah.c9
-rw-r--r--usr/src/uts/sun4u/vm/mach_vm_dep.c9
-rw-r--r--usr/src/uts/sun4v/vm/mach_vm_dep.c2
19 files changed, 1111 insertions, 330 deletions
diff --git a/usr/src/uts/common/fs/swapfs/swap_vnops.c b/usr/src/uts/common/fs/swapfs/swap_vnops.c
index efc547cee7..348392da2c 100644
--- a/usr/src/uts/common/fs/swapfs/swap_vnops.c
+++ b/usr/src/uts/common/fs/swapfs/swap_vnops.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -68,8 +67,9 @@ static int swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
int swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
- uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp, spgcnt_t *nreloc,
- struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
+ uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
+ uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
+ enum seg_rw rw, struct cred *cr);
static int swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
size_t *lenp, int flags, struct cred *cr);
@@ -316,6 +316,7 @@ swap_getconpage(
page_t *pl[],
size_t plsz,
page_t *conpp,
+ uint_t *pszc,
spgcnt_t *nreloc,
struct seg *seg,
caddr_t addr,
@@ -362,18 +363,22 @@ swap_getconpage(
*/
if (pp != conpp) {
ASSERT(rw != S_CREATE);
+ ASSERT(pszc != NULL);
ASSERT(PAGE_SHARED(pp));
if (pp->p_szc < conpp->p_szc) {
+ *pszc = pp->p_szc;
page_unlock(pp);
err = -1;
- } else if (pp->p_szc > conpp->p_szc) {
+ } else if (pp->p_szc > conpp->p_szc &&
+ seg->s_szc > conpp->p_szc) {
+ *pszc = MIN(pp->p_szc, seg->s_szc);
page_unlock(pp);
err = -2;
} else {
pl[0] = pp;
pl[1] = NULL;
if (page_pptonum(pp) &
- (page_get_pagecnt(pp->p_szc) - 1))
+ (page_get_pagecnt(conpp->p_szc) - 1))
cmn_err(CE_PANIC, "swap_getconpage: no root");
}
return (err);
diff --git a/usr/src/uts/common/os/grow.c b/usr/src/uts/common/os/grow.c
index e14ed3f43e..c1e3326c67 100644
--- a/usr/src/uts/common/os/grow.c
+++ b/usr/src/uts/common/os/grow.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -587,7 +586,7 @@ zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
* If the requested address or length are aligned to the selected large
* page size, l1 or l3 may also be 0.
*/
- if (use_zmap_lpg) {
+ if (use_zmap_lpg && a.type == MAP_PRIVATE) {
pgsz = map_pgsz(MAPPGSZ_VA, p, *addrp, len, NULL);
if (pgsz <= PAGESIZE || len < pgsz) {
diff --git a/usr/src/uts/common/os/shm.c b/usr/src/uts/common/os/shm.c
index b39b801a08..a9ea0b59fc 100644
--- a/usr/src/uts/common/os/shm.c
+++ b/usr/src/uts/common/os/shm.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -761,6 +760,10 @@ shmdt(caddr_t addr)
mutex_exit(&pp->p_lock);
return (EINVAL);
}
+ if (sap->sa_addr != addr) {
+ mutex_exit(&pp->p_lock);
+ return (EINVAL);
+ }
avl_remove(pp->p_segacct, sap);
mutex_exit(&pp->p_lock);
@@ -942,13 +945,21 @@ shm_sacompar(const void *x, const void *y)
segacct_t *sa1 = (segacct_t *)x;
segacct_t *sa2 = (segacct_t *)y;
- if (sa1->sa_addr < sa2->sa_addr)
+ if (sa1->sa_addr < sa2->sa_addr) {
return (-1);
- if (sa1->sa_addr > sa2->sa_addr)
+ } else if (sa2->sa_len != 0) {
+ if (sa1->sa_addr >= sa2->sa_addr + sa2->sa_len) {
+ return (1);
+ } else if (sa1->sa_len != 0) {
+ return (1);
+ } else {
+ return (0);
+ }
+ } else if (sa1->sa_addr > sa2->sa_addr) {
return (1);
- if ((sa1->sa_len == 0) || (sa2->sa_len == 0))
+ } else {
return (0);
- return (1);
+ }
}
/*
@@ -1240,7 +1251,13 @@ shm_rm_amp(struct anon_map *amp, uint_t lckflag)
* Free up the anon_map.
*/
lgrp_shm_policy_fini(amp, NULL);
- anon_free(amp->ahp, 0, amp->size);
+ if (amp->a_szc != 0) {
+ ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
+ anon_shmap_free_pages(amp, 0, amp->size);
+ ANON_LOCK_EXIT(&amp->a_rwlock);
+ } else {
+ anon_free(amp->ahp, 0, amp->size);
+ }
anon_unresv(amp->swresv);
anonmap_free(amp);
}
diff --git a/usr/src/uts/common/sys/vmsystm.h b/usr/src/uts/common/sys/vmsystm.h
index 199753d799..1f0aea0235 100644
--- a/usr/src/uts/common/sys/vmsystm.h
+++ b/usr/src/uts/common/sys/vmsystm.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -122,6 +121,7 @@ extern int useracc(void *, size_t, int);
extern size_t map_pgsz(int maptype, struct proc *p, caddr_t addr,
size_t len, int *remap);
extern uint_t map_execseg_pgszcvec(int, caddr_t, size_t);
+extern uint_t map_shm_pgszcvec(caddr_t, size_t, uintptr_t);
extern void map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign,
uint_t flags);
extern int map_addr_vacalign_check(caddr_t, u_offset_t);
diff --git a/usr/src/uts/common/syscall/memcntl.c b/usr/src/uts/common/syscall/memcntl.c
index 85f54e9944..6bdf5a1cc2 100644
--- a/usr/src/uts/common/syscall/memcntl.c
+++ b/usr/src/uts/common/syscall/memcntl.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -340,6 +339,9 @@ memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask)
}
return (error);
case MC_ADVISE:
+ if ((uintptr_t)arg == MADV_FREE) {
+ len &= PAGEMASK;
+ }
switch ((uintptr_t)arg) {
case MADV_WILLNEED:
fc = as_faulta(as, addr, len);
diff --git a/usr/src/uts/common/vm/anon.h b/usr/src/uts/common/vm/anon.h
index 97a8b31ca4..294867ca01 100644
--- a/usr/src/uts/common/vm/anon.h
+++ b/usr/src/uts/common/vm/anon.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -282,7 +281,7 @@ struct anon_map {
struct anon_hdr *ahp; /* anon array header pointer, containing */
/* anon pointer array(s) */
size_t swresv; /* swap space reserved for this anon_map */
- uint_t refcnt; /* reference count on this structure */
+ ulong_t refcnt; /* reference count on this structure */
ushort_t a_szc; /* max szc among shared processes */
void *locality; /* lgroup locality info */
};
@@ -362,7 +361,7 @@ extern void anon_disclaim(struct anon_map *, ulong_t, size_t, int);
extern int anon_getpage(struct anon **, uint_t *, struct page **,
size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
extern int swap_getconpage(struct vnode *, u_offset_t, size_t,
- uint_t *, page_t *[], size_t, page_t *,
+ uint_t *, page_t *[], size_t, page_t *, uint_t *,
spgcnt_t *, struct seg *, caddr_t,
enum seg_rw, struct cred *);
extern int anon_map_getpages(struct anon_map *, ulong_t,
@@ -384,6 +383,7 @@ extern int anon_map_createpages(struct anon_map *, ulong_t,
extern int anon_map_demotepages(struct anon_map *, ulong_t,
struct seg *, caddr_t, uint_t,
struct vpage [], struct cred *);
+extern void anon_shmap_free_pages(struct anon_map *, ulong_t, size_t);
extern int anon_resvmem(size_t, uint_t);
extern void anon_unresv(size_t);
extern struct anon_map *anonmap_alloc(size_t, size_t);
diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h
index 38faf2afef..590675b8a9 100644
--- a/usr/src/uts/common/vm/page.h
+++ b/usr/src/uts/common/vm/page.h
@@ -497,7 +497,7 @@ typedef struct page {
#if defined(_LP64)
uint_t p_sharepad; /* pad for growing p_share */
#endif
- uint_t p_msresv_1; /* reserved for future use */
+ uint_t p_slckcnt; /* number of softlocks */
#if defined(__sparc)
uint_t p_kpmref; /* number of kpm mapping sharers */
struct kpme *p_kpmelist; /* kpm specific mapping info */
@@ -516,6 +516,10 @@ typedef struct page {
typedef page_t devpage_t;
#define devpage page
+#define PAGE_LOCK_MAXIMUM \
+ ((1 << (sizeof (((page_t *)0)->p_lckcnt) * NBBY)) - 1)
+
+#define PAGE_SLOCK_MAXIMUM UINT_MAX
/*
* Page hash table is a power-of-two in size, externally chained
diff --git a/usr/src/uts/common/vm/seg_spt.c b/usr/src/uts/common/vm/seg_spt.c
index 444ac3c153..774a9c3b9f 100644
--- a/usr/src/uts/common/vm/seg_spt.c
+++ b/usr/src/uts/common/vm/seg_spt.c
@@ -376,6 +376,11 @@ segspt_create(struct seg *seg, caddr_t argsp)
struct vnode *vp;
page_t **ppa;
uint_t hat_flags;
+ size_t pgsz;
+ pgcnt_t pgcnt;
+ caddr_t a;
+ pgcnt_t pidx;
+ size_t sz;
/*
* We are holding the a_lock on the underlying dummy as,
@@ -419,7 +424,9 @@ segspt_create(struct seg *seg, caddr_t argsp)
seg->s_szc = sptcargs->szc;
ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
- amp->a_szc = seg->s_szc;
+ if (seg->s_szc > amp->a_szc) {
+ amp->a_szc = seg->s_szc;
+ }
ANON_LOCK_EXIT(&amp->a_rwlock);
/*
@@ -506,8 +513,20 @@ segspt_create(struct seg *seg, caddr_t argsp)
if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, NULL))
hat_flags |= HAT_LOAD_LOCK;
- hat_memload_array(seg->s_as->a_hat, addr, ptob(npages),
- ppa, sptd->spt_prot, hat_flags);
+ /*
+ * Load translations one lare page at a time
+ * to make sure we don't create mappings bigger than
+ * segment's size code in case underlying pages
+ * are shared with segvn's segment that uses bigger
+ * size code than we do.
+ */
+ pgsz = page_get_pagesize(seg->s_szc);
+ pgcnt = page_get_pagecnt(seg->s_szc);
+ for (a = addr, pidx = 0; pidx < npages; a += pgsz, pidx += pgcnt) {
+ sz = MIN(pgsz, ptob(npages - pidx));
+ hat_memload_array(seg->s_as->a_hat, a, sz,
+ &ppa[pidx], sptd->spt_prot, hat_flags);
+ }
/*
* On platforms that do not support HAT_DYNAMIC_ISM_UNMAP,
@@ -1701,13 +1720,17 @@ segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr,
struct as *curspt = shmd->shm_sptas;
struct spt_data *sptd = sptseg->s_data;
pgcnt_t npages;
- size_t share_sz, size;
+ size_t size;
caddr_t segspt_addr, shm_addr;
page_t **ppa;
int i;
ulong_t an_idx = 0;
int err = 0;
int dyn_ism_unmap = hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0);
+ size_t pgsz;
+ pgcnt_t pgcnt;
+ caddr_t a;
+ pgcnt_t pidx;
#ifdef lint
hat = hat;
@@ -1740,9 +1763,10 @@ segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr,
* layer by calling hat_memload_array() with differing page sizes
* over a given virtual range.
*/
- share_sz = page_get_pagesize(sptseg->s_szc);
- shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_sz);
- size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), share_sz);
+ pgsz = page_get_pagesize(sptseg->s_szc);
+ pgcnt = page_get_pagecnt(sptseg->s_szc);
+ shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz);
+ size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz);
npages = btopr(size);
/*
@@ -1792,15 +1816,19 @@ segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr,
goto dism_err;
}
AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER);
+ a = segspt_addr;
+ pidx = 0;
if (type == F_SOFTLOCK) {
/*
* Load up the translation keeping it
* locked and don't unlock the page.
*/
- hat_memload_array(sptseg->s_as->a_hat, segspt_addr,
- size, ppa, sptd->spt_prot,
- HAT_LOAD_LOCK | HAT_LOAD_SHARE);
+ for (; pidx < npages; a += pgsz, pidx += pgcnt) {
+ hat_memload_array(sptseg->s_as->a_hat,
+ a, pgsz, &ppa[pidx], sptd->spt_prot,
+ HAT_LOAD_LOCK | HAT_LOAD_SHARE);
+ }
} else {
if (hat == seg->s_as->a_hat) {
@@ -1812,9 +1840,13 @@ segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr,
npages);
/* CPU HAT */
- hat_memload_array(sptseg->s_as->a_hat,
- segspt_addr, size, ppa, sptd->spt_prot,
- HAT_LOAD_SHARE);
+ for (; pidx < npages;
+ a += pgsz, pidx += pgcnt) {
+ hat_memload_array(sptseg->s_as->a_hat,
+ a, pgsz, &ppa[pidx],
+ sptd->spt_prot,
+ HAT_LOAD_SHARE);
+ }
} else {
/* XHAT. Pass real address */
hat_memload_array(hat, shm_addr,
@@ -1896,7 +1928,7 @@ segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr,
struct as *curspt = shmd->shm_sptas;
struct spt_data *sptd = sptseg->s_data;
pgcnt_t npages;
- size_t share_size, size;
+ size_t size;
caddr_t sptseg_addr, shm_addr;
page_t *pp, **ppa;
int i;
@@ -1906,6 +1938,11 @@ segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr,
struct anon_map *amp; /* XXX - for locknest */
struct anon *ap = NULL;
anon_sync_obj_t cookie;
+ size_t pgsz;
+ pgcnt_t pgcnt;
+ caddr_t a;
+ pgcnt_t pidx;
+ size_t sz;
#ifdef lint
hat = hat;
@@ -1943,9 +1980,10 @@ segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr,
* layer by calling hat_memload_array() with differing page sizes
* over a given virtual range.
*/
- share_size = page_get_pagesize(sptseg->s_szc);
- shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_size);
- size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), share_size);
+ pgsz = page_get_pagesize(sptseg->s_szc);
+ pgcnt = page_get_pagecnt(sptseg->s_szc);
+ shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz);
+ size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz);
npages = btopr(size);
/*
@@ -2045,14 +2083,19 @@ segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr,
* underlying HAT layer.
*/
AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER);
+ a = sptseg_addr;
+ pidx = 0;
if (type == F_SOFTLOCK) {
/*
* Load up the translation keeping it
* locked and don't unlock the page.
*/
- hat_memload_array(sptseg->s_as->a_hat, sptseg_addr,
- ptob(npages), ppa, sptd->spt_prot,
- HAT_LOAD_LOCK | HAT_LOAD_SHARE);
+ for (; pidx < npages; a += pgsz, pidx += pgcnt) {
+ sz = MIN(pgsz, ptob(npages - pidx));
+ hat_memload_array(sptseg->s_as->a_hat, a,
+ sz, &ppa[pidx], sptd->spt_prot,
+ HAT_LOAD_LOCK | HAT_LOAD_SHARE);
+ }
} else {
if (hat == seg->s_as->a_hat) {
@@ -2064,9 +2107,13 @@ segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr,
npages);
/* CPU HAT */
- hat_memload_array(sptseg->s_as->a_hat,
- sptseg_addr, ptob(npages), ppa,
- sptd->spt_prot, HAT_LOAD_SHARE);
+ for (; pidx < npages;
+ a += pgsz, pidx += pgcnt) {
+ sz = MIN(pgsz, ptob(npages - pidx));
+ hat_memload_array(sptseg->s_as->a_hat,
+ a, sz, &ppa[pidx],
+ sptd->spt_prot, HAT_LOAD_SHARE);
+ }
} else {
/* XHAT. Pass real address */
hat_memload_array(hat, shm_addr,
diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c
index 2f8b6fbcd1..6c26b2d49c 100644
--- a/usr/src/uts/common/vm/seg_vn.c
+++ b/usr/src/uts/common/vm/seg_vn.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -176,7 +175,7 @@ static faultcode_t segvn_fault_anonpages(struct hat *, struct seg *, caddr_t,
caddr_t, enum fault_type, enum seg_rw, caddr_t, caddr_t, int);
static faultcode_t segvn_faultpage(struct hat *, struct seg *, caddr_t,
u_offset_t, struct vpage *, page_t **, uint_t,
- enum fault_type, enum seg_rw, int);
+ enum fault_type, enum seg_rw, int, int);
static void segvn_vpage(struct seg *);
static void segvn_purge(struct seg *seg);
@@ -185,12 +184,15 @@ static int segvn_reclaim(struct seg *, caddr_t, size_t, struct page **,
static int sameprot(struct seg *, caddr_t, size_t);
-static int segvn_demote_range(struct seg *, caddr_t, size_t, int);
+static int segvn_demote_range(struct seg *, caddr_t, size_t, int, uint_t);
static int segvn_clrszc(struct seg *);
static struct seg *segvn_split_seg(struct seg *, caddr_t);
static int segvn_claim_pages(struct seg *, struct vpage *, u_offset_t,
ulong_t, uint_t);
+static int segvn_pp_lock_anonpages(page_t *, int);
+static void segvn_pp_unlock_anonpages(page_t *, int);
+
static struct kmem_cache *segvn_cache;
#ifdef VM_STATS
@@ -272,6 +274,7 @@ ulong_t segvn_vmpss_clrszc_err;
ulong_t segvn_fltvnpages_clrszc_cnt;
ulong_t segvn_fltvnpages_clrszc_err;
ulong_t segvn_setpgsz_align_err;
+ulong_t segvn_setpgsz_anon_align_err;
ulong_t segvn_setpgsz_getattr_err;
ulong_t segvn_setpgsz_eof_err;
ulong_t segvn_faultvnmpss_align_err1;
@@ -388,8 +391,8 @@ segvn_create(struct seg *seg, void *argsp)
a->flags &= ~MAP_NORESERVE;
if (a->szc != 0) {
- if (segvn_lpg_disable != 0 || a->amp != NULL ||
- (a->type == MAP_SHARED && a->vp == NULL) ||
+ if (segvn_lpg_disable != 0 ||
+ (a->amp != NULL && a->type == MAP_PRIVATE) ||
(a->flags & MAP_NORESERVE) || seg->s_as == &kas) {
a->szc = 0;
} else {
@@ -412,6 +415,12 @@ segvn_create(struct seg *seg, void *argsp)
a->offset & PAGEMASK)) {
a->szc = 0;
}
+ } else if (a->amp != NULL) {
+ pgcnt_t anum = btopr(a->offset);
+ pgcnt_t pgcnt = page_get_pagecnt(a->szc);
+ if (!IS_P2ALIGNED(anum, pgcnt)) {
+ a->szc = 0;
+ }
}
}
}
@@ -657,6 +666,9 @@ segvn_create(struct seg *seg, void *argsp)
*/
ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
amp->refcnt++;
+ if (a->szc > amp->a_szc) {
+ amp->a_szc = a->szc;
+ }
ANON_LOCK_EXIT(&amp->a_rwlock);
svd->anon_index = anon_num;
svd->swresv = 0;
@@ -754,10 +766,10 @@ segvn_create(struct seg *seg, void *argsp)
* Concatenate two existing segments, if possible.
* Return 0 on success, -1 if two segments are not compatible
* or -2 on memory allocation failure.
- * If private == 1 then try and concat segments with private pages.
+ * If amp_cat == 1 then try and concat segments with anon maps
*/
static int
-segvn_concat(struct seg *seg1, struct seg *seg2, int private)
+segvn_concat(struct seg *seg1, struct seg *seg2, int amp_cat)
{
struct segvn_data *svd1 = seg1->s_data;
struct segvn_data *svd2 = seg2->s_data;
@@ -793,13 +805,23 @@ segvn_concat(struct seg *seg1, struct seg *seg2, int private)
/*
* Fail early if we're not supposed to concatenate
- * private pages.
+ * segments with non NULL amp.
*/
- if ((private == 0 || svd1->type != MAP_PRIVATE) &&
- (amp1 != NULL || amp2 != NULL)) {
+ if (amp_cat == 0 && (amp1 != NULL || amp2 != NULL)) {
return (-1);
}
+ if (svd1->vp == NULL && svd1->type == MAP_SHARED) {
+ if (amp1 != amp2) {
+ return (-1);
+ }
+ if (amp1 != NULL && svd1->anon_index + btop(seg1->s_size) !=
+ svd2->anon_index) {
+ return (-1);
+ }
+ ASSERT(amp1 == NULL || amp1->refcnt >= 2);
+ }
+
/*
* If either seg has vpages, create a new merged vpage array.
*/
@@ -840,13 +862,24 @@ segvn_concat(struct seg *seg1, struct seg *seg2, int private)
/*
* If either segment has private pages, create a new merged anon
- * array.
+ * array. If mergeing shared anon segments just decrement anon map's
+ * refcnt.
*/
- if (amp1 != NULL || amp2 != NULL) {
+ if (amp1 != NULL && svd1->type == MAP_SHARED) {
+ ASSERT(amp1 == amp2 && svd1->vp == NULL);
+ ANON_LOCK_ENTER(&amp1->a_rwlock, RW_WRITER);
+ ASSERT(amp1->refcnt >= 2);
+ amp1->refcnt--;
+ ANON_LOCK_EXIT(&amp1->a_rwlock);
+ svd2->amp = NULL;
+ } else if (amp1 != NULL || amp2 != NULL) {
struct anon_hdr *nahp;
struct anon_map *namp = NULL;
- size_t asize = seg1->s_size + seg2->s_size;
+ size_t asize;
+
+ ASSERT(svd1->type == MAP_PRIVATE);
+ asize = seg1->s_size + seg2->s_size;
if ((nahp = anon_create(btop(asize), ANON_NOSLEEP)) == NULL) {
if (nvpage != NULL) {
kmem_free(nvpage, nvpsize);
@@ -1442,7 +1475,7 @@ retry:
if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(len, pgsz)) {
ASSERT(seg->s_base != addr || seg->s_size != len);
VM_STAT_ADD(segvnvmstats.demoterange[0]);
- err = segvn_demote_range(seg, addr, len, SDR_END);
+ err = segvn_demote_range(seg, addr, len, SDR_END, 0);
if (err == 0) {
return (IE_RETRY);
}
@@ -1490,6 +1523,7 @@ retry:
dpages = btop(len);
npages = opages - dpages;
amp = svd->amp;
+ ASSERT(amp == NULL || amp->a_szc >= seg->s_szc);
/*
* Check for beginning of segment
@@ -1514,17 +1548,27 @@ retry:
/*
* Free up now unused parts of anon_map array.
*/
- if (seg->s_szc != 0) {
- anon_free_pages(amp->ahp,
- svd->anon_index, len, seg->s_szc);
+ if (amp->a_szc == seg->s_szc) {
+ if (seg->s_szc != 0) {
+ anon_free_pages(amp->ahp,
+ svd->anon_index, len,
+ seg->s_szc);
+ } else {
+ anon_free(amp->ahp,
+ svd->anon_index,
+ len);
+ }
} else {
- anon_free(amp->ahp, svd->anon_index,
- len);
+ ASSERT(svd->type == MAP_SHARED);
+ ASSERT(amp->a_szc > seg->s_szc);
+ anon_shmap_free_pages(amp,
+ svd->anon_index, len);
}
/*
- * Unreserve swap space for the unmapped chunk
- * of this segment in case it's MAP_SHARED
+ * Unreserve swap space for the
+ * unmapped chunk of this segment in
+ * case it's MAP_SHARED
*/
if (svd->type == MAP_SHARED) {
anon_unresv(len);
@@ -1580,20 +1624,29 @@ retry:
ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
if (amp->refcnt == 1 || svd->type == MAP_PRIVATE) {
/*
- * Free up now unused parts of anon_map array
+ * Free up now unused parts of anon_map array.
*/
- if (seg->s_szc != 0) {
- ulong_t an_idx = svd->anon_index +
- npages;
- anon_free_pages(amp->ahp, an_idx,
- len, seg->s_szc);
+ ulong_t an_idx = svd->anon_index + npages;
+ if (amp->a_szc == seg->s_szc) {
+ if (seg->s_szc != 0) {
+ anon_free_pages(amp->ahp,
+ an_idx, len,
+ seg->s_szc);
+ } else {
+ anon_free(amp->ahp, an_idx,
+ len);
+ }
} else {
- anon_free(amp->ahp,
- svd->anon_index + npages, len);
+ ASSERT(svd->type == MAP_SHARED);
+ ASSERT(amp->a_szc > seg->s_szc);
+ anon_shmap_free_pages(amp,
+ an_idx, len);
}
+
/*
- * Unreserve swap space for the unmapped chunk
- * of this segment in case it's MAP_SHARED
+ * Unreserve swap space for the
+ * unmapped chunk of this segment in
+ * case it's MAP_SHARED
*/
if (svd->type == MAP_SHARED) {
anon_unresv(len);
@@ -1689,31 +1742,36 @@ retry:
ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
if (amp->refcnt == 1 || svd->type == MAP_PRIVATE) {
/*
- * Free up now unused parts of anon_map array
+ * Free up now unused parts of anon_map array.
*/
- if (seg->s_szc != 0) {
- ulong_t an_idx = svd->anon_index + opages;
- anon_free_pages(amp->ahp, an_idx, len,
- seg->s_szc);
+ ulong_t an_idx = svd->anon_index + opages;
+ if (amp->a_szc == seg->s_szc) {
+ if (seg->s_szc != 0) {
+ anon_free_pages(amp->ahp, an_idx, len,
+ seg->s_szc);
+ } else {
+ anon_free(amp->ahp, an_idx,
+ len);
+ }
} else {
- anon_free(amp->ahp, svd->anon_index + opages,
- len);
+ ASSERT(svd->type == MAP_SHARED);
+ ASSERT(amp->a_szc > seg->s_szc);
+ anon_shmap_free_pages(amp, an_idx, len);
}
/*
- * Unreserve swap space for the unmapped chunk
- * of this segment in case it's MAP_SHARED
+ * Unreserve swap space for the
+ * unmapped chunk of this segment in
+ * case it's MAP_SHARED
*/
if (svd->type == MAP_SHARED) {
anon_unresv(len);
amp->swresv -= len;
}
}
-
nsvd->anon_index = svd->anon_index +
btop((uintptr_t)(nseg->s_base - seg->s_base));
if (svd->type == MAP_SHARED) {
- ASSERT(seg->s_szc == 0);
amp->refcnt++;
nsvd->amp = amp;
} else {
@@ -1799,6 +1857,7 @@ segvn_free(struct seg *seg)
* up all the anon slot pointers that we can.
*/
ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
+ ASSERT(amp->a_szc >= seg->s_szc);
if (--amp->refcnt == 0) {
if (svd->type == MAP_PRIVATE) {
/*
@@ -1819,8 +1878,12 @@ segvn_free(struct seg *seg)
* anon_map's worth of stuff and
* release any swap reservation.
*/
- ASSERT(seg->s_szc == 0);
- anon_free(amp->ahp, 0, amp->size);
+ if (amp->a_szc != 0) {
+ anon_shmap_free_pages(amp, 0,
+ amp->size);
+ } else {
+ anon_free(amp->ahp, 0, amp->size);
+ }
if ((len = amp->swresv) != 0) {
anon_unresv(len);
TRACE_3(TR_FAC_VM, TR_ANON_PROC,
@@ -1876,6 +1939,140 @@ segvn_free(struct seg *seg)
kmem_cache_free(segvn_cache, svd);
}
+ulong_t segvn_lpglck_limit = 0;
+/*
+ * Support routines used by segvn_pagelock() and softlock faults for anonymous
+ * pages to implement availrmem accounting in a way that makes sure the
+ * same memory is accounted just once for all softlock/pagelock purposes.
+ * This prevents a bug when availrmem is quickly incorrectly exausted from
+ * several pagelocks to different parts of the same large page since each
+ * pagelock has to decrement availrmem by the size of the entire large
+ * page. Note those pages are not COW shared until softunlock/pageunlock so
+ * we don't need to use cow style accounting here. We also need to make sure
+ * the entire large page is accounted even if softlock range is less than the
+ * entire large page because large anon pages can't be demoted when any of
+ * constituent pages is locked. The caller calls this routine for every page_t
+ * it locks. The very first page in the range may not be the root page of a
+ * large page. For all other pages it's guranteed we are going to visit the
+ * root of a particular large page before any other constituent page as we are
+ * locking sequential pages belonging to the same anon map. So we do all the
+ * locking when the root is encountered except for the very first page. Since
+ * softlocking is not supported (except S_READ_NOCOW special case) for vmpss
+ * segments and since vnode pages can be demoted without locking all
+ * constituent pages vnode pages don't come here. Unlocking relies on the
+ * fact that pagesize can't change whenever any of constituent large pages is
+ * locked at least SE_SHARED. This allows unlocking code to find the right
+ * root and decrement availrmem by the same amount it was incremented when the
+ * page was locked.
+ */
+static int
+segvn_pp_lock_anonpages(page_t *pp, int first)
+{
+ pgcnt_t pages;
+ pfn_t pfn;
+ uchar_t szc = pp->p_szc;
+
+ ASSERT(PAGE_LOCKED(pp));
+ ASSERT(pp->p_vnode != NULL);
+ ASSERT(IS_SWAPFSVP(pp->p_vnode));
+
+ /*
+ * pagesize won't change as long as any constituent page is locked.
+ */
+ pages = page_get_pagecnt(pp->p_szc);
+ pfn = page_pptonum(pp);
+
+ if (!first) {
+ if (!IS_P2ALIGNED(pfn, pages)) {
+#ifdef DEBUG
+ pp = &pp[-(spgcnt_t)(pfn & (pages - 1))];
+ pfn = page_pptonum(pp);
+ ASSERT(IS_P2ALIGNED(pfn, pages));
+ ASSERT(pp->p_szc == szc);
+ ASSERT(pp->p_vnode != NULL);
+ ASSERT(IS_SWAPFSVP(pp->p_vnode));
+ ASSERT(pp->p_slckcnt != 0);
+#endif /* DEBUG */
+ return (1);
+ }
+ } else if (!IS_P2ALIGNED(pfn, pages)) {
+ pp = &pp[-(spgcnt_t)(pfn & (pages - 1))];
+#ifdef DEBUG
+ pfn = page_pptonum(pp);
+ ASSERT(IS_P2ALIGNED(pfn, pages));
+ ASSERT(pp->p_szc == szc);
+ ASSERT(pp->p_vnode != NULL);
+ ASSERT(IS_SWAPFSVP(pp->p_vnode));
+#endif /* DEBUG */
+ }
+
+ /*
+ * pp is a root page.
+ * We haven't locked this large page yet.
+ */
+ page_struct_lock(pp);
+ if (pp->p_slckcnt != 0) {
+ if (pp->p_slckcnt < PAGE_SLOCK_MAXIMUM) {
+ pp->p_slckcnt++;
+ page_struct_unlock(pp);
+ return (1);
+ }
+ page_struct_unlock(pp);
+ segvn_lpglck_limit++;
+ return (0);
+ }
+ mutex_enter(&freemem_lock);
+ if (availrmem < tune.t_minarmem + pages) {
+ mutex_exit(&freemem_lock);
+ page_struct_unlock(pp);
+ return (0);
+ }
+ pp->p_slckcnt++;
+ availrmem -= pages;
+ mutex_exit(&freemem_lock);
+ page_struct_unlock(pp);
+ return (1);
+}
+
+static void
+segvn_pp_unlock_anonpages(page_t *pp, int first)
+{
+ pgcnt_t pages;
+ pfn_t pfn;
+
+ ASSERT(PAGE_LOCKED(pp));
+ ASSERT(pp->p_vnode != NULL);
+ ASSERT(IS_SWAPFSVP(pp->p_vnode));
+
+ /*
+ * pagesize won't change as long as any constituent page is locked.
+ */
+ pages = page_get_pagecnt(pp->p_szc);
+ pfn = page_pptonum(pp);
+
+ if (!first) {
+ if (!IS_P2ALIGNED(pfn, pages)) {
+ return;
+ }
+ } else if (!IS_P2ALIGNED(pfn, pages)) {
+ pp = &pp[-(spgcnt_t)(pfn & (pages - 1))];
+#ifdef DEBUG
+ pfn = page_pptonum(pp);
+ ASSERT(IS_P2ALIGNED(pfn, pages));
+#endif /* DEBUG */
+ }
+ ASSERT(pp->p_vnode != NULL);
+ ASSERT(IS_SWAPFSVP(pp->p_vnode));
+ ASSERT(pp->p_slckcnt != 0);
+ page_struct_lock(pp);
+ if (--pp->p_slckcnt == 0) {
+ mutex_enter(&freemem_lock);
+ availrmem += pages;
+ mutex_exit(&freemem_lock);
+ }
+ page_struct_unlock(pp);
+}
+
/*
* Do a F_SOFTUNLOCK call over the range requested. The range must have
* already been F_SOFTLOCK'ed.
@@ -1943,10 +2140,15 @@ segvn_softunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
}
TRACE_3(TR_FAC_VM, TR_SEGVN_FAULT,
"segvn_fault:pp %p vp %p offset %llx", pp, vp, offset);
+ if (svd->vp == NULL) {
+ segvn_pp_unlock_anonpages(pp, adr == addr);
+ }
page_unlock(pp);
}
mutex_enter(&freemem_lock); /* for availrmem */
- availrmem += btop(len);
+ if (svd->vp != NULL) {
+ availrmem += btop(len);
+ }
segvn_pages_locked -= btop(len);
svd->softlockcnt -= btop(len);
mutex_exit(&freemem_lock);
@@ -2028,7 +2230,8 @@ segvn_faultpage(
uint_t vpprot, /* access allowed to object pages */
enum fault_type type, /* type of fault */
enum seg_rw rw, /* type of access at fault */
- int brkcow) /* we may need to break cow */
+ int brkcow, /* we may need to break cow */
+ int first) /* first page for this fault if 1 */
{
struct segvn_data *svd = (struct segvn_data *)seg->s_data;
page_t *pp, **ppp;
@@ -2084,14 +2287,14 @@ segvn_faultpage(
prot = svd->prot;
}
- if (type == F_SOFTLOCK) {
+ if (type == F_SOFTLOCK && svd->vp != NULL) {
mutex_enter(&freemem_lock);
if (availrmem <= tune.t_minarmem) {
mutex_exit(&freemem_lock);
return (FC_MAKE_ERR(ENOMEM)); /* out of real memory */
} else {
- svd->softlockcnt++;
availrmem--;
+ svd->softlockcnt++;
segvn_pages_locked++;
}
mutex_exit(&freemem_lock);
@@ -2134,6 +2337,21 @@ segvn_faultpage(
*/
(void) anon_set_ptr(amp->ahp, anon_index, ap,
ANON_SLEEP);
+
+ ASSERT(pp->p_szc == 0);
+ if (type == F_SOFTLOCK) {
+ if (!segvn_pp_lock_anonpages(pp, first)) {
+ page_unlock(pp);
+ err = ENOMEM;
+ goto out;
+ } else {
+ mutex_enter(&freemem_lock);
+ svd->softlockcnt++;
+ segvn_pages_locked++;
+ mutex_exit(&freemem_lock);
+ }
+ }
+
if (enable_mbit_wa) {
if (rw == S_WRITE)
hat_setmod(pp);
@@ -2263,6 +2481,23 @@ segvn_faultpage(
* and return.
*/
if (cow == 0) {
+ if (type == F_SOFTLOCK && svd->vp == NULL) {
+
+ ASSERT(opp->p_szc == 0 ||
+ (svd->type == MAP_SHARED &&
+ amp != NULL && amp->a_szc != 0));
+
+ if (!segvn_pp_lock_anonpages(opp, first)) {
+ page_unlock(opp);
+ err = ENOMEM;
+ goto out;
+ } else {
+ mutex_enter(&freemem_lock);
+ svd->softlockcnt++;
+ segvn_pages_locked++;
+ mutex_exit(&freemem_lock);
+ }
+ }
if (IS_VMODSORT(opp->p_vnode) || enable_mbit_wa) {
if (rw == S_WRITE)
hat_setmod(opp);
@@ -2380,6 +2615,20 @@ segvn_faultpage(
(void) anon_set_ptr(amp->ahp, anon_index, ap, ANON_SLEEP);
+ ASSERT(pp->p_szc == 0);
+ if (type == F_SOFTLOCK && svd->vp == NULL) {
+ if (!segvn_pp_lock_anonpages(pp, first)) {
+ page_unlock(pp);
+ err = ENOMEM;
+ goto out;
+ } else {
+ mutex_enter(&freemem_lock);
+ svd->softlockcnt++;
+ segvn_pages_locked++;
+ mutex_exit(&freemem_lock);
+ }
+ }
+
ASSERT(!IS_VMODSORT(pp->p_vnode));
if (enable_mbit_wa) {
if (rw == S_WRITE)
@@ -2406,7 +2655,7 @@ out:
if (anon_lock)
anon_array_exit(&cookie);
- if (type == F_SOFTLOCK) {
+ if (type == F_SOFTLOCK && svd->vp != NULL) {
mutex_enter(&freemem_lock);
availrmem++;
segvn_pages_locked--;
@@ -3660,9 +3909,17 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
}
SEGVN_UPDATE_MODBITS(ppa, pages, rw,
prot, vpprot);
- for (i = 0; i < pages; i++) {
- hat_memload(hat, a + (i << PAGESHIFT),
- ppa[i], prot & vpprot, hat_flag);
+ if (upgrdfail && segvn_anypgsz_vnode) {
+ /* SOFTLOCK case */
+ hat_memload_array(hat, a, pgsz,
+ ppa, prot & vpprot, hat_flag);
+ } else {
+ for (i = 0; i < pages; i++) {
+ hat_memload(hat,
+ a + (i << PAGESHIFT),
+ ppa[i], prot & vpprot,
+ hat_flag);
+ }
}
if (!(hat_flag & HAT_LOAD_LOCK)) {
for (i = 0; i < pages; i++) {
@@ -3942,16 +4199,18 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
faultcode_t err;
int ierr;
uint_t protchk, prot, vpprot;
- int i;
+ ulong_t i;
int hat_flag = (type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD;
anon_sync_obj_t cookie;
+ int first = 1;
+ int adjszc_chk;
+ int purged = 0;
ASSERT(szc != 0);
ASSERT(amp != NULL);
ASSERT(enable_mbit_wa == 0); /* no mbit simulations with large pages */
ASSERT(!(svd->flags & MAP_NORESERVE));
ASSERT(type != F_SOFTUNLOCK);
- ASSERT(segtype == MAP_PRIVATE);
ASSERT(IS_P2ALIGNED(a, maxpgsz));
ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock));
@@ -3988,6 +4247,7 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
ppa = kmem_alloc(ppasize, KM_SLEEP);
ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
for (;;) {
+ adjszc_chk = 0;
for (; a < lpgeaddr; a += pgsz, aindx += pages) {
if (svd->pageprot != 0 && IS_P2ALIGNED(a, maxpgsz)) {
VM_STAT_ADD(segvnvmstats.fltanpages[3]);
@@ -3999,7 +4259,17 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
goto error;
}
}
- if (type == F_SOFTLOCK) {
+ if (adjszc_chk && IS_P2ALIGNED(a, maxpgsz) &&
+ pgsz < maxpgsz) {
+ ASSERT(a > lpgaddr);
+ szc = seg->s_szc;
+ pgsz = maxpgsz;
+ pages = btop(pgsz);
+ ASSERT(IS_P2ALIGNED(aindx, pages));
+ lpgeaddr = (caddr_t)P2ROUNDUP((uintptr_t)eaddr,
+ pgsz);
+ }
+ if (type == F_SOFTLOCK && svd->vp != NULL) {
mutex_enter(&freemem_lock);
if (availrmem < tune.t_minarmem + pages) {
mutex_exit(&freemem_lock);
@@ -4020,7 +4290,7 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
if (ierr != 0) {
anon_array_exit(&cookie);
VM_STAT_ADD(segvnvmstats.fltanpages[4]);
- if (type == F_SOFTLOCK) {
+ if (type == F_SOFTLOCK && svd->vp != NULL) {
VM_STAT_ADD(segvnvmstats.fltanpages[5]);
mutex_enter(&freemem_lock);
availrmem += pages;
@@ -4038,12 +4308,41 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
ASSERT(!IS_VMODSORT(ppa[0]->p_vnode));
+ ASSERT(segtype == MAP_SHARED ||
+ ppa[0]->p_szc <= szc);
+ ASSERT(segtype == MAP_PRIVATE ||
+ ppa[0]->p_szc >= szc);
+
+ if (type == F_SOFTLOCK && svd->vp == NULL) {
+ /*
+ * All pages in ppa array belong to the same
+ * large page. This means it's ok to call
+ * segvn_pp_lock_anonpages just for ppa[0].
+ */
+ if (!segvn_pp_lock_anonpages(ppa[0], first)) {
+ for (i = 0; i < pages; i++) {
+ page_unlock(ppa[i]);
+ }
+ err = FC_MAKE_ERR(ENOMEM);
+ goto error;
+ }
+ first = 0;
+ mutex_enter(&freemem_lock);
+ svd->softlockcnt += pages;
+ segvn_pages_locked += pages;
+ mutex_exit(&freemem_lock);
+ }
+
/*
* Handle pages that have been marked for migration
*/
if (lgrp_optimizations())
page_migrate(seg, a, ppa, pages);
+ if (segtype == MAP_SHARED) {
+ vpprot |= PROT_WRITE;
+ }
+
hat_memload_array(hat, a, pgsz, ppa,
prot & vpprot, hat_flag);
@@ -4058,6 +4357,7 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
vpage += pages;
anon_array_exit(&cookie);
+ adjszc_chk = 1;
}
if (a == lpgeaddr)
break;
@@ -4078,6 +4378,18 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
* have relocated locked pages.
*/
ASSERT(ierr == -1 || ierr == -2);
+ /*
+ * For the very first relocation failure try to purge this
+ * segment's cache so that the relocator can obtain an
+ * exclusive lock on pages we want to relocate.
+ */
+ if (!purged && ierr == -1 && ppa_szc != (uint_t)-1 &&
+ svd->softlockcnt != 0) {
+ purged = 1;
+ segvn_purge(seg);
+ continue;
+ }
+
if (segvn_anypgsz) {
ASSERT(ierr == -2 || szc != 0);
ASSERT(ierr == -1 || szc < seg->s_szc);
@@ -4377,15 +4689,8 @@ top:
if (seg->s_szc != 0) {
pgsz = page_get_pagesize(seg->s_szc);
ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock));
- /*
- * We may need to do relocations so purge seg_pcache to allow
- * pages to be locked exclusively.
- */
- if (svd->softlockcnt != 0)
- segvn_purge(seg);
CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr);
if (svd->vp == NULL) {
- ASSERT(svd->type == MAP_PRIVATE);
err = segvn_fault_anonpages(hat, seg, lpgaddr,
lpgeaddr, type, rw, addr, addr + len, brkcow);
} else {
@@ -4704,13 +5009,14 @@ slow:
*/
for (a = addr; a < addr + len; a += PAGESIZE, off += PAGESIZE) {
err = segvn_faultpage(hat, seg, a, off, vpage, plp, vpprot,
- type, rw, brkcow);
+ type, rw, brkcow, a == addr);
if (err) {
if (amp != NULL)
ANON_LOCK_EXIT(&amp->a_rwlock);
- if (type == F_SOFTLOCK && a > addr)
+ if (type == F_SOFTLOCK && a > addr) {
segvn_softunlock(seg, addr, (a - addr),
S_OTHER);
+ }
SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
segvn_pagelist_rele(plp);
if (pl_alloc_sz)
@@ -4938,7 +5244,15 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
if (AS_READ_HELD(seg->s_as, &seg->s_as->a_lock))
return (IE_RETRY);
VM_STAT_ADD(segvnvmstats.demoterange[1]);
- err = segvn_demote_range(seg, addr, len, SDR_END);
+ if (svd->type == MAP_PRIVATE || svd->vp != NULL) {
+ err = segvn_demote_range(seg, addr, len,
+ SDR_END, 0);
+ } else {
+ uint_t szcvec = map_shm_pgszcvec(seg->s_base,
+ pgsz, (uintptr_t)seg->s_base);
+ err = segvn_demote_range(seg, addr, len,
+ SDR_END, szcvec);
+ }
if (err == 0)
return (IE_RETRY);
if (err == ENOMEM)
@@ -4993,7 +5307,7 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
return (0); /* all done */
}
svd->prot = (uchar_t)prot;
- } else {
+ } else if (svd->type == MAP_PRIVATE) {
struct anon *ap = NULL;
page_t *pp;
u_offset_t offset, off;
@@ -5026,10 +5340,7 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
*/
for (svp = &svd->vpage[seg_page(seg, addr)]; svp < evp; svp++) {
- ASSERT(seg->s_szc == 0 ||
- (svd->vp != NULL || svd->type == MAP_PRIVATE));
-
- if (seg->s_szc != 0 && svd->type == MAP_PRIVATE) {
+ if (seg->s_szc != 0) {
if (amp != NULL) {
anon_array_enter(amp, anon_idx,
&cookie);
@@ -5054,8 +5365,7 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
}
if (VPP_ISPPLOCK(svp) &&
- (VPP_PROT(svp) != prot) &&
- (svd->type == MAP_PRIVATE)) {
+ VPP_PROT(svp) != prot) {
if (amp == NULL || ap == NULL) {
vp = svd->vp;
@@ -5109,9 +5419,17 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
return (IE_NOMEM);
}
+ } else {
+ segvn_vpage(seg);
+ evp = &svd->vpage[seg_page(seg, addr + len)];
+ for (svp = &svd->vpage[seg_page(seg, addr)]; svp < evp; svp++) {
+ VPP_SETPROT(svp, prot);
+ }
}
- if ((prot & PROT_WRITE) != 0 || (prot & ~PROT_USER) == PROT_NONE) {
+ if (((prot & PROT_WRITE) != 0 &&
+ (svd->vp != NULL || svd->type == MAP_PRIVATE)) ||
+ (prot & ~PROT_USER) == PROT_NONE) {
/*
* Either private or shared data with write access (in
* which case we need to throw out all former translations
@@ -5152,6 +5470,7 @@ segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
struct seg *nseg;
caddr_t eaddr = addr + len, a;
size_t pgsz = page_get_pagesize(szc);
+ pgcnt_t pgcnt = page_get_pagecnt(szc);
int err;
u_offset_t off = svd->offset + (uintptr_t)(addr - seg->s_base);
extern struct vnode kvp;
@@ -5178,8 +5497,16 @@ segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
return (EINVAL);
}
- if ((svd->vp == NULL && svd->type == MAP_SHARED) ||
- (svd->flags & MAP_NORESERVE) || seg->s_as == &kas ||
+ if (amp != NULL && svd->type == MAP_SHARED) {
+ ulong_t an_idx = svd->anon_index + seg_page(seg, addr);
+ if (!IS_P2ALIGNED(an_idx, pgcnt)) {
+
+ segvn_setpgsz_anon_align_err++;
+ return (EINVAL);
+ }
+ }
+
+ if ((svd->flags & MAP_NORESERVE) || seg->s_as == &kas ||
szc > segvn_maxpgszc) {
return (EINVAL);
}
@@ -5237,7 +5564,7 @@ segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
if (addr != seg->s_base || eaddr != (seg->s_base + seg->s_size)) {
if (szc < seg->s_szc) {
VM_STAT_ADD(segvnvmstats.demoterange[2]);
- err = segvn_demote_range(seg, addr, len, SDR_RANGE);
+ err = segvn_demote_range(seg, addr, len, SDR_RANGE, 0);
if (err == 0) {
return (IE_RETRY);
}
@@ -5313,10 +5640,11 @@ segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
* new szc.
*/
if (amp != NULL) {
- pgcnt_t pgcnt = pgsz >> PAGESHIFT;
if (!IS_P2ALIGNED(svd->anon_index, pgcnt)) {
struct anon_hdr *nahp;
+ ASSERT(svd->type == MAP_PRIVATE);
+
ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
ASSERT(amp->refcnt == 1);
nahp = anon_create(btop(amp->size), ANON_NOSLEEP);
@@ -5371,7 +5699,11 @@ segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
if (amp != NULL) {
ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
- amp->a_szc = szc;
+ if (svd->type == MAP_PRIVATE) {
+ amp->a_szc = szc;
+ } else if (szc > amp->a_szc) {
+ amp->a_szc = szc;
+ }
ANON_LOCK_EXIT(&amp->a_rwlock);
}
@@ -5399,8 +5731,6 @@ segvn_clrszc(struct seg *seg)
ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock) ||
SEGVN_WRITE_HELD(seg->s_as, &svd->lock));
- ASSERT(svd->type == MAP_PRIVATE ||
- (vp != NULL && svd->amp == NULL));
if (vp == NULL && amp == NULL) {
seg->s_szc = 0;
@@ -5415,7 +5745,7 @@ segvn_clrszc(struct seg *seg)
hat_unload(seg->s_as->a_hat, seg->s_base, seg->s_size,
HAT_UNLOAD_UNMAP);
- if (amp == NULL) {
+ if (amp == NULL || svd->type == MAP_SHARED) {
seg->s_szc = 0;
return (0);
}
@@ -5575,7 +5905,6 @@ segvn_split_seg(struct seg *seg, caddr_t addr)
struct segvn_data *nsvd;
ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
- ASSERT(svd->type == MAP_PRIVATE || svd->amp == NULL);
ASSERT(addr >= seg->s_base);
ASSERT(addr <= seg->s_base + seg->s_size);
@@ -5628,7 +5957,7 @@ segvn_split_seg(struct seg *seg, caddr_t addr)
bcopy(ovpage + seg_pages(seg), nsvd->vpage, nbytes);
kmem_free(ovpage, bytes + nbytes);
}
- if (svd->amp != NULL) {
+ if (svd->amp != NULL && svd->type == MAP_PRIVATE) {
struct anon_map *oamp = svd->amp, *namp;
struct anon_hdr *nahp;
@@ -5650,6 +5979,15 @@ segvn_split_seg(struct seg *seg, caddr_t addr)
nsvd->amp = namp;
nsvd->anon_index = 0;
ANON_LOCK_EXIT(&oamp->a_rwlock);
+ } else if (svd->amp != NULL) {
+ pgcnt_t pgcnt = page_get_pagecnt(seg->s_szc);
+ ASSERT(svd->amp == nsvd->amp);
+ ASSERT(seg->s_szc <= svd->amp->a_szc);
+ nsvd->anon_index = svd->anon_index + seg_pages(seg);
+ ASSERT(IS_P2ALIGNED(nsvd->anon_index, pgcnt));
+ ANON_LOCK_ENTER(&svd->amp->a_rwlock, RW_WRITER);
+ svd->amp->refcnt++;
+ ANON_LOCK_EXIT(&svd->amp->a_rwlock);
}
/*
@@ -5681,7 +6019,6 @@ segvn_split_seg(struct seg *seg, caddr_t addr)
return (nseg);
}
-
/*
* called on memory operations (unmap, setprot, setpagesize) for a subset
* of a large page segment to either demote the memory range (SDR_RANGE)
@@ -5690,7 +6027,12 @@ segvn_split_seg(struct seg *seg, caddr_t addr)
* returns 0 on success. returns errno, including ENOMEM, on failure.
*/
static int
-segvn_demote_range(struct seg *seg, caddr_t addr, size_t len, int flag)
+segvn_demote_range(
+ struct seg *seg,
+ caddr_t addr,
+ size_t len,
+ int flag,
+ uint_t szcvec)
{
caddr_t eaddr = addr + len;
caddr_t lpgaddr, lpgeaddr;
@@ -5700,15 +6042,16 @@ segvn_demote_range(struct seg *seg, caddr_t addr, size_t len, int flag)
size_t pgsz;
struct segvn_data *svd = (struct segvn_data *)seg->s_data;
int err;
+ uint_t szc = seg->s_szc;
+ uint_t tszcvec;
ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
- ASSERT(seg->s_szc != 0);
- pgsz = page_get_pagesize(seg->s_szc);
+ ASSERT(szc != 0);
+ pgsz = page_get_pagesize(szc);
ASSERT(seg->s_base != addr || seg->s_size != len);
ASSERT(addr >= seg->s_base && eaddr <= seg->s_base + seg->s_size);
ASSERT(svd->softlockcnt == 0);
- ASSERT(svd->type == MAP_PRIVATE ||
- (svd->vp != NULL && svd->amp == NULL));
+ ASSERT(szcvec == 0 || (flag == SDR_END && svd->type == MAP_SHARED));
CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr);
ASSERT(flag == SDR_RANGE || eaddr < lpgeaddr || addr > lpgaddr);
@@ -5749,25 +6092,77 @@ segvn_demote_range(struct seg *seg, caddr_t addr, size_t len, int flag)
}
ASSERT(badseg1 != NULL);
- ASSERT(badseg1->s_szc != 0);
- ASSERT(page_get_pagesize(badseg1->s_szc) == pgsz);
+ ASSERT(badseg1->s_szc == szc);
ASSERT(flag == SDR_RANGE || badseg1->s_size == pgsz ||
badseg1->s_size == 2 * pgsz);
+ ASSERT(sameprot(badseg1, badseg1->s_base, pgsz));
+ ASSERT(badseg1->s_size == pgsz ||
+ sameprot(badseg1, badseg1->s_base + pgsz, pgsz));
if (err = segvn_clrszc(badseg1)) {
return (err);
}
ASSERT(badseg1->s_szc == 0);
+ if (szc > 1 && (tszcvec = P2PHASE(szcvec, 1 << szc)) > 1) {
+ uint_t tszc = highbit(tszcvec) - 1;
+ caddr_t ta = MAX(addr, badseg1->s_base);
+ caddr_t te;
+ size_t tpgsz = page_get_pagesize(tszc);
+
+ ASSERT(svd->type == MAP_SHARED);
+ ASSERT(flag == SDR_END);
+ ASSERT(tszc < szc && tszc > 0);
+
+ if (eaddr > badseg1->s_base + badseg1->s_size) {
+ te = badseg1->s_base + badseg1->s_size;
+ } else {
+ te = eaddr;
+ }
+
+ ASSERT(ta <= te);
+ badseg1->s_szc = tszc;
+ if (!IS_P2ALIGNED(ta, tpgsz) || !IS_P2ALIGNED(te, tpgsz)) {
+ if (badseg2 != NULL) {
+ err = segvn_demote_range(badseg1, ta, te - ta,
+ SDR_END, tszcvec);
+ if (err != 0) {
+ return (err);
+ }
+ } else {
+ return (segvn_demote_range(badseg1, ta,
+ te - ta, SDR_END, tszcvec));
+ }
+ }
+ }
+
if (badseg2 == NULL)
return (0);
- ASSERT(badseg2->s_szc != 0);
- ASSERT(page_get_pagesize(badseg2->s_szc) == pgsz);
+ ASSERT(badseg2->s_szc == szc);
ASSERT(badseg2->s_size == pgsz);
ASSERT(sameprot(badseg2, badseg2->s_base, badseg2->s_size));
if (err = segvn_clrszc(badseg2)) {
return (err);
}
ASSERT(badseg2->s_szc == 0);
+
+ if (szc > 1 && (tszcvec = P2PHASE(szcvec, 1 << szc)) > 1) {
+ uint_t tszc = highbit(tszcvec) - 1;
+ size_t tpgsz = page_get_pagesize(tszc);
+
+ ASSERT(svd->type == MAP_SHARED);
+ ASSERT(flag == SDR_END);
+ ASSERT(tszc < szc && tszc > 0);
+ ASSERT(badseg2->s_base > addr);
+ ASSERT(eaddr > badseg2->s_base);
+ ASSERT(eaddr < badseg2->s_base + badseg2->s_size);
+
+ badseg2->s_szc = tszc;
+ if (!IS_P2ALIGNED(eaddr, tpgsz)) {
+ return (segvn_demote_range(badseg2, badseg2->s_base,
+ eaddr - badseg2->s_base, SDR_END, tszcvec));
+ }
+ }
+
return (0);
}
@@ -7344,6 +7739,7 @@ segvn_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp,
caddr_t a;
size_t page;
caddr_t lpgaddr, lpgeaddr;
+ pgcnt_t szc0_npages = 0;
TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEGVN_START,
"segvn_pagelock: start seg %p addr %p", seg, addr);
@@ -7520,18 +7916,24 @@ segvn_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp,
}
}
- mutex_enter(&freemem_lock);
- if (availrmem < tune.t_minarmem + npages) {
- mutex_exit(&freemem_lock);
- mutex_exit(&svd->segp_slock);
- error = ENOMEM;
- goto out;
- } else {
- svd->softlockcnt += npages;
+ /*
+ * Avoid per page overhead of segvn_pp_lock_anonpages() for small
+ * pages. For large pages segvn_pp_lock_anonpages() only does real
+ * work once per large page. The tradeoff is that we may decrement
+ * availrmem more than once for the same page but this is ok
+ * for small pages.
+ */
+ if (seg->s_szc == 0) {
+ mutex_enter(&freemem_lock);
+ if (availrmem < tune.t_minarmem + npages) {
+ mutex_exit(&freemem_lock);
+ mutex_exit(&svd->segp_slock);
+ error = ENOMEM;
+ goto out;
+ }
availrmem -= npages;
- segvn_pages_locked += npages;
+ mutex_exit(&freemem_lock);
}
- mutex_exit(&freemem_lock);
pplist = kmem_alloc(sizeof (page_t *) * npages, KM_SLEEP);
pl = pplist;
@@ -7574,11 +7976,29 @@ segvn_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp,
if (pp == NULL) {
break;
}
+ if (seg->s_szc != 0 || pp->p_szc != 0) {
+ if (!segvn_pp_lock_anonpages(pp, a == addr)) {
+ page_unlock(pp);
+ break;
+ }
+ } else {
+ szc0_npages++;
+ }
*pplist++ = pp;
}
ANON_LOCK_EXIT(&amp->a_rwlock);
+ ASSERT(npages >= szc0_npages);
+
if (a >= addr + len) {
+ mutex_enter(&freemem_lock);
+ if (seg->s_szc == 0 && npages != szc0_npages) {
+ ASSERT(svd->type == MAP_SHARED && amp->a_szc > 0);
+ availrmem += (npages - szc0_npages);
+ }
+ svd->softlockcnt += npages;
+ segvn_pages_locked += npages;
+ mutex_exit(&freemem_lock);
(void) seg_pinsert(seg, addr, len, pl, rw, SEGP_ASYNC_FLUSH,
segvn_reclaim);
mutex_exit(&svd->segp_slock);
@@ -7589,31 +8009,24 @@ segvn_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp,
}
mutex_exit(&svd->segp_slock);
+ if (seg->s_szc == 0) {
+ mutex_enter(&freemem_lock);
+ availrmem += npages;
+ mutex_exit(&freemem_lock);
+ }
error = EFAULT;
pplist = pl;
np = ((uintptr_t)(a - addr)) >> PAGESHIFT;
while (np > (uint_t)0) {
+ ASSERT(PAGE_LOCKED(*pplist));
+ if (seg->s_szc != 0 || (*pplist)->p_szc != 0) {
+ segvn_pp_unlock_anonpages(*pplist, pplist == pl);
+ }
page_unlock(*pplist);
np--;
pplist++;
}
kmem_free(pl, sizeof (page_t *) * npages);
- mutex_enter(&freemem_lock);
- svd->softlockcnt -= npages;
- availrmem += npages;
- segvn_pages_locked -= npages;
- mutex_exit(&freemem_lock);
- if (svd->softlockcnt <= 0) {
- if (AS_ISUNMAPWAIT(seg->s_as)) {
- mutex_enter(&seg->s_as->a_contents);
- if (AS_ISUNMAPWAIT(seg->s_as)) {
- AS_CLRUNMAPWAIT(seg->s_as);
- cv_broadcast(&seg->s_as->a_cv);
- }
- mutex_exit(&seg->s_as->a_contents);
- }
- }
-
out:
SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
*ppp = NULL;
@@ -7638,6 +8051,7 @@ segvn_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist,
struct segvn_data *svd = (struct segvn_data *)seg->s_data;
pgcnt_t np, npages;
struct page **pl;
+ pgcnt_t szc0_npages = 0;
#ifdef lint
addr = addr;
@@ -7654,12 +8068,19 @@ segvn_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist,
}
}
+ ASSERT(svd->vp == NULL && svd->amp != NULL);
+
while (np > (uint_t)0) {
if (rw == S_WRITE) {
hat_setrefmod(*pplist);
} else {
hat_setref(*pplist);
}
+ if (seg->s_szc != 0 || (*pplist)->p_szc != 0) {
+ segvn_pp_unlock_anonpages(*pplist, pplist == pl);
+ } else {
+ szc0_npages++;
+ }
page_unlock(*pplist);
np--;
pplist++;
@@ -7667,9 +8088,11 @@ segvn_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist,
kmem_free(pl, sizeof (page_t *) * npages);
mutex_enter(&freemem_lock);
- availrmem += npages;
segvn_pages_locked -= npages;
svd->softlockcnt -= npages;
+ if (szc0_npages != 0) {
+ availrmem += szc0_npages;
+ }
mutex_exit(&freemem_lock);
if (svd->softlockcnt <= 0) {
if (AS_ISUNMAPWAIT(seg->s_as)) {
diff --git a/usr/src/uts/common/vm/seg_vn.h b/usr/src/uts/common/vm/seg_vn.h
index 4f66d495dd..10cd0f1835 100644
--- a/usr/src/uts/common/vm/seg_vn.h
+++ b/usr/src/uts/common/vm/seg_vn.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -145,6 +144,10 @@ typedef struct segvn_data {
((struct segvn_crargs *)(argsp))->vp != NULL && \
((struct segvn_crargs *)(argsp))->amp == NULL)
+#define AS_MAP_SHAMP(crfp, argsp) \
+ ((crfp) == (int (*)())segvn_create && \
+ ((struct segvn_crargs *)(argsp))->type == MAP_SHARED && \
+ ((struct segvn_crargs *)(argsp))->vp == NULL)
extern void segvn_init(void);
extern int segvn_create(struct seg *, void *);
diff --git a/usr/src/uts/common/vm/vm_anon.c b/usr/src/uts/common/vm/vm_anon.c
index 415f96a0d1..e503432b02 100644
--- a/usr/src/uts/common/vm/vm_anon.c
+++ b/usr/src/uts/common/vm/vm_anon.c
@@ -1024,6 +1024,15 @@ anon_decref_pages(
ASSERT(szc != 0);
ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
ASSERT(IS_P2ALIGNED(an_idx, pgcnt));
+ ASSERT(an_idx < ahp->size);
+
+ if (ahp->size - an_idx < pgcnt) {
+ /*
+ * In case of shared mappings total anon map size may not be
+ * the largest page size aligned.
+ */
+ pgcnt = ahp->size - an_idx;
+ }
VM_STAT_ADD(anonvmstats.decrefpages[0]);
@@ -1474,6 +1483,7 @@ anon_free_pages(
npages = btopr(size);
ASSERT(IS_P2ALIGNED(npages, pgcnt));
ASSERT(IS_P2ALIGNED(an_idx, pgcnt));
+ ASSERT(an_idx < ahp->size);
VM_STAT_ADD(anonvmstats.freepages[0]);
@@ -1621,7 +1631,7 @@ anon_disclaim(struct anon_map *amp, ulong_t index, size_t size, int flags)
}
pgcnt = page_get_pagecnt(pp->p_szc);
- if (!IS_P2ALIGNED(index, pgcnt)) {
+ if (!IS_P2ALIGNED(index, pgcnt) || npages < pgcnt) {
if (!page_try_demote_pages(pp)) {
mutex_exit(ahm);
page_unlock(pp);
@@ -1802,6 +1812,7 @@ anon_map_getpages(
int prealloc = 1;
int err, slotcreate;
uint_t vpprot;
+ int upsize = (szc < seg->s_szc);
#if !defined(__i386) && !defined(__amd64)
ASSERT(seg->s_szc != 0);
@@ -1824,9 +1835,10 @@ anon_map_getpages(
ppa[0] = pl[0];
if (brkcow == 0 || (*protp & PROT_WRITE)) {
VM_STAT_ADD(anonvmstats.getpages[2]);
- if (ppa[0]->p_szc != 0) {
+ if (ppa[0]->p_szc != 0 && upsize) {
VM_STAT_ADD(anonvmstats.getpages[3]);
- *ppa_szc = ppa[0]->p_szc;
+ *ppa_szc = MIN(ppa[0]->p_szc,
+ seg->s_szc);
page_unlock(ppa[0]);
return (-2);
}
@@ -1859,11 +1871,11 @@ anon_map_getpages(
uint_t pszc;
swap_xlate(ap, &vp, &off);
if (page_exists_forreal(vp, (u_offset_t)off, &pszc)) {
- if (pszc > szc) {
- *ppa_szc = pszc;
+ if (pszc > szc && upsize) {
+ *ppa_szc = MIN(pszc, seg->s_szc);
return (-2);
}
- if (pszc == szc) {
+ if (pszc >= szc) {
prealloc = 0;
}
}
@@ -1980,10 +1992,11 @@ top:
* Similar to the anon_zero case.
*/
err = swap_getconpage(vp, (u_offset_t)off, PAGESIZE,
- NULL, pl, PAGESIZE, conpp, &nreloc, seg, vaddr,
+ NULL, pl, PAGESIZE, conpp, ppa_szc, &nreloc, seg, vaddr,
slotcreate == 1 ? S_CREATE : rw, cred);
if (err) {
+ ASSERT(err != -2 || upsize);
VM_STAT_ADD(anonvmstats.getpages[12]);
ASSERT(slotcreate == 0);
goto io_err;
@@ -1991,12 +2004,14 @@ top:
pp = pl[0];
- if (pp->p_szc != szc) {
+ if (pp->p_szc < szc || (pp->p_szc > szc && upsize)) {
VM_STAT_ADD(anonvmstats.getpages[13]);
ASSERT(slotcreate == 0);
ASSERT(prealloc == 0);
ASSERT(pg_idx == 0);
if (pp->p_szc > szc) {
+ ASSERT(upsize);
+ *ppa_szc = MIN(pp->p_szc, seg->s_szc);
page_unlock(pp);
VM_STAT_ADD(anonvmstats.getpages[14]);
return (-2);
@@ -2063,8 +2078,11 @@ top:
if (pg_idx > 0 &&
((page_pptonum(pp) != page_pptonum(ppa[pg_idx - 1]) + 1) ||
- (pp->p_szc != ppa[pg_idx - 1]->p_szc)))
+ (pp->p_szc != ppa[pg_idx - 1]->p_szc))) {
panic("anon_map_getpages: unexpected page");
+ } else if (pg_idx == 0 && (page_pptonum(pp) & (pgcnt - 1))) {
+ panic("anon_map_getpages: unaligned page");
+ }
if (prealloc == 0) {
ppa[pg_idx] = pp;
@@ -2122,7 +2140,7 @@ io_err:
* unlocked.
*/
- ASSERT(err != -2 || pg_idx == 0);
+ ASSERT(err != -2 || ((pg_idx == 0) && upsize));
VM_STAT_COND_ADD(err > 0, anonvmstats.getpages[22]);
VM_STAT_COND_ADD(err == -1, anonvmstats.getpages[23]);
@@ -2490,7 +2508,8 @@ anon_map_privatepages(
}
err = swap_getconpage(vp, (u_offset_t)off, PAGESIZE, NULL, pl,
- PAGESIZE, conpp, &nreloc, seg, vaddr, S_CREATE, cred);
+ PAGESIZE, conpp, NULL, &nreloc, seg, vaddr,
+ S_CREATE, cred);
/*
* Impossible to fail this is S_CREATE.
@@ -2788,8 +2807,8 @@ anon_map_createpages(
conpp = pp;
err = swap_getconpage(ap_vp, ap_off, PAGESIZE,
- (uint_t *)NULL, anon_pl, PAGESIZE, conpp, &nreloc,
- seg, addr, S_CREATE, cred);
+ (uint_t *)NULL, anon_pl, PAGESIZE, conpp, NULL,
+ &nreloc, seg, addr, S_CREATE, cred);
if (err) {
ANON_LOCK_EXIT(&amp->a_rwlock);
@@ -2822,6 +2841,124 @@ anon_map_createpages(
return (0);
}
+static int
+anon_try_demote_pages(
+ struct anon_hdr *ahp,
+ ulong_t sidx,
+ uint_t szc,
+ page_t **ppa,
+ int private)
+{
+ struct anon *ap;
+ pgcnt_t pgcnt = page_get_pagecnt(szc);
+ page_t *pp;
+ pgcnt_t i;
+ kmutex_t *ahmpages = NULL;
+ int root = 0;
+ pgcnt_t npgs;
+ pgcnt_t curnpgs = 0;
+ size_t ppasize = 0;
+
+ ASSERT(szc != 0);
+ ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
+ ASSERT(IS_P2ALIGNED(sidx, pgcnt));
+ ASSERT(sidx < ahp->size);
+
+ if (ppa == NULL) {
+ ppasize = pgcnt * sizeof (page_t *);
+ ppa = kmem_alloc(ppasize, KM_SLEEP);
+ }
+
+ ap = anon_get_ptr(ahp, sidx);
+ if (ap != NULL && private) {
+ VM_STAT_ADD(anonvmstats.demotepages[1]);
+ ahmpages = &anonpages_hash_lock[AH_LOCK(ap->an_vp, ap->an_off)];
+ mutex_enter(ahmpages);
+ }
+
+ if (ap != NULL && ap->an_refcnt > 1) {
+ if (ahmpages != NULL) {
+ VM_STAT_ADD(anonvmstats.demotepages[2]);
+ mutex_exit(ahmpages);
+ }
+ if (ppasize != 0) {
+ kmem_free(ppa, ppasize);
+ }
+ return (0);
+ }
+ if (ahmpages != NULL) {
+ mutex_exit(ahmpages);
+ }
+ if (ahp->size - sidx < pgcnt) {
+ ASSERT(private == 0);
+ pgcnt = ahp->size - sidx;
+ }
+ for (i = 0; i < pgcnt; i++, sidx++) {
+ ap = anon_get_ptr(ahp, sidx);
+ if (ap != NULL) {
+ if (ap->an_refcnt != 1) {
+ panic("anon_try_demote_pages: an_refcnt != 1");
+ }
+ pp = ppa[i] = page_lookup(ap->an_vp, ap->an_off,
+ SE_EXCL);
+ if (pp != NULL) {
+ (void) hat_pageunload(pp,
+ HAT_FORCE_PGUNLOAD);
+ }
+ } else {
+ ppa[i] = NULL;
+ }
+ }
+ for (i = 0; i < pgcnt; i++) {
+ if ((pp = ppa[i]) != NULL && pp->p_szc != 0) {
+ ASSERT(pp->p_szc <= szc);
+ if (!root) {
+ VM_STAT_ADD(anonvmstats.demotepages[3]);
+ if (curnpgs != 0)
+ panic("anon_try_demote_pages: "
+ "bad large page");
+
+ root = 1;
+ curnpgs = npgs =
+ page_get_pagecnt(pp->p_szc);
+
+ ASSERT(npgs <= pgcnt);
+ ASSERT(IS_P2ALIGNED(npgs, npgs));
+ ASSERT(!(page_pptonum(pp) &
+ (npgs - 1)));
+ } else {
+ ASSERT(i > 0);
+ ASSERT(page_pptonum(pp) - 1 ==
+ page_pptonum(ppa[i - 1]));
+ if ((page_pptonum(pp) & (npgs - 1)) ==
+ npgs - 1)
+ root = 0;
+ }
+ ASSERT(PAGE_EXCL(pp));
+ pp->p_szc = 0;
+ ASSERT(curnpgs > 0);
+ curnpgs--;
+ }
+ }
+ if (root != 0 || curnpgs != 0)
+ panic("anon_try_demote_pages: bad large page");
+
+ for (i = 0; i < pgcnt; i++) {
+ if ((pp = ppa[i]) != NULL) {
+ ASSERT(!hat_page_is_mapped(pp));
+ ASSERT(pp->p_szc == 0);
+ page_unlock(pp);
+ }
+ }
+ if (ppasize != 0) {
+ kmem_free(ppa, ppasize);
+ }
+ return (1);
+}
+
+/*
+ * anon_map_demotepages() can only be called by MAP_PRIVATE segments.
+ */
int
anon_map_demotepages(
struct anon_map *amp,
@@ -2842,7 +2979,6 @@ anon_map_demotepages(
pgcnt_t i, pg_idx;
ulong_t an_idx;
caddr_t vaddr;
- kmutex_t *ahmpages = NULL;
int err;
int retry = 0;
uint_t vpprot;
@@ -2851,87 +2987,15 @@ anon_map_demotepages(
ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
ASSERT(IS_P2ALIGNED(start_idx, pgcnt));
ASSERT(ppa != NULL);
+ ASSERT(szc != 0);
+ ASSERT(szc == amp->a_szc);
VM_STAT_ADD(anonvmstats.demotepages[0]);
- ap = anon_get_ptr(amp->ahp, start_idx);
- if (ap != NULL) {
- VM_STAT_ADD(anonvmstats.demotepages[1]);
- ahmpages = &anonpages_hash_lock[AH_LOCK(ap->an_vp, ap->an_off)];
- mutex_enter(ahmpages);
- }
top:
- if (ap == NULL || ap->an_refcnt <= 1) {
- int root = 0;
- pgcnt_t npgs, curnpgs = 0;
-
- VM_STAT_ADD(anonvmstats.demotepages[2]);
-
- ASSERT(retry == 0 || ap != NULL);
-
- if (ahmpages != NULL)
- mutex_exit(ahmpages);
- an_idx = start_idx;
- for (i = 0; i < pgcnt; i++, an_idx++) {
- ap = anon_get_ptr(amp->ahp, an_idx);
- if (ap != NULL) {
- ASSERT(ap->an_refcnt == 1);
- pp = ppa[i] = page_lookup(ap->an_vp, ap->an_off,
- SE_EXCL);
- if (pp != NULL) {
- (void) hat_pageunload(pp,
- HAT_FORCE_PGUNLOAD);
- }
- } else {
- ppa[i] = NULL;
- }
- }
- for (i = 0; i < pgcnt; i++) {
- if ((pp = ppa[i]) != NULL && pp->p_szc != 0) {
- ASSERT(pp->p_szc <= szc);
- if (!root) {
- VM_STAT_ADD(anonvmstats.demotepages[3]);
- if (curnpgs != 0)
- panic("anon_map_demotepages: "
- "bad large page");
-
- root = 1;
- curnpgs = npgs =
- page_get_pagecnt(pp->p_szc);
-
- ASSERT(npgs <= pgcnt);
- ASSERT(IS_P2ALIGNED(npgs, npgs));
- ASSERT(!(page_pptonum(pp) &
- (npgs - 1)));
- } else {
- ASSERT(i > 0);
- ASSERT(page_pptonum(pp) - 1 ==
- page_pptonum(ppa[i - 1]));
- if ((page_pptonum(pp) & (npgs - 1)) ==
- npgs - 1)
- root = 0;
- }
- ASSERT(PAGE_EXCL(pp));
- pp->p_szc = 0;
- curnpgs--;
- }
- }
- if (root != 0 || curnpgs != 0)
- panic("anon_map_demotepages: bad large page");
-
- for (i = 0; i < pgcnt; i++) {
- if ((pp = ppa[i]) != NULL) {
- ASSERT(!hat_page_is_mapped(pp));
- ASSERT(pp->p_szc == 0);
- page_unlock(pp);
- }
- }
- kmem_free(ppa, ppasize);
+ if (anon_try_demote_pages(amp->ahp, start_idx, szc, ppa, 1)) {
return (0);
}
- ASSERT(ahmpages != NULL);
- mutex_exit(ahmpages);
- ahmpages = NULL;
VM_STAT_ADD(anonvmstats.demotepages[4]);
@@ -2988,6 +3052,75 @@ top:
}
/*
+ * Free pages of shared anon map. It's assumed that anon maps don't share anon
+ * structures with private anon maps. Therefore all anon structures should
+ * have at most one reference at this point. This means underlying pages can
+ * be exclusively locked and demoted or freed. If not freeing the entire
+ * large pages demote the ends of the region we free to be able to free
+ * subpages. Page roots correspend to aligned index positions in anon map.
+ */
+void
+anon_shmap_free_pages(struct anon_map *amp, ulong_t sidx, size_t len)
+{
+ ulong_t eidx = sidx + btopr(len);
+ pgcnt_t pages = page_get_pagecnt(amp->a_szc);
+ struct anon_hdr *ahp = amp->ahp;
+ ulong_t tidx;
+ size_t size;
+ ulong_t sidx_aligned;
+ ulong_t eidx_aligned;
+
+ ASSERT(RW_WRITE_HELD(&amp->a_rwlock));
+ ASSERT(amp->refcnt <= 1);
+ ASSERT(amp->a_szc > 0);
+ ASSERT(eidx <= ahp->size);
+ ASSERT(!anon_share(ahp, sidx, btopr(len)));
+
+ if (len == 0) { /* XXX */
+ return;
+ }
+
+ sidx_aligned = P2ALIGN(sidx, pages);
+ if (sidx_aligned != sidx ||
+ (eidx < sidx_aligned + pages && eidx < ahp->size)) {
+ if (!anon_try_demote_pages(ahp, sidx_aligned,
+ amp->a_szc, NULL, 0)) {
+ panic("anon_shmap_free_pages: demote failed");
+ }
+ size = (eidx <= sidx_aligned + pages) ? (eidx - sidx) :
+ P2NPHASE(sidx, pages);
+ size <<= PAGESHIFT;
+ anon_free(ahp, sidx, size);
+ sidx = sidx_aligned + pages;
+ if (eidx <= sidx) {
+ return;
+ }
+ }
+ eidx_aligned = P2ALIGN(eidx, pages);
+ if (sidx < eidx_aligned) {
+ anon_free_pages(ahp, sidx,
+ (eidx_aligned - sidx) << PAGESHIFT,
+ amp->a_szc);
+ sidx = eidx_aligned;
+ }
+ ASSERT(sidx == eidx_aligned);
+ if (eidx == eidx_aligned) {
+ return;
+ }
+ tidx = eidx;
+ if (eidx != ahp->size && anon_get_next_ptr(ahp, &tidx) != NULL &&
+ tidx - sidx < pages) {
+ if (!anon_try_demote_pages(ahp, sidx, amp->a_szc, NULL, 0)) {
+ panic("anon_shmap_free_pages: demote failed");
+ }
+ size = (eidx - sidx) << PAGESHIFT;
+ anon_free(ahp, sidx, size);
+ } else {
+ anon_free_pages(ahp, sidx, pages << PAGESHIFT, amp->a_szc);
+ }
+}
+
+/*
* Allocate and initialize an anon_map structure for seg
* associating the given swap reservation with the new anon_map.
*/
diff --git a/usr/src/uts/common/vm/vm_as.c b/usr/src/uts/common/vm/vm_as.c
index f7533f56a6..4e807fd670 100644
--- a/usr/src/uts/common/vm/vm_as.c
+++ b/usr/src/uts/common/vm/vm_as.c
@@ -1463,11 +1463,9 @@ top:
}
static int
-as_map_vnsegs(struct as *as, caddr_t addr, size_t size,
+as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec,
int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
{
- int text = vn_a->flags & MAP_TEXT;
- uint_t szcvec = map_execseg_pgszcvec(text, addr, size);
uint_t szc;
uint_t nszc;
int error;
@@ -1475,19 +1473,18 @@ as_map_vnsegs(struct as *as, caddr_t addr, size_t size,
caddr_t eaddr;
size_t segsize;
struct seg *seg;
- uint_t save_szcvec;
size_t pgsz;
- struct vattr va;
- u_offset_t eoff;
- size_t save_size = 0;
+ int do_off = (vn_a->vp != NULL || vn_a->amp != NULL);
+ uint_t save_szcvec;
ASSERT(AS_WRITE_HELD(as, &as->a_lock));
ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
ASSERT(IS_P2ALIGNED(size, PAGESIZE));
- ASSERT(vn_a->vp != NULL);
- ASSERT(vn_a->amp == NULL);
+ ASSERT(vn_a->vp == NULL || vn_a->amp == NULL);
+ if (!do_off) {
+ vn_a->offset = 0;
+ }
-again:
if (szcvec <= 1) {
seg = seg_alloc(as, addr, size);
if (seg == NULL) {
@@ -1501,28 +1498,6 @@ again:
return (error);
}
- va.va_mask = AT_SIZE;
- if (VOP_GETATTR(vn_a->vp, &va, ATTR_HINT, vn_a->cred) != 0) {
- szcvec = 0;
- goto again;
- }
- eoff = vn_a->offset & PAGEMASK;
- if (eoff >= va.va_size) {
- szcvec = 0;
- goto again;
- }
- eoff += size;
- if (btopr(va.va_size) < btopr(eoff)) {
- save_size = size;
- size = va.va_size - (vn_a->offset & PAGEMASK);
- size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t);
- szcvec = map_execseg_pgszcvec(text, addr, size);
- if (szcvec <= 1) {
- size = save_size;
- goto again;
- }
- }
-
eaddr = addr + size;
save_szcvec = szcvec;
szcvec >>= 1;
@@ -1551,7 +1526,9 @@ again:
return (error);
}
*segcreated = 1;
- vn_a->offset += segsize;
+ if (do_off) {
+ vn_a->offset += segsize;
+ }
addr = a;
}
szc = nszc;
@@ -1576,7 +1553,9 @@ again:
return (error);
}
*segcreated = 1;
- vn_a->offset += segsize;
+ if (do_off) {
+ vn_a->offset += segsize;
+ }
addr = a;
}
szcvec &= ~(1 << szc);
@@ -1587,14 +1566,94 @@ again:
}
ASSERT(addr == eaddr);
+ return (0);
+}
+
+static int
+as_map_vnsegs(struct as *as, caddr_t addr, size_t size,
+ int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
+{
+ int text = vn_a->flags & MAP_TEXT;
+ uint_t szcvec = map_execseg_pgszcvec(text, addr, size);
+ int error;
+ struct seg *seg;
+ struct vattr va;
+ u_offset_t eoff;
+ size_t save_size = 0;
+
+ ASSERT(AS_WRITE_HELD(as, &as->a_lock));
+ ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
+ ASSERT(IS_P2ALIGNED(size, PAGESIZE));
+ ASSERT(vn_a->vp != NULL);
+ ASSERT(vn_a->amp == NULL);
+
+again:
+ if (szcvec <= 1) {
+ seg = seg_alloc(as, addr, size);
+ if (seg == NULL) {
+ return (ENOMEM);
+ }
+ vn_a->szc = 0;
+ error = (*crfp)(seg, vn_a);
+ if (error != 0) {
+ seg_free(seg);
+ }
+ return (error);
+ }
+
+ va.va_mask = AT_SIZE;
+ if (VOP_GETATTR(vn_a->vp, &va, ATTR_HINT, vn_a->cred) != 0) {
+ szcvec = 0;
+ goto again;
+ }
+ eoff = vn_a->offset & PAGEMASK;
+ if (eoff >= va.va_size) {
+ szcvec = 0;
+ goto again;
+ }
+ eoff += size;
+ if (btopr(va.va_size) < btopr(eoff)) {
+ save_size = size;
+ size = va.va_size - (vn_a->offset & PAGEMASK);
+ size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t);
+ szcvec = map_execseg_pgszcvec(text, addr, size);
+ if (szcvec <= 1) {
+ size = save_size;
+ goto again;
+ }
+ }
+
+ error = as_map_segvn_segs(as, addr, size, szcvec, crfp, vn_a,
+ segcreated);
+ if (error != 0) {
+ return (error);
+ }
if (save_size) {
+ addr += size;
size = save_size - size;
+ szcvec = 0;
goto again;
}
-
return (0);
}
+static int
+as_map_sham(struct as *as, caddr_t addr, size_t size,
+ int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
+{
+ uint_t szcvec = map_shm_pgszcvec(addr, size,
+ vn_a->amp == NULL ? (uintptr_t)addr :
+ (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE));
+
+ ASSERT(AS_WRITE_HELD(as, &as->a_lock));
+ ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
+ ASSERT(IS_P2ALIGNED(size, PAGESIZE));
+ ASSERT(vn_a->vp == NULL);
+
+ return (as_map_segvn_segs(as, addr, size, szcvec,
+ crfp, vn_a, segcreated));
+}
+
int
as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp)
{
@@ -1636,10 +1695,15 @@ as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(),
return (ENOMEM);
}
- if (AS_MAP_VNSEGS_USELPGS(crfp, argsp)) {
+ if (AS_MAP_VNSEGS_USELPGS(crfp, argsp) || AS_MAP_SHAMP(crfp, argsp)) {
int unmap = 0;
- error = as_map_vnsegs(as, raddr, rsize, crfp,
- (struct segvn_crargs *)argsp, &unmap);
+ if (AS_MAP_SHAMP(crfp, argsp)) {
+ error = as_map_sham(as, raddr, rsize, crfp,
+ (struct segvn_crargs *)argsp, &unmap);
+ } else {
+ error = as_map_vnsegs(as, raddr, rsize, crfp,
+ (struct segvn_crargs *)argsp, &unmap);
+ }
if (error != 0) {
AS_LOCK_EXIT(as, &as->a_lock);
if (unmap) {
diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c
index 9fa821131b..05bfe662be 100644
--- a/usr/src/uts/common/vm/vm_page.c
+++ b/usr/src/uts/common/vm/vm_page.c
@@ -200,9 +200,6 @@ kmutex_t pcgs_cagelock; /* serializes NOSLEEP cage allocs */
kmutex_t pcgs_wait_lock; /* used for delay in pcgs */
static kcondvar_t pcgs_cv; /* cv for delay in pcgs */
-#define PAGE_LOCK_MAXIMUM \
- ((1 << (sizeof (((page_t *)0)->p_lckcnt) * NBBY)) - 1)
-
#ifdef VM_STATS
/*
@@ -552,6 +549,10 @@ add_physmem(
*/
add_physmem_cb(pp, pnum);
+ pp->p_lckcnt = 0;
+ pp->p_cowcnt = 0;
+ pp->p_slckcnt = 0;
+
/*
* Initialize the page lock as unlocked, since nobody
* can see or access this page yet.
@@ -2711,9 +2712,11 @@ page_free(page_t *pp, int dontneed)
* The page_struct_lock need not be acquired to examine these
* fields since the page has an "exclusive" lock.
*/
- if (hat_page_is_mapped(pp) || pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
- panic("page_free pp=%p, pfn=%lx, lckcnt=%d, cowcnt=%d",
- pp, page_pptonum(pp), pp->p_lckcnt, pp->p_cowcnt);
+ if (hat_page_is_mapped(pp) || pp->p_lckcnt != 0 || pp->p_cowcnt != 0 ||
+ pp->p_slckcnt != 0) {
+ panic("page_free pp=%p, pfn=%lx, lckcnt=%d, cowcnt=%d "
+ "slckcnt = %d", pp, page_pptonum(pp), pp->p_lckcnt,
+ pp->p_cowcnt, pp->p_slckcnt);
/*NOTREACHED*/
}
@@ -2853,7 +2856,7 @@ page_free_pages(page_t *pp)
/*NOTREACHED*/
}
if (hat_page_is_mapped(tpp) || tpp->p_lckcnt != 0 ||
- tpp->p_cowcnt != 0) {
+ tpp->p_cowcnt != 0 || tpp->p_slckcnt != 0) {
panic("page_free_pages %p", (void *)tpp);
/*NOTREACHED*/
}
@@ -3142,6 +3145,7 @@ page_destroy(page_t *pp, int dontfree)
{
ASSERT((PAGE_EXCL(pp) &&
!page_iolock_assert(pp)) || panicstr);
+ ASSERT(pp->p_slckcnt == 0 || panicstr);
if (pp->p_szc != 0) {
if (pp->p_vnode == NULL || IS_SWAPFSVP(pp->p_vnode) ||
@@ -3210,6 +3214,7 @@ page_destroy_pages(page_t *pp)
for (i = 0, tpp = pp; i < pgcnt; i++, tpp++) {
ASSERT((PAGE_EXCL(tpp) &&
!page_iolock_assert(tpp)) || panicstr);
+ ASSERT(tpp->p_slckcnt == 0 || panicstr);
(void) hat_pageunload(tpp, HAT_FORCE_PGUNLOAD);
page_hashout(tpp, NULL);
ASSERT(tpp->p_offset == (u_offset_t)-1);
@@ -4921,6 +4926,8 @@ do_page_relocate(
for (i = 0; i < npgs; i++) {
ASSERT(PAGE_EXCL(targ));
+ ASSERT(targ->p_slckcnt == 0);
+ ASSERT(repl->p_slckcnt == 0);
(void) hat_pageunload(targ, HAT_FORCE_PGUNLOAD);
@@ -5269,6 +5276,7 @@ page_try_demote_pages(page_t *pp)
for (tpp = rootpp, i = 0; i < npgs; i++, tpp++) {
ASSERT(PAGE_EXCL(tpp));
+ ASSERT(tpp->p_slckcnt == 0);
(void) hat_pageunload(tpp, HAT_FORCE_PGUNLOAD);
tpp->p_szc = 0;
}
diff --git a/usr/src/uts/i86pc/vm/vm_machdep.c b/usr/src/uts/i86pc/vm/vm_machdep.c
index ab0ab9181f..8512bdc99f 100644
--- a/usr/src/uts/i86pc/vm/vm_machdep.c
+++ b/usr/src/uts/i86pc/vm/vm_machdep.c
@@ -172,6 +172,7 @@ map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int *remap)
* to be used for mapping application and libraries text segments.
*/
int use_text_largepages = 0;
+int use_shm_largepages = 0;
/*
* Return a bit vector of large page size codes that
@@ -201,6 +202,29 @@ map_execseg_pgszcvec(int text, caddr_t addr, size_t len)
return (1 << 1);
}
+uint_t
+map_shm_pgszcvec(caddr_t addr, size_t len, uintptr_t off)
+{
+ size_t pgsz;
+ caddr_t a;
+
+ if (!use_shm_largepages || mmu.max_page_level == 0) {
+ return (0);
+ }
+
+ pgsz = LEVEL_SIZE(1);
+ a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
+ if (a < addr || a >= addr + len ||
+ P2PHASE((uintptr_t)addr ^ off, pgsz)) {
+ return (0);
+ }
+ len -= (a - addr);
+ if (len < pgsz) {
+ return (0);
+ }
+ return (1 << 1);
+}
+
/*
* Handle a pagefault.
*/
diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.c b/usr/src/uts/sfmmu/vm/hat_sfmmu.c
index c8cbc1183f..c9474ee8fa 100644
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c
+++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c
@@ -146,6 +146,7 @@ int sfmmu_allow_nc_trans = 0;
int disable_ism_large_pages = (1 << TTE512K);
int disable_large_pages = 0;
int disable_auto_large_pages = 0;
+int disable_shm_large_pages = 0;
/*
* Private sfmmu data structures for hat management
@@ -918,6 +919,7 @@ hat_init_pagesizes()
mmu_large_pages_disabled(HAT_LOAD_AUTOLPG);
}
+ disable_shm_large_pages = disable_auto_large_pages;
}
/*
diff --git a/usr/src/uts/sun4/vm/vm_dep.c b/usr/src/uts/sun4/vm/vm_dep.c
index 6109ed7054..9f2eebc551 100644
--- a/usr/src/uts/sun4/vm/vm_dep.c
+++ b/usr/src/uts/sun4/vm/vm_dep.c
@@ -657,6 +657,10 @@ extern size_t initdata_pgsz64k_minsize;
*/
pgcnt_t execseg_lpg_min_physmem = 131072; /* 1GB */
+extern int disable_shm_large_pages;
+pgcnt_t shm_lpg_min_physmem = 131072; /* 1GB */
+extern size_t max_shm_lpsize;
+
/* assumes TTE8K...TTE4M == szc */
@@ -767,6 +771,48 @@ map_execseg_pgszcvec(int text, caddr_t addr, size_t len)
return (ret);
}
+uint_t
+map_shm_pgszcvec(caddr_t addr, size_t size, uintptr_t off)
+{
+ caddr_t eaddr = addr + size;
+ uint_t szcvec = 0;
+ int i;
+ caddr_t raddr;
+ caddr_t readdr;
+ size_t pgsz;
+
+ if (physmem < shm_lpg_min_physmem || mmu_page_sizes <= 1 ||
+ max_shm_lpsize <= MMU_PAGESIZE) {
+ return (0);
+ }
+
+ for (i = mmu_page_sizes - 1; i > 0; i--) {
+ if (disable_shm_large_pages & (1 << i)) {
+ continue;
+ }
+ pgsz = page_get_pagesize(i);
+ if (pgsz > max_shm_lpsize) {
+ continue;
+ }
+ raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
+ readdr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
+ if (raddr < addr || raddr >= readdr) {
+ continue;
+ }
+ if (P2PHASE((uintptr_t)addr ^ off, pgsz)) {
+ continue;
+ }
+ szcvec |= (1 << i);
+ /*
+ * And or in the remaining enabled page sizes.
+ */
+ szcvec |= P2PHASE(~disable_shm_large_pages, (1 << i));
+ szcvec &= ~1; /* no need to return 8K pagesize */
+ break;
+ }
+ return (szcvec);
+}
+
#define PNUM_SIZE(size_code) \
(hw_page_array[size_code].hp_size >> hw_page_array[0].hp_shift)
diff --git a/usr/src/uts/sun4u/cpu/us3_cheetah.c b/usr/src/uts/sun4u/cpu/us3_cheetah.c
index b530b6754d..9a74d72be1 100644
--- a/usr/src/uts/sun4u/cpu/us3_cheetah.c
+++ b/usr/src/uts/sun4u/cpu/us3_cheetah.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -124,6 +123,7 @@ cpu_fiximp(pnode_t dnode)
};
extern int exec_lpg_disable, use_brk_lpg, use_stk_lpg, use_zmap_lpg;
+ extern size_t max_shm_lpsize;
for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++)
@@ -148,6 +148,7 @@ cpu_fiximp(pnode_t dnode)
use_brk_lpg = 0;
use_stk_lpg = 0;
use_zmap_lpg = 0;
+ max_shm_lpsize = MMU_PAGESIZE;
}
void
diff --git a/usr/src/uts/sun4u/vm/mach_vm_dep.c b/usr/src/uts/sun4u/vm/mach_vm_dep.c
index d31d8321a9..d9907b3616 100644
--- a/usr/src/uts/sun4u/vm/mach_vm_dep.c
+++ b/usr/src/uts/sun4u/vm/mach_vm_dep.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -123,6 +122,8 @@ size_t text_pgsz64k_minsize = MMU_PAGESIZE64K;
size_t text_pgsz4m_minsize = MMU_PAGESIZE4M;
size_t initdata_pgsz64k_minsize = MMU_PAGESIZE64K;
+size_t max_shm_lpsize = ULONG_MAX;
+
/*
* map_addr_proc() is the routine called when the system is to
* choose an address for the user. We will pick an address
diff --git a/usr/src/uts/sun4v/vm/mach_vm_dep.c b/usr/src/uts/sun4v/vm/mach_vm_dep.c
index d214849fac..5b9e380a30 100644
--- a/usr/src/uts/sun4v/vm/mach_vm_dep.c
+++ b/usr/src/uts/sun4v/vm/mach_vm_dep.c
@@ -130,6 +130,8 @@ size_t text_pgsz64k_minsize = MMU_PAGESIZE64K;
size_t text_pgsz4m_minsize = MMU_PAGESIZE4M;
size_t initdata_pgsz64k_minsize = MMU_PAGESIZE64K;
+size_t max_shm_lpsize = MMU_PAGESIZE4M;
+
/*
* map_addr_proc() is the routine called when the system is to
* choose an address for the user. We will pick an address