diff options
author | sl108498 <none@none> | 2006-09-19 15:49:28 -0700 |
---|---|---|
committer | sl108498 <none@none> | 2006-09-19 15:49:28 -0700 |
commit | c6939658adb0a356a77bc28f7df252ceb4a8f6cc (patch) | |
tree | 2e24cb01bd59e15cda6ad68fa5d778b4cf571fa7 /usr/src/uts/common/vm | |
parent | 69889278ff50c08a6682a39ce6b5d97c5f0c2387 (diff) | |
download | illumos-gate-c6939658adb0a356a77bc28f7df252ceb4a8f6cc.tar.gz |
PSARC/2004/580 zone/project.max-locked-memory Resource Controls
PSARC/2006/463 Amendment_to_zone_project.max-locked-memory_Resource_Controls
5053609 RFE: need zone.max-locked-memory rctl
4691104 Need mlock capability without requiring superuser privileges
Diffstat (limited to 'usr/src/uts/common/vm')
-rw-r--r-- | usr/src/uts/common/vm/anon.h | 3 | ||||
-rw-r--r-- | usr/src/uts/common/vm/as.h | 11 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_dev.h | 13 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_spt.c | 166 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_vn.c | 215 | ||||
-rw-r--r-- | usr/src/uts/common/vm/vm_anon.c | 1 |
6 files changed, 323 insertions, 86 deletions
diff --git a/usr/src/uts/common/vm/anon.h b/usr/src/uts/common/vm/anon.h index 294867ca01..90f6e1e661 100644 --- a/usr/src/uts/common/vm/anon.h +++ b/usr/src/uts/common/vm/anon.h @@ -241,6 +241,8 @@ struct anon_hdr { #define ANON_ALLOC_FORCE 0x2 /* force single level anon array */ #define ANON_GROWDOWN 0x4 /* anon array should grow downward */ +struct kshmid; + /* * The anon_map structure is used by various clients of the anon layer to * manage anonymous memory. When anonymous memory is shared, @@ -284,6 +286,7 @@ struct anon_map { ulong_t refcnt; /* reference count on this structure */ ushort_t a_szc; /* max szc among shared processes */ void *locality; /* lgroup locality info */ + struct kshmid *a_sp; /* kshmid if amp backs sysV, or NULL */ }; #ifdef _KERNEL diff --git a/usr/src/uts/common/vm/as.h b/usr/src/uts/common/vm/as.h index c7afefc23c..f1c7ea3cfa 100644 --- a/usr/src/uts/common/vm/as.h +++ b/usr/src/uts/common/vm/as.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -47,6 +46,7 @@ #include <vm/faultcode.h> #include <vm/hat.h> #include <sys/avl.h> +#include <sys/proc.h> #ifdef __cplusplus extern "C" { @@ -124,6 +124,7 @@ struct as { size_t a_sizedir; /* size of object directory */ struct as_callback *a_callbacks; /* callback list */ void *a_xhat; /* list of xhat providers */ + proc_t *a_proc; /* back pointer to proc */ }; #define AS_PAGLCK 0x80 @@ -240,7 +241,7 @@ void as_avlinit(struct as *); struct seg *as_segat(struct as *as, caddr_t addr); void as_rangelock(struct as *as); void as_rangeunlock(struct as *as); -struct as *as_alloc(void); +struct as *as_alloc(); void as_free(struct as *as); int as_dup(struct as *as, struct as **outas); struct seg *as_findseg(struct as *as, caddr_t addr, int tail); diff --git a/usr/src/uts/common/vm/seg_dev.h b/usr/src/uts/common/vm/seg_dev.h index c498c06ecf..451d61963d 100644 --- a/usr/src/uts/common/vm/seg_dev.h +++ b/usr/src/uts/common/vm/seg_dev.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -42,12 +41,12 @@ #pragma ident "%Z%%M% %I% %E% SMI" -#include <sys/project.h> - #ifdef __cplusplus extern "C" { #endif +struct proc; + /* * Structure whose pointer is passed to the segdev_create routine */ @@ -110,7 +109,7 @@ struct devmap_pmem_cookie { pgcnt_t dp_npages; /* number of allocated mem pages */ page_t **dp_pparray; /* pages allocated for this cookie */ vnode_t *dp_vnp; /* vnode associated with this cookie */ - kproject_t *dp_projp; /* project ptr for resource ctl */ + proc_t *dp_proc; /* proc ptr for resource control */ }; #ifdef _KERNEL diff --git a/usr/src/uts/common/vm/seg_spt.c b/usr/src/uts/common/vm/seg_spt.c index 774a9c3b9f..b614344bd0 100644 --- a/usr/src/uts/common/vm/seg_spt.c +++ b/usr/src/uts/common/vm/seg_spt.c @@ -45,10 +45,13 @@ #include <sys/debug.h> #include <sys/vtrace.h> #include <sys/shm.h> +#include <sys/shm_impl.h> #include <sys/lgrp.h> #include <sys/vmsystm.h> - +#include <sys/policy.h> +#include <sys/project.h> #include <sys/tnf_probe.h> +#include <sys/zone.h> #define SEGSPTADDR (caddr_t)0x0 @@ -181,7 +184,7 @@ static int spt_anon_getpages(struct seg *seg, caddr_t addr, size_t len, /*ARGSUSED*/ int sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp, - uint_t prot, uint_t flags, uint_t share_szc) + uint_t prot, uint_t flags, uint_t share_szc) { int err; struct as *newas; @@ -189,7 +192,7 @@ sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp, #ifdef DEBUG TNF_PROBE_1(sptcreate, "spt", /* CSTYLED */, - tnf_ulong, size, size ); + tnf_ulong, size, size ); #endif if (segspt_minfree == 0) /* leave min 5% of availrmem for */ segspt_minfree = availrmem/20; /* for the system */ @@ -201,11 +204,11 @@ sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp, * get a new as for this shared memory segment */ newas = as_alloc(); + newas->a_proc = NULL; sptcargs.amp = amp; sptcargs.prot = prot; sptcargs.flags = flags; sptcargs.szc = share_szc; - /* * create a shared page table (spt) segment */ @@ -245,10 +248,10 @@ segspt_free(struct seg *seg) if (sptd->spt_realsize) segspt_free_pages(seg, seg->s_base, sptd->spt_realsize); - if (sptd->spt_ppa_lckcnt) - kmem_free(sptd->spt_ppa_lckcnt, - sizeof (*sptd->spt_ppa_lckcnt) - * btopr(sptd->spt_amp->size)); + if (sptd->spt_ppa_lckcnt) + kmem_free(sptd->spt_ppa_lckcnt, + sizeof (*sptd->spt_ppa_lckcnt) + * btopr(sptd->spt_amp->size)); kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp)); mutex_destroy(&sptd->spt_lock); kmem_free(sptd, sizeof (*sptd)); @@ -370,6 +373,7 @@ segspt_create(struct seg *seg, caddr_t argsp) struct spt_data *sptd; struct segspt_crargs *sptcargs = (struct segspt_crargs *)argsp; struct anon_map *amp = sptcargs->amp; + struct kshmid *sp = amp->a_sp; struct cred *cred = CRED(); ulong_t i, j, anon_index = 0; pgcnt_t npages = btopr(amp->size); @@ -381,16 +385,20 @@ segspt_create(struct seg *seg, caddr_t argsp) caddr_t a; pgcnt_t pidx; size_t sz; + proc_t *procp = curproc; + rctl_qty_t lockedbytes = 0; + kproject_t *proj; /* * We are holding the a_lock on the underlying dummy as, * so we can make calls to the HAT layer. */ ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); + ASSERT(sp != NULL); #ifdef DEBUG TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */, - tnf_opaque, addr, addr, + tnf_opaque, addr, addr, tnf_ulong, len, seg->s_size); #endif if ((sptcargs->flags & SHM_PAGEABLE) == 0) { @@ -484,25 +492,49 @@ segspt_create(struct seg *seg, caddr_t argsp) seg, addr, S_CREATE, cred)) != 0) goto out4; + mutex_enter(&sp->shm_mlock); + + /* May be partially locked, so, count bytes to charge for locking */ + for (i = 0; i < npages; i++) + if (ppa[i]->p_lckcnt == 0) + lockedbytes += PAGESIZE; + + proj = sp->shm_perm.ipc_proj; + + if (lockedbytes > 0) { + mutex_enter(&procp->p_lock); + if (rctl_incr_locked_mem(procp, proj, lockedbytes, 0)) { + mutex_exit(&procp->p_lock); + mutex_exit(&sp->shm_mlock); + for (i = 0; i < npages; i++) + page_unlock(ppa[i]); + err = ENOMEM; + goto out4; + } + mutex_exit(&procp->p_lock); + } + /* * addr is initial address corresponding to the first page on ppa list */ for (i = 0; i < npages; i++) { /* attempt to lock all pages */ - if (!page_pp_lock(ppa[i], 0, 1)) { + if (page_pp_lock(ppa[i], 0, 1) == 0) { /* * if unable to lock any page, unlock all * of them and return error */ for (j = 0; j < i; j++) page_pp_unlock(ppa[j], 0, 1); - for (i = 0; i < npages; i++) { + for (i = 0; i < npages; i++) page_unlock(ppa[i]); - } + rctl_decr_locked_mem(NULL, proj, lockedbytes, 0); + mutex_exit(&sp->shm_mlock); err = ENOMEM; goto out4; } } + mutex_exit(&sp->shm_mlock); /* * Some platforms assume that ISM mappings are HAT_LOAD_LOCK @@ -582,6 +614,9 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) int root = 0; pgcnt_t pgs, curnpgs = 0; page_t *rootpp; + rctl_qty_t unlocked_bytes = 0; + kproject_t *proj; + kshmid_t *sp; ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); @@ -601,7 +636,13 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) if (sptd->spt_flags & SHM_PAGEABLE) npages = btop(amp->size); - ASSERT(amp); + ASSERT(amp != NULL); + + if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { + sp = amp->a_sp; + proj = sp->shm_perm.ipc_proj; + mutex_enter(&sp->shm_mlock); + } for (anon_idx = 0; anon_idx < npages; anon_idx++) { if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) { @@ -647,11 +688,13 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) "page not in the system"); /*NOTREACHED*/ } + ASSERT(pp->p_lckcnt > 0); page_pp_unlock(pp, 0, 1); + if (pp->p_lckcnt == 0) + unlocked_bytes += PAGESIZE; } else { if ((pp = page_lookup(vp, off, SE_EXCL)) == NULL) continue; - page_pp_unlock(pp, 0, 0); } /* * It's logical to invalidate the pages here as in most cases @@ -697,7 +740,11 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) VN_DISPOSE(pp, B_INVAL, 0, kcred); } } - + if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { + if (unlocked_bytes > 0) + rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0); + mutex_exit(&sp->shm_mlock); + } if (root != 0 || curnpgs != 0) { panic("segspt_free_pages: bad large page"); /*NOTREACHED*/ @@ -1392,7 +1439,6 @@ segspt_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist, ASSERT(sptd->spt_pcachecnt != 0); ASSERT(sptd->spt_ppa == pplist); ASSERT(npages == btopr(sptd->spt_amp->size)); - /* * Acquire the lock on the dummy seg and destroy the * ppa array IF this is the last pcachecnt. @@ -1409,7 +1455,7 @@ segspt_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist, hat_setref(pplist[i]); } if ((sptd->spt_flags & SHM_PAGEABLE) && - (sptd->spt_ppa_lckcnt[i] == 0)) + (sptd->spt_ppa_lckcnt[i] == 0)) free_availrmem++; page_unlock(pplist[i]); } @@ -2363,15 +2409,35 @@ lpgs_err: return (err); } +/* + * count the number of bytes in a set of spt pages that are currently not + * locked + */ +static rctl_qty_t +spt_unlockedbytes(pgcnt_t npages, page_t **ppa) +{ + ulong_t i; + rctl_qty_t unlocked = 0; + + for (i = 0; i < npages; i++) { + if (ppa[i]->p_lckcnt == 0) + unlocked += PAGESIZE; + } + return (unlocked); +} + int spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages, - page_t **ppa, ulong_t *lockmap, size_t pos) + page_t **ppa, ulong_t *lockmap, size_t pos, + rctl_qty_t *locked) { struct shm_data *shmd = seg->s_data; struct spt_data *sptd = shmd->shm_sptseg->s_data; ulong_t i; int kernel; + /* return the number of bytes actually locked */ + *locked = 0; for (i = 0; i < npages; anon_index++, pos++, i++) { if (!(shmd->shm_vpage[anon_index] & DISM_PG_LOCKED)) { if (sptd->spt_ppa_lckcnt[anon_index] < @@ -2386,19 +2452,19 @@ spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages, kernel = (sptd->spt_ppa && sptd->spt_ppa[anon_index]) ? 1 : 0; if (!page_pp_lock(ppa[i], 0, kernel)) { - /* unlock rest of the pages */ - for (; i < npages; i++) - page_unlock(ppa[i]); sptd->spt_ppa_lckcnt[anon_index]--; return (EAGAIN); } + /* if this is a newly locked page, count it */ + if (ppa[i]->p_lckcnt == 1) { + *locked += PAGESIZE; + } shmd->shm_lckpgs++; shmd->shm_vpage[anon_index] |= DISM_PG_LOCKED; if (lockmap != NULL) BT_SET(lockmap, pos); } } - page_unlock(ppa[i]); } return (0); } @@ -2411,6 +2477,7 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, struct shm_data *shmd = seg->s_data; struct seg *sptseg = shmd->shm_sptseg; struct spt_data *sptd = sptseg->s_data; + struct kshmid *sp = sptd->spt_amp->a_sp; pgcnt_t npages, a_npages; page_t **ppa; pgcnt_t an_idx, a_an_idx, ppa_idx; @@ -2419,8 +2486,13 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, size_t share_sz; ulong_t i; int sts = 0; + rctl_qty_t unlocked = 0; + rctl_qty_t locked = 0; + struct proc *p = curproc; + kproject_t *proj; ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); + ASSERT(sp != NULL); if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { return (0); @@ -2434,7 +2506,16 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, return (ENOMEM); } + /* + * A shm's project never changes, so no lock needed. + * The shm has a hold on the project, so it will not go away. + * Since we have a mapping to shm within this zone, we know + * that the zone will not go away. + */ + proj = sp->shm_perm.ipc_proj; + if (op == MC_LOCK) { + /* * Need to align addr and size request if they are not * aligned so we can always allocate large page(s) however @@ -2469,18 +2550,36 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, return (sts); } - sts = spt_lockpages(seg, an_idx, npages, - &ppa[ppa_idx], lockmap, pos); + mutex_enter(&sp->shm_mlock); + /* enforce locked memory rctl */ + unlocked = spt_unlockedbytes(npages, &ppa[ppa_idx]); + + mutex_enter(&p->p_lock); + if (rctl_incr_locked_mem(p, proj, unlocked, 0)) { + mutex_exit(&p->p_lock); + sts = EAGAIN; + } else { + mutex_exit(&p->p_lock); + sts = spt_lockpages(seg, an_idx, npages, + &ppa[ppa_idx], lockmap, pos, &locked); + + /* + * correct locked count if not all pages could be + * locked + */ + if ((unlocked - locked) > 0) { + rctl_decr_locked_mem(NULL, proj, + (unlocked - locked), 0); + } + } /* - * unlock remaining pages for requests which are not - * aligned or not in 4 M chunks + * unlock pages */ - for (i = 0; i < ppa_idx; i++) - page_unlock(ppa[i]); - for (i = ppa_idx + npages; i < a_npages; i++) + for (i = 0; i < a_npages; i++) page_unlock(ppa[i]); if (sptd->spt_ppa != NULL) sptd->spt_flags |= DISM_PPA_CHANGED; + mutex_exit(&sp->shm_mlock); mutex_exit(&sptd->spt_lock); kmem_free(ppa, ((sizeof (page_t *)) * a_npages)); @@ -2493,6 +2592,7 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, struct page *pp; int kernel; anon_sync_obj_t cookie; + rctl_qty_t unlocked = 0; amp = sptd->spt_amp; mutex_enter(&sptd->spt_lock); @@ -2506,13 +2606,13 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, if (sptd->spt_ppa != NULL) sptd->spt_flags |= DISM_PPA_CHANGED; + mutex_enter(&sp->shm_mlock); ANON_LOCK_ENTER(&->a_rwlock, RW_READER); for (i = 0; i < npages; i++, an_idx++) { if (shmd->shm_vpage[an_idx] & DISM_PG_LOCKED) { anon_array_enter(amp, an_idx, &cookie); ap = anon_get_ptr(amp->ahp, an_idx); ASSERT(ap); - ASSERT(sptd->spt_ppa_lckcnt[an_idx] > 0); swap_xlate(ap, &vp, &off); anon_array_exit(&cookie); @@ -2527,7 +2627,10 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, */ kernel = (sptd->spt_ppa && sptd->spt_ppa[an_idx]) ? 1 : 0; + ASSERT(pp->p_lckcnt > 0); page_pp_unlock(pp, 0, kernel); + if (pp->p_lckcnt == 0) + unlocked += PAGESIZE; page_unlock(pp); shmd->shm_vpage[an_idx] &= ~DISM_PG_LOCKED; sptd->spt_ppa_lckcnt[an_idx]--; @@ -2538,6 +2641,9 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, if (sptd->spt_ppa != NULL) sptd->spt_flags |= DISM_PPA_CHANGED; mutex_exit(&sptd->spt_lock); + + rctl_decr_locked_mem(NULL, proj, unlocked, 0); + mutex_exit(&sp->shm_mlock); } return (sts); } diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c index 96fb02827f..83ef08e9f5 100644 --- a/usr/src/uts/common/vm/seg_vn.c +++ b/usr/src/uts/common/vm/seg_vn.c @@ -70,7 +70,11 @@ #include <vm/anon.h> #include <vm/page.h> #include <vm/vpage.h> - +#include <sys/proc.h> +#include <sys/task.h> +#include <sys/project.h> +#include <sys/zone.h> +#include <sys/shm_impl.h> /* * Private seg op routines. */ @@ -210,7 +214,7 @@ static struct segvnvmstats_str { #define SDR_RANGE 1 /* demote entire range */ #define SDR_END 2 /* demote non aligned ends only */ -#define CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr) { \ +#define CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr) { \ if ((len) != 0) { \ lpgaddr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz); \ ASSERT(lpgaddr >= (seg)->s_base); \ @@ -2393,13 +2397,29 @@ segvn_faultpage( * allocating vpage here if it's absent requires * upgrading the segvn reader lock, the cost of * which does not seem worthwhile. + * + * Usually testing and setting VPP_ISPPLOCK and + * VPP_SETPPLOCK requires holding the segvn lock as + * writer, but in this case all readers are + * serializing on the anon array lock. */ if (AS_ISPGLCK(seg->s_as) && vpage != NULL && - (svd->flags & MAP_NORESERVE)) { - claim = VPP_PROT(vpage) & PROT_WRITE; + (svd->flags & MAP_NORESERVE) && + !VPP_ISPPLOCK(vpage)) { + proc_t *p = seg->s_as->a_proc; ASSERT(svd->type == MAP_PRIVATE); - if (page_pp_lock(pp, claim, 0)) - VPP_SETPPLOCK(vpage); + mutex_enter(&p->p_lock); + if (rctl_incr_locked_mem(p, NULL, PAGESIZE, + 1) == 0) { + claim = VPP_PROT(vpage) & PROT_WRITE; + if (page_pp_lock(pp, claim, 0)) { + VPP_SETPPLOCK(vpage); + } else { + rctl_decr_locked_mem(p, NULL, + PAGESIZE, 1); + } + } + mutex_exit(&p->p_lock); } hat_memload(hat, addr, pp, prot, hat_flag); @@ -5826,7 +5846,7 @@ segvn_claim_pages( page_t *pp; pgcnt_t pg_idx, i; int err = 0; - anoff_t aoff; + anoff_t aoff; int anon = (amp != NULL) ? 1 : 0; ASSERT(svd->type == MAP_PRIVATE); @@ -6931,6 +6951,13 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, struct anon *ap; struct vattr va; anon_sync_obj_t cookie; + struct kshmid *sp = NULL; + struct proc *p = curproc; + kproject_t *proj = NULL; + int chargeproc = 1; + size_t locked_bytes = 0; + size_t unlocked_bytes = 0; + int err = 0; /* * Hold write lock on address space because may split or concatenate @@ -6938,6 +6965,18 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, */ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); + /* + * If this is a shm, use shm's project and zone, else use + * project and zone of calling process + */ + + /* Determine if this segment backs a sysV shm */ + if (svd->amp != NULL && svd->amp->a_sp != NULL) { + sp = svd->amp->a_sp; + proj = sp->shm_perm.ipc_proj; + chargeproc = 0; + } + SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER); if (attr) { pageprot = attr & ~(SHARED|PRIVATE); @@ -6990,6 +7029,61 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, offset = svd->offset + (uintptr_t)(addr - seg->s_base); evp = &svd->vpage[seg_page(seg, addr + len)]; + if (sp != NULL) + mutex_enter(&sp->shm_mlock); + + /* determine number of unlocked bytes in range for lock operation */ + if (op == MC_LOCK) { + + if (sp == NULL) { + for (vpp = &svd->vpage[seg_page(seg, addr)]; vpp < evp; + vpp++) { + if (!VPP_ISPPLOCK(vpp)) + unlocked_bytes += PAGESIZE; + } + } else { + ulong_t i_idx, i_edx; + anon_sync_obj_t i_cookie; + struct anon *i_ap; + struct vnode *i_vp; + u_offset_t i_off; + + /* Only count sysV pages once for locked memory */ + i_edx = svd->anon_index + seg_page(seg, addr + len); + ANON_LOCK_ENTER(&->a_rwlock, RW_READER); + for (i_idx = anon_index; i_idx < i_edx; i_idx++) { + anon_array_enter(amp, i_idx, &i_cookie); + i_ap = anon_get_ptr(amp->ahp, i_idx); + if (i_ap == NULL) { + unlocked_bytes += PAGESIZE; + anon_array_exit(&i_cookie); + continue; + } + swap_xlate(i_ap, &i_vp, &i_off); + anon_array_exit(&i_cookie); + pp = page_lookup(i_vp, i_off, SE_SHARED); + if (pp == NULL) { + unlocked_bytes += PAGESIZE; + continue; + } else if (pp->p_lckcnt == 0) + unlocked_bytes += PAGESIZE; + page_unlock(pp); + } + ANON_LOCK_EXIT(&->a_rwlock); + } + + mutex_enter(&p->p_lock); + err = rctl_incr_locked_mem(p, proj, unlocked_bytes, + chargeproc); + mutex_exit(&p->p_lock); + + if (err) { + if (sp != NULL) + mutex_exit(&sp->shm_mlock); + SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); + return (err); + } + } /* * Loop over all pages in the range. Process if we're locking and * page has not already been locked in this mapping; or if we're @@ -7022,9 +7116,8 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, if (pp == NULL) { anon_array_exit(&cookie); ANON_LOCK_EXIT(&->a_rwlock); - SEGVN_LOCK_EXIT(seg->s_as, - &svd->lock); - return (ENOMEM); + err = ENOMEM; + goto out; } ASSERT(anon_get_ptr(amp->ahp, anon_index) == NULL); @@ -7096,8 +7189,8 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, * 4125102 for details of the problem. */ if (error == EDEADLK) { - SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); - return (error); + err = error; + goto out; } /* * Quit if we fail to fault in the page. Treat @@ -7108,21 +7201,19 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, va.va_mask = AT_SIZE; if (VOP_GETATTR(svd->vp, &va, 0, svd->cred) != 0) { - SEGVN_LOCK_EXIT(seg->s_as, - &svd->lock); - return (EIO); + err = EIO; + goto out; } if (btopr(va.va_size) >= btopr(off + 1)) { - SEGVN_LOCK_EXIT(seg->s_as, - &svd->lock); - return (EIO); + err = EIO; + goto out; } - SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); - return (0); + goto out; + } else if (error) { - SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); - return (EIO); + err = EIO; + goto out; } pp = pl[0]; ASSERT(pp != NULL); @@ -7154,39 +7245,75 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, if (op == MC_LOCK) { int ret = 1; /* Assume success */ - /* - * Make sure another thread didn't lock - * the page after we released the segment - * lock. - */ - if ((attr == 0 || VPP_PROT(vpp) == pageprot) && - !VPP_ISPPLOCK(vpp)) { - ret = page_pp_lock(pp, claim, 0); - if (ret != 0) { - VPP_SETPPLOCK(vpp); - if (lockmap != (ulong_t *)NULL) - BT_SET(lockmap, pos); - } - } - page_unlock(pp); + ASSERT(!VPP_ISPPLOCK(vpp)); + + ret = page_pp_lock(pp, claim, 0); if (ret == 0) { - SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); - return (EAGAIN); + /* locking page failed */ + page_unlock(pp); + err = EAGAIN; + goto out; } + VPP_SETPPLOCK(vpp); + if (sp != NULL) { + if (pp->p_lckcnt == 1) + locked_bytes += PAGESIZE; + } else + locked_bytes += PAGESIZE; + + if (lockmap != (ulong_t *)NULL) + BT_SET(lockmap, pos); + + page_unlock(pp); } else { + ASSERT(VPP_ISPPLOCK(vpp)); if (pp != NULL) { - if ((attr == 0 || - VPP_PROT(vpp) == pageprot) && - VPP_ISPPLOCK(vpp)) - page_pp_unlock(pp, claim, 0); + /* sysV pages should be locked */ + ASSERT(sp == NULL || pp->p_lckcnt > 0); + page_pp_unlock(pp, claim, 0); + if (sp != NULL) { + if (pp->p_lckcnt == 0) + unlocked_bytes + += PAGESIZE; + } else + unlocked_bytes += PAGESIZE; page_unlock(pp); + } else { + ASSERT(sp != NULL); + unlocked_bytes += PAGESIZE; } VPP_CLRPPLOCK(vpp); } } } +out: + if (op == MC_LOCK) { + /* Credit back bytes that did not get locked */ + if ((unlocked_bytes - locked_bytes) > 0) { + if (proj == NULL) + mutex_enter(&p->p_lock); + rctl_decr_locked_mem(p, proj, + (unlocked_bytes - locked_bytes), chargeproc); + if (proj == NULL) + mutex_exit(&p->p_lock); + } + + } else { + /* Account bytes that were unlocked */ + if (unlocked_bytes > 0) { + if (proj == NULL) + mutex_enter(&p->p_lock); + rctl_decr_locked_mem(p, proj, unlocked_bytes, + chargeproc); + if (proj == NULL) + mutex_exit(&p->p_lock); + } + } + if (sp != NULL) + mutex_exit(&sp->shm_mlock); SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); - return (0); + + return (err); } /* diff --git a/usr/src/uts/common/vm/vm_anon.c b/usr/src/uts/common/vm/vm_anon.c index c05f20478e..0cad34257c 100644 --- a/usr/src/uts/common/vm/vm_anon.c +++ b/usr/src/uts/common/vm/vm_anon.c @@ -3139,6 +3139,7 @@ anonmap_alloc(size_t size, size_t swresv) amp->swresv = swresv; amp->locality = 0; amp->a_szc = 0; + amp->a_sp = NULL; return (amp); } |