diff options
Diffstat (limited to 'usr/src/uts/common/vm/seg_spt.c')
-rw-r--r-- | usr/src/uts/common/vm/seg_spt.c | 166 |
1 files changed, 136 insertions, 30 deletions
diff --git a/usr/src/uts/common/vm/seg_spt.c b/usr/src/uts/common/vm/seg_spt.c index 774a9c3b9f..b614344bd0 100644 --- a/usr/src/uts/common/vm/seg_spt.c +++ b/usr/src/uts/common/vm/seg_spt.c @@ -45,10 +45,13 @@ #include <sys/debug.h> #include <sys/vtrace.h> #include <sys/shm.h> +#include <sys/shm_impl.h> #include <sys/lgrp.h> #include <sys/vmsystm.h> - +#include <sys/policy.h> +#include <sys/project.h> #include <sys/tnf_probe.h> +#include <sys/zone.h> #define SEGSPTADDR (caddr_t)0x0 @@ -181,7 +184,7 @@ static int spt_anon_getpages(struct seg *seg, caddr_t addr, size_t len, /*ARGSUSED*/ int sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp, - uint_t prot, uint_t flags, uint_t share_szc) + uint_t prot, uint_t flags, uint_t share_szc) { int err; struct as *newas; @@ -189,7 +192,7 @@ sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp, #ifdef DEBUG TNF_PROBE_1(sptcreate, "spt", /* CSTYLED */, - tnf_ulong, size, size ); + tnf_ulong, size, size ); #endif if (segspt_minfree == 0) /* leave min 5% of availrmem for */ segspt_minfree = availrmem/20; /* for the system */ @@ -201,11 +204,11 @@ sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp, * get a new as for this shared memory segment */ newas = as_alloc(); + newas->a_proc = NULL; sptcargs.amp = amp; sptcargs.prot = prot; sptcargs.flags = flags; sptcargs.szc = share_szc; - /* * create a shared page table (spt) segment */ @@ -245,10 +248,10 @@ segspt_free(struct seg *seg) if (sptd->spt_realsize) segspt_free_pages(seg, seg->s_base, sptd->spt_realsize); - if (sptd->spt_ppa_lckcnt) - kmem_free(sptd->spt_ppa_lckcnt, - sizeof (*sptd->spt_ppa_lckcnt) - * btopr(sptd->spt_amp->size)); + if (sptd->spt_ppa_lckcnt) + kmem_free(sptd->spt_ppa_lckcnt, + sizeof (*sptd->spt_ppa_lckcnt) + * btopr(sptd->spt_amp->size)); kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp)); mutex_destroy(&sptd->spt_lock); kmem_free(sptd, sizeof (*sptd)); @@ -370,6 +373,7 @@ segspt_create(struct seg *seg, caddr_t argsp) struct spt_data *sptd; struct segspt_crargs *sptcargs = (struct segspt_crargs *)argsp; struct anon_map *amp = sptcargs->amp; + struct kshmid *sp = amp->a_sp; struct cred *cred = CRED(); ulong_t i, j, anon_index = 0; pgcnt_t npages = btopr(amp->size); @@ -381,16 +385,20 @@ segspt_create(struct seg *seg, caddr_t argsp) caddr_t a; pgcnt_t pidx; size_t sz; + proc_t *procp = curproc; + rctl_qty_t lockedbytes = 0; + kproject_t *proj; /* * We are holding the a_lock on the underlying dummy as, * so we can make calls to the HAT layer. */ ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); + ASSERT(sp != NULL); #ifdef DEBUG TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */, - tnf_opaque, addr, addr, + tnf_opaque, addr, addr, tnf_ulong, len, seg->s_size); #endif if ((sptcargs->flags & SHM_PAGEABLE) == 0) { @@ -484,25 +492,49 @@ segspt_create(struct seg *seg, caddr_t argsp) seg, addr, S_CREATE, cred)) != 0) goto out4; + mutex_enter(&sp->shm_mlock); + + /* May be partially locked, so, count bytes to charge for locking */ + for (i = 0; i < npages; i++) + if (ppa[i]->p_lckcnt == 0) + lockedbytes += PAGESIZE; + + proj = sp->shm_perm.ipc_proj; + + if (lockedbytes > 0) { + mutex_enter(&procp->p_lock); + if (rctl_incr_locked_mem(procp, proj, lockedbytes, 0)) { + mutex_exit(&procp->p_lock); + mutex_exit(&sp->shm_mlock); + for (i = 0; i < npages; i++) + page_unlock(ppa[i]); + err = ENOMEM; + goto out4; + } + mutex_exit(&procp->p_lock); + } + /* * addr is initial address corresponding to the first page on ppa list */ for (i = 0; i < npages; i++) { /* attempt to lock all pages */ - if (!page_pp_lock(ppa[i], 0, 1)) { + if (page_pp_lock(ppa[i], 0, 1) == 0) { /* * if unable to lock any page, unlock all * of them and return error */ for (j = 0; j < i; j++) page_pp_unlock(ppa[j], 0, 1); - for (i = 0; i < npages; i++) { + for (i = 0; i < npages; i++) page_unlock(ppa[i]); - } + rctl_decr_locked_mem(NULL, proj, lockedbytes, 0); + mutex_exit(&sp->shm_mlock); err = ENOMEM; goto out4; } } + mutex_exit(&sp->shm_mlock); /* * Some platforms assume that ISM mappings are HAT_LOAD_LOCK @@ -582,6 +614,9 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) int root = 0; pgcnt_t pgs, curnpgs = 0; page_t *rootpp; + rctl_qty_t unlocked_bytes = 0; + kproject_t *proj; + kshmid_t *sp; ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); @@ -601,7 +636,13 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) if (sptd->spt_flags & SHM_PAGEABLE) npages = btop(amp->size); - ASSERT(amp); + ASSERT(amp != NULL); + + if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { + sp = amp->a_sp; + proj = sp->shm_perm.ipc_proj; + mutex_enter(&sp->shm_mlock); + } for (anon_idx = 0; anon_idx < npages; anon_idx++) { if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) { @@ -647,11 +688,13 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) "page not in the system"); /*NOTREACHED*/ } + ASSERT(pp->p_lckcnt > 0); page_pp_unlock(pp, 0, 1); + if (pp->p_lckcnt == 0) + unlocked_bytes += PAGESIZE; } else { if ((pp = page_lookup(vp, off, SE_EXCL)) == NULL) continue; - page_pp_unlock(pp, 0, 0); } /* * It's logical to invalidate the pages here as in most cases @@ -697,7 +740,11 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) VN_DISPOSE(pp, B_INVAL, 0, kcred); } } - + if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { + if (unlocked_bytes > 0) + rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0); + mutex_exit(&sp->shm_mlock); + } if (root != 0 || curnpgs != 0) { panic("segspt_free_pages: bad large page"); /*NOTREACHED*/ @@ -1392,7 +1439,6 @@ segspt_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist, ASSERT(sptd->spt_pcachecnt != 0); ASSERT(sptd->spt_ppa == pplist); ASSERT(npages == btopr(sptd->spt_amp->size)); - /* * Acquire the lock on the dummy seg and destroy the * ppa array IF this is the last pcachecnt. @@ -1409,7 +1455,7 @@ segspt_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist, hat_setref(pplist[i]); } if ((sptd->spt_flags & SHM_PAGEABLE) && - (sptd->spt_ppa_lckcnt[i] == 0)) + (sptd->spt_ppa_lckcnt[i] == 0)) free_availrmem++; page_unlock(pplist[i]); } @@ -2363,15 +2409,35 @@ lpgs_err: return (err); } +/* + * count the number of bytes in a set of spt pages that are currently not + * locked + */ +static rctl_qty_t +spt_unlockedbytes(pgcnt_t npages, page_t **ppa) +{ + ulong_t i; + rctl_qty_t unlocked = 0; + + for (i = 0; i < npages; i++) { + if (ppa[i]->p_lckcnt == 0) + unlocked += PAGESIZE; + } + return (unlocked); +} + int spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages, - page_t **ppa, ulong_t *lockmap, size_t pos) + page_t **ppa, ulong_t *lockmap, size_t pos, + rctl_qty_t *locked) { struct shm_data *shmd = seg->s_data; struct spt_data *sptd = shmd->shm_sptseg->s_data; ulong_t i; int kernel; + /* return the number of bytes actually locked */ + *locked = 0; for (i = 0; i < npages; anon_index++, pos++, i++) { if (!(shmd->shm_vpage[anon_index] & DISM_PG_LOCKED)) { if (sptd->spt_ppa_lckcnt[anon_index] < @@ -2386,19 +2452,19 @@ spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages, kernel = (sptd->spt_ppa && sptd->spt_ppa[anon_index]) ? 1 : 0; if (!page_pp_lock(ppa[i], 0, kernel)) { - /* unlock rest of the pages */ - for (; i < npages; i++) - page_unlock(ppa[i]); sptd->spt_ppa_lckcnt[anon_index]--; return (EAGAIN); } + /* if this is a newly locked page, count it */ + if (ppa[i]->p_lckcnt == 1) { + *locked += PAGESIZE; + } shmd->shm_lckpgs++; shmd->shm_vpage[anon_index] |= DISM_PG_LOCKED; if (lockmap != NULL) BT_SET(lockmap, pos); } } - page_unlock(ppa[i]); } return (0); } @@ -2411,6 +2477,7 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, struct shm_data *shmd = seg->s_data; struct seg *sptseg = shmd->shm_sptseg; struct spt_data *sptd = sptseg->s_data; + struct kshmid *sp = sptd->spt_amp->a_sp; pgcnt_t npages, a_npages; page_t **ppa; pgcnt_t an_idx, a_an_idx, ppa_idx; @@ -2419,8 +2486,13 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, size_t share_sz; ulong_t i; int sts = 0; + rctl_qty_t unlocked = 0; + rctl_qty_t locked = 0; + struct proc *p = curproc; + kproject_t *proj; ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); + ASSERT(sp != NULL); if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { return (0); @@ -2434,7 +2506,16 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, return (ENOMEM); } + /* + * A shm's project never changes, so no lock needed. + * The shm has a hold on the project, so it will not go away. + * Since we have a mapping to shm within this zone, we know + * that the zone will not go away. + */ + proj = sp->shm_perm.ipc_proj; + if (op == MC_LOCK) { + /* * Need to align addr and size request if they are not * aligned so we can always allocate large page(s) however @@ -2469,18 +2550,36 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, return (sts); } - sts = spt_lockpages(seg, an_idx, npages, - &ppa[ppa_idx], lockmap, pos); + mutex_enter(&sp->shm_mlock); + /* enforce locked memory rctl */ + unlocked = spt_unlockedbytes(npages, &ppa[ppa_idx]); + + mutex_enter(&p->p_lock); + if (rctl_incr_locked_mem(p, proj, unlocked, 0)) { + mutex_exit(&p->p_lock); + sts = EAGAIN; + } else { + mutex_exit(&p->p_lock); + sts = spt_lockpages(seg, an_idx, npages, + &ppa[ppa_idx], lockmap, pos, &locked); + + /* + * correct locked count if not all pages could be + * locked + */ + if ((unlocked - locked) > 0) { + rctl_decr_locked_mem(NULL, proj, + (unlocked - locked), 0); + } + } /* - * unlock remaining pages for requests which are not - * aligned or not in 4 M chunks + * unlock pages */ - for (i = 0; i < ppa_idx; i++) - page_unlock(ppa[i]); - for (i = ppa_idx + npages; i < a_npages; i++) + for (i = 0; i < a_npages; i++) page_unlock(ppa[i]); if (sptd->spt_ppa != NULL) sptd->spt_flags |= DISM_PPA_CHANGED; + mutex_exit(&sp->shm_mlock); mutex_exit(&sptd->spt_lock); kmem_free(ppa, ((sizeof (page_t *)) * a_npages)); @@ -2493,6 +2592,7 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, struct page *pp; int kernel; anon_sync_obj_t cookie; + rctl_qty_t unlocked = 0; amp = sptd->spt_amp; mutex_enter(&sptd->spt_lock); @@ -2506,13 +2606,13 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, if (sptd->spt_ppa != NULL) sptd->spt_flags |= DISM_PPA_CHANGED; + mutex_enter(&sp->shm_mlock); ANON_LOCK_ENTER(&->a_rwlock, RW_READER); for (i = 0; i < npages; i++, an_idx++) { if (shmd->shm_vpage[an_idx] & DISM_PG_LOCKED) { anon_array_enter(amp, an_idx, &cookie); ap = anon_get_ptr(amp->ahp, an_idx); ASSERT(ap); - ASSERT(sptd->spt_ppa_lckcnt[an_idx] > 0); swap_xlate(ap, &vp, &off); anon_array_exit(&cookie); @@ -2527,7 +2627,10 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, */ kernel = (sptd->spt_ppa && sptd->spt_ppa[an_idx]) ? 1 : 0; + ASSERT(pp->p_lckcnt > 0); page_pp_unlock(pp, 0, kernel); + if (pp->p_lckcnt == 0) + unlocked += PAGESIZE; page_unlock(pp); shmd->shm_vpage[an_idx] &= ~DISM_PG_LOCKED; sptd->spt_ppa_lckcnt[an_idx]--; @@ -2538,6 +2641,9 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, if (sptd->spt_ppa != NULL) sptd->spt_flags |= DISM_PPA_CHANGED; mutex_exit(&sptd->spt_lock); + + rctl_decr_locked_mem(NULL, proj, unlocked, 0); + mutex_exit(&sp->shm_mlock); } return (sts); } |