diff options
Diffstat (limited to 'usr/src')
27 files changed, 731 insertions, 354 deletions
diff --git a/usr/src/lib/brand/lx/zone/config.xml b/usr/src/lib/brand/lx/zone/config.xml index 12deb33022..7eba9f6f18 100644 --- a/usr/src/lib/brand/lx/zone/config.xml +++ b/usr/src/lib/brand/lx/zone/config.xml @@ -65,6 +65,7 @@ <privilege set="default" name="proc_chroot" /> <privilege set="default" name="sys_audit" /> <privilege set="default" name="proc_audit" /> + <privilege set="default" name="proc_lock_memory" /> <privilege set="default" name="proc_owner" /> <privilege set="default" name="proc_setid" /> <privilege set="default" name="proc_taskid" /> diff --git a/usr/src/lib/brand/native/zone/config.xml b/usr/src/lib/brand/native/zone/config.xml index d91bebf46e..45ce096c13 100644 --- a/usr/src/lib/brand/native/zone/config.xml +++ b/usr/src/lib/brand/native/zone/config.xml @@ -65,6 +65,7 @@ <privilege set="default" name="proc_chroot" /> <privilege set="default" name="sys_audit" /> <privilege set="default" name="proc_audit" /> + <privilege set="default" name="proc_lock_memory" /> <privilege set="default" name="proc_owner" /> <privilege set="default" name="proc_setid" /> <privilege set="default" name="proc_taskid" /> diff --git a/usr/src/lib/brand/sn1/zone/config.xml b/usr/src/lib/brand/sn1/zone/config.xml index 5873587953..15d39b9c35 100644 --- a/usr/src/lib/brand/sn1/zone/config.xml +++ b/usr/src/lib/brand/sn1/zone/config.xml @@ -65,6 +65,7 @@ <privilege set="default" name="proc_chroot" /> <privilege set="default" name="sys_audit" /> <privilege set="default" name="proc_audit" /> + <privilege set="default" name="proc_lock_memory" /> <privilege set="default" name="proc_owner" /> <privilege set="default" name="proc_setid" /> <privilege set="default" name="proc_taskid" /> diff --git a/usr/src/uts/common/fs/swapfs/swap_vnops.c b/usr/src/uts/common/fs/swapfs/swap_vnops.c index 348392da2c..02ae7f3212 100644 --- a/usr/src/uts/common/fs/swapfs/swap_vnops.c +++ b/usr/src/uts/common/fs/swapfs/swap_vnops.c @@ -593,6 +593,11 @@ swap_putapage( size_t swap_klustsize; /* + * Clear force flag so that p_lckcnt pages are not invalidated. + */ + flags &= ~B_FORCE; + + /* * This check is added for callers who access swap_putpage with len = 0. * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty. * And it's necessary to do the same queuing if users have the same diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c index 3b01993465..657d87300f 100644 --- a/usr/src/uts/common/os/exec.c +++ b/usr/src/uts/common/os/exec.c @@ -1816,6 +1816,7 @@ exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) as = as_alloc(); p->p_as = as; + as->a_proc = p; if (p->p_model == DATAMODEL_ILP32) as->a_userlimit = (caddr_t)USERLIMIT32; (void) hat_setup(as->a_hat, HAT_ALLOC); diff --git a/usr/src/uts/common/os/fork.c b/usr/src/uts/common/os/fork.c index fbda5b8c4a..7ae565274d 100644 --- a/usr/src/uts/common/os/fork.c +++ b/usr/src/uts/common/os/fork.c @@ -271,6 +271,8 @@ cfork(int isvfork, int isfork1) error = (error == ENOMEM) ? ENOMEM : EAGAIN; goto forkerr; } + cp->p_as->a_proc = cp; + /* Duplicate parent's shared memory */ if (p->p_segacct) shmfork(p, cp); diff --git a/usr/src/uts/common/os/main.c b/usr/src/uts/common/os/main.c index ec9fc6c3e3..1f4fbbf877 100644 --- a/usr/src/uts/common/os/main.c +++ b/usr/src/uts/common/os/main.c @@ -318,6 +318,7 @@ start_init_common() p->p_stk_ctl = INT32_MAX; p->p_as = as_alloc(); + p->p_as->a_proc = p; p->p_as->a_userlimit = (caddr_t)USERLIMIT32; (void) hat_setup(p->p_as->a_hat, HAT_INIT); diff --git a/usr/src/uts/common/os/project.c b/usr/src/uts/common/os/project.c index 6eb65a8048..6c266c0ca3 100644 --- a/usr/src/uts/common/os/project.c +++ b/usr/src/uts/common/os/project.c @@ -55,7 +55,7 @@ rctl_hndl_t rc_project_semmni; rctl_hndl_t rc_project_shmmax; rctl_hndl_t rc_project_shmmni; rctl_hndl_t rc_project_portids; -rctl_hndl_t rc_project_devlockmem; +rctl_hndl_t rc_project_locked_mem; rctl_hndl_t rc_project_contract; rctl_hndl_t rc_project_crypto_mem; @@ -114,7 +114,8 @@ project_data_init(kproject_data_t *data) data->kpd_ipc.ipcq_shmmni = 0; data->kpd_ipc.ipcq_semmni = 0; data->kpd_ipc.ipcq_msgmni = 0; - data->kpd_devlockmem = 0; + data->kpd_locked_mem = 0; + data->kpd_locked_mem_ctl = UINT64_MAX; data->kpd_contract = 0; data->kpd_crypto_mem = 0; } @@ -442,6 +443,7 @@ project_lwps_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl, rctl_qty_t nlwps; ASSERT(MUTEX_HELD(&p->p_lock)); + ASSERT(MUTEX_HELD(&p->p_zone->zone_nlwps_lock)); ASSERT(e->rcep_t == RCENTITY_PROJECT); if (e->rcep_p.proj == NULL) return (0); @@ -628,29 +630,51 @@ static rctl_ops_t project_msgmni_ops = { project_msgmni_test }; -/* - * project.max-device-locked-memory resource control support. - */ +/*ARGSUSED*/ +static rctl_qty_t +project_locked_mem_usage(rctl_t *rctl, struct proc *p) +{ + rctl_qty_t q; + ASSERT(MUTEX_HELD(&p->p_lock)); + mutex_enter(&p->p_zone->zone_rctl_lock); + q = p->p_task->tk_proj->kpj_data.kpd_locked_mem; + mutex_exit(&p->p_zone->zone_rctl_lock); + return (q); +} /*ARGSUSED*/ static int -project_devlockmem_test(struct rctl *rctl, struct proc *p, rctl_entity_p_t *e, +project_locked_mem_test(struct rctl *rctl, struct proc *p, rctl_entity_p_t *e, rctl_val_t *rval, rctl_qty_t inc, uint_t flags) { - rctl_qty_t v; + rctl_qty_t q; ASSERT(MUTEX_HELD(&p->p_lock)); - ASSERT(e->rcep_t == RCENTITY_PROJECT); - v = e->rcep_p.proj->kpj_data.kpd_devlockmem + inc; - if (v > rval->rcv_value) + ASSERT(MUTEX_HELD(&p->p_zone->zone_rctl_lock)); + q = p->p_task->tk_proj->kpj_data.kpd_locked_mem; + if (q + inc > rval->rcv_value) return (1); return (0); } -static rctl_ops_t project_devlockmem_ops = { +/*ARGSUSED*/ +static int +project_locked_mem_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, + rctl_qty_t nv) { + + ASSERT(MUTEX_HELD(&p->p_lock)); + ASSERT(e->rcep_t == RCENTITY_PROJECT); + if (e->rcep_p.proj == NULL) + return (0); + + e->rcep_p.proj->kpj_data.kpd_locked_mem_ctl = nv; + return (0); +} + +static rctl_ops_t project_locked_mem_ops = { rcop_no_action, - rcop_no_usage, - rcop_no_set, - project_devlockmem_test + project_locked_mem_usage, + project_locked_mem_set, + project_locked_mem_test }; /* @@ -826,17 +850,13 @@ project_init(void) /* * Resource control for locked memory */ - rc_project_devlockmem = rctl_register( - "project.max-device-locked-memory", RCENTITY_PROJECT, + rc_project_locked_mem = rctl_register( + "project.max-locked-memory", RCENTITY_PROJECT, RCTL_GLOBAL_DENY_ALWAYS | RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES, - UINT64_MAX, UINT64_MAX, &project_devlockmem_ops); - - /* - * Defaults to 1/16th of the machine's memory - */ - qty = availrmem_initial << (PAGESHIFT - 4); + UINT64_MAX, UINT64_MAX, &project_locked_mem_ops); - rctl_add_default_limit("project.max-device-locked-memory", qty, + /* Default value equals that of max-shm-memory. */ + rctl_add_default_limit("project.max-locked-memory", qty, RCPRIV_PRIVILEGED, RCTL_LOCAL_DENY); /* diff --git a/usr/src/uts/common/os/rctl.c b/usr/src/uts/common/os/rctl.c index dd6230ad7b..4de4c74fe8 100644 --- a/usr/src/uts/common/os/rctl.c +++ b/usr/src/uts/common/os/rctl.c @@ -2566,3 +2566,110 @@ rctl_init(void) rctlproc_init(); } + +/* + * rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc) + * + * Increments the amount of locked memory on a project, and + * zone. If proj is NULL, the proj and zone of proc_t p is used. If + * chargeproc is non-zero, then the charged amount is cached on p->p_locked_mem + * so that the charge can be migrated when a process changes projects. + * + * Return values + * 0 - success + * EAGAIN - attempting to increment locked memory is denied by one + * or more resource entities. + */ +int +rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, + int chargeproc) +{ + kproject_t *projp; + zone_t *zonep; + rctl_entity_p_t e; + int ret = 0; + + ASSERT(p != NULL); + ASSERT(MUTEX_HELD(&p->p_lock)); + if (proj != NULL) { + projp = proj; + zonep = zone_find_by_id(projp->kpj_zoneid); + } else { + projp = p->p_task->tk_proj; + zonep = p->p_zone; + } + + mutex_enter(&zonep->zone_rctl_lock); + + e.rcep_p.proj = projp; + e.rcep_t = RCENTITY_PROJECT; + if (projp->kpj_data.kpd_locked_mem + inc > + projp->kpj_data.kpd_locked_mem_ctl) { + if (rctl_test_entity(rc_project_locked_mem, projp->kpj_rctls, + p, &e, inc, 0) & RCT_DENY) { + ret = EAGAIN; + goto out; + } + } + e.rcep_p.zone = zonep; + e.rcep_t = RCENTITY_ZONE; + if (zonep->zone_locked_mem + inc > zonep->zone_locked_mem_ctl) { + if (rctl_test_entity(rc_zone_locked_mem, zonep->zone_rctls, + p, &e, inc, 0) & RCT_DENY) { + ret = EAGAIN; + goto out; + } + } + + zonep->zone_locked_mem += inc; + projp->kpj_data.kpd_locked_mem += inc; + if (chargeproc != 0) { + p->p_locked_mem += inc; + } +out: + mutex_exit(&zonep->zone_rctl_lock); + if (proj != NULL) + zone_rele(zonep); + return (ret); +} + +/* + * rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc) + * + * Decrements the amount of locked memory on a project and + * zone. If proj is NULL, the proj and zone of proc_t p is used. If + * creditproc is non-zero, then the quantity of locked memory is subtracted + * from p->p_locked_mem. + * + * Return values + * none + */ +void +rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, + int creditproc) +{ + kproject_t *projp; + zone_t *zonep; + + if (proj != NULL) { + projp = proj; + zonep = zone_find_by_id(projp->kpj_zoneid); + } else { + ASSERT(p != NULL); + ASSERT(MUTEX_HELD(&p->p_lock)); + projp = p->p_task->tk_proj; + zonep = p->p_zone; + } + + mutex_enter(&zonep->zone_rctl_lock); + zonep->zone_locked_mem -= inc; + projp->kpj_data.kpd_locked_mem -= inc; + if (creditproc != 0) { + ASSERT(p != NULL); + ASSERT(MUTEX_HELD(&p->p_lock)); + p->p_locked_mem -= inc; + } + mutex_exit(&zonep->zone_rctl_lock); + if (proj != NULL) + zone_rele(zonep); +} diff --git a/usr/src/uts/common/os/shm.c b/usr/src/uts/common/os/shm.c index b8038fd0ae..5c03ab7803 100644 --- a/usr/src/uts/common/os/shm.c +++ b/usr/src/uts/common/os/shm.c @@ -108,6 +108,7 @@ #include <sys/project.h> #include <sys/policy.h> #include <sys/zone.h> +#include <sys/rctl.h> #include <sys/ipc.h> #include <sys/ipc_impl.h> @@ -125,11 +126,11 @@ #include <c2/audit.h> -static int shmem_lock(struct anon_map *amp); -static void shmem_unlock(struct anon_map *amp, uint_t lck); +static int shmem_lock(kshmid_t *sp, struct anon_map *amp); +static void shmem_unlock(kshmid_t *sp, struct anon_map *amp); static void sa_add(struct proc *pp, caddr_t addr, size_t len, ulong_t flags, kshmid_t *id); -static void shm_rm_amp(struct anon_map *amp, uint_t lckflag); +static void shm_rm_amp(struct anon_map *amp); static void shm_dtor(kipc_perm_t *); static void shm_rmid(kipc_perm_t *); static void shm_remove_zone(zoneid_t, void *); @@ -464,7 +465,6 @@ shmat(int shmid, caddr_t uaddr, int uflags, uintptr_t *rvp) sp->shm_sptinfo->sptas = segspt->s_as; sp->shm_sptseg = segspt; sp->shm_sptprot = prot; - sp->shm_lkcnt = 0; } else if ((prot & sp->shm_sptprot) != sp->shm_sptprot) { /* * Ensure we're attaching to an ISM segment with @@ -573,6 +573,11 @@ shm_dtor(kipc_perm_t *perm) uint_t cnt; size_t rsize; + if (sp->shm_lkcnt > 0) { + shmem_unlock(sp, sp->shm_amp); + sp->shm_lkcnt = 0; + } + if (sp->shm_sptinfo) { if (isspt(sp)) sptdestroy(sp->shm_sptinfo->sptas, sp->shm_amp); @@ -583,7 +588,7 @@ shm_dtor(kipc_perm_t *perm) cnt = --sp->shm_amp->refcnt; ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock); ASSERT(cnt == 0); - shm_rm_amp(sp->shm_amp, sp->shm_lkcnt); + shm_rm_amp(sp->shm_amp); if (sp->shm_perm.ipc_id != IPC_ID_INVAL) { rsize = ptob(btopr(sp->shm_segsz)); @@ -705,8 +710,13 @@ shmctl(int shmid, int cmd, void *arg) if ((error = secpolicy_lock_memory(cr)) != 0) break; + /* protect against overflow */ + if (sp->shm_lkcnt >= USHRT_MAX) { + error = ENOMEM; + break; + } if (!isspt(sp) && (sp->shm_lkcnt++ == 0)) { - if (error = shmem_lock(sp->shm_amp)) { + if (error = shmem_lock(sp, sp->shm_amp)) { ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER); cmn_err(CE_NOTE, "shmctl - couldn't lock %ld pages into memory", @@ -714,7 +724,6 @@ shmctl(int shmid, int cmd, void *arg) ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock); error = ENOMEM; sp->shm_lkcnt--; - shmem_unlock(sp->shm_amp, 0); } } break; @@ -724,10 +733,8 @@ shmctl(int shmid, int cmd, void *arg) if ((error = secpolicy_lock_memory(cr)) != 0) break; - if (!isspt(sp)) { - if (sp->shm_lkcnt && (--sp->shm_lkcnt == 0)) { - shmem_unlock(sp->shm_amp, 1); - } + if (sp->shm_lkcnt && (--sp->shm_lkcnt == 0)) { + shmem_unlock(sp, sp->shm_amp); } break; @@ -863,7 +870,7 @@ top: } sp->shm_amp = anonmap_alloc(rsize, rsize); - + sp->shm_amp->a_sp = sp; /* * Store the original user's requested size, in bytes, * rather than the page-aligned size. The former is @@ -878,7 +885,6 @@ top: sp->shm_cpid = curproc->p_pid; sp->shm_ismattch = 0; sp->shm_sptinfo = NULL; - /* * Check limits one last time, push id into global * visibility, and update resource usage counts. @@ -1094,115 +1100,58 @@ shmexit(struct proc *pp) * At this time pages should be in memory, so just lock them. */ static void -lock_again(size_t npages, struct anon_map *amp) +lock_again(size_t npages, kshmid_t *sp, struct anon_map *amp) { struct anon *ap; struct page *pp; struct vnode *vp; - anoff_t off; + u_offset_t off; ulong_t anon_idx; anon_sync_obj_t cookie; + mutex_enter(&sp->shm_mlock); ANON_LOCK_ENTER(&->a_rwlock, RW_READER); - for (anon_idx = 0; npages != 0; anon_idx++, npages--) { anon_array_enter(amp, anon_idx, &cookie); ap = anon_get_ptr(amp->ahp, anon_idx); + ASSERT(ap != NULL); swap_xlate(ap, &vp, &off); anon_array_exit(&cookie); - pp = page_lookup(vp, (u_offset_t)off, SE_SHARED); + pp = page_lookup(vp, off, SE_SHARED); if (pp == NULL) { panic("lock_again: page not in the system"); /*NOTREACHED*/ } + /* page should already be locked by caller */ + ASSERT(pp->p_lckcnt > 0); (void) page_pp_lock(pp, 0, 0); page_unlock(pp); } ANON_LOCK_EXIT(&->a_rwlock); + mutex_exit(&sp->shm_mlock); } -/* check if this segment is already locked. */ -/*ARGSUSED*/ -static int -check_locked(struct as *as, struct segvn_data *svd, size_t npages) -{ - struct vpage *vpp = svd->vpage; - size_t i; - if (svd->vpage == NULL) - return (0); /* unlocked */ - - SEGVN_LOCK_ENTER(as, &svd->lock, RW_READER); - for (i = 0; i < npages; i++, vpp++) { - if (VPP_ISPPLOCK(vpp) == 0) { - SEGVN_LOCK_EXIT(as, &svd->lock); - return (1); /* partially locked */ - } - } - SEGVN_LOCK_EXIT(as, &svd->lock); - return (2); /* locked */ -} - - /* * Attach the shared memory segment to the process * address space and lock the pages. */ static int -shmem_lock(struct anon_map *amp) +shmem_lock(kshmid_t *sp, struct anon_map *amp) { size_t npages = btopr(amp->size); - struct seg *seg; struct as *as; struct segvn_crargs crargs; - struct segvn_data *svd; - proc_t *p = curproc; - caddr_t addr; - uint_t error, ret; - caddr_t seg_base; - size_t seg_sz; - - as = p->p_as; - AS_LOCK_ENTER(as, &as->a_lock, RW_READER); - /* check if shared memory is already attached */ - for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { - svd = (struct segvn_data *)seg->s_data; - if ((seg->s_ops == &segvn_ops) && (svd->amp == amp) && - (amp->size == seg->s_size)) { - switch (ret = check_locked(as, svd, npages)) { - case 0: /* unlocked */ - case 1: /* partially locked */ - seg_base = seg->s_base; - seg_sz = seg->s_size; - - AS_LOCK_EXIT(as, &as->a_lock); - if ((error = as_ctl(as, seg_base, seg_sz, - MC_LOCK, 0, 0, NULL, 0)) == 0) - lock_again(npages, amp); - (void) as_ctl(as, seg_base, seg_sz, MC_UNLOCK, - 0, 0, NULL, NULL); - return (error); - case 2: /* locked */ - AS_LOCK_EXIT(as, &as->a_lock); - lock_again(npages, amp); - return (0); - default: - cmn_err(CE_WARN, "shmem_lock: deflt %d", ret); - break; - } - } - } - AS_LOCK_EXIT(as, &as->a_lock); + uint_t error; - /* attach shm segment to our address space */ - as_rangelock(as); - map_addr(&addr, amp->size, 0ll, 1, 0); - if (addr == NULL) { - as_rangeunlock(as); - return (ENOMEM); - } + /* + * A later ISM/DISM attach may increase the size of the amp, so + * cache the number of pages locked for the future shmem_unlock() + */ + sp->shm_lkpages = npages; + as = as_alloc(); /* Initialize the create arguments and map the segment */ crargs = *(struct segvn_crargs *)zfod_argsp; /* structure copy */ crargs.offset = (u_offset_t)0; @@ -1211,16 +1160,15 @@ shmem_lock(struct anon_map *amp) crargs.prot = PROT_ALL; crargs.maxprot = crargs.prot; crargs.flags = 0; - - error = as_map(as, addr, amp->size, segvn_create, &crargs); - as_rangeunlock(as); + error = as_map(as, 0x0, amp->size, segvn_create, &crargs); if (!error) { - if ((error = as_ctl(as, addr, amp->size, MC_LOCK, 0, 0, + if ((error = as_ctl(as, 0x0, amp->size, MC_LOCK, 0, 0, NULL, 0)) == 0) { - lock_again(npages, amp); + lock_again(npages, sp, amp); } - (void) as_unmap(as, addr, amp->size); + (void) as_unmap(as, 0x0, amp->size); } + as_free(as); return (error); } @@ -1229,38 +1177,53 @@ shmem_lock(struct anon_map *amp) * Unlock shared memory */ static void -shmem_unlock(struct anon_map *amp, uint_t lck) +shmem_unlock(kshmid_t *sp, struct anon_map *amp) { struct anon *ap; - pgcnt_t npages = btopr(amp->size); + pgcnt_t npages = sp->shm_lkpages; struct vnode *vp; struct page *pp; - anoff_t off; + u_offset_t off; ulong_t anon_idx; + size_t unlocked_bytes = 0; + kproject_t *proj; + anon_sync_obj_t cookie; + proj = sp->shm_perm.ipc_proj; + mutex_enter(&sp->shm_mlock); + ANON_LOCK_ENTER(&->a_rwlock, RW_READER); for (anon_idx = 0; anon_idx < npages; anon_idx++) { + anon_array_enter(amp, anon_idx, &cookie); if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) { - if (lck) { - panic("shmem_unlock: null app"); - /*NOTREACHED*/ - } - continue; + panic("shmem_unlock: null app"); + /*NOTREACHED*/ } swap_xlate(ap, &vp, &off); + anon_array_exit(&cookie); pp = page_lookup(vp, off, SE_SHARED); if (pp == NULL) { - if (lck) { - panic("shmem_unlock: page not in the system"); - /*NOTREACHED*/ - } - continue; - } - if (pp->p_lckcnt) { - page_pp_unlock(pp, 0, 0); + panic("shmem_unlock: page not in the system"); + /*NOTREACHED*/ } + /* + * Page should at least have once lock from previous + * shmem_lock + */ + ASSERT(pp->p_lckcnt > 0); + page_pp_unlock(pp, 0, 0); + if (pp->p_lckcnt == 0) + unlocked_bytes += PAGESIZE; + page_unlock(pp); } + + if (unlocked_bytes > 0) { + rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0); + } + + ANON_LOCK_EXIT(&->a_rwlock); + mutex_exit(&sp->shm_mlock); } /* @@ -1268,16 +1231,9 @@ shmem_unlock(struct anon_map *amp, uint_t lck) * amp. This means all shmdt()s and the IPC_RMID have been done. */ static void -shm_rm_amp(struct anon_map *amp, uint_t lckflag) +shm_rm_amp(struct anon_map *amp) { /* - * If we are finally deleting the - * shared memory, and if no one did - * the SHM_UNLOCK, we must do it now. - */ - shmem_unlock(amp, lckflag); - - /* * Free up the anon_map. */ lgrp_shm_policy_fini(amp, NULL); diff --git a/usr/src/uts/common/os/sunddi.c b/usr/src/uts/common/os/sunddi.c index f16ae44426..8c6bcefe06 100644 --- a/usr/src/uts/common/os/sunddi.c +++ b/usr/src/uts/common/os/sunddi.c @@ -82,10 +82,12 @@ #include <sys/devpolicy.h> #include <sys/ctype.h> #include <net/if.h> +#include <sys/rctl.h> extern pri_t minclsyspri; -extern rctl_hndl_t rc_project_devlockmem; +extern rctl_hndl_t rc_project_locked_mem; +extern rctl_hndl_t rc_zone_locked_mem; #ifdef DEBUG static int sunddi_debug = 0; @@ -104,13 +106,6 @@ static kthread_t *ddi_umem_unlock_thread; static struct ddi_umem_cookie *ddi_umem_unlock_head = NULL; static struct ddi_umem_cookie *ddi_umem_unlock_tail = NULL; -/* - * This lock protects the project.max-device-locked-memory counter. - * When both p_lock (proc_t) and this lock need to acquired, p_lock - * should be acquired first. - */ -static kmutex_t umem_devlockmem_rctl_lock; - /* * DDI(Sun) Function and flag definitions: @@ -7819,32 +7814,15 @@ umem_lock_undo(struct as *as, void *arg, uint_t event) */ int /* ARGSUSED */ -i_ddi_incr_locked_memory(proc_t *procp, task_t *taskp, - kproject_t *projectp, zone_t *zonep, rctl_qty_t inc) +i_ddi_incr_locked_memory(proc_t *procp, rctl_qty_t inc) { - kproject_t *projp; - - ASSERT(procp); - ASSERT(mutex_owned(&procp->p_lock)); - - projp = procp->p_task->tk_proj; - mutex_enter(&umem_devlockmem_rctl_lock); - /* - * Test if the requested memory can be locked without exceeding the - * limits. - */ - if (rctl_test(rc_project_devlockmem, projp->kpj_rctls, - procp, inc, RCA_SAFE) & RCT_DENY) { - mutex_exit(&umem_devlockmem_rctl_lock); + ASSERT(procp != NULL); + mutex_enter(&procp->p_lock); + if (rctl_incr_locked_mem(procp, NULL, inc, 1)) { + mutex_exit(&procp->p_lock); return (ENOMEM); } - projp->kpj_data.kpd_devlockmem += inc; - mutex_exit(&umem_devlockmem_rctl_lock); - /* - * Grab a hold on the project. - */ - (void) project_hold(projp); - + mutex_exit(&procp->p_lock); return (0); } @@ -7854,24 +7832,16 @@ i_ddi_incr_locked_memory(proc_t *procp, task_t *taskp, */ /* ARGSUSED */ void -i_ddi_decr_locked_memory(proc_t *procp, task_t *taskp, - kproject_t *projectp, zone_t *zonep, rctl_qty_t dec) +i_ddi_decr_locked_memory(proc_t *procp, rctl_qty_t dec) { - ASSERT(projectp); - - mutex_enter(&umem_devlockmem_rctl_lock); - projectp->kpj_data.kpd_devlockmem -= dec; - mutex_exit(&umem_devlockmem_rctl_lock); - - /* - * Release the project pointer reference accquired in - * i_ddi_incr_locked_memory(). - */ - (void) project_rele(projectp); + ASSERT(procp != NULL); + mutex_enter(&procp->p_lock); + rctl_decr_locked_mem(procp, NULL, dec, 1); + mutex_exit(&procp->p_lock); } /* - * This routine checks if the max-device-locked-memory resource ctl is + * This routine checks if the max-locked-memory resource ctl is * exceeded, if not increments it, grabs a hold on the project. * Returns 0 if successful otherwise returns error code */ @@ -7885,41 +7855,27 @@ umem_incr_devlockmem(struct ddi_umem_cookie *cookie) procp = cookie->procp; ASSERT(procp); - mutex_enter(&procp->p_lock); - - if ((ret = i_ddi_incr_locked_memory(procp, NULL, - NULL, NULL, cookie->size)) != 0) { - mutex_exit(&procp->p_lock); + if ((ret = i_ddi_incr_locked_memory(procp, + cookie->size)) != 0) { return (ret); } - - /* - * save the project pointer in the - * umem cookie, project pointer already - * hold in i_ddi_incr_locked_memory - */ - cookie->lockmem_proj = (void *)procp->p_task->tk_proj; - mutex_exit(&procp->p_lock); - return (0); } /* - * Decrements the max-device-locked-memory resource ctl and releases + * Decrements the max-locked-memory resource ctl and releases * the hold on the project that was acquired during umem_incr_devlockmem */ static void umem_decr_devlockmem(struct ddi_umem_cookie *cookie) { - kproject_t *projp; + proc_t *proc; - if (!cookie->lockmem_proj) + proc = (proc_t *)cookie->procp; + if (!proc) return; - projp = (kproject_t *)cookie->lockmem_proj; - i_ddi_decr_locked_memory(NULL, NULL, projp, NULL, cookie->size); - - cookie->lockmem_proj = NULL; + i_ddi_decr_locked_memory(proc, cookie->size); } /* @@ -7954,7 +7910,7 @@ umem_decr_devlockmem(struct ddi_umem_cookie *cookie) * EINVAL - for invalid parameters * EPERM, ENOMEM and other error codes returned by as_pagelock * ENOMEM - is returned if the current request to lock memory exceeds - * project.max-device-locked-memory resource control value. + * *.max-locked-memory resource control value. * EFAULT - memory pertains to a regular file mapped shared and * and DDI_UMEMLOCK_LONGTERM flag is set * EAGAIN - could not start the ddi_umem_unlock list processing thread @@ -8043,12 +7999,6 @@ umem_lockmemory(caddr_t addr, size_t len, int flags, ddi_umem_cookie_t *cookie, *cookie = (ddi_umem_cookie_t)NULL; return (ENOMEM); } - /* - * umem_incr_devlockmem stashes the project ptr into the - * cookie. This is needed during unlock since that can - * happen in a non-USER context - */ - ASSERT(p->lockmem_proj); /* Lock the pages corresponding to addr, len in memory */ error = as_pagelock(as, &(p->pparray), addr, len, p->s_flags); @@ -8169,7 +8119,7 @@ i_ddi_umem_unlock(struct ddi_umem_cookie *p) /* * Now that we have unlocked the memory decrement the - * max-device-locked-memory rctl + * *.max-locked-memory rctl */ umem_decr_devlockmem(p); @@ -8269,7 +8219,7 @@ i_ddi_umem_unlock_thread_start(void) * EINVAL - for invalid parameters * EPERM, ENOMEM and other error codes returned by as_pagelock * ENOMEM - is returned if the current request to lock memory exceeds - * project.max-device-locked-memory resource control value. + * *.max-locked-memory resource control value. * EAGAIN - could not start the ddi_umem_unlock list processing thread */ int @@ -8338,12 +8288,6 @@ ddi_umem_lock(caddr_t addr, size_t len, int flags, ddi_umem_cookie_t *cookie) *cookie = (ddi_umem_cookie_t)NULL; return (ENOMEM); } - /* - * umem_incr_devlockmem stashes the project ptr into the - * cookie. This is needed during unlock since that can - * happen in a non-USER context - */ - ASSERT(p->lockmem_proj); /* Lock the pages corresponding to addr, len in memory */ error = as_pagelock(((proc_t *)p->procp)->p_as, &(p->pparray), diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 9fd6b423bd..0fb2c2be55 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -316,6 +316,7 @@ const char *zone_status_table[] = { * This isn't static so lint doesn't complain. */ rctl_hndl_t rc_zone_cpu_shares; +rctl_hndl_t rc_zone_locked_mem; rctl_hndl_t rc_zone_nlwps; rctl_hndl_t rc_zone_shmmax; rctl_hndl_t rc_zone_shmmni; @@ -903,8 +904,8 @@ zone_lwps_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl, /*ARGSUSED*/ static int -zone_lwps_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv) { - +zone_lwps_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv) +{ ASSERT(MUTEX_HELD(&p->p_lock)); ASSERT(e->rcep_t == RCENTITY_ZONE); if (e->rcep_p.zone == NULL) @@ -1004,6 +1005,51 @@ static rctl_ops_t zone_msgmni_ops = { zone_msgmni_test }; +/*ARGSUSED*/ +static rctl_qty_t +zone_locked_mem_usage(rctl_t *rctl, struct proc *p) +{ + rctl_qty_t q; + ASSERT(MUTEX_HELD(&p->p_lock)); + mutex_enter(&p->p_zone->zone_rctl_lock); + q = p->p_zone->zone_locked_mem; + mutex_exit(&p->p_zone->zone_rctl_lock); + return (q); +} + +/*ARGSUSED*/ +static int +zone_locked_mem_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, + rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags) +{ + rctl_qty_t q; + ASSERT(MUTEX_HELD(&p->p_lock)); + ASSERT(MUTEX_HELD(&p->p_zone->zone_rctl_lock)); + q = p->p_zone->zone_locked_mem; + if (q + incr > rcntl->rcv_value) + return (1); + return (0); +} + +/*ARGSUSED*/ +static int +zone_locked_mem_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, + rctl_qty_t nv) +{ + ASSERT(MUTEX_HELD(&p->p_lock)); + ASSERT(e->rcep_t == RCENTITY_ZONE); + if (e->rcep_p.zone == NULL) + return (0); + e->rcep_p.zone->zone_locked_mem_ctl = nv; + return (0); +} + +static rctl_ops_t zone_locked_mem_ops = { + rcop_no_action, + zone_locked_mem_usage, + zone_locked_mem_set, + zone_locked_mem_test +}; /* * Helper function to brand the zone with a unique ID. @@ -1209,6 +1255,10 @@ zone_init(void) rde = rctl_dict_lookup("zone.cpu-shares"); (void) rctl_val_list_insert(&rde->rcd_default_value, dval); + rc_zone_locked_mem = rctl_register("zone.max-locked-memory", + RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES | + RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX, + &zone_locked_mem_ops); /* * Initialize the ``global zone''. */ @@ -2458,6 +2508,14 @@ zsched(void *arg) mutex_exit(&global_zone->zone_nlwps_lock); /* + * Decrement locked memory counts on old zone and project. + */ + mutex_enter(&global_zone->zone_rctl_lock); + global_zone->zone_locked_mem -= pp->p_locked_mem; + pj->kpj_data.kpd_locked_mem -= pp->p_locked_mem; + mutex_exit(&global_zone->zone_rctl_lock); + + /* * Create and join a new task in project '0' of this zone. * * We don't need to call holdlwps() since we know we're the only lwp in @@ -2468,21 +2526,29 @@ zsched(void *arg) tk = task_create(0, zone); mutex_enter(&cpu_lock); oldtk = task_join(tk, 0); - mutex_exit(&curproc->p_lock); - mutex_exit(&cpu_lock); - task_rele(oldtk); + + pj = pp->p_task->tk_proj; + + mutex_enter(&zone->zone_rctl_lock); + zone->zone_locked_mem += pp->p_locked_mem; + pj->kpj_data.kpd_locked_mem += pp->p_locked_mem; + mutex_exit(&zone->zone_rctl_lock); /* * add lwp counts to zsched's zone, and increment project's task count * due to the task created in the above tasksys_settaskid */ - pj = pp->p_task->tk_proj; + mutex_enter(&zone->zone_nlwps_lock); pj->kpj_nlwps += pp->p_lwpcnt; pj->kpj_ntasks += 1; zone->zone_nlwps += pp->p_lwpcnt; mutex_exit(&zone->zone_nlwps_lock); + mutex_exit(&curproc->p_lock); + mutex_exit(&cpu_lock); + task_rele(oldtk); + /* * The process was created by a process in the global zone, hence the * credentials are wrong. We might as well have kcred-ish credentials. @@ -2953,6 +3019,7 @@ zone_create(const char *zone_name, const char *zone_root, zone->zone_initname = NULL; mutex_init(&zone->zone_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zone->zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&zone->zone_rctl_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&zone->zone_cv, NULL, CV_DEFAULT, NULL); list_create(&zone->zone_zsd, sizeof (struct zsd_entry), offsetof(struct zsd_entry, zsd_linkage)); @@ -2990,6 +3057,8 @@ zone_create(const char *zone_name, const char *zone_root, zone->zone_initname = kmem_alloc(strlen(zone_default_initname) + 1, KM_SLEEP); (void) strcpy(zone->zone_initname, zone_default_initname); + zone->zone_locked_mem = 0; + zone->zone_locked_mem_ctl = UINT64_MAX; /* * Zsched initializes the rctls. @@ -4145,15 +4214,26 @@ zone_enter(zoneid_t zoneid) zone->zone_nlwps += pp->p_lwpcnt; /* add 1 task to zone's proj0 */ zone_proj0->kpj_ntasks += 1; - mutex_exit(&pp->p_lock); mutex_exit(&zone->zone_nlwps_lock); + mutex_enter(&zone->zone_rctl_lock); + zone->zone_locked_mem += pp->p_locked_mem; + zone_proj0->kpj_data.kpd_locked_mem += pp->p_locked_mem; + mutex_exit(&zone->zone_rctl_lock); + /* remove lwps from proc's old zone and old project */ mutex_enter(&pp->p_zone->zone_nlwps_lock); pp->p_zone->zone_nlwps -= pp->p_lwpcnt; pp->p_task->tk_proj->kpj_nlwps -= pp->p_lwpcnt; mutex_exit(&pp->p_zone->zone_nlwps_lock); + mutex_enter(&pp->p_zone->zone_rctl_lock); + pp->p_zone->zone_locked_mem -= pp->p_locked_mem; + pp->p_task->tk_proj->kpj_data.kpd_locked_mem -= pp->p_locked_mem; + mutex_exit(&pp->p_zone->zone_rctl_lock); + + mutex_exit(&pp->p_lock); + /* * Joining the zone cannot fail from now on. * diff --git a/usr/src/uts/common/sys/ddi_implfuncs.h b/usr/src/uts/common/sys/ddi_implfuncs.h index e4ef2f0e3e..ad70123d76 100644 --- a/usr/src/uts/common/sys/ddi_implfuncs.h +++ b/usr/src/uts/common/sys/ddi_implfuncs.h @@ -273,10 +273,8 @@ void e_devid_cache_free_devt_list(int, dev_t *); /* * Resource control functions to lock down device memory. */ -extern int i_ddi_incr_locked_memory(proc_t *, task_t *, kproject_t *, - zone_t *, rctl_qty_t); -extern void i_ddi_decr_locked_memory(proc_t *, task_t *, kproject_t *, - zone_t *, rctl_qty_t); +extern int i_ddi_incr_locked_memory(proc_t *, rctl_qty_t); +extern void i_ddi_decr_locked_memory(proc_t *, rctl_qty_t); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/ddidevmap.h b/usr/src/uts/common/sys/ddidevmap.h index 7798f1d7cd..734c04417d 100644 --- a/usr/src/uts/common/sys/ddidevmap.h +++ b/usr/src/uts/common/sys/ddidevmap.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -93,7 +92,7 @@ struct ddi_umem_cookie { */ ulong_t cook_refcnt; /* cookie reference count */ struct ddi_umem_cookie *unl_forw; /* list ptr for unlock cookies */ - void *lockmem_proj; /* project ptr for resource mgmt */ + void *reserved; /* unused */ }; typedef struct as *ddi_as_handle_t; diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h index 13a3605e66..4c4025e649 100644 --- a/usr/src/uts/common/sys/proc.h +++ b/usr/src/uts/common/sys/proc.h @@ -342,6 +342,8 @@ typedef struct proc { /* additional lock to protect p_sessp (but not its contents) */ kmutex_t p_splock; + rctl_qty_t p_locked_mem; /* locked memory charged to proc */ + /* protected by p_lock */ } proc_t; #define PROC_T /* headers relying on proc_t are OK */ @@ -486,8 +488,8 @@ extern struct pid pid0; /* p0's pid */ * These flags are used to synchronize with the pool subsystem to allow * re-binding of processes to new pools. */ -#define PBWAIT 0x0001 /* process should wait outside fork/exec/exit */ -#define PEXITED 0x0002 /* process exited and about to become zombie */ +#define PBWAIT 0x0001 /* process should wait outside fork/exec/exit */ +#define PEXITED 0x0002 /* process exited and about to become zombie */ /* Macro to convert proc pointer to a user block pointer */ #define PTOU(p) (&(p)->p_user) diff --git a/usr/src/uts/common/sys/project.h b/usr/src/uts/common/sys/project.h index 181c18b651..679c1eddc2 100644 --- a/usr/src/uts/common/sys/project.h +++ b/usr/src/uts/common/sys/project.h @@ -40,9 +40,11 @@ extern "C" { typedef struct kproject_data { /* Datum protected by: */ rctl_qty_t kpd_shmmax; /* shm's ipcs_lock */ ipc_rqty_t kpd_ipc; /* shm|sem|msg's ipcs lock */ - rctl_qty_t kpd_devlockmem; /* umem_devlockmem_rctl_lock */ + rctl_qty_t kpd_locked_mem; /* zone_rctl_lock */ + rctl_qty_t kpd_locked_mem_ctl; /* kpj_rctls->rcs_lock */ rctl_qty_t kpd_contract; /* contract_lock */ rctl_qty_t kpd_crypto_mem; /* crypto_rctl_lock */ + } kproject_data_t; /* @@ -84,7 +86,7 @@ projid_t curprojid(void); extern kproject_t *proj0p; extern rctl_hndl_t rc_project_nlwps; extern rctl_hndl_t rc_project_ntasks; - +extern rctl_hndl_t rc_project_locked_mem; #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/rctl.h b/usr/src/uts/common/sys/rctl.h index 02bcef6f36..eb56fff9e5 100644 --- a/usr/src/uts/common/sys/rctl.h +++ b/usr/src/uts/common/sys/rctl.h @@ -319,6 +319,12 @@ int rctl_rlimit_set(rctl_hndl_t, struct proc *, struct rlimit64 *, rctl_alloc_gp_t *, int, int, const struct cred *); int rctl_rlimit_get(rctl_hndl_t, struct proc *, struct rlimit64 *); +/* specific rctl utility functions */ +int rctl_incr_locked_mem(struct proc *, struct kproject *, rctl_qty_t, + int); +void rctl_decr_locked_mem(struct proc *, struct kproject *, rctl_qty_t, + int); + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/shm_impl.h b/usr/src/uts/common/sys/shm_impl.h index f43fad4c87..4d8cdcede5 100644 --- a/usr/src/uts/common/sys/shm_impl.h +++ b/usr/src/uts/common/sys/shm_impl.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -59,6 +58,10 @@ typedef struct kshmid { size_t shm_segsz; /* size of segment in bytes */ struct anon_map *shm_amp; /* segment anon_map pointer */ ushort_t shm_lkcnt; /* number of times it is being locked */ + pgcnt_t shm_lkpages; /* number of pages locked by shmctl */ + kmutex_t shm_mlock; /* held when locking physical pages */ + /* Therefore, protects p_lckcnt for */ + /* pages that back shm */ pid_t shm_lpid; /* pid of last shmop */ pid_t shm_cpid; /* pid of creator */ ulong_t shm_ismattch; /* number of ISM attaches */ diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 636b8acc0f..daccd16bdf 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -326,6 +326,14 @@ typedef struct zone { uint_t zone_rootpathlen; /* strlen(zone_rootpath) + 1 */ uint32_t zone_shares; /* FSS shares allocated to zone */ rctl_set_t *zone_rctls; /* zone-wide (zone.*) rctls */ + kmutex_t zone_rctl_lock; /* protects zone_locked_mem and */ + /* kpd_locked_mem for all */ + /* projects in zone */ + /* grab after p_lock, before rcs_lock */ + rctl_qty_t zone_locked_mem; /* bytes of locked memory in zone */ + rctl_qty_t zone_locked_mem_ctl; /* current locked memory */ + /* limit. Protected by */ + /* zone_rctls->rcs_lock */ list_t zone_zsd; /* list of Zone-Specific Data values */ kcondvar_t zone_cv; /* used to signal state changes */ struct proc *zone_zsched; /* Dummy kernel "zsched" process */ @@ -544,6 +552,8 @@ extern void mount_completed(void); extern int zone_walk(int (*)(zone_t *, void *), void *); +extern rctl_hndl_t rc_zone_locked_mem; + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/syscall/tasksys.c b/usr/src/uts/common/syscall/tasksys.c index 10b7e95c76..705b543a37 100644 --- a/usr/src/uts/common/syscall/tasksys.c +++ b/usr/src/uts/common/syscall/tasksys.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,6 +40,7 @@ #include <sys/cpuvar.h> #include <sys/policy.h> #include <sys/zone.h> +#include <sys/rctl.h> /* * Limit projlist to 256k projects. @@ -52,7 +52,6 @@ typedef struct projlist_walk { size_t pw_bufsz; } projlist_walk_t; - /* * taskid_t tasksys_settaskid(projid_t projid, uint_t flags); * @@ -112,6 +111,7 @@ tasksys_settaskid(projid_t projid, uint_t flags) zone = p->p_zone; mutex_enter(&zone->zone_nlwps_lock); + mutex_enter(&zone->zone_rctl_lock); if (kpj->kpj_nlwps + p->p_lwpcnt > kpj->kpj_nlwps_ctl) if (rctl_test_entity(rc_project_nlwps, kpj->kpj_rctls, p, &e, @@ -123,18 +123,28 @@ tasksys_settaskid(projid_t projid, uint_t flags) 1, 0) & RCT_DENY) rctlfail = 1; + if (kpj->kpj_data.kpd_locked_mem + p->p_locked_mem + > kpj->kpj_data.kpd_locked_mem_ctl) + if (rctl_test_entity(rc_project_locked_mem, kpj->kpj_rctls, p, + &e, p->p_locked_mem, 0) &RCT_DENY) + rctlfail = 1; + if (rctlfail) { + mutex_exit(&zone->zone_rctl_lock); mutex_exit(&zone->zone_nlwps_lock); if (curthread != p->p_agenttp) continuelwps(p); mutex_exit(&p->p_lock); return (set_errno(EAGAIN)); } + kpj->kpj_data.kpd_locked_mem += p->p_locked_mem; kpj->kpj_nlwps += p->p_lwpcnt; kpj->kpj_ntasks++; + oldpj->kpj_data.kpd_locked_mem -= p->p_locked_mem; oldpj->kpj_nlwps -= p->p_lwpcnt; + mutex_exit(&zone->zone_rctl_lock); mutex_exit(&zone->zone_nlwps_lock); mutex_exit(&p->p_lock); diff --git a/usr/src/uts/common/vm/anon.h b/usr/src/uts/common/vm/anon.h index 294867ca01..90f6e1e661 100644 --- a/usr/src/uts/common/vm/anon.h +++ b/usr/src/uts/common/vm/anon.h @@ -241,6 +241,8 @@ struct anon_hdr { #define ANON_ALLOC_FORCE 0x2 /* force single level anon array */ #define ANON_GROWDOWN 0x4 /* anon array should grow downward */ +struct kshmid; + /* * The anon_map structure is used by various clients of the anon layer to * manage anonymous memory. When anonymous memory is shared, @@ -284,6 +286,7 @@ struct anon_map { ulong_t refcnt; /* reference count on this structure */ ushort_t a_szc; /* max szc among shared processes */ void *locality; /* lgroup locality info */ + struct kshmid *a_sp; /* kshmid if amp backs sysV, or NULL */ }; #ifdef _KERNEL diff --git a/usr/src/uts/common/vm/as.h b/usr/src/uts/common/vm/as.h index c7afefc23c..f1c7ea3cfa 100644 --- a/usr/src/uts/common/vm/as.h +++ b/usr/src/uts/common/vm/as.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -47,6 +46,7 @@ #include <vm/faultcode.h> #include <vm/hat.h> #include <sys/avl.h> +#include <sys/proc.h> #ifdef __cplusplus extern "C" { @@ -124,6 +124,7 @@ struct as { size_t a_sizedir; /* size of object directory */ struct as_callback *a_callbacks; /* callback list */ void *a_xhat; /* list of xhat providers */ + proc_t *a_proc; /* back pointer to proc */ }; #define AS_PAGLCK 0x80 @@ -240,7 +241,7 @@ void as_avlinit(struct as *); struct seg *as_segat(struct as *as, caddr_t addr); void as_rangelock(struct as *as); void as_rangeunlock(struct as *as); -struct as *as_alloc(void); +struct as *as_alloc(); void as_free(struct as *as); int as_dup(struct as *as, struct as **outas); struct seg *as_findseg(struct as *as, caddr_t addr, int tail); diff --git a/usr/src/uts/common/vm/seg_dev.h b/usr/src/uts/common/vm/seg_dev.h index c498c06ecf..451d61963d 100644 --- a/usr/src/uts/common/vm/seg_dev.h +++ b/usr/src/uts/common/vm/seg_dev.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -42,12 +41,12 @@ #pragma ident "%Z%%M% %I% %E% SMI" -#include <sys/project.h> - #ifdef __cplusplus extern "C" { #endif +struct proc; + /* * Structure whose pointer is passed to the segdev_create routine */ @@ -110,7 +109,7 @@ struct devmap_pmem_cookie { pgcnt_t dp_npages; /* number of allocated mem pages */ page_t **dp_pparray; /* pages allocated for this cookie */ vnode_t *dp_vnp; /* vnode associated with this cookie */ - kproject_t *dp_projp; /* project ptr for resource ctl */ + proc_t *dp_proc; /* proc ptr for resource control */ }; #ifdef _KERNEL diff --git a/usr/src/uts/common/vm/seg_spt.c b/usr/src/uts/common/vm/seg_spt.c index 774a9c3b9f..b614344bd0 100644 --- a/usr/src/uts/common/vm/seg_spt.c +++ b/usr/src/uts/common/vm/seg_spt.c @@ -45,10 +45,13 @@ #include <sys/debug.h> #include <sys/vtrace.h> #include <sys/shm.h> +#include <sys/shm_impl.h> #include <sys/lgrp.h> #include <sys/vmsystm.h> - +#include <sys/policy.h> +#include <sys/project.h> #include <sys/tnf_probe.h> +#include <sys/zone.h> #define SEGSPTADDR (caddr_t)0x0 @@ -181,7 +184,7 @@ static int spt_anon_getpages(struct seg *seg, caddr_t addr, size_t len, /*ARGSUSED*/ int sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp, - uint_t prot, uint_t flags, uint_t share_szc) + uint_t prot, uint_t flags, uint_t share_szc) { int err; struct as *newas; @@ -189,7 +192,7 @@ sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp, #ifdef DEBUG TNF_PROBE_1(sptcreate, "spt", /* CSTYLED */, - tnf_ulong, size, size ); + tnf_ulong, size, size ); #endif if (segspt_minfree == 0) /* leave min 5% of availrmem for */ segspt_minfree = availrmem/20; /* for the system */ @@ -201,11 +204,11 @@ sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp, * get a new as for this shared memory segment */ newas = as_alloc(); + newas->a_proc = NULL; sptcargs.amp = amp; sptcargs.prot = prot; sptcargs.flags = flags; sptcargs.szc = share_szc; - /* * create a shared page table (spt) segment */ @@ -245,10 +248,10 @@ segspt_free(struct seg *seg) if (sptd->spt_realsize) segspt_free_pages(seg, seg->s_base, sptd->spt_realsize); - if (sptd->spt_ppa_lckcnt) - kmem_free(sptd->spt_ppa_lckcnt, - sizeof (*sptd->spt_ppa_lckcnt) - * btopr(sptd->spt_amp->size)); + if (sptd->spt_ppa_lckcnt) + kmem_free(sptd->spt_ppa_lckcnt, + sizeof (*sptd->spt_ppa_lckcnt) + * btopr(sptd->spt_amp->size)); kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp)); mutex_destroy(&sptd->spt_lock); kmem_free(sptd, sizeof (*sptd)); @@ -370,6 +373,7 @@ segspt_create(struct seg *seg, caddr_t argsp) struct spt_data *sptd; struct segspt_crargs *sptcargs = (struct segspt_crargs *)argsp; struct anon_map *amp = sptcargs->amp; + struct kshmid *sp = amp->a_sp; struct cred *cred = CRED(); ulong_t i, j, anon_index = 0; pgcnt_t npages = btopr(amp->size); @@ -381,16 +385,20 @@ segspt_create(struct seg *seg, caddr_t argsp) caddr_t a; pgcnt_t pidx; size_t sz; + proc_t *procp = curproc; + rctl_qty_t lockedbytes = 0; + kproject_t *proj; /* * We are holding the a_lock on the underlying dummy as, * so we can make calls to the HAT layer. */ ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); + ASSERT(sp != NULL); #ifdef DEBUG TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */, - tnf_opaque, addr, addr, + tnf_opaque, addr, addr, tnf_ulong, len, seg->s_size); #endif if ((sptcargs->flags & SHM_PAGEABLE) == 0) { @@ -484,25 +492,49 @@ segspt_create(struct seg *seg, caddr_t argsp) seg, addr, S_CREATE, cred)) != 0) goto out4; + mutex_enter(&sp->shm_mlock); + + /* May be partially locked, so, count bytes to charge for locking */ + for (i = 0; i < npages; i++) + if (ppa[i]->p_lckcnt == 0) + lockedbytes += PAGESIZE; + + proj = sp->shm_perm.ipc_proj; + + if (lockedbytes > 0) { + mutex_enter(&procp->p_lock); + if (rctl_incr_locked_mem(procp, proj, lockedbytes, 0)) { + mutex_exit(&procp->p_lock); + mutex_exit(&sp->shm_mlock); + for (i = 0; i < npages; i++) + page_unlock(ppa[i]); + err = ENOMEM; + goto out4; + } + mutex_exit(&procp->p_lock); + } + /* * addr is initial address corresponding to the first page on ppa list */ for (i = 0; i < npages; i++) { /* attempt to lock all pages */ - if (!page_pp_lock(ppa[i], 0, 1)) { + if (page_pp_lock(ppa[i], 0, 1) == 0) { /* * if unable to lock any page, unlock all * of them and return error */ for (j = 0; j < i; j++) page_pp_unlock(ppa[j], 0, 1); - for (i = 0; i < npages; i++) { + for (i = 0; i < npages; i++) page_unlock(ppa[i]); - } + rctl_decr_locked_mem(NULL, proj, lockedbytes, 0); + mutex_exit(&sp->shm_mlock); err = ENOMEM; goto out4; } } + mutex_exit(&sp->shm_mlock); /* * Some platforms assume that ISM mappings are HAT_LOAD_LOCK @@ -582,6 +614,9 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) int root = 0; pgcnt_t pgs, curnpgs = 0; page_t *rootpp; + rctl_qty_t unlocked_bytes = 0; + kproject_t *proj; + kshmid_t *sp; ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); @@ -601,7 +636,13 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) if (sptd->spt_flags & SHM_PAGEABLE) npages = btop(amp->size); - ASSERT(amp); + ASSERT(amp != NULL); + + if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { + sp = amp->a_sp; + proj = sp->shm_perm.ipc_proj; + mutex_enter(&sp->shm_mlock); + } for (anon_idx = 0; anon_idx < npages; anon_idx++) { if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) { @@ -647,11 +688,13 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) "page not in the system"); /*NOTREACHED*/ } + ASSERT(pp->p_lckcnt > 0); page_pp_unlock(pp, 0, 1); + if (pp->p_lckcnt == 0) + unlocked_bytes += PAGESIZE; } else { if ((pp = page_lookup(vp, off, SE_EXCL)) == NULL) continue; - page_pp_unlock(pp, 0, 0); } /* * It's logical to invalidate the pages here as in most cases @@ -697,7 +740,11 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) VN_DISPOSE(pp, B_INVAL, 0, kcred); } } - + if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { + if (unlocked_bytes > 0) + rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0); + mutex_exit(&sp->shm_mlock); + } if (root != 0 || curnpgs != 0) { panic("segspt_free_pages: bad large page"); /*NOTREACHED*/ @@ -1392,7 +1439,6 @@ segspt_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist, ASSERT(sptd->spt_pcachecnt != 0); ASSERT(sptd->spt_ppa == pplist); ASSERT(npages == btopr(sptd->spt_amp->size)); - /* * Acquire the lock on the dummy seg and destroy the * ppa array IF this is the last pcachecnt. @@ -1409,7 +1455,7 @@ segspt_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist, hat_setref(pplist[i]); } if ((sptd->spt_flags & SHM_PAGEABLE) && - (sptd->spt_ppa_lckcnt[i] == 0)) + (sptd->spt_ppa_lckcnt[i] == 0)) free_availrmem++; page_unlock(pplist[i]); } @@ -2363,15 +2409,35 @@ lpgs_err: return (err); } +/* + * count the number of bytes in a set of spt pages that are currently not + * locked + */ +static rctl_qty_t +spt_unlockedbytes(pgcnt_t npages, page_t **ppa) +{ + ulong_t i; + rctl_qty_t unlocked = 0; + + for (i = 0; i < npages; i++) { + if (ppa[i]->p_lckcnt == 0) + unlocked += PAGESIZE; + } + return (unlocked); +} + int spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages, - page_t **ppa, ulong_t *lockmap, size_t pos) + page_t **ppa, ulong_t *lockmap, size_t pos, + rctl_qty_t *locked) { struct shm_data *shmd = seg->s_data; struct spt_data *sptd = shmd->shm_sptseg->s_data; ulong_t i; int kernel; + /* return the number of bytes actually locked */ + *locked = 0; for (i = 0; i < npages; anon_index++, pos++, i++) { if (!(shmd->shm_vpage[anon_index] & DISM_PG_LOCKED)) { if (sptd->spt_ppa_lckcnt[anon_index] < @@ -2386,19 +2452,19 @@ spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages, kernel = (sptd->spt_ppa && sptd->spt_ppa[anon_index]) ? 1 : 0; if (!page_pp_lock(ppa[i], 0, kernel)) { - /* unlock rest of the pages */ - for (; i < npages; i++) - page_unlock(ppa[i]); sptd->spt_ppa_lckcnt[anon_index]--; return (EAGAIN); } + /* if this is a newly locked page, count it */ + if (ppa[i]->p_lckcnt == 1) { + *locked += PAGESIZE; + } shmd->shm_lckpgs++; shmd->shm_vpage[anon_index] |= DISM_PG_LOCKED; if (lockmap != NULL) BT_SET(lockmap, pos); } } - page_unlock(ppa[i]); } return (0); } @@ -2411,6 +2477,7 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, struct shm_data *shmd = seg->s_data; struct seg *sptseg = shmd->shm_sptseg; struct spt_data *sptd = sptseg->s_data; + struct kshmid *sp = sptd->spt_amp->a_sp; pgcnt_t npages, a_npages; page_t **ppa; pgcnt_t an_idx, a_an_idx, ppa_idx; @@ -2419,8 +2486,13 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, size_t share_sz; ulong_t i; int sts = 0; + rctl_qty_t unlocked = 0; + rctl_qty_t locked = 0; + struct proc *p = curproc; + kproject_t *proj; ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); + ASSERT(sp != NULL); if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { return (0); @@ -2434,7 +2506,16 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, return (ENOMEM); } + /* + * A shm's project never changes, so no lock needed. + * The shm has a hold on the project, so it will not go away. + * Since we have a mapping to shm within this zone, we know + * that the zone will not go away. + */ + proj = sp->shm_perm.ipc_proj; + if (op == MC_LOCK) { + /* * Need to align addr and size request if they are not * aligned so we can always allocate large page(s) however @@ -2469,18 +2550,36 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, return (sts); } - sts = spt_lockpages(seg, an_idx, npages, - &ppa[ppa_idx], lockmap, pos); + mutex_enter(&sp->shm_mlock); + /* enforce locked memory rctl */ + unlocked = spt_unlockedbytes(npages, &ppa[ppa_idx]); + + mutex_enter(&p->p_lock); + if (rctl_incr_locked_mem(p, proj, unlocked, 0)) { + mutex_exit(&p->p_lock); + sts = EAGAIN; + } else { + mutex_exit(&p->p_lock); + sts = spt_lockpages(seg, an_idx, npages, + &ppa[ppa_idx], lockmap, pos, &locked); + + /* + * correct locked count if not all pages could be + * locked + */ + if ((unlocked - locked) > 0) { + rctl_decr_locked_mem(NULL, proj, + (unlocked - locked), 0); + } + } /* - * unlock remaining pages for requests which are not - * aligned or not in 4 M chunks + * unlock pages */ - for (i = 0; i < ppa_idx; i++) - page_unlock(ppa[i]); - for (i = ppa_idx + npages; i < a_npages; i++) + for (i = 0; i < a_npages; i++) page_unlock(ppa[i]); if (sptd->spt_ppa != NULL) sptd->spt_flags |= DISM_PPA_CHANGED; + mutex_exit(&sp->shm_mlock); mutex_exit(&sptd->spt_lock); kmem_free(ppa, ((sizeof (page_t *)) * a_npages)); @@ -2493,6 +2592,7 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, struct page *pp; int kernel; anon_sync_obj_t cookie; + rctl_qty_t unlocked = 0; amp = sptd->spt_amp; mutex_enter(&sptd->spt_lock); @@ -2506,13 +2606,13 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, if (sptd->spt_ppa != NULL) sptd->spt_flags |= DISM_PPA_CHANGED; + mutex_enter(&sp->shm_mlock); ANON_LOCK_ENTER(&->a_rwlock, RW_READER); for (i = 0; i < npages; i++, an_idx++) { if (shmd->shm_vpage[an_idx] & DISM_PG_LOCKED) { anon_array_enter(amp, an_idx, &cookie); ap = anon_get_ptr(amp->ahp, an_idx); ASSERT(ap); - ASSERT(sptd->spt_ppa_lckcnt[an_idx] > 0); swap_xlate(ap, &vp, &off); anon_array_exit(&cookie); @@ -2527,7 +2627,10 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, */ kernel = (sptd->spt_ppa && sptd->spt_ppa[an_idx]) ? 1 : 0; + ASSERT(pp->p_lckcnt > 0); page_pp_unlock(pp, 0, kernel); + if (pp->p_lckcnt == 0) + unlocked += PAGESIZE; page_unlock(pp); shmd->shm_vpage[an_idx] &= ~DISM_PG_LOCKED; sptd->spt_ppa_lckcnt[an_idx]--; @@ -2538,6 +2641,9 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, if (sptd->spt_ppa != NULL) sptd->spt_flags |= DISM_PPA_CHANGED; mutex_exit(&sptd->spt_lock); + + rctl_decr_locked_mem(NULL, proj, unlocked, 0); + mutex_exit(&sp->shm_mlock); } return (sts); } diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c index 96fb02827f..83ef08e9f5 100644 --- a/usr/src/uts/common/vm/seg_vn.c +++ b/usr/src/uts/common/vm/seg_vn.c @@ -70,7 +70,11 @@ #include <vm/anon.h> #include <vm/page.h> #include <vm/vpage.h> - +#include <sys/proc.h> +#include <sys/task.h> +#include <sys/project.h> +#include <sys/zone.h> +#include <sys/shm_impl.h> /* * Private seg op routines. */ @@ -210,7 +214,7 @@ static struct segvnvmstats_str { #define SDR_RANGE 1 /* demote entire range */ #define SDR_END 2 /* demote non aligned ends only */ -#define CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr) { \ +#define CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr) { \ if ((len) != 0) { \ lpgaddr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz); \ ASSERT(lpgaddr >= (seg)->s_base); \ @@ -2393,13 +2397,29 @@ segvn_faultpage( * allocating vpage here if it's absent requires * upgrading the segvn reader lock, the cost of * which does not seem worthwhile. + * + * Usually testing and setting VPP_ISPPLOCK and + * VPP_SETPPLOCK requires holding the segvn lock as + * writer, but in this case all readers are + * serializing on the anon array lock. */ if (AS_ISPGLCK(seg->s_as) && vpage != NULL && - (svd->flags & MAP_NORESERVE)) { - claim = VPP_PROT(vpage) & PROT_WRITE; + (svd->flags & MAP_NORESERVE) && + !VPP_ISPPLOCK(vpage)) { + proc_t *p = seg->s_as->a_proc; ASSERT(svd->type == MAP_PRIVATE); - if (page_pp_lock(pp, claim, 0)) - VPP_SETPPLOCK(vpage); + mutex_enter(&p->p_lock); + if (rctl_incr_locked_mem(p, NULL, PAGESIZE, + 1) == 0) { + claim = VPP_PROT(vpage) & PROT_WRITE; + if (page_pp_lock(pp, claim, 0)) { + VPP_SETPPLOCK(vpage); + } else { + rctl_decr_locked_mem(p, NULL, + PAGESIZE, 1); + } + } + mutex_exit(&p->p_lock); } hat_memload(hat, addr, pp, prot, hat_flag); @@ -5826,7 +5846,7 @@ segvn_claim_pages( page_t *pp; pgcnt_t pg_idx, i; int err = 0; - anoff_t aoff; + anoff_t aoff; int anon = (amp != NULL) ? 1 : 0; ASSERT(svd->type == MAP_PRIVATE); @@ -6931,6 +6951,13 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, struct anon *ap; struct vattr va; anon_sync_obj_t cookie; + struct kshmid *sp = NULL; + struct proc *p = curproc; + kproject_t *proj = NULL; + int chargeproc = 1; + size_t locked_bytes = 0; + size_t unlocked_bytes = 0; + int err = 0; /* * Hold write lock on address space because may split or concatenate @@ -6938,6 +6965,18 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, */ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); + /* + * If this is a shm, use shm's project and zone, else use + * project and zone of calling process + */ + + /* Determine if this segment backs a sysV shm */ + if (svd->amp != NULL && svd->amp->a_sp != NULL) { + sp = svd->amp->a_sp; + proj = sp->shm_perm.ipc_proj; + chargeproc = 0; + } + SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER); if (attr) { pageprot = attr & ~(SHARED|PRIVATE); @@ -6990,6 +7029,61 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, offset = svd->offset + (uintptr_t)(addr - seg->s_base); evp = &svd->vpage[seg_page(seg, addr + len)]; + if (sp != NULL) + mutex_enter(&sp->shm_mlock); + + /* determine number of unlocked bytes in range for lock operation */ + if (op == MC_LOCK) { + + if (sp == NULL) { + for (vpp = &svd->vpage[seg_page(seg, addr)]; vpp < evp; + vpp++) { + if (!VPP_ISPPLOCK(vpp)) + unlocked_bytes += PAGESIZE; + } + } else { + ulong_t i_idx, i_edx; + anon_sync_obj_t i_cookie; + struct anon *i_ap; + struct vnode *i_vp; + u_offset_t i_off; + + /* Only count sysV pages once for locked memory */ + i_edx = svd->anon_index + seg_page(seg, addr + len); + ANON_LOCK_ENTER(&->a_rwlock, RW_READER); + for (i_idx = anon_index; i_idx < i_edx; i_idx++) { + anon_array_enter(amp, i_idx, &i_cookie); + i_ap = anon_get_ptr(amp->ahp, i_idx); + if (i_ap == NULL) { + unlocked_bytes += PAGESIZE; + anon_array_exit(&i_cookie); + continue; + } + swap_xlate(i_ap, &i_vp, &i_off); + anon_array_exit(&i_cookie); + pp = page_lookup(i_vp, i_off, SE_SHARED); + if (pp == NULL) { + unlocked_bytes += PAGESIZE; + continue; + } else if (pp->p_lckcnt == 0) + unlocked_bytes += PAGESIZE; + page_unlock(pp); + } + ANON_LOCK_EXIT(&->a_rwlock); + } + + mutex_enter(&p->p_lock); + err = rctl_incr_locked_mem(p, proj, unlocked_bytes, + chargeproc); + mutex_exit(&p->p_lock); + + if (err) { + if (sp != NULL) + mutex_exit(&sp->shm_mlock); + SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); + return (err); + } + } /* * Loop over all pages in the range. Process if we're locking and * page has not already been locked in this mapping; or if we're @@ -7022,9 +7116,8 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, if (pp == NULL) { anon_array_exit(&cookie); ANON_LOCK_EXIT(&->a_rwlock); - SEGVN_LOCK_EXIT(seg->s_as, - &svd->lock); - return (ENOMEM); + err = ENOMEM; + goto out; } ASSERT(anon_get_ptr(amp->ahp, anon_index) == NULL); @@ -7096,8 +7189,8 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, * 4125102 for details of the problem. */ if (error == EDEADLK) { - SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); - return (error); + err = error; + goto out; } /* * Quit if we fail to fault in the page. Treat @@ -7108,21 +7201,19 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, va.va_mask = AT_SIZE; if (VOP_GETATTR(svd->vp, &va, 0, svd->cred) != 0) { - SEGVN_LOCK_EXIT(seg->s_as, - &svd->lock); - return (EIO); + err = EIO; + goto out; } if (btopr(va.va_size) >= btopr(off + 1)) { - SEGVN_LOCK_EXIT(seg->s_as, - &svd->lock); - return (EIO); + err = EIO; + goto out; } - SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); - return (0); + goto out; + } else if (error) { - SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); - return (EIO); + err = EIO; + goto out; } pp = pl[0]; ASSERT(pp != NULL); @@ -7154,39 +7245,75 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, if (op == MC_LOCK) { int ret = 1; /* Assume success */ - /* - * Make sure another thread didn't lock - * the page after we released the segment - * lock. - */ - if ((attr == 0 || VPP_PROT(vpp) == pageprot) && - !VPP_ISPPLOCK(vpp)) { - ret = page_pp_lock(pp, claim, 0); - if (ret != 0) { - VPP_SETPPLOCK(vpp); - if (lockmap != (ulong_t *)NULL) - BT_SET(lockmap, pos); - } - } - page_unlock(pp); + ASSERT(!VPP_ISPPLOCK(vpp)); + + ret = page_pp_lock(pp, claim, 0); if (ret == 0) { - SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); - return (EAGAIN); + /* locking page failed */ + page_unlock(pp); + err = EAGAIN; + goto out; } + VPP_SETPPLOCK(vpp); + if (sp != NULL) { + if (pp->p_lckcnt == 1) + locked_bytes += PAGESIZE; + } else + locked_bytes += PAGESIZE; + + if (lockmap != (ulong_t *)NULL) + BT_SET(lockmap, pos); + + page_unlock(pp); } else { + ASSERT(VPP_ISPPLOCK(vpp)); if (pp != NULL) { - if ((attr == 0 || - VPP_PROT(vpp) == pageprot) && - VPP_ISPPLOCK(vpp)) - page_pp_unlock(pp, claim, 0); + /* sysV pages should be locked */ + ASSERT(sp == NULL || pp->p_lckcnt > 0); + page_pp_unlock(pp, claim, 0); + if (sp != NULL) { + if (pp->p_lckcnt == 0) + unlocked_bytes + += PAGESIZE; + } else + unlocked_bytes += PAGESIZE; page_unlock(pp); + } else { + ASSERT(sp != NULL); + unlocked_bytes += PAGESIZE; } VPP_CLRPPLOCK(vpp); } } } +out: + if (op == MC_LOCK) { + /* Credit back bytes that did not get locked */ + if ((unlocked_bytes - locked_bytes) > 0) { + if (proj == NULL) + mutex_enter(&p->p_lock); + rctl_decr_locked_mem(p, proj, + (unlocked_bytes - locked_bytes), chargeproc); + if (proj == NULL) + mutex_exit(&p->p_lock); + } + + } else { + /* Account bytes that were unlocked */ + if (unlocked_bytes > 0) { + if (proj == NULL) + mutex_enter(&p->p_lock); + rctl_decr_locked_mem(p, proj, unlocked_bytes, + chargeproc); + if (proj == NULL) + mutex_exit(&p->p_lock); + } + } + if (sp != NULL) + mutex_exit(&sp->shm_mlock); SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); - return (0); + + return (err); } /* diff --git a/usr/src/uts/common/vm/vm_anon.c b/usr/src/uts/common/vm/vm_anon.c index c05f20478e..0cad34257c 100644 --- a/usr/src/uts/common/vm/vm_anon.c +++ b/usr/src/uts/common/vm/vm_anon.c @@ -3139,6 +3139,7 @@ anonmap_alloc(size_t size, size_t swresv) amp->swresv = swresv; amp->locality = 0; amp->a_szc = 0; + amp->a_sp = NULL; return (amp); } diff --git a/usr/src/uts/i86pc/os/pmem.c b/usr/src/uts/i86pc/os/pmem.c index 56986e0153..f7269bfb82 100644 --- a/usr/src/uts/i86pc/os/pmem.c +++ b/usr/src/uts/i86pc/os/pmem.c @@ -38,7 +38,6 @@ #include <sys/ddidevmap.h> #include <sys/vnode.h> #include <sys/sysmacros.h> -#include <sys/project.h> #include <vm/seg_dev.h> #include <sys/pmem.h> #include <vm/hat_i86.h> @@ -126,7 +125,7 @@ static int lpp_create(page_t **, pgcnt_t, pgcnt_t *, pmem_lpg_t **, static void tlist_in(page_t *, pgcnt_t, vnode_t *, u_offset_t *); static void tlist_out(page_t *, pgcnt_t); static int pmem_cookie_alloc(struct devmap_pmem_cookie **, pgcnt_t, uint_t); -static int pmem_lock(pgcnt_t, kproject_t **); +static int pmem_lock(pgcnt_t, proc_t *p); /* * Called by driver devmap routine to pass physical memory mapping info to @@ -314,13 +313,12 @@ devmap_pmem_alloc(size_t size, uint_t flags, devmap_pmem_cookie_t *cookiep) pcp->dp_npages = npages; /* - * See if the requested memory can be locked. Currently we do resource - * controls on the project levlel only. + * See if the requested memory can be locked. */ - if (pmem_lock(npages, &(pcp->dp_projp)) == DDI_FAILURE) + pcp->dp_proc = curproc; + if (pmem_lock(npages, curproc) == DDI_FAILURE) goto alloc_fail; locked = 1; - /* * First, grab as many as possible from pmem_mpool. If pages in * pmem_mpool are enough for this request, we are done. @@ -402,8 +400,7 @@ alloc_fail: mutex_exit(&pmem_mutex); } if (locked == 1) - i_ddi_decr_locked_memory(NULL, NULL, pcp->dp_projp, NULL, - ptob(pcp->dp_npages)); + i_ddi_decr_locked_memory(pcp->dp_proc, ptob(pcp->dp_npages)); /* Freeing pmem_cookie. */ kmem_free(pcp->dp_vnp, sizeof (vnode_t)); kmem_free(pcp->dp_pparray, npages * sizeof (page_t *)); @@ -492,8 +489,8 @@ devmap_pmem_free(devmap_pmem_cookie_t cookie) pmem_lpg_concat(&pmem_occ_lpgs, &pf_lpgs); mutex_exit(&pmem_mutex); - i_ddi_decr_locked_memory(NULL, NULL, (kproject_t *)pcp->dp_projp, NULL, - ptob(pcp->dp_npages)); + if (curproc == pcp->dp_proc) + i_ddi_decr_locked_memory(curproc, ptob(pcp->dp_npages)); kmem_free(pcp->dp_vnp, sizeof (vnode_t)); kmem_free(pcp->dp_pparray, pcp->dp_npages * sizeof (page_t *)); kmem_free(pcp, sizeof (struct devmap_pmem_cookie)); @@ -552,19 +549,13 @@ pmem_cookie_alloc(struct devmap_pmem_cookie **pcpp, pgcnt_t n, uint_t kflags) return (DDI_SUCCESS); } -/* Try to lock down n pages resource for current project. */ +/* Try to lock down n pages resource */ static int -pmem_lock(pgcnt_t n, kproject_t **prjpp) +pmem_lock(pgcnt_t n, proc_t *p) { - mutex_enter(&curproc->p_lock); - if (i_ddi_incr_locked_memory(curproc, NULL, NULL, NULL, - ptob(n)) != 0) { - mutex_exit(&curproc->p_lock); + if (i_ddi_incr_locked_memory(p, ptob(n)) != 0) { return (DDI_FAILURE); } - /* Store this project in cookie for later lock/unlock. */ - *prjpp = curproc->p_task->tk_proj; - mutex_exit(&curproc->p_lock); return (DDI_SUCCESS); } |