summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/lib/brand/lx/zone/config.xml1
-rw-r--r--usr/src/lib/brand/native/zone/config.xml1
-rw-r--r--usr/src/lib/brand/sn1/zone/config.xml1
-rw-r--r--usr/src/uts/common/fs/swapfs/swap_vnops.c5
-rw-r--r--usr/src/uts/common/os/exec.c1
-rw-r--r--usr/src/uts/common/os/fork.c2
-rw-r--r--usr/src/uts/common/os/main.c1
-rw-r--r--usr/src/uts/common/os/project.c66
-rw-r--r--usr/src/uts/common/os/rctl.c107
-rw-r--r--usr/src/uts/common/os/shm.c188
-rw-r--r--usr/src/uts/common/os/sunddi.c106
-rw-r--r--usr/src/uts/common/os/zone.c94
-rw-r--r--usr/src/uts/common/sys/ddi_implfuncs.h6
-rw-r--r--usr/src/uts/common/sys/ddidevmap.h9
-rw-r--r--usr/src/uts/common/sys/proc.h6
-rw-r--r--usr/src/uts/common/sys/project.h6
-rw-r--r--usr/src/uts/common/sys/rctl.h6
-rw-r--r--usr/src/uts/common/sys/shm_impl.h11
-rw-r--r--usr/src/uts/common/sys/zone.h10
-rw-r--r--usr/src/uts/common/syscall/tasksys.c20
-rw-r--r--usr/src/uts/common/vm/anon.h3
-rw-r--r--usr/src/uts/common/vm/as.h11
-rw-r--r--usr/src/uts/common/vm/seg_dev.h13
-rw-r--r--usr/src/uts/common/vm/seg_spt.c166
-rw-r--r--usr/src/uts/common/vm/seg_vn.c215
-rw-r--r--usr/src/uts/common/vm/vm_anon.c1
-rw-r--r--usr/src/uts/i86pc/os/pmem.c29
27 files changed, 731 insertions, 354 deletions
diff --git a/usr/src/lib/brand/lx/zone/config.xml b/usr/src/lib/brand/lx/zone/config.xml
index 12deb33022..7eba9f6f18 100644
--- a/usr/src/lib/brand/lx/zone/config.xml
+++ b/usr/src/lib/brand/lx/zone/config.xml
@@ -65,6 +65,7 @@
<privilege set="default" name="proc_chroot" />
<privilege set="default" name="sys_audit" />
<privilege set="default" name="proc_audit" />
+ <privilege set="default" name="proc_lock_memory" />
<privilege set="default" name="proc_owner" />
<privilege set="default" name="proc_setid" />
<privilege set="default" name="proc_taskid" />
diff --git a/usr/src/lib/brand/native/zone/config.xml b/usr/src/lib/brand/native/zone/config.xml
index d91bebf46e..45ce096c13 100644
--- a/usr/src/lib/brand/native/zone/config.xml
+++ b/usr/src/lib/brand/native/zone/config.xml
@@ -65,6 +65,7 @@
<privilege set="default" name="proc_chroot" />
<privilege set="default" name="sys_audit" />
<privilege set="default" name="proc_audit" />
+ <privilege set="default" name="proc_lock_memory" />
<privilege set="default" name="proc_owner" />
<privilege set="default" name="proc_setid" />
<privilege set="default" name="proc_taskid" />
diff --git a/usr/src/lib/brand/sn1/zone/config.xml b/usr/src/lib/brand/sn1/zone/config.xml
index 5873587953..15d39b9c35 100644
--- a/usr/src/lib/brand/sn1/zone/config.xml
+++ b/usr/src/lib/brand/sn1/zone/config.xml
@@ -65,6 +65,7 @@
<privilege set="default" name="proc_chroot" />
<privilege set="default" name="sys_audit" />
<privilege set="default" name="proc_audit" />
+ <privilege set="default" name="proc_lock_memory" />
<privilege set="default" name="proc_owner" />
<privilege set="default" name="proc_setid" />
<privilege set="default" name="proc_taskid" />
diff --git a/usr/src/uts/common/fs/swapfs/swap_vnops.c b/usr/src/uts/common/fs/swapfs/swap_vnops.c
index 348392da2c..02ae7f3212 100644
--- a/usr/src/uts/common/fs/swapfs/swap_vnops.c
+++ b/usr/src/uts/common/fs/swapfs/swap_vnops.c
@@ -593,6 +593,11 @@ swap_putapage(
size_t swap_klustsize;
/*
+ * Clear force flag so that p_lckcnt pages are not invalidated.
+ */
+ flags &= ~B_FORCE;
+
+ /*
* This check is added for callers who access swap_putpage with len = 0.
* swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
* And it's necessary to do the same queuing if users have the same
diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c
index 3b01993465..657d87300f 100644
--- a/usr/src/uts/common/os/exec.c
+++ b/usr/src/uts/common/os/exec.c
@@ -1816,6 +1816,7 @@ exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
as = as_alloc();
p->p_as = as;
+ as->a_proc = p;
if (p->p_model == DATAMODEL_ILP32)
as->a_userlimit = (caddr_t)USERLIMIT32;
(void) hat_setup(as->a_hat, HAT_ALLOC);
diff --git a/usr/src/uts/common/os/fork.c b/usr/src/uts/common/os/fork.c
index fbda5b8c4a..7ae565274d 100644
--- a/usr/src/uts/common/os/fork.c
+++ b/usr/src/uts/common/os/fork.c
@@ -271,6 +271,8 @@ cfork(int isvfork, int isfork1)
error = (error == ENOMEM) ? ENOMEM : EAGAIN;
goto forkerr;
}
+ cp->p_as->a_proc = cp;
+
/* Duplicate parent's shared memory */
if (p->p_segacct)
shmfork(p, cp);
diff --git a/usr/src/uts/common/os/main.c b/usr/src/uts/common/os/main.c
index ec9fc6c3e3..1f4fbbf877 100644
--- a/usr/src/uts/common/os/main.c
+++ b/usr/src/uts/common/os/main.c
@@ -318,6 +318,7 @@ start_init_common()
p->p_stk_ctl = INT32_MAX;
p->p_as = as_alloc();
+ p->p_as->a_proc = p;
p->p_as->a_userlimit = (caddr_t)USERLIMIT32;
(void) hat_setup(p->p_as->a_hat, HAT_INIT);
diff --git a/usr/src/uts/common/os/project.c b/usr/src/uts/common/os/project.c
index 6eb65a8048..6c266c0ca3 100644
--- a/usr/src/uts/common/os/project.c
+++ b/usr/src/uts/common/os/project.c
@@ -55,7 +55,7 @@ rctl_hndl_t rc_project_semmni;
rctl_hndl_t rc_project_shmmax;
rctl_hndl_t rc_project_shmmni;
rctl_hndl_t rc_project_portids;
-rctl_hndl_t rc_project_devlockmem;
+rctl_hndl_t rc_project_locked_mem;
rctl_hndl_t rc_project_contract;
rctl_hndl_t rc_project_crypto_mem;
@@ -114,7 +114,8 @@ project_data_init(kproject_data_t *data)
data->kpd_ipc.ipcq_shmmni = 0;
data->kpd_ipc.ipcq_semmni = 0;
data->kpd_ipc.ipcq_msgmni = 0;
- data->kpd_devlockmem = 0;
+ data->kpd_locked_mem = 0;
+ data->kpd_locked_mem_ctl = UINT64_MAX;
data->kpd_contract = 0;
data->kpd_crypto_mem = 0;
}
@@ -442,6 +443,7 @@ project_lwps_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl,
rctl_qty_t nlwps;
ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(MUTEX_HELD(&p->p_zone->zone_nlwps_lock));
ASSERT(e->rcep_t == RCENTITY_PROJECT);
if (e->rcep_p.proj == NULL)
return (0);
@@ -628,29 +630,51 @@ static rctl_ops_t project_msgmni_ops = {
project_msgmni_test
};
-/*
- * project.max-device-locked-memory resource control support.
- */
+/*ARGSUSED*/
+static rctl_qty_t
+project_locked_mem_usage(rctl_t *rctl, struct proc *p)
+{
+ rctl_qty_t q;
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ mutex_enter(&p->p_zone->zone_rctl_lock);
+ q = p->p_task->tk_proj->kpj_data.kpd_locked_mem;
+ mutex_exit(&p->p_zone->zone_rctl_lock);
+ return (q);
+}
/*ARGSUSED*/
static int
-project_devlockmem_test(struct rctl *rctl, struct proc *p, rctl_entity_p_t *e,
+project_locked_mem_test(struct rctl *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_val_t *rval, rctl_qty_t inc, uint_t flags)
{
- rctl_qty_t v;
+ rctl_qty_t q;
ASSERT(MUTEX_HELD(&p->p_lock));
- ASSERT(e->rcep_t == RCENTITY_PROJECT);
- v = e->rcep_p.proj->kpj_data.kpd_devlockmem + inc;
- if (v > rval->rcv_value)
+ ASSERT(MUTEX_HELD(&p->p_zone->zone_rctl_lock));
+ q = p->p_task->tk_proj->kpj_data.kpd_locked_mem;
+ if (q + inc > rval->rcv_value)
return (1);
return (0);
}
-static rctl_ops_t project_devlockmem_ops = {
+/*ARGSUSED*/
+static int
+project_locked_mem_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+ rctl_qty_t nv) {
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(e->rcep_t == RCENTITY_PROJECT);
+ if (e->rcep_p.proj == NULL)
+ return (0);
+
+ e->rcep_p.proj->kpj_data.kpd_locked_mem_ctl = nv;
+ return (0);
+}
+
+static rctl_ops_t project_locked_mem_ops = {
rcop_no_action,
- rcop_no_usage,
- rcop_no_set,
- project_devlockmem_test
+ project_locked_mem_usage,
+ project_locked_mem_set,
+ project_locked_mem_test
};
/*
@@ -826,17 +850,13 @@ project_init(void)
/*
* Resource control for locked memory
*/
- rc_project_devlockmem = rctl_register(
- "project.max-device-locked-memory", RCENTITY_PROJECT,
+ rc_project_locked_mem = rctl_register(
+ "project.max-locked-memory", RCENTITY_PROJECT,
RCTL_GLOBAL_DENY_ALWAYS | RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES,
- UINT64_MAX, UINT64_MAX, &project_devlockmem_ops);
-
- /*
- * Defaults to 1/16th of the machine's memory
- */
- qty = availrmem_initial << (PAGESHIFT - 4);
+ UINT64_MAX, UINT64_MAX, &project_locked_mem_ops);
- rctl_add_default_limit("project.max-device-locked-memory", qty,
+ /* Default value equals that of max-shm-memory. */
+ rctl_add_default_limit("project.max-locked-memory", qty,
RCPRIV_PRIVILEGED, RCTL_LOCAL_DENY);
/*
diff --git a/usr/src/uts/common/os/rctl.c b/usr/src/uts/common/os/rctl.c
index dd6230ad7b..4de4c74fe8 100644
--- a/usr/src/uts/common/os/rctl.c
+++ b/usr/src/uts/common/os/rctl.c
@@ -2566,3 +2566,110 @@ rctl_init(void)
rctlproc_init();
}
+
+/*
+ * rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc)
+ *
+ * Increments the amount of locked memory on a project, and
+ * zone. If proj is NULL, the proj and zone of proc_t p is used. If
+ * chargeproc is non-zero, then the charged amount is cached on p->p_locked_mem
+ * so that the charge can be migrated when a process changes projects.
+ *
+ * Return values
+ * 0 - success
+ * EAGAIN - attempting to increment locked memory is denied by one
+ * or more resource entities.
+ */
+int
+rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc,
+ int chargeproc)
+{
+ kproject_t *projp;
+ zone_t *zonep;
+ rctl_entity_p_t e;
+ int ret = 0;
+
+ ASSERT(p != NULL);
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ if (proj != NULL) {
+ projp = proj;
+ zonep = zone_find_by_id(projp->kpj_zoneid);
+ } else {
+ projp = p->p_task->tk_proj;
+ zonep = p->p_zone;
+ }
+
+ mutex_enter(&zonep->zone_rctl_lock);
+
+ e.rcep_p.proj = projp;
+ e.rcep_t = RCENTITY_PROJECT;
+ if (projp->kpj_data.kpd_locked_mem + inc >
+ projp->kpj_data.kpd_locked_mem_ctl) {
+ if (rctl_test_entity(rc_project_locked_mem, projp->kpj_rctls,
+ p, &e, inc, 0) & RCT_DENY) {
+ ret = EAGAIN;
+ goto out;
+ }
+ }
+ e.rcep_p.zone = zonep;
+ e.rcep_t = RCENTITY_ZONE;
+ if (zonep->zone_locked_mem + inc > zonep->zone_locked_mem_ctl) {
+ if (rctl_test_entity(rc_zone_locked_mem, zonep->zone_rctls,
+ p, &e, inc, 0) & RCT_DENY) {
+ ret = EAGAIN;
+ goto out;
+ }
+ }
+
+ zonep->zone_locked_mem += inc;
+ projp->kpj_data.kpd_locked_mem += inc;
+ if (chargeproc != 0) {
+ p->p_locked_mem += inc;
+ }
+out:
+ mutex_exit(&zonep->zone_rctl_lock);
+ if (proj != NULL)
+ zone_rele(zonep);
+ return (ret);
+}
+
+/*
+ * rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc)
+ *
+ * Decrements the amount of locked memory on a project and
+ * zone. If proj is NULL, the proj and zone of proc_t p is used. If
+ * creditproc is non-zero, then the quantity of locked memory is subtracted
+ * from p->p_locked_mem.
+ *
+ * Return values
+ * none
+ */
+void
+rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc,
+ int creditproc)
+{
+ kproject_t *projp;
+ zone_t *zonep;
+
+ if (proj != NULL) {
+ projp = proj;
+ zonep = zone_find_by_id(projp->kpj_zoneid);
+ } else {
+ ASSERT(p != NULL);
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ projp = p->p_task->tk_proj;
+ zonep = p->p_zone;
+ }
+
+ mutex_enter(&zonep->zone_rctl_lock);
+ zonep->zone_locked_mem -= inc;
+ projp->kpj_data.kpd_locked_mem -= inc;
+ if (creditproc != 0) {
+ ASSERT(p != NULL);
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ p->p_locked_mem -= inc;
+ }
+ mutex_exit(&zonep->zone_rctl_lock);
+ if (proj != NULL)
+ zone_rele(zonep);
+}
diff --git a/usr/src/uts/common/os/shm.c b/usr/src/uts/common/os/shm.c
index b8038fd0ae..5c03ab7803 100644
--- a/usr/src/uts/common/os/shm.c
+++ b/usr/src/uts/common/os/shm.c
@@ -108,6 +108,7 @@
#include <sys/project.h>
#include <sys/policy.h>
#include <sys/zone.h>
+#include <sys/rctl.h>
#include <sys/ipc.h>
#include <sys/ipc_impl.h>
@@ -125,11 +126,11 @@
#include <c2/audit.h>
-static int shmem_lock(struct anon_map *amp);
-static void shmem_unlock(struct anon_map *amp, uint_t lck);
+static int shmem_lock(kshmid_t *sp, struct anon_map *amp);
+static void shmem_unlock(kshmid_t *sp, struct anon_map *amp);
static void sa_add(struct proc *pp, caddr_t addr, size_t len, ulong_t flags,
kshmid_t *id);
-static void shm_rm_amp(struct anon_map *amp, uint_t lckflag);
+static void shm_rm_amp(struct anon_map *amp);
static void shm_dtor(kipc_perm_t *);
static void shm_rmid(kipc_perm_t *);
static void shm_remove_zone(zoneid_t, void *);
@@ -464,7 +465,6 @@ shmat(int shmid, caddr_t uaddr, int uflags, uintptr_t *rvp)
sp->shm_sptinfo->sptas = segspt->s_as;
sp->shm_sptseg = segspt;
sp->shm_sptprot = prot;
- sp->shm_lkcnt = 0;
} else if ((prot & sp->shm_sptprot) != sp->shm_sptprot) {
/*
* Ensure we're attaching to an ISM segment with
@@ -573,6 +573,11 @@ shm_dtor(kipc_perm_t *perm)
uint_t cnt;
size_t rsize;
+ if (sp->shm_lkcnt > 0) {
+ shmem_unlock(sp, sp->shm_amp);
+ sp->shm_lkcnt = 0;
+ }
+
if (sp->shm_sptinfo) {
if (isspt(sp))
sptdestroy(sp->shm_sptinfo->sptas, sp->shm_amp);
@@ -583,7 +588,7 @@ shm_dtor(kipc_perm_t *perm)
cnt = --sp->shm_amp->refcnt;
ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
ASSERT(cnt == 0);
- shm_rm_amp(sp->shm_amp, sp->shm_lkcnt);
+ shm_rm_amp(sp->shm_amp);
if (sp->shm_perm.ipc_id != IPC_ID_INVAL) {
rsize = ptob(btopr(sp->shm_segsz));
@@ -705,8 +710,13 @@ shmctl(int shmid, int cmd, void *arg)
if ((error = secpolicy_lock_memory(cr)) != 0)
break;
+ /* protect against overflow */
+ if (sp->shm_lkcnt >= USHRT_MAX) {
+ error = ENOMEM;
+ break;
+ }
if (!isspt(sp) && (sp->shm_lkcnt++ == 0)) {
- if (error = shmem_lock(sp->shm_amp)) {
+ if (error = shmem_lock(sp, sp->shm_amp)) {
ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
cmn_err(CE_NOTE,
"shmctl - couldn't lock %ld pages into memory",
@@ -714,7 +724,6 @@ shmctl(int shmid, int cmd, void *arg)
ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
error = ENOMEM;
sp->shm_lkcnt--;
- shmem_unlock(sp->shm_amp, 0);
}
}
break;
@@ -724,10 +733,8 @@ shmctl(int shmid, int cmd, void *arg)
if ((error = secpolicy_lock_memory(cr)) != 0)
break;
- if (!isspt(sp)) {
- if (sp->shm_lkcnt && (--sp->shm_lkcnt == 0)) {
- shmem_unlock(sp->shm_amp, 1);
- }
+ if (sp->shm_lkcnt && (--sp->shm_lkcnt == 0)) {
+ shmem_unlock(sp, sp->shm_amp);
}
break;
@@ -863,7 +870,7 @@ top:
}
sp->shm_amp = anonmap_alloc(rsize, rsize);
-
+ sp->shm_amp->a_sp = sp;
/*
* Store the original user's requested size, in bytes,
* rather than the page-aligned size. The former is
@@ -878,7 +885,6 @@ top:
sp->shm_cpid = curproc->p_pid;
sp->shm_ismattch = 0;
sp->shm_sptinfo = NULL;
-
/*
* Check limits one last time, push id into global
* visibility, and update resource usage counts.
@@ -1094,115 +1100,58 @@ shmexit(struct proc *pp)
* At this time pages should be in memory, so just lock them.
*/
static void
-lock_again(size_t npages, struct anon_map *amp)
+lock_again(size_t npages, kshmid_t *sp, struct anon_map *amp)
{
struct anon *ap;
struct page *pp;
struct vnode *vp;
- anoff_t off;
+ u_offset_t off;
ulong_t anon_idx;
anon_sync_obj_t cookie;
+ mutex_enter(&sp->shm_mlock);
ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
-
for (anon_idx = 0; npages != 0; anon_idx++, npages--) {
anon_array_enter(amp, anon_idx, &cookie);
ap = anon_get_ptr(amp->ahp, anon_idx);
+ ASSERT(ap != NULL);
swap_xlate(ap, &vp, &off);
anon_array_exit(&cookie);
- pp = page_lookup(vp, (u_offset_t)off, SE_SHARED);
+ pp = page_lookup(vp, off, SE_SHARED);
if (pp == NULL) {
panic("lock_again: page not in the system");
/*NOTREACHED*/
}
+ /* page should already be locked by caller */
+ ASSERT(pp->p_lckcnt > 0);
(void) page_pp_lock(pp, 0, 0);
page_unlock(pp);
}
ANON_LOCK_EXIT(&amp->a_rwlock);
+ mutex_exit(&sp->shm_mlock);
}
-/* check if this segment is already locked. */
-/*ARGSUSED*/
-static int
-check_locked(struct as *as, struct segvn_data *svd, size_t npages)
-{
- struct vpage *vpp = svd->vpage;
- size_t i;
- if (svd->vpage == NULL)
- return (0); /* unlocked */
-
- SEGVN_LOCK_ENTER(as, &svd->lock, RW_READER);
- for (i = 0; i < npages; i++, vpp++) {
- if (VPP_ISPPLOCK(vpp) == 0) {
- SEGVN_LOCK_EXIT(as, &svd->lock);
- return (1); /* partially locked */
- }
- }
- SEGVN_LOCK_EXIT(as, &svd->lock);
- return (2); /* locked */
-}
-
-
/*
* Attach the shared memory segment to the process
* address space and lock the pages.
*/
static int
-shmem_lock(struct anon_map *amp)
+shmem_lock(kshmid_t *sp, struct anon_map *amp)
{
size_t npages = btopr(amp->size);
- struct seg *seg;
struct as *as;
struct segvn_crargs crargs;
- struct segvn_data *svd;
- proc_t *p = curproc;
- caddr_t addr;
- uint_t error, ret;
- caddr_t seg_base;
- size_t seg_sz;
-
- as = p->p_as;
- AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
- /* check if shared memory is already attached */
- for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
- svd = (struct segvn_data *)seg->s_data;
- if ((seg->s_ops == &segvn_ops) && (svd->amp == amp) &&
- (amp->size == seg->s_size)) {
- switch (ret = check_locked(as, svd, npages)) {
- case 0: /* unlocked */
- case 1: /* partially locked */
- seg_base = seg->s_base;
- seg_sz = seg->s_size;
-
- AS_LOCK_EXIT(as, &as->a_lock);
- if ((error = as_ctl(as, seg_base, seg_sz,
- MC_LOCK, 0, 0, NULL, 0)) == 0)
- lock_again(npages, amp);
- (void) as_ctl(as, seg_base, seg_sz, MC_UNLOCK,
- 0, 0, NULL, NULL);
- return (error);
- case 2: /* locked */
- AS_LOCK_EXIT(as, &as->a_lock);
- lock_again(npages, amp);
- return (0);
- default:
- cmn_err(CE_WARN, "shmem_lock: deflt %d", ret);
- break;
- }
- }
- }
- AS_LOCK_EXIT(as, &as->a_lock);
+ uint_t error;
- /* attach shm segment to our address space */
- as_rangelock(as);
- map_addr(&addr, amp->size, 0ll, 1, 0);
- if (addr == NULL) {
- as_rangeunlock(as);
- return (ENOMEM);
- }
+ /*
+ * A later ISM/DISM attach may increase the size of the amp, so
+ * cache the number of pages locked for the future shmem_unlock()
+ */
+ sp->shm_lkpages = npages;
+ as = as_alloc();
/* Initialize the create arguments and map the segment */
crargs = *(struct segvn_crargs *)zfod_argsp; /* structure copy */
crargs.offset = (u_offset_t)0;
@@ -1211,16 +1160,15 @@ shmem_lock(struct anon_map *amp)
crargs.prot = PROT_ALL;
crargs.maxprot = crargs.prot;
crargs.flags = 0;
-
- error = as_map(as, addr, amp->size, segvn_create, &crargs);
- as_rangeunlock(as);
+ error = as_map(as, 0x0, amp->size, segvn_create, &crargs);
if (!error) {
- if ((error = as_ctl(as, addr, amp->size, MC_LOCK, 0, 0,
+ if ((error = as_ctl(as, 0x0, amp->size, MC_LOCK, 0, 0,
NULL, 0)) == 0) {
- lock_again(npages, amp);
+ lock_again(npages, sp, amp);
}
- (void) as_unmap(as, addr, amp->size);
+ (void) as_unmap(as, 0x0, amp->size);
}
+ as_free(as);
return (error);
}
@@ -1229,38 +1177,53 @@ shmem_lock(struct anon_map *amp)
* Unlock shared memory
*/
static void
-shmem_unlock(struct anon_map *amp, uint_t lck)
+shmem_unlock(kshmid_t *sp, struct anon_map *amp)
{
struct anon *ap;
- pgcnt_t npages = btopr(amp->size);
+ pgcnt_t npages = sp->shm_lkpages;
struct vnode *vp;
struct page *pp;
- anoff_t off;
+ u_offset_t off;
ulong_t anon_idx;
+ size_t unlocked_bytes = 0;
+ kproject_t *proj;
+ anon_sync_obj_t cookie;
+ proj = sp->shm_perm.ipc_proj;
+ mutex_enter(&sp->shm_mlock);
+ ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
for (anon_idx = 0; anon_idx < npages; anon_idx++) {
+ anon_array_enter(amp, anon_idx, &cookie);
if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) {
- if (lck) {
- panic("shmem_unlock: null app");
- /*NOTREACHED*/
- }
- continue;
+ panic("shmem_unlock: null app");
+ /*NOTREACHED*/
}
swap_xlate(ap, &vp, &off);
+ anon_array_exit(&cookie);
pp = page_lookup(vp, off, SE_SHARED);
if (pp == NULL) {
- if (lck) {
- panic("shmem_unlock: page not in the system");
- /*NOTREACHED*/
- }
- continue;
- }
- if (pp->p_lckcnt) {
- page_pp_unlock(pp, 0, 0);
+ panic("shmem_unlock: page not in the system");
+ /*NOTREACHED*/
}
+ /*
+ * Page should at least have once lock from previous
+ * shmem_lock
+ */
+ ASSERT(pp->p_lckcnt > 0);
+ page_pp_unlock(pp, 0, 0);
+ if (pp->p_lckcnt == 0)
+ unlocked_bytes += PAGESIZE;
+
page_unlock(pp);
}
+
+ if (unlocked_bytes > 0) {
+ rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0);
+ }
+
+ ANON_LOCK_EXIT(&amp->a_rwlock);
+ mutex_exit(&sp->shm_mlock);
}
/*
@@ -1268,16 +1231,9 @@ shmem_unlock(struct anon_map *amp, uint_t lck)
* amp. This means all shmdt()s and the IPC_RMID have been done.
*/
static void
-shm_rm_amp(struct anon_map *amp, uint_t lckflag)
+shm_rm_amp(struct anon_map *amp)
{
/*
- * If we are finally deleting the
- * shared memory, and if no one did
- * the SHM_UNLOCK, we must do it now.
- */
- shmem_unlock(amp, lckflag);
-
- /*
* Free up the anon_map.
*/
lgrp_shm_policy_fini(amp, NULL);
diff --git a/usr/src/uts/common/os/sunddi.c b/usr/src/uts/common/os/sunddi.c
index f16ae44426..8c6bcefe06 100644
--- a/usr/src/uts/common/os/sunddi.c
+++ b/usr/src/uts/common/os/sunddi.c
@@ -82,10 +82,12 @@
#include <sys/devpolicy.h>
#include <sys/ctype.h>
#include <net/if.h>
+#include <sys/rctl.h>
extern pri_t minclsyspri;
-extern rctl_hndl_t rc_project_devlockmem;
+extern rctl_hndl_t rc_project_locked_mem;
+extern rctl_hndl_t rc_zone_locked_mem;
#ifdef DEBUG
static int sunddi_debug = 0;
@@ -104,13 +106,6 @@ static kthread_t *ddi_umem_unlock_thread;
static struct ddi_umem_cookie *ddi_umem_unlock_head = NULL;
static struct ddi_umem_cookie *ddi_umem_unlock_tail = NULL;
-/*
- * This lock protects the project.max-device-locked-memory counter.
- * When both p_lock (proc_t) and this lock need to acquired, p_lock
- * should be acquired first.
- */
-static kmutex_t umem_devlockmem_rctl_lock;
-
/*
* DDI(Sun) Function and flag definitions:
@@ -7819,32 +7814,15 @@ umem_lock_undo(struct as *as, void *arg, uint_t event)
*/
int
/* ARGSUSED */
-i_ddi_incr_locked_memory(proc_t *procp, task_t *taskp,
- kproject_t *projectp, zone_t *zonep, rctl_qty_t inc)
+i_ddi_incr_locked_memory(proc_t *procp, rctl_qty_t inc)
{
- kproject_t *projp;
-
- ASSERT(procp);
- ASSERT(mutex_owned(&procp->p_lock));
-
- projp = procp->p_task->tk_proj;
- mutex_enter(&umem_devlockmem_rctl_lock);
- /*
- * Test if the requested memory can be locked without exceeding the
- * limits.
- */
- if (rctl_test(rc_project_devlockmem, projp->kpj_rctls,
- procp, inc, RCA_SAFE) & RCT_DENY) {
- mutex_exit(&umem_devlockmem_rctl_lock);
+ ASSERT(procp != NULL);
+ mutex_enter(&procp->p_lock);
+ if (rctl_incr_locked_mem(procp, NULL, inc, 1)) {
+ mutex_exit(&procp->p_lock);
return (ENOMEM);
}
- projp->kpj_data.kpd_devlockmem += inc;
- mutex_exit(&umem_devlockmem_rctl_lock);
- /*
- * Grab a hold on the project.
- */
- (void) project_hold(projp);
-
+ mutex_exit(&procp->p_lock);
return (0);
}
@@ -7854,24 +7832,16 @@ i_ddi_incr_locked_memory(proc_t *procp, task_t *taskp,
*/
/* ARGSUSED */
void
-i_ddi_decr_locked_memory(proc_t *procp, task_t *taskp,
- kproject_t *projectp, zone_t *zonep, rctl_qty_t dec)
+i_ddi_decr_locked_memory(proc_t *procp, rctl_qty_t dec)
{
- ASSERT(projectp);
-
- mutex_enter(&umem_devlockmem_rctl_lock);
- projectp->kpj_data.kpd_devlockmem -= dec;
- mutex_exit(&umem_devlockmem_rctl_lock);
-
- /*
- * Release the project pointer reference accquired in
- * i_ddi_incr_locked_memory().
- */
- (void) project_rele(projectp);
+ ASSERT(procp != NULL);
+ mutex_enter(&procp->p_lock);
+ rctl_decr_locked_mem(procp, NULL, dec, 1);
+ mutex_exit(&procp->p_lock);
}
/*
- * This routine checks if the max-device-locked-memory resource ctl is
+ * This routine checks if the max-locked-memory resource ctl is
* exceeded, if not increments it, grabs a hold on the project.
* Returns 0 if successful otherwise returns error code
*/
@@ -7885,41 +7855,27 @@ umem_incr_devlockmem(struct ddi_umem_cookie *cookie)
procp = cookie->procp;
ASSERT(procp);
- mutex_enter(&procp->p_lock);
-
- if ((ret = i_ddi_incr_locked_memory(procp, NULL,
- NULL, NULL, cookie->size)) != 0) {
- mutex_exit(&procp->p_lock);
+ if ((ret = i_ddi_incr_locked_memory(procp,
+ cookie->size)) != 0) {
return (ret);
}
-
- /*
- * save the project pointer in the
- * umem cookie, project pointer already
- * hold in i_ddi_incr_locked_memory
- */
- cookie->lockmem_proj = (void *)procp->p_task->tk_proj;
- mutex_exit(&procp->p_lock);
-
return (0);
}
/*
- * Decrements the max-device-locked-memory resource ctl and releases
+ * Decrements the max-locked-memory resource ctl and releases
* the hold on the project that was acquired during umem_incr_devlockmem
*/
static void
umem_decr_devlockmem(struct ddi_umem_cookie *cookie)
{
- kproject_t *projp;
+ proc_t *proc;
- if (!cookie->lockmem_proj)
+ proc = (proc_t *)cookie->procp;
+ if (!proc)
return;
- projp = (kproject_t *)cookie->lockmem_proj;
- i_ddi_decr_locked_memory(NULL, NULL, projp, NULL, cookie->size);
-
- cookie->lockmem_proj = NULL;
+ i_ddi_decr_locked_memory(proc, cookie->size);
}
/*
@@ -7954,7 +7910,7 @@ umem_decr_devlockmem(struct ddi_umem_cookie *cookie)
* EINVAL - for invalid parameters
* EPERM, ENOMEM and other error codes returned by as_pagelock
* ENOMEM - is returned if the current request to lock memory exceeds
- * project.max-device-locked-memory resource control value.
+ * *.max-locked-memory resource control value.
* EFAULT - memory pertains to a regular file mapped shared and
* and DDI_UMEMLOCK_LONGTERM flag is set
* EAGAIN - could not start the ddi_umem_unlock list processing thread
@@ -8043,12 +7999,6 @@ umem_lockmemory(caddr_t addr, size_t len, int flags, ddi_umem_cookie_t *cookie,
*cookie = (ddi_umem_cookie_t)NULL;
return (ENOMEM);
}
- /*
- * umem_incr_devlockmem stashes the project ptr into the
- * cookie. This is needed during unlock since that can
- * happen in a non-USER context
- */
- ASSERT(p->lockmem_proj);
/* Lock the pages corresponding to addr, len in memory */
error = as_pagelock(as, &(p->pparray), addr, len, p->s_flags);
@@ -8169,7 +8119,7 @@ i_ddi_umem_unlock(struct ddi_umem_cookie *p)
/*
* Now that we have unlocked the memory decrement the
- * max-device-locked-memory rctl
+ * *.max-locked-memory rctl
*/
umem_decr_devlockmem(p);
@@ -8269,7 +8219,7 @@ i_ddi_umem_unlock_thread_start(void)
* EINVAL - for invalid parameters
* EPERM, ENOMEM and other error codes returned by as_pagelock
* ENOMEM - is returned if the current request to lock memory exceeds
- * project.max-device-locked-memory resource control value.
+ * *.max-locked-memory resource control value.
* EAGAIN - could not start the ddi_umem_unlock list processing thread
*/
int
@@ -8338,12 +8288,6 @@ ddi_umem_lock(caddr_t addr, size_t len, int flags, ddi_umem_cookie_t *cookie)
*cookie = (ddi_umem_cookie_t)NULL;
return (ENOMEM);
}
- /*
- * umem_incr_devlockmem stashes the project ptr into the
- * cookie. This is needed during unlock since that can
- * happen in a non-USER context
- */
- ASSERT(p->lockmem_proj);
/* Lock the pages corresponding to addr, len in memory */
error = as_pagelock(((proc_t *)p->procp)->p_as, &(p->pparray),
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index 9fd6b423bd..0fb2c2be55 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -316,6 +316,7 @@ const char *zone_status_table[] = {
* This isn't static so lint doesn't complain.
*/
rctl_hndl_t rc_zone_cpu_shares;
+rctl_hndl_t rc_zone_locked_mem;
rctl_hndl_t rc_zone_nlwps;
rctl_hndl_t rc_zone_shmmax;
rctl_hndl_t rc_zone_shmmni;
@@ -903,8 +904,8 @@ zone_lwps_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl,
/*ARGSUSED*/
static int
-zone_lwps_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv) {
-
+zone_lwps_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv)
+{
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
@@ -1004,6 +1005,51 @@ static rctl_ops_t zone_msgmni_ops = {
zone_msgmni_test
};
+/*ARGSUSED*/
+static rctl_qty_t
+zone_locked_mem_usage(rctl_t *rctl, struct proc *p)
+{
+ rctl_qty_t q;
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ mutex_enter(&p->p_zone->zone_rctl_lock);
+ q = p->p_zone->zone_locked_mem;
+ mutex_exit(&p->p_zone->zone_rctl_lock);
+ return (q);
+}
+
+/*ARGSUSED*/
+static int
+zone_locked_mem_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e,
+ rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags)
+{
+ rctl_qty_t q;
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(MUTEX_HELD(&p->p_zone->zone_rctl_lock));
+ q = p->p_zone->zone_locked_mem;
+ if (q + incr > rcntl->rcv_value)
+ return (1);
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+zone_locked_mem_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+ rctl_qty_t nv)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(e->rcep_t == RCENTITY_ZONE);
+ if (e->rcep_p.zone == NULL)
+ return (0);
+ e->rcep_p.zone->zone_locked_mem_ctl = nv;
+ return (0);
+}
+
+static rctl_ops_t zone_locked_mem_ops = {
+ rcop_no_action,
+ zone_locked_mem_usage,
+ zone_locked_mem_set,
+ zone_locked_mem_test
+};
/*
* Helper function to brand the zone with a unique ID.
@@ -1209,6 +1255,10 @@ zone_init(void)
rde = rctl_dict_lookup("zone.cpu-shares");
(void) rctl_val_list_insert(&rde->rcd_default_value, dval);
+ rc_zone_locked_mem = rctl_register("zone.max-locked-memory",
+ RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
+ RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
+ &zone_locked_mem_ops);
/*
* Initialize the ``global zone''.
*/
@@ -2458,6 +2508,14 @@ zsched(void *arg)
mutex_exit(&global_zone->zone_nlwps_lock);
/*
+ * Decrement locked memory counts on old zone and project.
+ */
+ mutex_enter(&global_zone->zone_rctl_lock);
+ global_zone->zone_locked_mem -= pp->p_locked_mem;
+ pj->kpj_data.kpd_locked_mem -= pp->p_locked_mem;
+ mutex_exit(&global_zone->zone_rctl_lock);
+
+ /*
* Create and join a new task in project '0' of this zone.
*
* We don't need to call holdlwps() since we know we're the only lwp in
@@ -2468,21 +2526,29 @@ zsched(void *arg)
tk = task_create(0, zone);
mutex_enter(&cpu_lock);
oldtk = task_join(tk, 0);
- mutex_exit(&curproc->p_lock);
- mutex_exit(&cpu_lock);
- task_rele(oldtk);
+
+ pj = pp->p_task->tk_proj;
+
+ mutex_enter(&zone->zone_rctl_lock);
+ zone->zone_locked_mem += pp->p_locked_mem;
+ pj->kpj_data.kpd_locked_mem += pp->p_locked_mem;
+ mutex_exit(&zone->zone_rctl_lock);
/*
* add lwp counts to zsched's zone, and increment project's task count
* due to the task created in the above tasksys_settaskid
*/
- pj = pp->p_task->tk_proj;
+
mutex_enter(&zone->zone_nlwps_lock);
pj->kpj_nlwps += pp->p_lwpcnt;
pj->kpj_ntasks += 1;
zone->zone_nlwps += pp->p_lwpcnt;
mutex_exit(&zone->zone_nlwps_lock);
+ mutex_exit(&curproc->p_lock);
+ mutex_exit(&cpu_lock);
+ task_rele(oldtk);
+
/*
* The process was created by a process in the global zone, hence the
* credentials are wrong. We might as well have kcred-ish credentials.
@@ -2953,6 +3019,7 @@ zone_create(const char *zone_name, const char *zone_root,
zone->zone_initname = NULL;
mutex_init(&zone->zone_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&zone->zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&zone->zone_rctl_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&zone->zone_cv, NULL, CV_DEFAULT, NULL);
list_create(&zone->zone_zsd, sizeof (struct zsd_entry),
offsetof(struct zsd_entry, zsd_linkage));
@@ -2990,6 +3057,8 @@ zone_create(const char *zone_name, const char *zone_root,
zone->zone_initname =
kmem_alloc(strlen(zone_default_initname) + 1, KM_SLEEP);
(void) strcpy(zone->zone_initname, zone_default_initname);
+ zone->zone_locked_mem = 0;
+ zone->zone_locked_mem_ctl = UINT64_MAX;
/*
* Zsched initializes the rctls.
@@ -4145,15 +4214,26 @@ zone_enter(zoneid_t zoneid)
zone->zone_nlwps += pp->p_lwpcnt;
/* add 1 task to zone's proj0 */
zone_proj0->kpj_ntasks += 1;
- mutex_exit(&pp->p_lock);
mutex_exit(&zone->zone_nlwps_lock);
+ mutex_enter(&zone->zone_rctl_lock);
+ zone->zone_locked_mem += pp->p_locked_mem;
+ zone_proj0->kpj_data.kpd_locked_mem += pp->p_locked_mem;
+ mutex_exit(&zone->zone_rctl_lock);
+
/* remove lwps from proc's old zone and old project */
mutex_enter(&pp->p_zone->zone_nlwps_lock);
pp->p_zone->zone_nlwps -= pp->p_lwpcnt;
pp->p_task->tk_proj->kpj_nlwps -= pp->p_lwpcnt;
mutex_exit(&pp->p_zone->zone_nlwps_lock);
+ mutex_enter(&pp->p_zone->zone_rctl_lock);
+ pp->p_zone->zone_locked_mem -= pp->p_locked_mem;
+ pp->p_task->tk_proj->kpj_data.kpd_locked_mem -= pp->p_locked_mem;
+ mutex_exit(&pp->p_zone->zone_rctl_lock);
+
+ mutex_exit(&pp->p_lock);
+
/*
* Joining the zone cannot fail from now on.
*
diff --git a/usr/src/uts/common/sys/ddi_implfuncs.h b/usr/src/uts/common/sys/ddi_implfuncs.h
index e4ef2f0e3e..ad70123d76 100644
--- a/usr/src/uts/common/sys/ddi_implfuncs.h
+++ b/usr/src/uts/common/sys/ddi_implfuncs.h
@@ -273,10 +273,8 @@ void e_devid_cache_free_devt_list(int, dev_t *);
/*
* Resource control functions to lock down device memory.
*/
-extern int i_ddi_incr_locked_memory(proc_t *, task_t *, kproject_t *,
- zone_t *, rctl_qty_t);
-extern void i_ddi_decr_locked_memory(proc_t *, task_t *, kproject_t *,
- zone_t *, rctl_qty_t);
+extern int i_ddi_incr_locked_memory(proc_t *, rctl_qty_t);
+extern void i_ddi_decr_locked_memory(proc_t *, rctl_qty_t);
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/sys/ddidevmap.h b/usr/src/uts/common/sys/ddidevmap.h
index 7798f1d7cd..734c04417d 100644
--- a/usr/src/uts/common/sys/ddidevmap.h
+++ b/usr/src/uts/common/sys/ddidevmap.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -93,7 +92,7 @@ struct ddi_umem_cookie {
*/
ulong_t cook_refcnt; /* cookie reference count */
struct ddi_umem_cookie *unl_forw; /* list ptr for unlock cookies */
- void *lockmem_proj; /* project ptr for resource mgmt */
+ void *reserved; /* unused */
};
typedef struct as *ddi_as_handle_t;
diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h
index 13a3605e66..4c4025e649 100644
--- a/usr/src/uts/common/sys/proc.h
+++ b/usr/src/uts/common/sys/proc.h
@@ -342,6 +342,8 @@ typedef struct proc {
/* additional lock to protect p_sessp (but not its contents) */
kmutex_t p_splock;
+ rctl_qty_t p_locked_mem; /* locked memory charged to proc */
+ /* protected by p_lock */
} proc_t;
#define PROC_T /* headers relying on proc_t are OK */
@@ -486,8 +488,8 @@ extern struct pid pid0; /* p0's pid */
* These flags are used to synchronize with the pool subsystem to allow
* re-binding of processes to new pools.
*/
-#define PBWAIT 0x0001 /* process should wait outside fork/exec/exit */
-#define PEXITED 0x0002 /* process exited and about to become zombie */
+#define PBWAIT 0x0001 /* process should wait outside fork/exec/exit */
+#define PEXITED 0x0002 /* process exited and about to become zombie */
/* Macro to convert proc pointer to a user block pointer */
#define PTOU(p) (&(p)->p_user)
diff --git a/usr/src/uts/common/sys/project.h b/usr/src/uts/common/sys/project.h
index 181c18b651..679c1eddc2 100644
--- a/usr/src/uts/common/sys/project.h
+++ b/usr/src/uts/common/sys/project.h
@@ -40,9 +40,11 @@ extern "C" {
typedef struct kproject_data { /* Datum protected by: */
rctl_qty_t kpd_shmmax; /* shm's ipcs_lock */
ipc_rqty_t kpd_ipc; /* shm|sem|msg's ipcs lock */
- rctl_qty_t kpd_devlockmem; /* umem_devlockmem_rctl_lock */
+ rctl_qty_t kpd_locked_mem; /* zone_rctl_lock */
+ rctl_qty_t kpd_locked_mem_ctl; /* kpj_rctls->rcs_lock */
rctl_qty_t kpd_contract; /* contract_lock */
rctl_qty_t kpd_crypto_mem; /* crypto_rctl_lock */
+
} kproject_data_t;
/*
@@ -84,7 +86,7 @@ projid_t curprojid(void);
extern kproject_t *proj0p;
extern rctl_hndl_t rc_project_nlwps;
extern rctl_hndl_t rc_project_ntasks;
-
+extern rctl_hndl_t rc_project_locked_mem;
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/rctl.h b/usr/src/uts/common/sys/rctl.h
index 02bcef6f36..eb56fff9e5 100644
--- a/usr/src/uts/common/sys/rctl.h
+++ b/usr/src/uts/common/sys/rctl.h
@@ -319,6 +319,12 @@ int rctl_rlimit_set(rctl_hndl_t, struct proc *, struct rlimit64 *,
rctl_alloc_gp_t *, int, int, const struct cred *);
int rctl_rlimit_get(rctl_hndl_t, struct proc *, struct rlimit64 *);
+/* specific rctl utility functions */
+int rctl_incr_locked_mem(struct proc *, struct kproject *, rctl_qty_t,
+ int);
+void rctl_decr_locked_mem(struct proc *, struct kproject *, rctl_qty_t,
+ int);
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/shm_impl.h b/usr/src/uts/common/sys/shm_impl.h
index f43fad4c87..4d8cdcede5 100644
--- a/usr/src/uts/common/sys/shm_impl.h
+++ b/usr/src/uts/common/sys/shm_impl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -59,6 +58,10 @@ typedef struct kshmid {
size_t shm_segsz; /* size of segment in bytes */
struct anon_map *shm_amp; /* segment anon_map pointer */
ushort_t shm_lkcnt; /* number of times it is being locked */
+ pgcnt_t shm_lkpages; /* number of pages locked by shmctl */
+ kmutex_t shm_mlock; /* held when locking physical pages */
+ /* Therefore, protects p_lckcnt for */
+ /* pages that back shm */
pid_t shm_lpid; /* pid of last shmop */
pid_t shm_cpid; /* pid of creator */
ulong_t shm_ismattch; /* number of ISM attaches */
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 636b8acc0f..daccd16bdf 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -326,6 +326,14 @@ typedef struct zone {
uint_t zone_rootpathlen; /* strlen(zone_rootpath) + 1 */
uint32_t zone_shares; /* FSS shares allocated to zone */
rctl_set_t *zone_rctls; /* zone-wide (zone.*) rctls */
+ kmutex_t zone_rctl_lock; /* protects zone_locked_mem and */
+ /* kpd_locked_mem for all */
+ /* projects in zone */
+ /* grab after p_lock, before rcs_lock */
+ rctl_qty_t zone_locked_mem; /* bytes of locked memory in zone */
+ rctl_qty_t zone_locked_mem_ctl; /* current locked memory */
+ /* limit. Protected by */
+ /* zone_rctls->rcs_lock */
list_t zone_zsd; /* list of Zone-Specific Data values */
kcondvar_t zone_cv; /* used to signal state changes */
struct proc *zone_zsched; /* Dummy kernel "zsched" process */
@@ -544,6 +552,8 @@ extern void mount_completed(void);
extern int zone_walk(int (*)(zone_t *, void *), void *);
+extern rctl_hndl_t rc_zone_locked_mem;
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/syscall/tasksys.c b/usr/src/uts/common/syscall/tasksys.c
index 10b7e95c76..705b543a37 100644
--- a/usr/src/uts/common/syscall/tasksys.c
+++ b/usr/src/uts/common/syscall/tasksys.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -41,6 +40,7 @@
#include <sys/cpuvar.h>
#include <sys/policy.h>
#include <sys/zone.h>
+#include <sys/rctl.h>
/*
* Limit projlist to 256k projects.
@@ -52,7 +52,6 @@ typedef struct projlist_walk {
size_t pw_bufsz;
} projlist_walk_t;
-
/*
* taskid_t tasksys_settaskid(projid_t projid, uint_t flags);
*
@@ -112,6 +111,7 @@ tasksys_settaskid(projid_t projid, uint_t flags)
zone = p->p_zone;
mutex_enter(&zone->zone_nlwps_lock);
+ mutex_enter(&zone->zone_rctl_lock);
if (kpj->kpj_nlwps + p->p_lwpcnt > kpj->kpj_nlwps_ctl)
if (rctl_test_entity(rc_project_nlwps, kpj->kpj_rctls, p, &e,
@@ -123,18 +123,28 @@ tasksys_settaskid(projid_t projid, uint_t flags)
1, 0) & RCT_DENY)
rctlfail = 1;
+ if (kpj->kpj_data.kpd_locked_mem + p->p_locked_mem
+ > kpj->kpj_data.kpd_locked_mem_ctl)
+ if (rctl_test_entity(rc_project_locked_mem, kpj->kpj_rctls, p,
+ &e, p->p_locked_mem, 0) &RCT_DENY)
+ rctlfail = 1;
+
if (rctlfail) {
+ mutex_exit(&zone->zone_rctl_lock);
mutex_exit(&zone->zone_nlwps_lock);
if (curthread != p->p_agenttp)
continuelwps(p);
mutex_exit(&p->p_lock);
return (set_errno(EAGAIN));
}
+ kpj->kpj_data.kpd_locked_mem += p->p_locked_mem;
kpj->kpj_nlwps += p->p_lwpcnt;
kpj->kpj_ntasks++;
+ oldpj->kpj_data.kpd_locked_mem -= p->p_locked_mem;
oldpj->kpj_nlwps -= p->p_lwpcnt;
+ mutex_exit(&zone->zone_rctl_lock);
mutex_exit(&zone->zone_nlwps_lock);
mutex_exit(&p->p_lock);
diff --git a/usr/src/uts/common/vm/anon.h b/usr/src/uts/common/vm/anon.h
index 294867ca01..90f6e1e661 100644
--- a/usr/src/uts/common/vm/anon.h
+++ b/usr/src/uts/common/vm/anon.h
@@ -241,6 +241,8 @@ struct anon_hdr {
#define ANON_ALLOC_FORCE 0x2 /* force single level anon array */
#define ANON_GROWDOWN 0x4 /* anon array should grow downward */
+struct kshmid;
+
/*
* The anon_map structure is used by various clients of the anon layer to
* manage anonymous memory. When anonymous memory is shared,
@@ -284,6 +286,7 @@ struct anon_map {
ulong_t refcnt; /* reference count on this structure */
ushort_t a_szc; /* max szc among shared processes */
void *locality; /* lgroup locality info */
+ struct kshmid *a_sp; /* kshmid if amp backs sysV, or NULL */
};
#ifdef _KERNEL
diff --git a/usr/src/uts/common/vm/as.h b/usr/src/uts/common/vm/as.h
index c7afefc23c..f1c7ea3cfa 100644
--- a/usr/src/uts/common/vm/as.h
+++ b/usr/src/uts/common/vm/as.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -47,6 +46,7 @@
#include <vm/faultcode.h>
#include <vm/hat.h>
#include <sys/avl.h>
+#include <sys/proc.h>
#ifdef __cplusplus
extern "C" {
@@ -124,6 +124,7 @@ struct as {
size_t a_sizedir; /* size of object directory */
struct as_callback *a_callbacks; /* callback list */
void *a_xhat; /* list of xhat providers */
+ proc_t *a_proc; /* back pointer to proc */
};
#define AS_PAGLCK 0x80
@@ -240,7 +241,7 @@ void as_avlinit(struct as *);
struct seg *as_segat(struct as *as, caddr_t addr);
void as_rangelock(struct as *as);
void as_rangeunlock(struct as *as);
-struct as *as_alloc(void);
+struct as *as_alloc();
void as_free(struct as *as);
int as_dup(struct as *as, struct as **outas);
struct seg *as_findseg(struct as *as, caddr_t addr, int tail);
diff --git a/usr/src/uts/common/vm/seg_dev.h b/usr/src/uts/common/vm/seg_dev.h
index c498c06ecf..451d61963d 100644
--- a/usr/src/uts/common/vm/seg_dev.h
+++ b/usr/src/uts/common/vm/seg_dev.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -42,12 +41,12 @@
#pragma ident "%Z%%M% %I% %E% SMI"
-#include <sys/project.h>
-
#ifdef __cplusplus
extern "C" {
#endif
+struct proc;
+
/*
* Structure whose pointer is passed to the segdev_create routine
*/
@@ -110,7 +109,7 @@ struct devmap_pmem_cookie {
pgcnt_t dp_npages; /* number of allocated mem pages */
page_t **dp_pparray; /* pages allocated for this cookie */
vnode_t *dp_vnp; /* vnode associated with this cookie */
- kproject_t *dp_projp; /* project ptr for resource ctl */
+ proc_t *dp_proc; /* proc ptr for resource control */
};
#ifdef _KERNEL
diff --git a/usr/src/uts/common/vm/seg_spt.c b/usr/src/uts/common/vm/seg_spt.c
index 774a9c3b9f..b614344bd0 100644
--- a/usr/src/uts/common/vm/seg_spt.c
+++ b/usr/src/uts/common/vm/seg_spt.c
@@ -45,10 +45,13 @@
#include <sys/debug.h>
#include <sys/vtrace.h>
#include <sys/shm.h>
+#include <sys/shm_impl.h>
#include <sys/lgrp.h>
#include <sys/vmsystm.h>
-
+#include <sys/policy.h>
+#include <sys/project.h>
#include <sys/tnf_probe.h>
+#include <sys/zone.h>
#define SEGSPTADDR (caddr_t)0x0
@@ -181,7 +184,7 @@ static int spt_anon_getpages(struct seg *seg, caddr_t addr, size_t len,
/*ARGSUSED*/
int
sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp,
- uint_t prot, uint_t flags, uint_t share_szc)
+ uint_t prot, uint_t flags, uint_t share_szc)
{
int err;
struct as *newas;
@@ -189,7 +192,7 @@ sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp,
#ifdef DEBUG
TNF_PROBE_1(sptcreate, "spt", /* CSTYLED */,
- tnf_ulong, size, size );
+ tnf_ulong, size, size );
#endif
if (segspt_minfree == 0) /* leave min 5% of availrmem for */
segspt_minfree = availrmem/20; /* for the system */
@@ -201,11 +204,11 @@ sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp,
* get a new as for this shared memory segment
*/
newas = as_alloc();
+ newas->a_proc = NULL;
sptcargs.amp = amp;
sptcargs.prot = prot;
sptcargs.flags = flags;
sptcargs.szc = share_szc;
-
/*
* create a shared page table (spt) segment
*/
@@ -245,10 +248,10 @@ segspt_free(struct seg *seg)
if (sptd->spt_realsize)
segspt_free_pages(seg, seg->s_base, sptd->spt_realsize);
- if (sptd->spt_ppa_lckcnt)
- kmem_free(sptd->spt_ppa_lckcnt,
- sizeof (*sptd->spt_ppa_lckcnt)
- * btopr(sptd->spt_amp->size));
+ if (sptd->spt_ppa_lckcnt)
+ kmem_free(sptd->spt_ppa_lckcnt,
+ sizeof (*sptd->spt_ppa_lckcnt)
+ * btopr(sptd->spt_amp->size));
kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp));
mutex_destroy(&sptd->spt_lock);
kmem_free(sptd, sizeof (*sptd));
@@ -370,6 +373,7 @@ segspt_create(struct seg *seg, caddr_t argsp)
struct spt_data *sptd;
struct segspt_crargs *sptcargs = (struct segspt_crargs *)argsp;
struct anon_map *amp = sptcargs->amp;
+ struct kshmid *sp = amp->a_sp;
struct cred *cred = CRED();
ulong_t i, j, anon_index = 0;
pgcnt_t npages = btopr(amp->size);
@@ -381,16 +385,20 @@ segspt_create(struct seg *seg, caddr_t argsp)
caddr_t a;
pgcnt_t pidx;
size_t sz;
+ proc_t *procp = curproc;
+ rctl_qty_t lockedbytes = 0;
+ kproject_t *proj;
/*
* We are holding the a_lock on the underlying dummy as,
* so we can make calls to the HAT layer.
*/
ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
+ ASSERT(sp != NULL);
#ifdef DEBUG
TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */,
- tnf_opaque, addr, addr,
+ tnf_opaque, addr, addr,
tnf_ulong, len, seg->s_size);
#endif
if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
@@ -484,25 +492,49 @@ segspt_create(struct seg *seg, caddr_t argsp)
seg, addr, S_CREATE, cred)) != 0)
goto out4;
+ mutex_enter(&sp->shm_mlock);
+
+ /* May be partially locked, so, count bytes to charge for locking */
+ for (i = 0; i < npages; i++)
+ if (ppa[i]->p_lckcnt == 0)
+ lockedbytes += PAGESIZE;
+
+ proj = sp->shm_perm.ipc_proj;
+
+ if (lockedbytes > 0) {
+ mutex_enter(&procp->p_lock);
+ if (rctl_incr_locked_mem(procp, proj, lockedbytes, 0)) {
+ mutex_exit(&procp->p_lock);
+ mutex_exit(&sp->shm_mlock);
+ for (i = 0; i < npages; i++)
+ page_unlock(ppa[i]);
+ err = ENOMEM;
+ goto out4;
+ }
+ mutex_exit(&procp->p_lock);
+ }
+
/*
* addr is initial address corresponding to the first page on ppa list
*/
for (i = 0; i < npages; i++) {
/* attempt to lock all pages */
- if (!page_pp_lock(ppa[i], 0, 1)) {
+ if (page_pp_lock(ppa[i], 0, 1) == 0) {
/*
* if unable to lock any page, unlock all
* of them and return error
*/
for (j = 0; j < i; j++)
page_pp_unlock(ppa[j], 0, 1);
- for (i = 0; i < npages; i++) {
+ for (i = 0; i < npages; i++)
page_unlock(ppa[i]);
- }
+ rctl_decr_locked_mem(NULL, proj, lockedbytes, 0);
+ mutex_exit(&sp->shm_mlock);
err = ENOMEM;
goto out4;
}
}
+ mutex_exit(&sp->shm_mlock);
/*
* Some platforms assume that ISM mappings are HAT_LOAD_LOCK
@@ -582,6 +614,9 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len)
int root = 0;
pgcnt_t pgs, curnpgs = 0;
page_t *rootpp;
+ rctl_qty_t unlocked_bytes = 0;
+ kproject_t *proj;
+ kshmid_t *sp;
ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
@@ -601,7 +636,13 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len)
if (sptd->spt_flags & SHM_PAGEABLE)
npages = btop(amp->size);
- ASSERT(amp);
+ ASSERT(amp != NULL);
+
+ if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
+ sp = amp->a_sp;
+ proj = sp->shm_perm.ipc_proj;
+ mutex_enter(&sp->shm_mlock);
+ }
for (anon_idx = 0; anon_idx < npages; anon_idx++) {
if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) {
@@ -647,11 +688,13 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len)
"page not in the system");
/*NOTREACHED*/
}
+ ASSERT(pp->p_lckcnt > 0);
page_pp_unlock(pp, 0, 1);
+ if (pp->p_lckcnt == 0)
+ unlocked_bytes += PAGESIZE;
} else {
if ((pp = page_lookup(vp, off, SE_EXCL)) == NULL)
continue;
- page_pp_unlock(pp, 0, 0);
}
/*
* It's logical to invalidate the pages here as in most cases
@@ -697,7 +740,11 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len)
VN_DISPOSE(pp, B_INVAL, 0, kcred);
}
}
-
+ if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
+ if (unlocked_bytes > 0)
+ rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0);
+ mutex_exit(&sp->shm_mlock);
+ }
if (root != 0 || curnpgs != 0) {
panic("segspt_free_pages: bad large page");
/*NOTREACHED*/
@@ -1392,7 +1439,6 @@ segspt_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist,
ASSERT(sptd->spt_pcachecnt != 0);
ASSERT(sptd->spt_ppa == pplist);
ASSERT(npages == btopr(sptd->spt_amp->size));
-
/*
* Acquire the lock on the dummy seg and destroy the
* ppa array IF this is the last pcachecnt.
@@ -1409,7 +1455,7 @@ segspt_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist,
hat_setref(pplist[i]);
}
if ((sptd->spt_flags & SHM_PAGEABLE) &&
- (sptd->spt_ppa_lckcnt[i] == 0))
+ (sptd->spt_ppa_lckcnt[i] == 0))
free_availrmem++;
page_unlock(pplist[i]);
}
@@ -2363,15 +2409,35 @@ lpgs_err:
return (err);
}
+/*
+ * count the number of bytes in a set of spt pages that are currently not
+ * locked
+ */
+static rctl_qty_t
+spt_unlockedbytes(pgcnt_t npages, page_t **ppa)
+{
+ ulong_t i;
+ rctl_qty_t unlocked = 0;
+
+ for (i = 0; i < npages; i++) {
+ if (ppa[i]->p_lckcnt == 0)
+ unlocked += PAGESIZE;
+ }
+ return (unlocked);
+}
+
int
spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages,
- page_t **ppa, ulong_t *lockmap, size_t pos)
+ page_t **ppa, ulong_t *lockmap, size_t pos,
+ rctl_qty_t *locked)
{
struct shm_data *shmd = seg->s_data;
struct spt_data *sptd = shmd->shm_sptseg->s_data;
ulong_t i;
int kernel;
+ /* return the number of bytes actually locked */
+ *locked = 0;
for (i = 0; i < npages; anon_index++, pos++, i++) {
if (!(shmd->shm_vpage[anon_index] & DISM_PG_LOCKED)) {
if (sptd->spt_ppa_lckcnt[anon_index] <
@@ -2386,19 +2452,19 @@ spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages,
kernel = (sptd->spt_ppa &&
sptd->spt_ppa[anon_index]) ? 1 : 0;
if (!page_pp_lock(ppa[i], 0, kernel)) {
- /* unlock rest of the pages */
- for (; i < npages; i++)
- page_unlock(ppa[i]);
sptd->spt_ppa_lckcnt[anon_index]--;
return (EAGAIN);
}
+ /* if this is a newly locked page, count it */
+ if (ppa[i]->p_lckcnt == 1) {
+ *locked += PAGESIZE;
+ }
shmd->shm_lckpgs++;
shmd->shm_vpage[anon_index] |= DISM_PG_LOCKED;
if (lockmap != NULL)
BT_SET(lockmap, pos);
}
}
- page_unlock(ppa[i]);
}
return (0);
}
@@ -2411,6 +2477,7 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
struct shm_data *shmd = seg->s_data;
struct seg *sptseg = shmd->shm_sptseg;
struct spt_data *sptd = sptseg->s_data;
+ struct kshmid *sp = sptd->spt_amp->a_sp;
pgcnt_t npages, a_npages;
page_t **ppa;
pgcnt_t an_idx, a_an_idx, ppa_idx;
@@ -2419,8 +2486,13 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
size_t share_sz;
ulong_t i;
int sts = 0;
+ rctl_qty_t unlocked = 0;
+ rctl_qty_t locked = 0;
+ struct proc *p = curproc;
+ kproject_t *proj;
ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
+ ASSERT(sp != NULL);
if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
return (0);
@@ -2434,7 +2506,16 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
return (ENOMEM);
}
+ /*
+ * A shm's project never changes, so no lock needed.
+ * The shm has a hold on the project, so it will not go away.
+ * Since we have a mapping to shm within this zone, we know
+ * that the zone will not go away.
+ */
+ proj = sp->shm_perm.ipc_proj;
+
if (op == MC_LOCK) {
+
/*
* Need to align addr and size request if they are not
* aligned so we can always allocate large page(s) however
@@ -2469,18 +2550,36 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
return (sts);
}
- sts = spt_lockpages(seg, an_idx, npages,
- &ppa[ppa_idx], lockmap, pos);
+ mutex_enter(&sp->shm_mlock);
+ /* enforce locked memory rctl */
+ unlocked = spt_unlockedbytes(npages, &ppa[ppa_idx]);
+
+ mutex_enter(&p->p_lock);
+ if (rctl_incr_locked_mem(p, proj, unlocked, 0)) {
+ mutex_exit(&p->p_lock);
+ sts = EAGAIN;
+ } else {
+ mutex_exit(&p->p_lock);
+ sts = spt_lockpages(seg, an_idx, npages,
+ &ppa[ppa_idx], lockmap, pos, &locked);
+
+ /*
+ * correct locked count if not all pages could be
+ * locked
+ */
+ if ((unlocked - locked) > 0) {
+ rctl_decr_locked_mem(NULL, proj,
+ (unlocked - locked), 0);
+ }
+ }
/*
- * unlock remaining pages for requests which are not
- * aligned or not in 4 M chunks
+ * unlock pages
*/
- for (i = 0; i < ppa_idx; i++)
- page_unlock(ppa[i]);
- for (i = ppa_idx + npages; i < a_npages; i++)
+ for (i = 0; i < a_npages; i++)
page_unlock(ppa[i]);
if (sptd->spt_ppa != NULL)
sptd->spt_flags |= DISM_PPA_CHANGED;
+ mutex_exit(&sp->shm_mlock);
mutex_exit(&sptd->spt_lock);
kmem_free(ppa, ((sizeof (page_t *)) * a_npages));
@@ -2493,6 +2592,7 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
struct page *pp;
int kernel;
anon_sync_obj_t cookie;
+ rctl_qty_t unlocked = 0;
amp = sptd->spt_amp;
mutex_enter(&sptd->spt_lock);
@@ -2506,13 +2606,13 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
if (sptd->spt_ppa != NULL)
sptd->spt_flags |= DISM_PPA_CHANGED;
+ mutex_enter(&sp->shm_mlock);
ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
for (i = 0; i < npages; i++, an_idx++) {
if (shmd->shm_vpage[an_idx] & DISM_PG_LOCKED) {
anon_array_enter(amp, an_idx, &cookie);
ap = anon_get_ptr(amp->ahp, an_idx);
ASSERT(ap);
- ASSERT(sptd->spt_ppa_lckcnt[an_idx] > 0);
swap_xlate(ap, &vp, &off);
anon_array_exit(&cookie);
@@ -2527,7 +2627,10 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
*/
kernel = (sptd->spt_ppa &&
sptd->spt_ppa[an_idx]) ? 1 : 0;
+ ASSERT(pp->p_lckcnt > 0);
page_pp_unlock(pp, 0, kernel);
+ if (pp->p_lckcnt == 0)
+ unlocked += PAGESIZE;
page_unlock(pp);
shmd->shm_vpage[an_idx] &= ~DISM_PG_LOCKED;
sptd->spt_ppa_lckcnt[an_idx]--;
@@ -2538,6 +2641,9 @@ segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
if (sptd->spt_ppa != NULL)
sptd->spt_flags |= DISM_PPA_CHANGED;
mutex_exit(&sptd->spt_lock);
+
+ rctl_decr_locked_mem(NULL, proj, unlocked, 0);
+ mutex_exit(&sp->shm_mlock);
}
return (sts);
}
diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c
index 96fb02827f..83ef08e9f5 100644
--- a/usr/src/uts/common/vm/seg_vn.c
+++ b/usr/src/uts/common/vm/seg_vn.c
@@ -70,7 +70,11 @@
#include <vm/anon.h>
#include <vm/page.h>
#include <vm/vpage.h>
-
+#include <sys/proc.h>
+#include <sys/task.h>
+#include <sys/project.h>
+#include <sys/zone.h>
+#include <sys/shm_impl.h>
/*
* Private seg op routines.
*/
@@ -210,7 +214,7 @@ static struct segvnvmstats_str {
#define SDR_RANGE 1 /* demote entire range */
#define SDR_END 2 /* demote non aligned ends only */
-#define CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr) { \
+#define CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr) { \
if ((len) != 0) { \
lpgaddr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz); \
ASSERT(lpgaddr >= (seg)->s_base); \
@@ -2393,13 +2397,29 @@ segvn_faultpage(
* allocating vpage here if it's absent requires
* upgrading the segvn reader lock, the cost of
* which does not seem worthwhile.
+ *
+ * Usually testing and setting VPP_ISPPLOCK and
+ * VPP_SETPPLOCK requires holding the segvn lock as
+ * writer, but in this case all readers are
+ * serializing on the anon array lock.
*/
if (AS_ISPGLCK(seg->s_as) && vpage != NULL &&
- (svd->flags & MAP_NORESERVE)) {
- claim = VPP_PROT(vpage) & PROT_WRITE;
+ (svd->flags & MAP_NORESERVE) &&
+ !VPP_ISPPLOCK(vpage)) {
+ proc_t *p = seg->s_as->a_proc;
ASSERT(svd->type == MAP_PRIVATE);
- if (page_pp_lock(pp, claim, 0))
- VPP_SETPPLOCK(vpage);
+ mutex_enter(&p->p_lock);
+ if (rctl_incr_locked_mem(p, NULL, PAGESIZE,
+ 1) == 0) {
+ claim = VPP_PROT(vpage) & PROT_WRITE;
+ if (page_pp_lock(pp, claim, 0)) {
+ VPP_SETPPLOCK(vpage);
+ } else {
+ rctl_decr_locked_mem(p, NULL,
+ PAGESIZE, 1);
+ }
+ }
+ mutex_exit(&p->p_lock);
}
hat_memload(hat, addr, pp, prot, hat_flag);
@@ -5826,7 +5846,7 @@ segvn_claim_pages(
page_t *pp;
pgcnt_t pg_idx, i;
int err = 0;
- anoff_t aoff;
+ anoff_t aoff;
int anon = (amp != NULL) ? 1 : 0;
ASSERT(svd->type == MAP_PRIVATE);
@@ -6931,6 +6951,13 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
struct anon *ap;
struct vattr va;
anon_sync_obj_t cookie;
+ struct kshmid *sp = NULL;
+ struct proc *p = curproc;
+ kproject_t *proj = NULL;
+ int chargeproc = 1;
+ size_t locked_bytes = 0;
+ size_t unlocked_bytes = 0;
+ int err = 0;
/*
* Hold write lock on address space because may split or concatenate
@@ -6938,6 +6965,18 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
*/
ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
+ /*
+ * If this is a shm, use shm's project and zone, else use
+ * project and zone of calling process
+ */
+
+ /* Determine if this segment backs a sysV shm */
+ if (svd->amp != NULL && svd->amp->a_sp != NULL) {
+ sp = svd->amp->a_sp;
+ proj = sp->shm_perm.ipc_proj;
+ chargeproc = 0;
+ }
+
SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
if (attr) {
pageprot = attr & ~(SHARED|PRIVATE);
@@ -6990,6 +7029,61 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
offset = svd->offset + (uintptr_t)(addr - seg->s_base);
evp = &svd->vpage[seg_page(seg, addr + len)];
+ if (sp != NULL)
+ mutex_enter(&sp->shm_mlock);
+
+ /* determine number of unlocked bytes in range for lock operation */
+ if (op == MC_LOCK) {
+
+ if (sp == NULL) {
+ for (vpp = &svd->vpage[seg_page(seg, addr)]; vpp < evp;
+ vpp++) {
+ if (!VPP_ISPPLOCK(vpp))
+ unlocked_bytes += PAGESIZE;
+ }
+ } else {
+ ulong_t i_idx, i_edx;
+ anon_sync_obj_t i_cookie;
+ struct anon *i_ap;
+ struct vnode *i_vp;
+ u_offset_t i_off;
+
+ /* Only count sysV pages once for locked memory */
+ i_edx = svd->anon_index + seg_page(seg, addr + len);
+ ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
+ for (i_idx = anon_index; i_idx < i_edx; i_idx++) {
+ anon_array_enter(amp, i_idx, &i_cookie);
+ i_ap = anon_get_ptr(amp->ahp, i_idx);
+ if (i_ap == NULL) {
+ unlocked_bytes += PAGESIZE;
+ anon_array_exit(&i_cookie);
+ continue;
+ }
+ swap_xlate(i_ap, &i_vp, &i_off);
+ anon_array_exit(&i_cookie);
+ pp = page_lookup(i_vp, i_off, SE_SHARED);
+ if (pp == NULL) {
+ unlocked_bytes += PAGESIZE;
+ continue;
+ } else if (pp->p_lckcnt == 0)
+ unlocked_bytes += PAGESIZE;
+ page_unlock(pp);
+ }
+ ANON_LOCK_EXIT(&amp->a_rwlock);
+ }
+
+ mutex_enter(&p->p_lock);
+ err = rctl_incr_locked_mem(p, proj, unlocked_bytes,
+ chargeproc);
+ mutex_exit(&p->p_lock);
+
+ if (err) {
+ if (sp != NULL)
+ mutex_exit(&sp->shm_mlock);
+ SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+ return (err);
+ }
+ }
/*
* Loop over all pages in the range. Process if we're locking and
* page has not already been locked in this mapping; or if we're
@@ -7022,9 +7116,8 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
if (pp == NULL) {
anon_array_exit(&cookie);
ANON_LOCK_EXIT(&amp->a_rwlock);
- SEGVN_LOCK_EXIT(seg->s_as,
- &svd->lock);
- return (ENOMEM);
+ err = ENOMEM;
+ goto out;
}
ASSERT(anon_get_ptr(amp->ahp,
anon_index) == NULL);
@@ -7096,8 +7189,8 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
* 4125102 for details of the problem.
*/
if (error == EDEADLK) {
- SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
- return (error);
+ err = error;
+ goto out;
}
/*
* Quit if we fail to fault in the page. Treat
@@ -7108,21 +7201,19 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
va.va_mask = AT_SIZE;
if (VOP_GETATTR(svd->vp, &va, 0,
svd->cred) != 0) {
- SEGVN_LOCK_EXIT(seg->s_as,
- &svd->lock);
- return (EIO);
+ err = EIO;
+ goto out;
}
if (btopr(va.va_size) >=
btopr(off + 1)) {
- SEGVN_LOCK_EXIT(seg->s_as,
- &svd->lock);
- return (EIO);
+ err = EIO;
+ goto out;
}
- SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
- return (0);
+ goto out;
+
} else if (error) {
- SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
- return (EIO);
+ err = EIO;
+ goto out;
}
pp = pl[0];
ASSERT(pp != NULL);
@@ -7154,39 +7245,75 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
if (op == MC_LOCK) {
int ret = 1; /* Assume success */
- /*
- * Make sure another thread didn't lock
- * the page after we released the segment
- * lock.
- */
- if ((attr == 0 || VPP_PROT(vpp) == pageprot) &&
- !VPP_ISPPLOCK(vpp)) {
- ret = page_pp_lock(pp, claim, 0);
- if (ret != 0) {
- VPP_SETPPLOCK(vpp);
- if (lockmap != (ulong_t *)NULL)
- BT_SET(lockmap, pos);
- }
- }
- page_unlock(pp);
+ ASSERT(!VPP_ISPPLOCK(vpp));
+
+ ret = page_pp_lock(pp, claim, 0);
if (ret == 0) {
- SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
- return (EAGAIN);
+ /* locking page failed */
+ page_unlock(pp);
+ err = EAGAIN;
+ goto out;
}
+ VPP_SETPPLOCK(vpp);
+ if (sp != NULL) {
+ if (pp->p_lckcnt == 1)
+ locked_bytes += PAGESIZE;
+ } else
+ locked_bytes += PAGESIZE;
+
+ if (lockmap != (ulong_t *)NULL)
+ BT_SET(lockmap, pos);
+
+ page_unlock(pp);
} else {
+ ASSERT(VPP_ISPPLOCK(vpp));
if (pp != NULL) {
- if ((attr == 0 ||
- VPP_PROT(vpp) == pageprot) &&
- VPP_ISPPLOCK(vpp))
- page_pp_unlock(pp, claim, 0);
+ /* sysV pages should be locked */
+ ASSERT(sp == NULL || pp->p_lckcnt > 0);
+ page_pp_unlock(pp, claim, 0);
+ if (sp != NULL) {
+ if (pp->p_lckcnt == 0)
+ unlocked_bytes
+ += PAGESIZE;
+ } else
+ unlocked_bytes += PAGESIZE;
page_unlock(pp);
+ } else {
+ ASSERT(sp != NULL);
+ unlocked_bytes += PAGESIZE;
}
VPP_CLRPPLOCK(vpp);
}
}
}
+out:
+ if (op == MC_LOCK) {
+ /* Credit back bytes that did not get locked */
+ if ((unlocked_bytes - locked_bytes) > 0) {
+ if (proj == NULL)
+ mutex_enter(&p->p_lock);
+ rctl_decr_locked_mem(p, proj,
+ (unlocked_bytes - locked_bytes), chargeproc);
+ if (proj == NULL)
+ mutex_exit(&p->p_lock);
+ }
+
+ } else {
+ /* Account bytes that were unlocked */
+ if (unlocked_bytes > 0) {
+ if (proj == NULL)
+ mutex_enter(&p->p_lock);
+ rctl_decr_locked_mem(p, proj, unlocked_bytes,
+ chargeproc);
+ if (proj == NULL)
+ mutex_exit(&p->p_lock);
+ }
+ }
+ if (sp != NULL)
+ mutex_exit(&sp->shm_mlock);
SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
- return (0);
+
+ return (err);
}
/*
diff --git a/usr/src/uts/common/vm/vm_anon.c b/usr/src/uts/common/vm/vm_anon.c
index c05f20478e..0cad34257c 100644
--- a/usr/src/uts/common/vm/vm_anon.c
+++ b/usr/src/uts/common/vm/vm_anon.c
@@ -3139,6 +3139,7 @@ anonmap_alloc(size_t size, size_t swresv)
amp->swresv = swresv;
amp->locality = 0;
amp->a_szc = 0;
+ amp->a_sp = NULL;
return (amp);
}
diff --git a/usr/src/uts/i86pc/os/pmem.c b/usr/src/uts/i86pc/os/pmem.c
index 56986e0153..f7269bfb82 100644
--- a/usr/src/uts/i86pc/os/pmem.c
+++ b/usr/src/uts/i86pc/os/pmem.c
@@ -38,7 +38,6 @@
#include <sys/ddidevmap.h>
#include <sys/vnode.h>
#include <sys/sysmacros.h>
-#include <sys/project.h>
#include <vm/seg_dev.h>
#include <sys/pmem.h>
#include <vm/hat_i86.h>
@@ -126,7 +125,7 @@ static int lpp_create(page_t **, pgcnt_t, pgcnt_t *, pmem_lpg_t **,
static void tlist_in(page_t *, pgcnt_t, vnode_t *, u_offset_t *);
static void tlist_out(page_t *, pgcnt_t);
static int pmem_cookie_alloc(struct devmap_pmem_cookie **, pgcnt_t, uint_t);
-static int pmem_lock(pgcnt_t, kproject_t **);
+static int pmem_lock(pgcnt_t, proc_t *p);
/*
* Called by driver devmap routine to pass physical memory mapping info to
@@ -314,13 +313,12 @@ devmap_pmem_alloc(size_t size, uint_t flags, devmap_pmem_cookie_t *cookiep)
pcp->dp_npages = npages;
/*
- * See if the requested memory can be locked. Currently we do resource
- * controls on the project levlel only.
+ * See if the requested memory can be locked.
*/
- if (pmem_lock(npages, &(pcp->dp_projp)) == DDI_FAILURE)
+ pcp->dp_proc = curproc;
+ if (pmem_lock(npages, curproc) == DDI_FAILURE)
goto alloc_fail;
locked = 1;
-
/*
* First, grab as many as possible from pmem_mpool. If pages in
* pmem_mpool are enough for this request, we are done.
@@ -402,8 +400,7 @@ alloc_fail:
mutex_exit(&pmem_mutex);
}
if (locked == 1)
- i_ddi_decr_locked_memory(NULL, NULL, pcp->dp_projp, NULL,
- ptob(pcp->dp_npages));
+ i_ddi_decr_locked_memory(pcp->dp_proc, ptob(pcp->dp_npages));
/* Freeing pmem_cookie. */
kmem_free(pcp->dp_vnp, sizeof (vnode_t));
kmem_free(pcp->dp_pparray, npages * sizeof (page_t *));
@@ -492,8 +489,8 @@ devmap_pmem_free(devmap_pmem_cookie_t cookie)
pmem_lpg_concat(&pmem_occ_lpgs, &pf_lpgs);
mutex_exit(&pmem_mutex);
- i_ddi_decr_locked_memory(NULL, NULL, (kproject_t *)pcp->dp_projp, NULL,
- ptob(pcp->dp_npages));
+ if (curproc == pcp->dp_proc)
+ i_ddi_decr_locked_memory(curproc, ptob(pcp->dp_npages));
kmem_free(pcp->dp_vnp, sizeof (vnode_t));
kmem_free(pcp->dp_pparray, pcp->dp_npages * sizeof (page_t *));
kmem_free(pcp, sizeof (struct devmap_pmem_cookie));
@@ -552,19 +549,13 @@ pmem_cookie_alloc(struct devmap_pmem_cookie **pcpp, pgcnt_t n, uint_t kflags)
return (DDI_SUCCESS);
}
-/* Try to lock down n pages resource for current project. */
+/* Try to lock down n pages resource */
static int
-pmem_lock(pgcnt_t n, kproject_t **prjpp)
+pmem_lock(pgcnt_t n, proc_t *p)
{
- mutex_enter(&curproc->p_lock);
- if (i_ddi_incr_locked_memory(curproc, NULL, NULL, NULL,
- ptob(n)) != 0) {
- mutex_exit(&curproc->p_lock);
+ if (i_ddi_incr_locked_memory(p, ptob(n)) != 0) {
return (DDI_FAILURE);
}
- /* Store this project in cookie for later lock/unlock. */
- *prjpp = curproc->p_task->tk_proj;
- mutex_exit(&curproc->p_lock);
return (DDI_SUCCESS);
}