diff options
Diffstat (limited to 'usr/src/uts/common/os/pid.c')
-rw-r--r-- | usr/src/uts/common/os/pid.c | 725 |
1 files changed, 725 insertions, 0 deletions
diff --git a/usr/src/uts/common/os/pid.c b/usr/src/uts/common/os/pid.c new file mode 100644 index 0000000000..00ad0a49e7 --- /dev/null +++ b/usr/src/uts/common/os/pid.c @@ -0,0 +1,725 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/proc.h> +#include <sys/kmem.h> +#include <sys/tuneable.h> +#include <sys/var.h> +#include <sys/cred.h> +#include <sys/systm.h> +#include <sys/prsystm.h> +#include <sys/vnode.h> +#include <sys/session.h> +#include <sys/cpuvar.h> +#include <sys/cmn_err.h> +#include <sys/bitmap.h> +#include <sys/debug.h> +#include <c2/audit.h> +#include <sys/zone.h> + +/* directory entries for /proc */ +union procent { + proc_t *pe_proc; + union procent *pe_next; +}; + +struct pid pid0 = { + 0, /* pid_prinactive */ + 1, /* pid_pgorphaned */ + 0, /* pid_padding */ + 0, /* pid_prslot */ + 0, /* pid_id */ + NULL, /* pid_pglink */ + NULL, /* pid_link */ + 3 /* pid_ref */ +}; + +static int pid_hashlen = 4; /* desired average hash chain length */ +static int pid_hashsz; /* number of buckets in the hash table */ + +#define HASHPID(pid) (pidhash[((pid)&(pid_hashsz-1))]) + +extern uint_t nproc; +extern struct kmem_cache *process_cache; +static void upcount_init(void); + +kmutex_t pidlock; /* global process lock */ +kmutex_t pr_pidlock; /* /proc global process lock */ +kcondvar_t *pr_pid_cv; /* for /proc, one per process slot */ +struct plock *proc_lock; /* persistent array of p_lock's */ + +/* + * See the comment above pid_getlockslot() for a detailed explanation of this + * constant. Note that a PLOCK_SHIFT of 3 implies 64-byte coherence + * granularity; if the coherence granularity is ever changed, this constant + * should be modified to reflect the change to minimize proc_lock false + * sharing (correctness, however, is guaranteed regardless of the coherence + * granularity). + */ +#define PLOCK_SHIFT 3 + +static kmutex_t pidlinklock; +static struct pid **pidhash; +static pid_t minpid; +static pid_t mpid; +static union procent *procdir; +static union procent *procentfree; + +static struct pid * +pid_lookup(pid_t pid) +{ + struct pid *pidp; + + ASSERT(MUTEX_HELD(&pidlinklock)); + + for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) { + if (pidp->pid_id == pid) { + ASSERT(pidp->pid_ref > 0); + break; + } + } + return (pidp); +} + +void +pid_setmin(void) +{ + if (jump_pid && jump_pid > mpid) + minpid = mpid = jump_pid; + else + minpid = mpid + 1; +} + +/* + * When prslots are simply used as an index to determine a process' p_lock, + * adjacent prslots share adjacent p_locks. On machines where the size + * of a mutex is smaller than that of a cache line (which, as of this writing, + * is true for all machines on which Solaris runs), this can potentially + * induce false sharing. The standard solution for false sharing is to pad + * out one's data structures (in this case, struct plock). However, + * given the size and (generally) sparse use of the proc_lock array, this + * is suboptimal. We therefore stride through the proc_lock array with + * a stride of PLOCK_SHIFT. PLOCK_SHIFT should be defined as: + * + * log_2 (coherence_granularity / sizeof (kmutex_t)) + * + * Under this scheme, false sharing is still possible -- but only when + * the number of active processes is very large. Note that the one-to-one + * mapping between prslots and lockslots is maintained. + */ +static int +pid_getlockslot(int prslot) +{ + int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT; + int perlap = even >> PLOCK_SHIFT; + + if (prslot >= even) + return (prslot); + + return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap)); +} + +/* + * This function assigns a pid for use in a fork request. It allocates + * a pid structure, tries to find an empty slot in the proc table, + * and selects the process id. + * + * pid_assign() returns the new pid on success, -1 on failure. + */ +pid_t +pid_assign(proc_t *prp) +{ + struct pid *pidp; + union procent *pep; + pid_t newpid, startpid; + + pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP); + + mutex_enter(&pidlinklock); + if ((pep = procentfree) == NULL) { + /* + * ran out of /proc directory entries + */ + goto failed; + } + + /* + * Allocate a pid + */ + startpid = mpid; + do { + newpid = (++mpid == maxpid ? mpid = minpid : mpid); + } while (pid_lookup(newpid) && newpid != startpid); + + if (newpid == startpid && pid_lookup(newpid)) { + /* couldn't find a free pid */ + goto failed; + } + + procentfree = pep->pe_next; + pep->pe_proc = prp; + prp->p_pidp = pidp; + + /* + * Put pid into the pid hash table. + */ + pidp->pid_link = HASHPID(newpid); + HASHPID(newpid) = pidp; + pidp->pid_ref = 1; + pidp->pid_id = newpid; + pidp->pid_prslot = pep - procdir; + prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)]; + mutex_exit(&pidlinklock); + + return (newpid); + +failed: + mutex_exit(&pidlinklock); + kmem_free(pidp, sizeof (struct pid)); + return (-1); +} + +/* + * decrement the reference count for pid + */ +int +pid_rele(struct pid *pidp) +{ + struct pid **pidpp; + + mutex_enter(&pidlinklock); + ASSERT(pidp != &pid0); + + pidpp = &HASHPID(pidp->pid_id); + for (;;) { + ASSERT(*pidpp != NULL); + if (*pidpp == pidp) + break; + pidpp = &(*pidpp)->pid_link; + } + + *pidpp = pidp->pid_link; + mutex_exit(&pidlinklock); + + kmem_free(pidp, sizeof (*pidp)); + return (0); +} + +void +proc_entry_free(struct pid *pidp) +{ + mutex_enter(&pidlinklock); + pidp->pid_prinactive = 1; + procdir[pidp->pid_prslot].pe_next = procentfree; + procentfree = &procdir[pidp->pid_prslot]; + mutex_exit(&pidlinklock); +} + +void +pid_exit(proc_t *prp) +{ + struct pid *pidp; + + ASSERT(MUTEX_HELD(&pidlock)); + + /* + * Exit process group. If it is NULL, it's because fork failed + * before calling pgjoin(). + */ + ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL); + if (prp->p_pgidp != NULL) + pgexit(prp); + + SESS_RELE(prp->p_sessp); + + pidp = prp->p_pidp; + + proc_entry_free(pidp); + +#ifdef C2_AUDIT + if (audit_active) + audit_pfree(prp); +#endif + + if (practive == prp) { + practive = prp->p_next; + } + + if (prp->p_next) { + prp->p_next->p_prev = prp->p_prev; + } + if (prp->p_prev) { + prp->p_prev->p_next = prp->p_next; + } + + PID_RELE(pidp); + + mutex_destroy(&prp->p_crlock); + kmem_cache_free(process_cache, prp); + nproc--; +} + +/* + * Find a process visible from the specified zone given its process ID. + */ +proc_t * +prfind_zone(pid_t pid, zoneid_t zoneid) +{ + struct pid *pidp; + proc_t *p; + + ASSERT(MUTEX_HELD(&pidlock)); + + mutex_enter(&pidlinklock); + pidp = pid_lookup(pid); + mutex_exit(&pidlinklock); + if (pidp != NULL && pidp->pid_prinactive == 0) { + p = procdir[pidp->pid_prslot].pe_proc; + if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) + return (p); + } + return (NULL); +} + +/* + * Find a process given its process ID. This obeys zone restrictions, + * so if the caller is in a non-global zone it won't find processes + * associated with other zones. Use prfind_zone(pid, ALL_ZONES) to + * bypass this restriction. + */ +proc_t * +prfind(pid_t pid) +{ + zoneid_t zoneid; + + if (INGLOBALZONE(curproc)) + zoneid = ALL_ZONES; + else + zoneid = getzoneid(); + return (prfind_zone(pid, zoneid)); +} + +proc_t * +pgfind_zone(pid_t pgid, zoneid_t zoneid) +{ + struct pid *pidp; + + ASSERT(MUTEX_HELD(&pidlock)); + + mutex_enter(&pidlinklock); + pidp = pid_lookup(pgid); + mutex_exit(&pidlinklock); + if (pidp != NULL) { + proc_t *p = pidp->pid_pglink; + + if (zoneid == ALL_ZONES || pgid == 0 || p == NULL || + p->p_zone->zone_id == zoneid) + return (p); + } + return (NULL); +} + +/* + * return the head of the list of processes whose process group ID is 'pgid', + * or NULL, if no such process group + */ +proc_t * +pgfind(pid_t pgid) +{ + zoneid_t zoneid; + + if (INGLOBALZONE(curproc)) + zoneid = ALL_ZONES; + else + zoneid = getzoneid(); + return (pgfind_zone(pgid, zoneid)); +} + +/* + * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK. + * Returns the proc pointer on success, NULL on failure. sprlock() is + * really just a stripped-down version of pr_p_lock() to allow practive + * walkers like dofusers() and dumpsys() to synchronize with /proc. + */ +proc_t * +sprlock_zone(pid_t pid, zoneid_t zoneid) +{ + proc_t *p; + kmutex_t *mp; + + for (;;) { + mutex_enter(&pidlock); + if ((p = prfind_zone(pid, zoneid)) == NULL) { + mutex_exit(&pidlock); + return (NULL); + } + /* + * p_lock is persistent, but p itself is not -- it could + * vanish during cv_wait(). Load p->p_lock now so we can + * drop it after cv_wait() without referencing p. + */ + mp = &p->p_lock; + mutex_enter(mp); + mutex_exit(&pidlock); + /* + * If the process is in some half-baked state, fail. + */ + if (p->p_stat == SZOMB || p->p_stat == SIDL || + p->p_tlist == NULL || (p->p_flag & SEXITLWPS)) { + mutex_exit(mp); + return (NULL); + } + if (panicstr) + return (p); + if (!(p->p_proc_flag & P_PR_LOCK)) + break; + cv_wait(&pr_pid_cv[p->p_slot], mp); + mutex_exit(mp); + } + p->p_proc_flag |= P_PR_LOCK; + THREAD_KPRI_REQUEST(); + return (p); +} + +proc_t * +sprlock(pid_t pid) +{ + zoneid_t zoneid; + + if (INGLOBALZONE(curproc)) + zoneid = ALL_ZONES; + else + zoneid = getzoneid(); + return (sprlock_zone(pid, zoneid)); +} + +void +sprlock_proc(proc_t *p) +{ + ASSERT(MUTEX_HELD(&p->p_lock)); + + while (p->p_proc_flag & P_PR_LOCK) { + cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock); + } + + p->p_proc_flag |= P_PR_LOCK; + THREAD_KPRI_REQUEST(); +} + +void +sprunlock(proc_t *p) +{ + if (panicstr) { + mutex_exit(&p->p_lock); + return; + } + + ASSERT(p->p_proc_flag & P_PR_LOCK); + ASSERT(MUTEX_HELD(&p->p_lock)); + + cv_signal(&pr_pid_cv[p->p_slot]); + p->p_proc_flag &= ~P_PR_LOCK; + mutex_exit(&p->p_lock); + THREAD_KPRI_RELEASE(); +} + +void +pid_init(void) +{ + int i; + + pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen); + + pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP); + procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP); + pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP); + proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP); + + nproc = 1; + practive = proc_sched; + proc_sched->p_next = NULL; + procdir[0].pe_proc = proc_sched; + + procentfree = &procdir[1]; + for (i = 1; i < v.v_proc - 1; i++) + procdir[i].pe_next = &procdir[i+1]; + procdir[i].pe_next = NULL; + + HASHPID(0) = &pid0; + + upcount_init(); +} + +proc_t * +pid_entry(int slot) +{ + union procent *pep; + proc_t *prp; + + ASSERT(MUTEX_HELD(&pidlock)); + ASSERT(slot >= 0 && slot < v.v_proc); + + pep = procdir[slot].pe_next; + if (pep >= procdir && pep < &procdir[v.v_proc]) + return (NULL); + prp = procdir[slot].pe_proc; + if (prp != 0 && prp->p_stat == SIDL) + return (NULL); + return (prp); +} + +/* + * Send the specified signal to all processes whose process group ID is + * equal to 'pgid' + */ + +void +signal(pid_t pgid, int sig) +{ + struct pid *pidp; + proc_t *prp; + + mutex_enter(&pidlock); + mutex_enter(&pidlinklock); + if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) { + mutex_exit(&pidlinklock); + mutex_exit(&pidlock); + return; + } + mutex_exit(&pidlinklock); + for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) { + mutex_enter(&prp->p_lock); + sigtoproc(prp, NULL, sig); + mutex_exit(&prp->p_lock); + } + mutex_exit(&pidlock); +} + +/* + * Send the specified signal to the specified process + */ + +void +prsignal(struct pid *pidp, int sig) +{ + if (!(pidp->pid_prinactive)) + psignal(procdir[pidp->pid_prslot].pe_proc, sig); +} + +#include <sys/sunddi.h> + +/* + * DDI/DKI interfaces for drivers to send signals to processes + */ + +/* + * obtain an opaque reference to a process for signaling + */ +void * +proc_ref(void) +{ + struct pid *pidp; + + mutex_enter(&pidlock); + pidp = curproc->p_pidp; + PID_HOLD(pidp); + mutex_exit(&pidlock); + + return (pidp); +} + +/* + * release a reference to a process + * - a process can exit even if a driver has a reference to it + * - one proc_unref for every proc_ref + */ +void +proc_unref(void *pref) +{ + mutex_enter(&pidlock); + PID_RELE((struct pid *)pref); + mutex_exit(&pidlock); +} + +/* + * send a signal to a process + * + * - send the process the signal + * - if the process went away, return a -1 + * - if the process is still there return 0 + */ +int +proc_signal(void *pref, int sig) +{ + struct pid *pidp = pref; + + prsignal(pidp, sig); + return (pidp->pid_prinactive ? -1 : 0); +} + + +static struct upcount **upc_hash; /* a boot time allocated array */ +static ulong_t upc_hashmask; +#define UPC_HASH(x, y) ((ulong_t)(x ^ y) & upc_hashmask) + +/* + * Get us off the ground. Called once at boot. + */ +void +upcount_init(void) +{ + ulong_t upc_hashsize; + + /* + * An entry per MB of memory is our current guess + */ + /* + * 2^20 is a meg, so shifting right by 20 - PAGESHIFT + * converts pages to megs (without overflowing a u_int + * if you have more than 4G of memory, like ptob(physmem)/1M + * would). + */ + upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT))); + upc_hashmask = upc_hashsize - 1; + upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *), + KM_SLEEP); +} + +/* + * Increment the number of processes associated with a given uid and zoneid. + */ +void +upcount_inc(uid_t uid, zoneid_t zoneid) +{ + struct upcount **upc, **hupc; + struct upcount *new; + + ASSERT(MUTEX_HELD(&pidlock)); + new = NULL; + hupc = &upc_hash[UPC_HASH(uid, zoneid)]; +top: + upc = hupc; + while ((*upc) != NULL) { + if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { + (*upc)->up_count++; + if (new) { + /* + * did not need `new' afterall. + */ + kmem_free(new, sizeof (*new)); + } + return; + } + upc = &(*upc)->up_next; + } + + /* + * There is no entry for this <uid,zoneid> pair. + * Allocate one. If we have to drop pidlock, check + * again. + */ + if (new == NULL) { + new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP); + if (new == NULL) { + mutex_exit(&pidlock); + new = (struct upcount *)kmem_alloc(sizeof (*new), + KM_SLEEP); + mutex_enter(&pidlock); + goto top; + } + } + + + /* + * On the assumption that a new user is going to do some + * more forks, put the new upcount structure on the front. + */ + upc = hupc; + + new->up_uid = uid; + new->up_zoneid = zoneid; + new->up_count = 1; + new->up_next = *upc; + + *upc = new; +} + +/* + * Decrement the number of processes a given uid and zoneid has. + */ +void +upcount_dec(uid_t uid, zoneid_t zoneid) +{ + struct upcount **upc; + struct upcount *done; + + ASSERT(MUTEX_HELD(&pidlock)); + + upc = &upc_hash[UPC_HASH(uid, zoneid)]; + while ((*upc) != NULL) { + if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { + (*upc)->up_count--; + if ((*upc)->up_count == 0) { + done = *upc; + *upc = (*upc)->up_next; + kmem_free(done, sizeof (*done)); + } + return; + } + upc = &(*upc)->up_next; + } + cmn_err(CE_PANIC, "decr_upcount-off the end"); +} + +/* + * Returns the number of processes a uid has. + * Non-existent uid's are assumed to have no processes. + */ +int +upcount_get(uid_t uid, zoneid_t zoneid) +{ + struct upcount *upc; + + ASSERT(MUTEX_HELD(&pidlock)); + + upc = upc_hash[UPC_HASH(uid, zoneid)]; + while (upc != NULL) { + if (upc->up_uid == uid && upc->up_zoneid == zoneid) { + return (upc->up_count); + } + upc = upc->up_next; + } + return (0); +} |