summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/os/exit.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/os/exit.c')
-rw-r--r--usr/src/uts/common/os/exit.c210
1 files changed, 174 insertions, 36 deletions
diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c
index 5a9355ae9f..7ccf9b3221 100644
--- a/usr/src/uts/common/os/exit.c
+++ b/usr/src/uts/common/os/exit.c
@@ -141,11 +141,32 @@ rexit(int rval)
}
/*
+ * Bump the init_restarts kstat and let interested parties know about the
+ * restart.
+ */
+static void
+restart_init_notify(zone_t *zone)
+{
+ nvlist_t *nvl = NULL;
+
+ zone->zone_proc_init_restarts++;
+
+ if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0 &&
+ nvlist_add_uint32(nvl, ZONE_CB_RESTARTS,
+ zone->zone_proc_init_restarts) == 0) {
+ zone_sysevent_publish(zone, ZONE_EVENT_INIT_CLASS,
+ ZONE_EVENT_INIT_RESTART_SC, nvl);
+ }
+
+ nvlist_free(nvl);
+}
+
+/*
* Called by proc_exit() when a zone's init exits, presumably because
* it failed. As long as the given zone is still in the "running"
* state, we will re-exec() init, but first we need to reset things
* which are usually inherited across exec() but will break init's
- * assumption that it is being exec()'d from a virgin process. Most
+ * assumption that it is being exec()'d from a virgin process. Most
* importantly this includes closing all file descriptors (exec only
* closes those marked close-on-exec) and resetting signals (exec only
* resets handled signals, and we need to clear any signals which
@@ -234,7 +255,7 @@ restart_init(int what, int why)
siginfofree(lwp->lwp_curinfo);
lwp->lwp_curinfo = NULL;
}
- lwp_ctmpl_clear(lwp);
+ lwp_ctmpl_clear(lwp, B_FALSE);
/*
* Reset both the process root directory and the current working
@@ -286,6 +307,8 @@ restart_init(int what, int why)
ASSERT(p == curproc);
(void) freectty(B_TRUE);
+ restart_init_notify(p->p_zone);
+
/*
* Now exec() the new init(8) on top of the current process. If we
* succeed, the caller will treat this like a successful system call.
@@ -320,7 +343,7 @@ exit(int why, int what)
/*
* If proc_exit() fails, then some other lwp in the process
* got there first. We just have to call lwp_exit() to allow
- * the other lwp to finish exiting the process. Otherwise we're
+ * the other lwp to finish exiting the process. Otherwise we're
* restarting init, and should return.
*/
if (proc_exit(why, what) != 0) {
@@ -333,7 +356,7 @@ exit(int why, int what)
/*
* Set the SEXITING flag on the process, after making sure /proc does
- * not have it locked. This is done in more places than proc_exit(),
+ * not have it locked. This is done in more places than proc_exit(),
* so it is a separate function.
*/
void
@@ -380,8 +403,9 @@ zone_init_exit(zone_t *z, int why, int what)
*/
if (!z->zone_restart_init) {
/*
- * The zone has been set up to halt when init exits.
+ * The zone has been setup to halt when init exits.
*/
+ z->zone_init_status = wstat(why, what);
(void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred());
z->zone_proc_initpid = -1;
return (B_FALSE);
@@ -421,6 +445,7 @@ zone_init_exit(zone_t *z, int why, int what)
(void) zone_kadmin(A_REBOOT, 0, NULL, zone_kcred());
}
+ z->zone_init_status = wstat(why, what);
z->zone_proc_initpid = -1;
return (B_FALSE);
}
@@ -441,14 +466,16 @@ zone_init_exit(zone_t *z, int why, int what)
/*
* No restart modifiers on the zone, attempt to restart init.
*/
- if (restart_init(what, why) == 0)
+ if (restart_init(what, why) == 0) {
return (B_TRUE);
+ }
}
/*
* The restart failed, or the criteria for a restart are not met;
* the zone will shut down.
*/
+ z->zone_init_status = wstat(why, what);
(void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred());
z->zone_proc_initpid = -1;
return (B_FALSE);
@@ -483,7 +510,7 @@ proc_exit(int why, int what)
/*
* Stop and discard the process's lwps except for the current one,
- * unless some other lwp beat us to it. If exitlwps() fails then
+ * unless some other lwp beat us to it. If exitlwps() fails then
* return and the calling lwp will call (or continue in) lwp_exit().
*/
proc_is_exiting(p);
@@ -501,19 +528,6 @@ proc_exit(int why, int what)
}
mutex_exit(&p->p_lock);
- DTRACE_PROC(lwp__exit);
- DTRACE_PROC1(exit, int, why);
-
- /*
- * Will perform any brand specific proc exit processing, since this
- * is always the last lwp, will also perform lwp_exit and free brand
- * data
- */
- if (PROC_IS_BRANDED(p)) {
- lwp_detach_brand_hdlrs(lwp);
- brand_clearbrand(p, B_FALSE);
- }
-
/*
* Don't let init exit unless zone_start_init() failed its exec, or
* we are shutting down the zone or the machine.
@@ -527,6 +541,32 @@ proc_exit(int why, int what)
return (0);
}
+ /*
+ * Delay firing probes (and performing brand cleanup) until after the
+ * zone_proc_initpid check. Cases which result in zone shutdown or
+ * restart via zone_kadmin eventually result in a call back to
+ * proc_exit.
+ */
+ DTRACE_PROC(lwp__exit);
+ DTRACE_PROC1(exit, int, why);
+
+ /*
+ * Will perform any brand specific proc exit processing. Since this
+ * is always the last lwp, will also perform lwp exit/free and proc
+ * exit. Brand data will be freed when the process is reaped.
+ */
+ if (PROC_IS_BRANDED(p)) {
+ BROP(p)->b_lwpexit(lwp);
+ BROP(p)->b_proc_exit(p);
+ /*
+ * To ensure that b_proc_exit has access to brand-specific data
+ * contained by the one remaining lwp, call the freelwp hook as
+ * the last part of this clean-up process.
+ */
+ BROP(p)->b_freelwp(lwp);
+ lwp_detach_brand_hdlrs(lwp);
+ }
+
lwp_pcb_exit();
/*
@@ -693,7 +733,7 @@ proc_exit(int why, int what)
semexit(p);
rv = wstat(why, what);
- acct(rv & 0xff);
+ acct(rv);
exacct_commit_proc(p, rv);
/*
@@ -786,10 +826,22 @@ proc_exit(int why, int what)
if ((q = p->p_child) != NULL && p != proc_init) {
struct proc *np;
struct proc *initp = proc_init;
+ pid_t zone_initpid = 1;
+ struct proc *zoneinitp = NULL;
boolean_t setzonetop = B_FALSE;
- if (!INGLOBALZONE(curproc))
- setzonetop = B_TRUE;
+ if (!INGLOBALZONE(curproc)) {
+ zone_initpid = curproc->p_zone->zone_proc_initpid;
+
+ ASSERT(MUTEX_HELD(&pidlock));
+ zoneinitp = prfind(zone_initpid);
+ if (zoneinitp != NULL) {
+ initp = zoneinitp;
+ } else {
+ zone_initpid = 1;
+ setzonetop = B_TRUE;
+ }
+ }
pgdetach(p);
@@ -801,7 +853,8 @@ proc_exit(int why, int what)
*/
delete_ns(q->p_parent, q);
- q->p_ppid = 1;
+ q->p_ppid = zone_initpid;
+
q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
if (setzonetop) {
mutex_enter(&q->p_lock);
@@ -959,7 +1012,7 @@ proc_exit(int why, int what)
* curthread's proc pointer is changed to point to the 'sched'
* process for the corresponding zone, except in the case when
* the exiting process is in fact a zsched instance, in which
- * case the proc pointer is set to p0. We do so, so that the
+ * case the proc pointer is set to p0. We do so, so that the
* process still points at the right zone when we call the VN_RELE()
* below.
*
@@ -975,8 +1028,50 @@ proc_exit(int why, int what)
mutex_exit(&p->p_lock);
if (!evaporate) {
- p->p_pidflag &= ~CLDPEND;
- sigcld(p, sqp);
+ /*
+ * The brand specific code only happens when the brand has a
+ * function to call in place of sigcld and the parent of the
+ * exiting process is not the global zone init. If the parent
+ * is the global zone init, then the process was reparented,
+ * and we don't want brand code delivering possibly strange
+ * signals to init. Also, init is not branded, so any brand
+ * specific exit data will not be picked up by init anyway.
+ */
+ if (PROC_IS_BRANDED(p) &&
+ BROP(p)->b_exit_with_sig != NULL &&
+ p->p_ppid != 1) {
+ /*
+ * The code for _fini that could unload the brand_t
+ * blocks until the count of zones using the module
+ * reaches zero. Zones decrement the refcount on their
+ * brands only after all user tasks in that zone have
+ * exited and been waited on. The decrement on the
+ * brand's refcount happen in zone_destroy(). That
+ * depends on zone_shutdown() having been completed.
+ * zone_shutdown() includes a call to zone_empty(),
+ * where the zone waits for itself to reach the state
+ * ZONE_IS_EMPTY. This state is only set in either
+ * zone_shutdown(), when there are no user processes as
+ * the zone enters this function, or in
+ * zone_task_rele(). zone_task_rele() is called from
+ * code triggered by waiting on processes, not by the
+ * processes exiting through proc_exit(). This means
+ * all the branded processes that could exist for a
+ * specific brand_t must exit and get reaped before the
+ * refcount on the brand_t can reach 0. _fini will
+ * never unload the corresponding brand module before
+ * proc_exit finishes execution for all processes
+ * branded with a particular brand_t, which makes the
+ * operation below safe to do. Brands that wish to use
+ * this mechanism must wait in _fini as described
+ * above.
+ */
+ BROP(p)->b_exit_with_sig(p, sqp);
+ } else {
+ p->p_pidflag &= ~CLDPEND;
+ sigcld(p, sqp);
+ }
+
} else {
/*
* Do what sigcld() would do if the disposition
@@ -1001,7 +1096,7 @@ proc_exit(int why, int what)
/*
* task_rele() may ultimately cause the zone to go away (or
* may cause the last user process in a zone to go away, which
- * signals zsched to go away). So prior to this call, we must
+ * signals zsched to go away). So prior to this call, we must
* no longer point at zsched.
*/
t->t_procp = &p0;
@@ -1055,10 +1150,9 @@ winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
int
waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
{
- int found;
proc_t *cp, *pp;
- int proc_gone;
int waitflag = !(options & WNOWAIT);
+ boolean_t have_brand_helper = B_FALSE;
/*
* Obsolete flag, defined here only for binary compatibility
@@ -1086,7 +1180,8 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
pp = ttoproc(curthread);
/*
- * lock parent mutex so that sibling chain can be searched.
+ * Anytime you are looking for a process, you take pidlock to prevent
+ * things from changing as you look.
*/
mutex_enter(&pidlock);
@@ -1106,10 +1201,37 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
return (ECHILD);
}
- while (pp->p_child != NULL) {
+ if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
+ have_brand_helper = B_TRUE;
+ }
+
+ while (pp->p_child != NULL || have_brand_helper) {
+ boolean_t brand_wants_wait = B_FALSE;
+ int proc_gone = 0;
+ int found = 0;
- proc_gone = 0;
+ /*
+ * Give the brand a chance to return synthetic results from
+ * this waitid() call before we do the real thing.
+ */
+ if (have_brand_helper) {
+ int ret;
+ if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
+ &brand_wants_wait, &ret) == 0) {
+ mutex_exit(&pidlock);
+ return (ret);
+ }
+
+ if (pp->p_child == NULL) {
+ goto no_real_children;
+ }
+ }
+
+ /*
+ * Look for interesting children in the newstate list.
+ */
+ VERIFY(pp->p_child != NULL);
for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
continue;
@@ -1117,6 +1239,11 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
continue;
if (idtype == P_PGID && id != cp->p_pgrp)
continue;
+ if (PROC_IS_BRANDED(pp)) {
+ if (BROP(pp)->b_wait_filter != NULL &&
+ BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
+ continue;
+ }
switch (cp->p_wcode) {
@@ -1161,12 +1288,16 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
* Wow! None of the threads on the p_sibling_ns list were
* interesting threads. Check all the kids!
*/
- found = 0;
for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
if (idtype == P_PID && id != cp->p_pid)
continue;
if (idtype == P_PGID && id != cp->p_pgrp)
continue;
+ if (PROC_IS_BRANDED(pp)) {
+ if (BROP(pp)->b_wait_filter != NULL &&
+ BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
+ continue;
+ }
switch (cp->p_wcode) {
case CLD_TRAPPED:
@@ -1235,11 +1366,12 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
break;
}
+no_real_children:
/*
* If we found no interesting processes at all,
* break out and return ECHILD.
*/
- if (found + proc_gone == 0)
+ if (!brand_wants_wait && (found + proc_gone == 0))
break;
if (options & WNOHANG) {
@@ -1258,7 +1390,7 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
* change state while we wait, we don't wait at all.
* Get out with ECHILD according to SVID.
*/
- if (found == proc_gone)
+ if (!brand_wants_wait && (found == proc_gone))
break;
if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
@@ -1354,6 +1486,12 @@ freeproc(proc_t *p)
p->p_killsqp = NULL;
}
+ /* Clear any remaining brand data */
+ if (PROC_IS_BRANDED(p)) {
+ brand_clearbrand(p, B_FALSE);
+ }
+
+
prfree(p); /* inform /proc */
/*