diff options
Diffstat (limited to 'usr/src/uts/common/os/exit.c')
-rw-r--r-- | usr/src/uts/common/os/exit.c | 210 |
1 files changed, 174 insertions, 36 deletions
diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c index 5a9355ae9f..7ccf9b3221 100644 --- a/usr/src/uts/common/os/exit.c +++ b/usr/src/uts/common/os/exit.c @@ -141,11 +141,32 @@ rexit(int rval) } /* + * Bump the init_restarts kstat and let interested parties know about the + * restart. + */ +static void +restart_init_notify(zone_t *zone) +{ + nvlist_t *nvl = NULL; + + zone->zone_proc_init_restarts++; + + if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0 && + nvlist_add_uint32(nvl, ZONE_CB_RESTARTS, + zone->zone_proc_init_restarts) == 0) { + zone_sysevent_publish(zone, ZONE_EVENT_INIT_CLASS, + ZONE_EVENT_INIT_RESTART_SC, nvl); + } + + nvlist_free(nvl); +} + +/* * Called by proc_exit() when a zone's init exits, presumably because * it failed. As long as the given zone is still in the "running" * state, we will re-exec() init, but first we need to reset things * which are usually inherited across exec() but will break init's - * assumption that it is being exec()'d from a virgin process. Most + * assumption that it is being exec()'d from a virgin process. Most * importantly this includes closing all file descriptors (exec only * closes those marked close-on-exec) and resetting signals (exec only * resets handled signals, and we need to clear any signals which @@ -234,7 +255,7 @@ restart_init(int what, int why) siginfofree(lwp->lwp_curinfo); lwp->lwp_curinfo = NULL; } - lwp_ctmpl_clear(lwp); + lwp_ctmpl_clear(lwp, B_FALSE); /* * Reset both the process root directory and the current working @@ -286,6 +307,8 @@ restart_init(int what, int why) ASSERT(p == curproc); (void) freectty(B_TRUE); + restart_init_notify(p->p_zone); + /* * Now exec() the new init(8) on top of the current process. If we * succeed, the caller will treat this like a successful system call. @@ -320,7 +343,7 @@ exit(int why, int what) /* * If proc_exit() fails, then some other lwp in the process * got there first. We just have to call lwp_exit() to allow - * the other lwp to finish exiting the process. Otherwise we're + * the other lwp to finish exiting the process. Otherwise we're * restarting init, and should return. */ if (proc_exit(why, what) != 0) { @@ -333,7 +356,7 @@ exit(int why, int what) /* * Set the SEXITING flag on the process, after making sure /proc does - * not have it locked. This is done in more places than proc_exit(), + * not have it locked. This is done in more places than proc_exit(), * so it is a separate function. */ void @@ -380,8 +403,9 @@ zone_init_exit(zone_t *z, int why, int what) */ if (!z->zone_restart_init) { /* - * The zone has been set up to halt when init exits. + * The zone has been setup to halt when init exits. */ + z->zone_init_status = wstat(why, what); (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred()); z->zone_proc_initpid = -1; return (B_FALSE); @@ -421,6 +445,7 @@ zone_init_exit(zone_t *z, int why, int what) (void) zone_kadmin(A_REBOOT, 0, NULL, zone_kcred()); } + z->zone_init_status = wstat(why, what); z->zone_proc_initpid = -1; return (B_FALSE); } @@ -441,14 +466,16 @@ zone_init_exit(zone_t *z, int why, int what) /* * No restart modifiers on the zone, attempt to restart init. */ - if (restart_init(what, why) == 0) + if (restart_init(what, why) == 0) { return (B_TRUE); + } } /* * The restart failed, or the criteria for a restart are not met; * the zone will shut down. */ + z->zone_init_status = wstat(why, what); (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred()); z->zone_proc_initpid = -1; return (B_FALSE); @@ -483,7 +510,7 @@ proc_exit(int why, int what) /* * Stop and discard the process's lwps except for the current one, - * unless some other lwp beat us to it. If exitlwps() fails then + * unless some other lwp beat us to it. If exitlwps() fails then * return and the calling lwp will call (or continue in) lwp_exit(). */ proc_is_exiting(p); @@ -501,19 +528,6 @@ proc_exit(int why, int what) } mutex_exit(&p->p_lock); - DTRACE_PROC(lwp__exit); - DTRACE_PROC1(exit, int, why); - - /* - * Will perform any brand specific proc exit processing, since this - * is always the last lwp, will also perform lwp_exit and free brand - * data - */ - if (PROC_IS_BRANDED(p)) { - lwp_detach_brand_hdlrs(lwp); - brand_clearbrand(p, B_FALSE); - } - /* * Don't let init exit unless zone_start_init() failed its exec, or * we are shutting down the zone or the machine. @@ -527,6 +541,32 @@ proc_exit(int why, int what) return (0); } + /* + * Delay firing probes (and performing brand cleanup) until after the + * zone_proc_initpid check. Cases which result in zone shutdown or + * restart via zone_kadmin eventually result in a call back to + * proc_exit. + */ + DTRACE_PROC(lwp__exit); + DTRACE_PROC1(exit, int, why); + + /* + * Will perform any brand specific proc exit processing. Since this + * is always the last lwp, will also perform lwp exit/free and proc + * exit. Brand data will be freed when the process is reaped. + */ + if (PROC_IS_BRANDED(p)) { + BROP(p)->b_lwpexit(lwp); + BROP(p)->b_proc_exit(p); + /* + * To ensure that b_proc_exit has access to brand-specific data + * contained by the one remaining lwp, call the freelwp hook as + * the last part of this clean-up process. + */ + BROP(p)->b_freelwp(lwp); + lwp_detach_brand_hdlrs(lwp); + } + lwp_pcb_exit(); /* @@ -693,7 +733,7 @@ proc_exit(int why, int what) semexit(p); rv = wstat(why, what); - acct(rv & 0xff); + acct(rv); exacct_commit_proc(p, rv); /* @@ -786,10 +826,22 @@ proc_exit(int why, int what) if ((q = p->p_child) != NULL && p != proc_init) { struct proc *np; struct proc *initp = proc_init; + pid_t zone_initpid = 1; + struct proc *zoneinitp = NULL; boolean_t setzonetop = B_FALSE; - if (!INGLOBALZONE(curproc)) - setzonetop = B_TRUE; + if (!INGLOBALZONE(curproc)) { + zone_initpid = curproc->p_zone->zone_proc_initpid; + + ASSERT(MUTEX_HELD(&pidlock)); + zoneinitp = prfind(zone_initpid); + if (zoneinitp != NULL) { + initp = zoneinitp; + } else { + zone_initpid = 1; + setzonetop = B_TRUE; + } + } pgdetach(p); @@ -801,7 +853,8 @@ proc_exit(int why, int what) */ delete_ns(q->p_parent, q); - q->p_ppid = 1; + q->p_ppid = zone_initpid; + q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID); if (setzonetop) { mutex_enter(&q->p_lock); @@ -959,7 +1012,7 @@ proc_exit(int why, int what) * curthread's proc pointer is changed to point to the 'sched' * process for the corresponding zone, except in the case when * the exiting process is in fact a zsched instance, in which - * case the proc pointer is set to p0. We do so, so that the + * case the proc pointer is set to p0. We do so, so that the * process still points at the right zone when we call the VN_RELE() * below. * @@ -975,8 +1028,50 @@ proc_exit(int why, int what) mutex_exit(&p->p_lock); if (!evaporate) { - p->p_pidflag &= ~CLDPEND; - sigcld(p, sqp); + /* + * The brand specific code only happens when the brand has a + * function to call in place of sigcld and the parent of the + * exiting process is not the global zone init. If the parent + * is the global zone init, then the process was reparented, + * and we don't want brand code delivering possibly strange + * signals to init. Also, init is not branded, so any brand + * specific exit data will not be picked up by init anyway. + */ + if (PROC_IS_BRANDED(p) && + BROP(p)->b_exit_with_sig != NULL && + p->p_ppid != 1) { + /* + * The code for _fini that could unload the brand_t + * blocks until the count of zones using the module + * reaches zero. Zones decrement the refcount on their + * brands only after all user tasks in that zone have + * exited and been waited on. The decrement on the + * brand's refcount happen in zone_destroy(). That + * depends on zone_shutdown() having been completed. + * zone_shutdown() includes a call to zone_empty(), + * where the zone waits for itself to reach the state + * ZONE_IS_EMPTY. This state is only set in either + * zone_shutdown(), when there are no user processes as + * the zone enters this function, or in + * zone_task_rele(). zone_task_rele() is called from + * code triggered by waiting on processes, not by the + * processes exiting through proc_exit(). This means + * all the branded processes that could exist for a + * specific brand_t must exit and get reaped before the + * refcount on the brand_t can reach 0. _fini will + * never unload the corresponding brand module before + * proc_exit finishes execution for all processes + * branded with a particular brand_t, which makes the + * operation below safe to do. Brands that wish to use + * this mechanism must wait in _fini as described + * above. + */ + BROP(p)->b_exit_with_sig(p, sqp); + } else { + p->p_pidflag &= ~CLDPEND; + sigcld(p, sqp); + } + } else { /* * Do what sigcld() would do if the disposition @@ -1001,7 +1096,7 @@ proc_exit(int why, int what) /* * task_rele() may ultimately cause the zone to go away (or * may cause the last user process in a zone to go away, which - * signals zsched to go away). So prior to this call, we must + * signals zsched to go away). So prior to this call, we must * no longer point at zsched. */ t->t_procp = &p0; @@ -1055,10 +1150,9 @@ winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) int waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) { - int found; proc_t *cp, *pp; - int proc_gone; int waitflag = !(options & WNOWAIT); + boolean_t have_brand_helper = B_FALSE; /* * Obsolete flag, defined here only for binary compatibility @@ -1086,7 +1180,8 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) pp = ttoproc(curthread); /* - * lock parent mutex so that sibling chain can be searched. + * Anytime you are looking for a process, you take pidlock to prevent + * things from changing as you look. */ mutex_enter(&pidlock); @@ -1106,10 +1201,37 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) return (ECHILD); } - while (pp->p_child != NULL) { + if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) { + have_brand_helper = B_TRUE; + } + + while (pp->p_child != NULL || have_brand_helper) { + boolean_t brand_wants_wait = B_FALSE; + int proc_gone = 0; + int found = 0; - proc_gone = 0; + /* + * Give the brand a chance to return synthetic results from + * this waitid() call before we do the real thing. + */ + if (have_brand_helper) { + int ret; + if (BROP(pp)->b_waitid_helper(idtype, id, ip, options, + &brand_wants_wait, &ret) == 0) { + mutex_exit(&pidlock); + return (ret); + } + + if (pp->p_child == NULL) { + goto no_real_children; + } + } + + /* + * Look for interesting children in the newstate list. + */ + VERIFY(pp->p_child != NULL); for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) { if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID)) continue; @@ -1117,6 +1239,11 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) continue; if (idtype == P_PGID && id != cp->p_pgrp) continue; + if (PROC_IS_BRANDED(pp)) { + if (BROP(pp)->b_wait_filter != NULL && + BROP(pp)->b_wait_filter(pp, cp) == B_FALSE) + continue; + } switch (cp->p_wcode) { @@ -1161,12 +1288,16 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) * Wow! None of the threads on the p_sibling_ns list were * interesting threads. Check all the kids! */ - found = 0; for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) { if (idtype == P_PID && id != cp->p_pid) continue; if (idtype == P_PGID && id != cp->p_pgrp) continue; + if (PROC_IS_BRANDED(pp)) { + if (BROP(pp)->b_wait_filter != NULL && + BROP(pp)->b_wait_filter(pp, cp) == B_FALSE) + continue; + } switch (cp->p_wcode) { case CLD_TRAPPED: @@ -1235,11 +1366,12 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) break; } +no_real_children: /* * If we found no interesting processes at all, * break out and return ECHILD. */ - if (found + proc_gone == 0) + if (!brand_wants_wait && (found + proc_gone == 0)) break; if (options & WNOHANG) { @@ -1258,7 +1390,7 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) * change state while we wait, we don't wait at all. * Get out with ECHILD according to SVID. */ - if (found == proc_gone) + if (!brand_wants_wait && (found == proc_gone)) break; if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { @@ -1354,6 +1486,12 @@ freeproc(proc_t *p) p->p_killsqp = NULL; } + /* Clear any remaining brand data */ + if (PROC_IS_BRANDED(p)) { + brand_clearbrand(p, B_FALSE); + } + + prfree(p); /* inform /proc */ /* |