diff options
Diffstat (limited to 'usr/src')
31 files changed, 3163 insertions, 1822 deletions
diff --git a/usr/src/cmd/ptools/pflags/pflags.c b/usr/src/cmd/ptools/pflags/pflags.c index 8054a80d3c..f19a945d95 100644 --- a/usr/src/cmd/ptools/pflags/pflags.c +++ b/usr/src/cmd/ptools/pflags/pflags.c @@ -25,7 +25,7 @@ */ /* - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #include <stdio.h> @@ -469,6 +469,9 @@ prwhy(int why) case PR_SUSPENDED: str = "PR_SUSPENDED"; break; + case PR_BRAND: + str = "PR_BRAND"; + break; default: str = buf; (void) sprintf(str, "%d", why); diff --git a/usr/src/common/brand/lx/lx_syscall.h b/usr/src/common/brand/lx/lx_syscall.h index e80b0486f5..e9d06fd9bc 100644 --- a/usr/src/common/brand/lx/lx_syscall.h +++ b/usr/src/common/brand/lx/lx_syscall.h @@ -35,9 +35,6 @@ extern "C" { #define LX_WNOTHREAD 0x20000000 /* Do not wait on siblings' children */ #define LX_WALL 0x40000000 /* Wait on all children */ #define LX_WCLONE 0x80000000 /* Wait only on clone children */ -typedef struct lx_waitid_args { - int waitid_flags; -} lx_waitid_args_t; /* For arch_prctl(2) */ #define LX_ARCH_SET_GS 0x1001 @@ -45,6 +42,50 @@ typedef struct lx_waitid_args { #define LX_ARCH_GET_FS 0x1003 #define LX_ARCH_GET_GS 0x1004 +/* + * For ptrace(2): + */ +#define LX_PTRACE_TRACEME 0 +#define LX_PTRACE_PEEKTEXT 1 +#define LX_PTRACE_PEEKDATA 2 +#define LX_PTRACE_PEEKUSER 3 +#define LX_PTRACE_POKETEXT 4 +#define LX_PTRACE_POKEDATA 5 +#define LX_PTRACE_POKEUSER 6 +#define LX_PTRACE_CONT 7 +#define LX_PTRACE_KILL 8 +#define LX_PTRACE_SINGLESTEP 9 +#define LX_PTRACE_GETREGS 12 +#define LX_PTRACE_SETREGS 13 +#define LX_PTRACE_GETFPREGS 14 +#define LX_PTRACE_SETFPREGS 15 +#define LX_PTRACE_ATTACH 16 +#define LX_PTRACE_DETACH 17 +#define LX_PTRACE_GETFPXREGS 18 +#define LX_PTRACE_SETFPXREGS 19 +#define LX_PTRACE_SYSCALL 24 +#define LX_PTRACE_SETOPTIONS 0x4200 +#define LX_PTRACE_GETEVENTMSG 0x4201 + +/* + * For clone(2): + */ +#define LX_CSIGNAL 0x000000ff +#define LX_CLONE_VM 0x00000100 +#define LX_CLONE_FS 0x00000200 +#define LX_CLONE_FILES 0x00000400 +#define LX_CLONE_SIGHAND 0x00000800 +#define LX_CLONE_PID 0x00001000 +#define LX_CLONE_PTRACE 0x00002000 +#define LX_CLONE_VFORK 0x00004000 +#define LX_CLONE_PARENT 0x00008000 +#define LX_CLONE_THREAD 0x00010000 +#define LX_CLONE_SYSVSEM 0x00040000 +#define LX_CLONE_SETTLS 0x00080000 +#define LX_CLONE_PARENT_SETTID 0x00100000 +#define LX_CLONE_CHILD_CLEARTID 0x00200000 +#define LX_CLONE_DETACH 0x00400000 +#define LX_CLONE_CHILD_SETTID 0x01000000 #ifdef __cplusplus } diff --git a/usr/src/lib/brand/lx/lx_brand/common/clone.c b/usr/src/lib/brand/lx/lx_brand/common/clone.c index 58c84c773b..87f966cc89 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/clone.c +++ b/usr/src/lib/brand/lx/lx_brand/common/clone.c @@ -49,23 +49,7 @@ #include <sys/lx_debug.h> #include <sys/lx_thread.h> #include <sys/fork.h> - -#define LX_CSIGNAL 0x000000ff -#define LX_CLONE_VM 0x00000100 -#define LX_CLONE_FS 0x00000200 -#define LX_CLONE_FILES 0x00000400 -#define LX_CLONE_SIGHAND 0x00000800 -#define LX_CLONE_PID 0x00001000 -#define LX_CLONE_PTRACE 0x00002000 -#define LX_CLONE_VFORK 0x00004000 -#define LX_CLONE_PARENT 0x00008000 -#define LX_CLONE_THREAD 0x00010000 -#define LX_CLONE_SYSVSEM 0x00040000 -#define LX_CLONE_SETTLS 0x00080000 -#define LX_CLONE_PARENT_SETTID 0x00100000 -#define LX_CLONE_CHILD_CLEARTID 0x00200000 -#define LX_CLONE_DETACH 0x00400000 -#define LX_CLONE_CHILD_SETTID 0x01000000 +#include <lx_syscall.h> #define SHARED_AS \ (LX_CLONE_VM | LX_CLONE_FS | LX_CLONE_FILES | LX_CLONE_SIGHAND \ @@ -116,6 +100,7 @@ struct clone_state { sigset_t c_sigmask; /* signal mask */ lx_affmask_t c_affmask; /* CPU affinity mask */ volatile int *c_clone_res; /* pid/error returned to cloner */ + int c_ptrace_event; /* ptrace(2) event for child stop */ }; extern void lx_setup_clone(uintptr_t, void *, void *); @@ -147,7 +132,7 @@ lx_exit(uintptr_t p1) assert(lx_tsd != 0); - lx_tsd->lxtsd_exit = LX_EXIT; + lx_tsd->lxtsd_exit = LX_ET_EXIT; lx_tsd->lxtsd_exit_status = status; lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEEXIT, B_FALSE, @@ -200,7 +185,7 @@ lx_group_exit(uintptr_t p1) assert(lx_tsd != 0); - lx_tsd->lxtsd_exit = LX_EXIT_GROUP; + lx_tsd->lxtsd_exit = LX_ET_EXIT_GROUP; lx_tsd->lxtsd_exit_status = status; /* @@ -315,7 +300,7 @@ clone_start(void *arg) * Do the final stack twiddling, reset %gs, and return to the * clone(2) path. */ - if (lx_tsd.lxtsd_exit == 0) { + if (lx_tsd.lxtsd_exit == LX_ET_NONE) { if (sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL) < 0) { *(cs->c_clone_res) = -errno; @@ -329,6 +314,11 @@ clone_start(void *arg) */ *(cs->c_clone_res) = rval; + /* + * Fire the ptrace(2) event stop in the new thread: + */ + lx_ptrace_stop_if_option(cs->c_ptrace_event, B_TRUE, 0); + #if defined(_LP64) (void) syscall(SYS_brand, B_CLR_NTV_SYSC_FLAG); lx_setup_clone((uintptr_t)&cs->c_regs, cs->c_retaddr, @@ -347,12 +337,7 @@ clone_start(void *arg) * setcontext() to jump to the thread context state saved in * getcontext(), above. */ - if (lx_tsd.lxtsd_exit == LX_EXIT) - thr_exit((void *)(long)lx_tsd.lxtsd_exit_status); - else - exit(lx_tsd.lxtsd_exit_status); - - assert(0); + lx_exit_common(lx_tsd.lxtsd_exit, lx_tsd.lxtsd_exit_status); /*NOTREACHED*/ } @@ -455,6 +440,12 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, ptrace_event = ptrace_clone_event(flags); + /* + * Inform the in-kernel ptrace(2) subsystem that we are about to + * emulate a fork(2), vfork(2) or clone(2) system call. + */ + lx_ptrace_clone_begin(ptrace_event, !!(flags & LX_CLONE_PTRACE)); + /* See if this is a fork() operation or a thr_create(). */ if (IS_FORK(flags) || IS_VFORK(flags)) { if (flags & LX_CLONE_PARENT) { @@ -463,9 +454,6 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, return (-ENOTSUP); } - if (flags & LX_CLONE_PTRACE) - lx_ptrace_fork(); - if ((flags & LX_CSIGNAL) == 0) fork_flags |= FORK_NOSIGCHLD; @@ -509,7 +497,6 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, return ((rval < 0) ? -errno : rval); } - /* * Set up additional data in the lx_proc_data structure as * necessary. @@ -584,6 +571,7 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, cs->c_ldtinfo = ldtinfo; cs->c_ctidp = ctidp; cs->c_clone_res = &clone_res; + cs->c_ptrace_event = ptrace_event; #if defined(_LP64) /* * The AMD64 ABI says that the kernel clobbers %rcx and %r11. We @@ -649,7 +637,7 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, ; rval = clone_res; - lx_ptrace_stop_if_option(ptrace_event, B_TRUE, 0); + lx_ptrace_stop_if_option(ptrace_event, B_FALSE, (ulong_t)rval); } return (rval); diff --git a/usr/src/lib/brand/lx/lx_brand/common/fork.c b/usr/src/lib/brand/lx/lx_brand/common/fork.c index 9f2fbd6406..b0edee1adb 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/fork.c +++ b/usr/src/lib/brand/lx/lx_brand/common/fork.c @@ -41,18 +41,36 @@ long lx_fork(void) { - int ret = fork1(); + int ret; - if (ret == 0) { - if (lx_is_rpm) + /* + * Inform the in-kernel ptrace(2) subsystem that we are about to + * emulate fork(2). + */ + lx_ptrace_clone_begin(LX_PTRACE_O_TRACEFORK, B_FALSE); + + switch (ret = fork1()) { + case -1: + return (-errno); + + case 0: + /* + * Returning in the new child. + */ + if (lx_is_rpm) { (void) sleep(lx_rpm_delay); + } lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEFORK, B_TRUE, 0); - } else if (ret != -1) { + return (0); + + default: + /* + * Returning in the new parent. + */ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEFORK, B_FALSE, (ulong_t)ret); + return (ret); } - - return (ret == -1 ? -errno : ret); } /* @@ -65,14 +83,31 @@ lx_fork(void) long lx_vfork(void) { - int ret = fork1(); + int ret; - if (ret == 0) { + /* + * Inform the in-kernel ptrace(2) subsystem that we are about to + * emulate vfork(2). + */ + lx_ptrace_clone_begin(LX_PTRACE_O_TRACEVFORK, B_FALSE); + + switch (ret = fork1()) { + case -1: + return (-errno); + + case 0: + /* + * Returning in the new child. + */ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEVFORK, B_TRUE, 0); - } else if (ret != -1) { + return (0); + + default: + /* + * Returning in the new parent. + */ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEVFORK, B_FALSE, (ulong_t)ret); + return (ret); } - - return (ret == -1 ? -errno : ret); } diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c index b8fdf36b42..abe015c2c4 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c +++ b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c @@ -172,6 +172,9 @@ struct lx_locale_ending { int se_size; /* solaris ending string length */ }; +__thread int lx_do_syscall_restart; +__thread int lx_had_sigchild; + #define l2s_locale(lname, sname) \ {(lname), (sname), sizeof ((lname)) - 1, sizeof ((sname)) - 1} @@ -647,6 +650,7 @@ lx_emulate(lx_regs_t *rp) } #endif /* _ILP32 */ +restart_syscall: if (s->sy_flags & LX_SYS_IKE) { lx_debug("\tsyscall %d re-vectoring to lx kernel module " "for %s()", syscall_num, s->sy_name); @@ -679,6 +683,12 @@ lx_emulate(lx_regs_t *rp) ret = -stol_errno[-ret]; } + if (lx_do_syscall_restart && ret == -stol_errno[EINTR]) { + lx_debug("restarting system call due to signal interruption"); + lx_do_syscall_restart = 0; + goto restart_syscall; + } + out: /* * For 32-bit, %eax holds the return code from the system call. For @@ -962,7 +972,7 @@ lx_init(int argc, char *argv[], char *envp[]) lx_err_fatal("Unable to initialize thread-specific exit " "context: %s", strerror(errno)); - if (lx_tsd.lxtsd_exit == 0) { + if (lx_tsd.lxtsd_exit == LX_ET_NONE) { #if defined(_LP64) /* Switch to Linux syscall mode */ (void) syscall(SYS_brand, B_CLR_NTV_SYSC_FLAG); @@ -978,17 +988,36 @@ lx_init(int argc, char *argv[], char *envp[]) * exit_group() system call. In turn the brand library did a * setcontext() to jump to the thread context state we saved above. */ - if (lx_tsd.lxtsd_exit == 1) - thr_exit((void *)(long)lx_tsd.lxtsd_exit_status); - else - exit(lx_tsd.lxtsd_exit_status); - - assert(0); - + lx_exit_common(lx_tsd.lxtsd_exit, lx_tsd.lxtsd_exit_status); /*NOTREACHED*/ return (0); } +void +lx_exit_common(lx_exit_type_t exit_type, uintptr_t exit_value) +{ + int ev = 0xff & exit_value; + + switch (exit_type) { + case LX_ET_EXIT: + /* + * The native thread return value is never seen so we pass + * NULL. + */ + thr_exit(NULL); + break; + + case LX_ET_EXIT_GROUP: + exit(ev); + break; + + default: + abort(); + } + + abort(); +} + /* * Walk back through the stack until we find the lx_emulate() frame. */ diff --git a/usr/src/lib/brand/lx/lx_brand/common/misc.c b/usr/src/lib/brand/lx/lx_brand/common/misc.c index f60f3f290f..750af869a4 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/misc.c +++ b/usr/src/lib/brand/lx/lx_brand/common/misc.c @@ -572,8 +572,6 @@ lx_execve(uintptr_t p1, uintptr_t p2, uintptr_t p3) if (argv == NULL) argv = nullist; - lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEEXEC, B_FALSE, 0); - /* * Emulate PR_SET_KEEPCAPS which is reset on execve. If this is not done * the emulated capabilities could be reduced more than expected. diff --git a/usr/src/lib/brand/lx/lx_brand/common/ptrace.c b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c index 2efc64a43e..174dbe8c19 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/ptrace.c +++ b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c @@ -51,82 +51,17 @@ #include <ieeefp.h> #include <assert.h> #include <libintl.h> +#include <lx_syscall.h> /* - * Linux ptrace compatibility. - * - * The brand support for ptrace(2) is built on top of the Solaris /proc - * interfaces, mounted at /native/proc in the zone. This gets quite - * complicated due to the way ptrace works and the Solaris realization of the - * Linux threading model. - * - * ptrace can only interact with a process if we are tracing it, and it is - * currently stopped. There are two ways a process can begin tracing another - * process: - * - * PTRACE_TRACEME - * - * A child process can use PTRACE_TRACEME to indicate that it wants to be - * traced by the parent. This sets the ptrace compatibility flag in /proc - * which causes ths ptrace consumer to be notified through the wait(2) - * system call of events of interest. PTRACE_TRACEME is typically used by - * the debugger by forking a process, using PTRACE_TRACEME, and finally - * doing an exec of the specified program. - * - * - * PTRACE_ATTACH - * - * We can attach to a process using PTRACE_ATTACH. This is considerably - * more complicated than the previous case. On Linux, the traced process is - * effectively reparented to the ptrace consumer so that event notification - * can go through the normal wait(2) system call. Solaris has no such - * ability to reparent a process (nor should it) so some trickery was - * required. - * - * When the ptrace consumer uses PTRACE_ATTACH it forks a monitor child - * process. The monitor enables the /proc ptrace flag for itself and uses - * the native /proc mechanisms to observe the traced process and wait for - * events of interest. When the traced process stops, the monitor process - * sends itself a SIGTRAP thus rousting its parent process (the ptrace - * consumer) out of wait(2). We then translate the process id and status - * code from wait(2) to those of the traced process. - * - * To detach from the process we just have to clean up tracing flags and - * clean up the monitor. - * - * ptrace can only interact with a process if we have traced it, and it is - * currently stopped (see is_traced()). For threads, there's no way to - * distinguish whether ptrace() has been called for all threads or some - * subset. Since most clients will be tracing all threads, and erroneously - * allowing ptrace to access a non-traced thread is non-fatal (or at least - * would be fatal on linux), we ignore this aspect of the problem. + * Much of the Linux ptrace(2) emulation is performed in the kernel, and there + * is a block comment in "lx_ptrace.c" that describes the facility in some + * detail. */ -#define LX_PTRACE_TRACEME 0 -#define LX_PTRACE_PEEKTEXT 1 -#define LX_PTRACE_PEEKDATA 2 -#define LX_PTRACE_PEEKUSER 3 -#define LX_PTRACE_POKETEXT 4 -#define LX_PTRACE_POKEDATA 5 -#define LX_PTRACE_POKEUSER 6 -#define LX_PTRACE_CONT 7 -#define LX_PTRACE_KILL 8 -#define LX_PTRACE_SINGLESTEP 9 -#define LX_PTRACE_GETREGS 12 -#define LX_PTRACE_SETREGS 13 -#define LX_PTRACE_GETFPREGS 14 -#define LX_PTRACE_SETFPREGS 15 -#define LX_PTRACE_ATTACH 16 -#define LX_PTRACE_DETACH 17 -#define LX_PTRACE_GETFPXREGS 18 -#define LX_PTRACE_SETFPXREGS 19 -#define LX_PTRACE_SYSCALL 24 -#define LX_PTRACE_SETOPTIONS 0x4200 -#define LX_PTRACE_GETEVENTMSG 0x4201 - /* execve syscall numbers for 64-bit vs. 32-bit */ #if defined(_LP64) -#define LX_SYS_execve 520 +#define LX_SYS_execve 59 #else #define LX_SYS_execve 11 #endif @@ -237,22 +172,12 @@ typedef struct lx_user { int lxu_debugreg[8]; } lx_user_t; -typedef struct ptrace_monitor_map { - struct ptrace_monitor_map *pmm_next; /* next pointer */ - pid_t pmm_monitor; /* monitor child process */ - pid_t pmm_target; /* traced Linux pid */ - pid_t pmm_pid; /* Solaris pid */ - lwpid_t pmm_lwpid; /* Solaris lwpid */ - uint_t pmm_exiting; /* detached */ -} ptrace_monitor_map_t; - typedef struct ptrace_state_map { struct ptrace_state_map *psm_next; /* next pointer */ pid_t psm_pid; /* Solaris pid */ uintptr_t psm_debugreg[8]; /* debug registers */ } ptrace_state_map_t; -static ptrace_monitor_map_t *ptrace_monitor_map = NULL; static ptrace_state_map_t *ptrace_state_map = NULL; static mutex_t ptrace_map_mtx = DEFAULTMUTEX; @@ -260,6 +185,8 @@ extern void *_START_; static sigset_t blockable_sigs; +static long lx_ptrace_kernel(int, pid_t, uintptr_t, uintptr_t); + void lx_ptrace_init(void) { @@ -298,24 +225,6 @@ open_lwpfile(pid_t pid, lwpid_t lwpid, int mode, const char *name) } static int -get_status(pid_t pid, pstatus_t *psp) -{ - int fd; - - if ((fd = open_procfile(pid, O_RDONLY, "status")) < 0) - return (-ESRCH); - - if (read(fd, psp, sizeof (pstatus_t)) != sizeof (pstatus_t)) { - (void) close(fd); - return (-EIO); - } - - (void) close(fd); - - return (0); -} - -static int get_lwpstatus(pid_t pid, lwpid_t lwpid, lwpstatus_t *lsp) { int fd; @@ -869,22 +778,6 @@ debug_registers(pid_t pid) return (p != NULL? p->psm_debugreg : NULL); } -static void -free_debug_registers(pid_t pid) -{ - ptrace_state_map_t **pp; - ptrace_state_map_t *p; - - /* ASSERT(MUTEX_HELD(&ptrace_map_mtx) */ - for (pp = &ptrace_state_map; (p = *pp) != NULL; pp = &p->psm_next) { - if (p->psm_pid == pid) { - *pp = p->psm_next; - free(p); - break; - } - } -} - static int setup_watchpoints(pid_t pid, uintptr_t *debugreg) { @@ -952,156 +845,33 @@ setup_watchpoints(pid_t pid, uintptr_t *debugreg) } /* - * Returns TRUE if the process is traced, FALSE otherwise. This is only true - * if the process is currently stopped, and has been traced using - * PTRACE_TRACEME, PTRACE_ATTACH or one of the Linux-specific trace options. + * Returns B_TRUE if the target LWP, identified by its Linux pid, is traced by + * this LWP and is waiting in "ptrace-stop". Returns B_FALSE otherwise. */ -static int -is_traced(pid_t pid) +static boolean_t +is_ptrace_stopped(pid_t lxpid) { - ptrace_monitor_map_t *p; - pstatus_t status; - uint_t curr_opts; - pid_t mypid; + ulong_t dummy; /* - * First get the stop options since that is an indication that the - * process is being traced. + * We attempt a PTRACE_GETEVENTMSG request to determine if the tracee + * is stopped appropriately. As we are not in the kernel, this is not + * an atomic check; the process is not guaranteed to remain stopped + * once we have dropped the locks protecting that state and left the + * kernel. */ - if (syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_EXT_OPTS_GET, pid, - &curr_opts) != 0) - return (0); - - mypid = getpid(); - - if (get_status(pid, &status) != 0) - return (0); - - /* - * When we look to see if we are tracing a process we have to take the - * PTRACE_SETOPTIONS handling into account. In particular, if we are - * tracing with PTRACE_O_TRACEFORK, etc. then we may be dealing with - * the child of a child that we started tracing. We can determine this - * by checking the EMUL_PTRACE_IS_TRACED flag and checking the parent - * of the parent. We cannot check for the presence of the options since - * those will be cleared during the process of detaching from a tracee. - */ - if (curr_opts & EMUL_PTRACE_IS_TRACED && status.pr_ppid != mypid) { - pstatus_t par_status; - pid_t chkpid = status.pr_ppid; - - if (get_status(status.pr_ppid, &par_status) == 0) { - chkpid = par_status.pr_ppid; - } else { - /* parent is gone, re-get our ppid */ - if (get_status(pid, &par_status) == 0) - chkpid = par_status.pr_ppid; - } - - if (chkpid == mypid) - return (1); + if (lx_ptrace_kernel(LX_PTRACE_GETEVENTMSG, lxpid, NULL, + (uintptr_t)&dummy) == 0) { + return (B_TRUE); } - if ((status.pr_flags & PR_PTRACE || - curr_opts & EMUL_PTRACE_IS_TRACED) && - (status.pr_ppid == mypid) && - (status.pr_lwp.pr_flags & PR_ISTOP)) - return (1); - - (void) mutex_lock(&ptrace_map_mtx); - for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) { - if (p->pmm_target == pid) { - (void) mutex_unlock(&ptrace_map_mtx); - return (1); - } - } - (void) mutex_unlock(&ptrace_map_mtx); - - return (0); -} - -static int -ptrace_trace_common(int fd) -{ - struct { - long cmd; - union { - long flags; - sigset_t signals; - fltset_t faults; - } arg; - } ctl; - size_t size; - - ctl.cmd = PCSTRACE; - prfillset(&ctl.arg.signals); - size = sizeof (long) + sizeof (sigset_t); - if (write(fd, &ctl, size) != size) - return (-1); - - ctl.cmd = PCSFAULT; - premptyset(&ctl.arg.faults); - size = sizeof (long) + sizeof (fltset_t); - if (write(fd, &ctl, size) != size) - return (-1); - - ctl.cmd = PCUNSET; - ctl.arg.flags = PR_FORK; - size = sizeof (long) + sizeof (long); - if (write(fd, &ctl, size) != size) - return (-1); - - return (0); -} - -/* - * Notify that parent that we wish to be traced. This is the equivalent of: - * - * 1. Stop on all signals, and nothing else - * 2. Turn off inherit-on-fork flag - * 3. Set ptrace compatible flag - * - * If we are not the main thread, then the client is trying to request behavior - * by which one of its own thread is to be traced. We don't support this mode - * of operation. - */ -static int -ptrace_traceme(void) -{ - int fd, ret; - int error; - long ctl[2]; - pstatus_t status; - pid_t pid = getpid(); - - if (_lwp_self() != 1) { - lx_unsupported("thread %d calling PTRACE_TRACEME is " - "unsupported", _lwp_self()); - return (-ENOTSUP); - } - - if ((ret = get_status(pid, &status)) != 0) - return (ret); - /* - * Why would a process try to do this twice? I'm not sure, but there's - * a conformance test which wants this to fail just so. + * This call should only fail with ESRCH, which tells us that the + * a tracee with that pid was not found in the stopped condition. */ - if (status.pr_flags & PR_PTRACE) - return (-EPERM); - - if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) - return (-errno); + assert(errno == ESRCH); - ctl[0] = PCSET; - ctl[1] = PR_PTRACE; - error = 0; - if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl) || - ptrace_trace_common(fd) != 0) - error = -errno; - - (void) close(fd); - return (error); + return (B_FALSE); } /* @@ -1114,9 +884,6 @@ ptrace_peek(pid_t pid, uintptr_t addr, long *ret) int fd; long data; - if (!is_traced(pid)) - return (-ESRCH); - if ((fd = open_procfile(pid, O_RDONLY, "as")) < 0) return (-ESRCH); @@ -1143,9 +910,6 @@ ptrace_peek_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int *ret) uintptr_t *debugreg; int dreg; - if (!is_traced(pid)) - return (-ESRCH); - /* * The offset specified by the user is an offset into the Linux * user structure (seriously). Rather than constructing a full @@ -1239,9 +1003,6 @@ ptrace_poke(pid_t pid, uintptr_t addr, int data) { int fd; - if (!is_traced(pid)) - return (-ESRCH); - if (addr & 0x3) return (-EINVAL); @@ -1265,9 +1026,6 @@ ptrace_poke_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int data) uintptr_t *debugreg; int dreg; - if (!is_traced(pid)) - return (-ESRCH); - if (off & 0x3) return (-EINVAL); @@ -1300,187 +1058,13 @@ ptrace_poke_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int data) } static int -ptrace_cont_common(int fd, int sig, int run, int step) -{ - long ctl[1 + 1 + sizeof (siginfo_t) / sizeof (long) + 2]; - long *ctlp = ctl; - size_t size; - - assert(0 <= sig && sig <= LX_NSIG); - assert(!step || run); - - /* - * Clear the current signal. - */ - *ctlp++ = PCCSIG; - - /* - * Send a signal if one was specified. - */ - if (sig != 0 && sig != LX_SIGSTOP) { - siginfo_t *infop; - - *ctlp++ = PCSSIG; - infop = (siginfo_t *)ctlp; - bzero(infop, sizeof (siginfo_t)); - infop->si_signo = ltos_signo[sig]; - - ctlp += sizeof (siginfo_t) / sizeof (long); - } - - /* - * If run is true, set the lwp running. - */ - if (run) { - *ctlp++ = PCRUN; - *ctlp++ = step ? PRSTEP : 0; - } - - size = (char *)ctlp - (char *)&ctl[0]; - assert(size <= sizeof (ctl)); - - if (write(fd, ctl, size) != size) { - lx_debug("failed to continue %s", strerror(errno)); - return (-EIO); - } - - return (0); -} - -static int -ptrace_cont_monitor(ptrace_monitor_map_t *p) -{ - long ctl[2]; - int fd; - - fd = open_procfile(p->pmm_monitor, O_WRONLY, "ctl"); - if (fd < 0) { - lx_debug("failed to open monitor ctl %d", - errno); - return (-EIO); - } - - ctl[0] = PCRUN; - ctl[1] = PRCSIG; - if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) { - (void) close(fd); - return (-EIO); - } - - (void) close(fd); - - return (0); -} - -static int -ptrace_cont(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig, int step) -{ - ptrace_monitor_map_t *p; - uintptr_t *debugreg; - int fd, ret; - - if (!is_traced(pid)) - return (-ESRCH); - - if (sig < 0 || sig > LX_NSIG) - return (-EINVAL); - - if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0) - return (-ESRCH); - - if ((ret = ptrace_cont_common(fd, sig, 1, step)) != 0) { - (void) close(fd); - return (ret); - } - - (void) close(fd); - - /* kludge: use debugreg[4] to remember the single-step flag */ - if ((debugreg = debug_registers(pid)) != NULL) - debugreg[4] = step; - - /* - * Check for a monitor and get it moving if we find it. If any of the - * /proc operations fail, we're kind of sunk so just return an error. - */ - (void) mutex_lock(&ptrace_map_mtx); - for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) { - if (p->pmm_target == lxpid) { - if ((ret = ptrace_cont_monitor(p)) != 0) - return (ret); - break; - } - } - (void) mutex_unlock(&ptrace_map_mtx); - - return (0); -} - -/* - * If a monitor exists for this traced process, dispose of it. - * First turn off its ptrace flag so we won't be notified of its - * impending demise. We ignore errors for this step since they - * indicate only that the monitor has been damaged due to pilot - * error. Then kill the monitor, and wait for it. If the wait - * succeeds we can dispose of the corpse, otherwise another thread's - * wait call has collected it and we need to set a flag in the - * structure so that if can be picked up in wait. - */ -static void -monitor_kill(pid_t lxpid, pid_t pid) -{ - ptrace_monitor_map_t *p, **pp; - pid_t mpid; - int fd; - long ctl[2]; - - (void) mutex_lock(&ptrace_map_mtx); - free_debug_registers(pid); - for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) { - if (p->pmm_target == lxpid) { - mpid = p->pmm_monitor; - if ((fd = open_procfile(mpid, O_WRONLY, "ctl")) >= 0) { - ctl[0] = PCUNSET; - ctl[1] = PR_PTRACE; - (void) write(fd, ctl, sizeof (ctl)); - (void) close(fd); - } - - (void) kill(mpid, SIGKILL); - - if (waitpid(mpid, NULL, 0) == mpid) { - *pp = p->pmm_next; - free(p); - } else { - p->pmm_exiting = 1; - } - - break; - } - } - (void) mutex_unlock(&ptrace_map_mtx); -} - -static int -ptrace_kill(pid_t lxpid, pid_t pid) +ptrace_kill(pid_t pid) { int ret; - if (!is_traced(pid)) - return (-ESRCH); - ret = kill(pid, SIGKILL); - /* kill off the monitor process, if any */ - monitor_kill(lxpid, pid); - - return (ret); -} - -static int -ptrace_step(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig) -{ - return (ptrace_cont(lxpid, pid, lwpid, sig, 1)); + return (ret == 0 ? ret : -errno); } static int @@ -1489,9 +1073,6 @@ ptrace_getregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) lx_user_regs_t regs; int ret; - if (!is_traced(pid)) - return (-ESRCH); - if ((ret = getregs(pid, lwpid, ®s)) != 0) return (ret); @@ -1506,9 +1087,6 @@ ptrace_setregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) { lx_user_regs_t regs; - if (!is_traced(pid)) - return (-ESRCH); - if (uucopy((void *)addr, ®s, sizeof (regs)) != 0) return (-errno); @@ -1521,9 +1099,6 @@ ptrace_getfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) lx_user_fpregs_t regs; int ret; - if (!is_traced(pid)) - return (-ESRCH); - if ((ret = getfpregs(pid, lwpid, ®s)) != 0) return (ret); @@ -1538,9 +1113,6 @@ ptrace_setfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) { lx_user_fpregs_t regs; - if (!is_traced(pid)) - return (-ESRCH); - if (uucopy((void *)addr, ®s, sizeof (regs)) != 0) return (-errno); @@ -1553,9 +1125,6 @@ ptrace_getfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) lx_user_fpxregs_t regs; int ret; - if (!is_traced(pid)) - return (-ESRCH); - if ((ret = getfpxregs(pid, lwpid, ®s)) != 0) return (ret); @@ -1570,412 +1139,124 @@ ptrace_setfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) { lx_user_fpxregs_t regs; - if (!is_traced(pid)) - return (-ESRCH); - if (uucopy((void *)addr, ®s, sizeof (regs)) != 0) return (-errno); return (setfpxregs(pid, lwpid, ®s)); } -static void __NORETURN -ptrace_monitor(int fd) +void +lx_ptrace_stop_if_option(int option, boolean_t child, ulong_t msg) { - struct { - long cmd; - union { - long flags; - sigset_t signals; - fltset_t faults; - } arg; - } ctl; - size_t size; - int monfd; - int rv; - - monfd = open_procfile(getpid(), O_WRONLY, "ctl"); - - ctl.cmd = PCSTRACE; /* trace only SIGTRAP */ - premptyset(&ctl.arg.signals); - praddset(&ctl.arg.signals, SIGTRAP); - size = sizeof (long) + sizeof (sigset_t); - (void) write(monfd, &ctl, size); /* can't fail */ - - ctl.cmd = PCSFAULT; - premptyset(&ctl.arg.faults); - size = sizeof (long) + sizeof (fltset_t); - (void) write(monfd, &ctl, size); /* can't fail */ - - ctl.cmd = PCUNSET; - ctl.arg.flags = PR_FORK; - size = sizeof (long) + sizeof (long); - (void) write(monfd, &ctl, size); /* can't fail */ - - ctl.cmd = PCSET; /* wait()able by the parent */ - ctl.arg.flags = PR_PTRACE; - size = sizeof (long) + sizeof (long); - (void) write(monfd, &ctl, size); /* can't fail */ - - (void) close(monfd); - - ctl.cmd = PCWSTOP; - size = sizeof (long); - - for (;;) { - /* - * Wait for the traced process to stop. - */ - if (write(fd, &ctl, size) != size) { - rv = (errno == ENOENT)? 0 : 1; - lx_debug("monitor failed to wait for LWP to stop: %s", + /* + * We call into the kernel to see if we need to stop for specific + * ptrace(2) events. + */ + lx_debug("lx_ptrace_stop_if_option(%d, %s, %lu)", option, + child ? "TRUE [child]" : "FALSE [parent]", msg); + if (syscall(SYS_brand, B_PTRACE_STOP_FOR_OPT, option, child, + msg) != 0) { + if (errno != ESRCH) { + /* + * This should _only_ fail if we are not traced, or do + * not have this option set. + */ + lx_err_fatal("B_PTRACE_STOP_FOR_OPT failed: %s", strerror(errno)); - _exit(rv); } - - lx_debug("monitor caught traced LWP"); - - /* - * Pull the ptrace trigger by sending ourself a SIGTRAP. This - * will cause this, the monitor process, to stop which will - * cause the parent's waitid(2) call to return this process - * id. In lx_wait(), we remap the monitor process's pid and - * status to those of the traced LWP. When the parent process - * uses ptrace to resume the traced LWP, it will additionally - * restart this process. - */ - (void) _lwp_kill(_lwp_self(), SIGTRAP); - - lx_debug("monitor was resumed"); } } -static int -ptrace_attach_common(int fd, pid_t lxpid, pid_t pid, lwpid_t lwpid, int run) +/* + * Signal to the in-kernel ptrace(2) subsystem that the next native fork() or + * thr_create() is part of an emulated fork(2) or clone(2). If PTRACE_CLONE + * was passed to clone(2), inherit_flag should be B_TRUE. + */ +void +lx_ptrace_clone_begin(int option, boolean_t inherit_flag) { - pid_t child; - ptrace_monitor_map_t *p; - sigset_t unblock; - pstatus_t status; - long ctl[1 + sizeof (sysset_t) / sizeof (long) + 2]; - long *ctlp = ctl; - size_t size; - sysset_t *sysp; - int ret; - - /* - * We're going to need this structure so better to fail now before its - * too late to turn back. - */ - if ((p = malloc(sizeof (ptrace_monitor_map_t))) == NULL) - return (-EIO); - - if ((ret = get_status(pid, &status)) != 0) { - free(p); - return (ret); + lx_debug("lx_ptrace_clone_begin(%d, %sPTRACE_CLONE)", option, + inherit_flag ? "" : "!"); + if (syscall(SYS_brand, B_PTRACE_CLONE_BEGIN, option, + inherit_flag) != 0) { + lx_err_fatal("B_PTRACE_CLONE_BEGIN failed: %s", + strerror(errno)); } - - /* - * If this process is already traced, bail. - */ - if (status.pr_flags & PR_PTRACE) { - free(p); - return (-EPERM); - } - - /* - * Turn on the appropriate tracing flags. It's exceedingly unlikely - * that this operation will fail; any failure would probably be due - * to another /proc consumer mucking around. - */ - if (ptrace_trace_common(fd) != 0) { - free(p); - return (-EIO); - } - - /* - * Native ptrace automatically catches processes when they exec so we - * have to do that explicitly here. - */ - *ctlp++ = PCSEXIT; - sysp = (sysset_t *)ctlp; - ctlp += sizeof (sysset_t) / sizeof (long); - premptyset(sysp); - praddset(sysp, SYS_execve); - if (run) { - *ctlp++ = PCRUN; - *ctlp++ = 0; - } - - size = (char *)ctlp - (char *)&ctl[0]; - - if (write(fd, ctl, size) != size) { - free(p); - return (-EIO); - } - - /* - * Spawn the monitor proceses to notify this process of events of - * interest in the traced process. We block signals here both so - * we're not interrupted during this operation and so that the - * monitor process doesn't accept signals. - */ - (void) sigprocmask(SIG_BLOCK, &blockable_sigs, &unblock); - if ((child = fork1()) == 0) - ptrace_monitor(fd); - (void) sigprocmask(SIG_SETMASK, &unblock, NULL); - - if (child == -1) { - lx_debug("failed to fork monitor process\n"); - free(p); - return (-EIO); - } - - p->pmm_monitor = child; - p->pmm_target = lxpid; - p->pmm_pid = pid; - p->pmm_lwpid = lwpid; - p->pmm_exiting = 0; - - (void) mutex_lock(&ptrace_map_mtx); - p->pmm_next = ptrace_monitor_map; - ptrace_monitor_map = p; - (void) mutex_unlock(&ptrace_map_mtx); - - return (0); } -static int -ptrace_attach(pid_t lxpid, pid_t pid, lwpid_t lwpid) +static long +lx_ptrace_kernel(int ptrace_op, pid_t lxpid, uintptr_t addr, uintptr_t data) { - int fd, ret; - long ctl; + int ret; /* - * Linux doesn't let you trace process 1 -- go figure. + * Call into the in-kernel ptrace(2) emulation code. */ - if (lxpid == 1) - return (-EPERM); - - if ((fd = open_lwpfile(pid, lwpid, O_WRONLY | O_EXCL, "lwpctl")) < 0) - return (errno == EBUSY ? -EPERM : -ESRCH); - - ctl = PCSTOP; - if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) { - lx_err("failed to stop %d/%d\n", (int)pid, (int)lwpid); - assert(0); + lx_debug("revectoring to B_PTRACE_KERNEL(%d, %d, %p, %p)", ptrace_op, + lxpid, addr, data); + ret = syscall(SYS_brand, B_PTRACE_KERNEL, ptrace_op, lxpid, addr, + data); + if (ret == 0) { + lx_debug("\t= %d", ret); + } else { + lx_debug("\t= %d (%s)", ret, strerror(errno)); } - ret = ptrace_attach_common(fd, lxpid, pid, lwpid, 0); - - (void) close(fd); - - return (ret); + return (ret == 0 ? ret : -errno); } -static int -ptrace_detach(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig) +long +lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) { - long ctl[2]; - int fd, ret; - - if (!is_traced(pid)) - return (-ESRCH); - - if (sig < 0 || sig > LX_NSIG) - return (-EINVAL); - - if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0) - return (-ESRCH); - - if (syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_DETACH, pid, 0) != 0) - return (-ESRCH); + int ptrace_op = (int)p1; + pid_t pid, lxpid = (pid_t)p2; + lwpid_t lwpid; /* - * The /proc ptrace flag may not be set, but we clear it - * unconditionally since doing so doesn't hurt anything. + * Some PTRACE_* requests are emulated entirely in the kernel. */ - ctl[0] = PCUNSET; - ctl[1] = PR_PTRACE; - if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) { - (void) close(fd); - return (-EIO); - } - + switch (ptrace_op) { /* - * Clear the brand-specific system call tracing flag to ensure that - * the target doesn't stop unexpectedly some time in the future. + * PTRACE_TRACEME and PTRACE_ATTACH operations induce the tracing of + * one LWP by another. The target LWP must not be traced already. + * Both `data' and `addr' are ignored in both cases. */ - if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 0)) != 0) { - (void) close(fd); - return (-ret); - } + case LX_PTRACE_TRACEME: + return (lx_ptrace_kernel(ptrace_op, 0, 0, 0)); - /* kill off the monitor process, if any */ - monitor_kill(lxpid, pid); + case LX_PTRACE_ATTACH: + return (lx_ptrace_kernel(ptrace_op, lxpid, 0, 0)); /* - * Turn on the run-on-last-close flag so that all tracing flags will be - * cleared when we close the control file descriptor. + * PTRACE_DETACH, PTRACE_SYSCALL, PTRACE_SINGLESTEP and PTRACE_CONT + * are all restarting actions. They are only allowed when attached + * to the target LWP and when that target LWP is in a "ptrace-stop" + * condition. */ - ctl[0] = PCSET; - ctl[1] = PR_RLC; - if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) { - (void) close(fd); - return (-EIO); + case LX_PTRACE_DETACH: + case LX_PTRACE_SYSCALL: + case LX_PTRACE_CONT: + case LX_PTRACE_SINGLESTEP: + /* + * These actions also require the LWP to be traced and stopped, but do + * not restart the target LWP. + */ + case LX_PTRACE_SETOPTIONS: + case LX_PTRACE_GETEVENTMSG: + return (lx_ptrace_kernel(ptrace_op, lxpid, p3, p4)); } /* - * Clear the current signal (if any) and possibly send the traced - * process a new signal. + * The rest of the emulated PTRACE_* actions are emulated in userland. + * They require the target LWP to be traced and in currently + * "ptrace-stop", but do not subsequently restart the target LWP. */ - ret = ptrace_cont_common(fd, sig, 0, 0); - - (void) close(fd); - - return (ret); -} - -static int -ptrace_syscall(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig) -{ - int ret; - - if (!is_traced(pid)) + if (lx_lpid_to_spair(lxpid, &pid, &lwpid) < 0 || + !is_ptrace_stopped(lxpid)) { return (-ESRCH); - - if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 1)) != 0) - return (-ret); - - return (ptrace_cont(lxpid, pid, lwpid, sig, 0)); -} - -static int -ptrace_setoptions(pid_t pid, int options) -{ - int ret; - int fd; - int error = 0; - struct { - long cmd; - union { - long flags; - sigset_t signals; - fltset_t faults; - } arg; - } ctl; - size_t size; - pstatus_t status; - - if ((ret = get_status(pid, &status)) != 0) - return (ret); - - if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) - return (-errno); - - /* since we're doing option tracing now, only catch sigtrap */ - ctl.cmd = PCSTRACE; - premptyset(&ctl.arg.signals); - praddset(&ctl.arg.signals, SIGTRAP); - size = sizeof (long) + sizeof (sigset_t); - if (write(fd, &ctl, size) != size) { - error = -errno; - } else { - /* - * If we're tracing fork, set inherit-on-fork, otherwise clear - * it. - */ - if (options & LX_PTRACE_O_TRACEFORK) { - ctl.cmd = PCSET; - } else { - ctl.cmd = PCUNSET; - } - ctl.arg.flags = PR_FORK; - size = sizeof (long) + sizeof (long); - if (write(fd, &ctl, size) != size) - error = -errno; } - (void) close(fd); - - if (error != 0) - return (error); - - ret = syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_EXT_OPTS_SET, pid, - options); - - return ((ret != 0) ? -errno : 0); -} - -void -lx_ptrace_stop_if_option(int option, boolean_t child, ulong_t msg) -{ - pid_t pid; - uint_t curr_opts; - - pid = getpid(); - if (pid == 1) - pid = zoneinit_pid; - - /* first we have to see if the stop option is set for this process */ - if (syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_EXT_OPTS_GET, pid, - &curr_opts) != 0) - return; - - if (child) { - /* - * If we just forked/cloned, then the trace flags only carry - * over to the child if the specific flag was enabled on the - * parent. For example, if only TRACEFORK is enabled and we - * clone, then we must clear the trace flags. If TRACEFORK is - * enabled and we fork, then we keep the flags. - */ - if (option == LX_PTRACE_O_TRACECLONE || - option == LX_PTRACE_O_TRACEFORK || - option == LX_PTRACE_O_TRACEVFORK) { - - if ((curr_opts & option) == 0) - (void) syscall(SYS_brand, B_PTRACE_EXT_OPTS, - B_PTRACE_EXT_OPTS_SET, pid, 0); - - /* - * Since we know we're the child we have to modify how - * we stop. Set the emulation's child flag in the - * option. - */ - option |= EMUL_PTRACE_O_CHILD; - } - } - - /* now if the option is/was set, this brand call will stop us */ - if (curr_opts & option) - (void) syscall(SYS_brand, B_PTRACE_STOP_FOR_OPT, option, msg); -} - -static int -ptrace_geteventmsg(pid_t pid, ulong_t *msgp) -{ - int ret; - - ret = syscall(SYS_brand, B_PTRACE_GETEVENTMSG, pid, msgp); - - return ((ret != 0) ? -errno : 0); -} - -long -lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) -{ - pid_t pid, lxpid = (pid_t)p2; - lwpid_t lwpid; - - if ((p1 != LX_PTRACE_TRACEME) && - (lx_lpid_to_spair(lxpid, &pid, &lwpid) < 0)) - return (-ESRCH); - - switch (p1) { - case LX_PTRACE_TRACEME: - return (ptrace_traceme()); - + switch (ptrace_op) { case LX_PTRACE_PEEKTEXT: case LX_PTRACE_PEEKDATA: return (ptrace_peek(pid, p3, (long *)p4)); @@ -1990,14 +1271,8 @@ lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) case LX_PTRACE_POKEUSER: return (ptrace_poke_user(pid, lwpid, p3, (int)p4)); - case LX_PTRACE_CONT: - return (ptrace_cont(lxpid, pid, lwpid, (int)p4, 0)); - case LX_PTRACE_KILL: - return (ptrace_kill(lxpid, pid)); - - case LX_PTRACE_SINGLESTEP: - return (ptrace_step(lxpid, pid, lwpid, (int)p4)); + return (ptrace_kill(pid)); case LX_PTRACE_GETREGS: return (ptrace_getregs(pid, lwpid, p4)); @@ -2011,419 +1286,13 @@ lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) case LX_PTRACE_SETFPREGS: return (ptrace_setfpregs(pid, lwpid, p4)); - case LX_PTRACE_ATTACH: - return (ptrace_attach(lxpid, pid, lwpid)); - - case LX_PTRACE_DETACH: - return (ptrace_detach(lxpid, pid, lwpid, (int)p4)); - case LX_PTRACE_GETFPXREGS: return (ptrace_getfpxregs(pid, lwpid, p4)); case LX_PTRACE_SETFPXREGS: return (ptrace_setfpxregs(pid, lwpid, p4)); - case LX_PTRACE_SYSCALL: - return (ptrace_syscall(lxpid, pid, lwpid, (int)p4)); - - case LX_PTRACE_SETOPTIONS: - return (ptrace_setoptions(pid, (int)p4)); - - case LX_PTRACE_GETEVENTMSG: - return (ptrace_geteventmsg(pid, (ulong_t *)p4)); - default: return (-EINVAL); } } - -void -lx_ptrace_fork(void) -{ - /* - * Send a special signal (that has no Linux equivalent) to indicate - * that we're in this particularly special case. The signal will be - * ignored by this process, but noticed by /proc consumers tracing - * this process. - */ - (void) _lwp_kill(_lwp_self(), SIGWAITING); -} - -static void -ptrace_catch_fork(pid_t pid, int monitor) -{ - long ctl[14 + 2 * sizeof (sysset_t) / sizeof (long)]; - long *ctlp; - sysset_t *sysp; - size_t size; - pstatus_t ps; - pid_t child; - int fd, err; - - /* - * If any of this fails, we're really sunk since the child - * will be stuck in the middle of lx_ptrace_fork(). - * Fortunately it's practically assured to succeed unless - * something is seriously wrong on the system. - */ - if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) { - lx_debug("lx_catch_fork: failed to control %d", - (int)pid); - return; - } - - /* - * Turn off the /proc PR_PTRACE flag so the parent doesn't get - * spurious wake ups while we're working our dark magic. Arrange to - * catch the process when it exits from fork, and turn on the /proc - * inherit-on-fork flag so we catcht the child as well. We then run - * the process, wait for it to stop on the fork1(2) call and reset - * the tracing flags to their original state. - */ - ctlp = ctl; - *ctlp++ = PCCSIG; - if (!monitor) { - *ctlp++ = PCUNSET; - *ctlp++ = PR_PTRACE; - } - *ctlp++ = PCSET; - *ctlp++ = PR_FORK; - *ctlp++ = PCSEXIT; - sysp = (sysset_t *)ctlp; - ctlp += sizeof (sysset_t) / sizeof (long); - premptyset(sysp); - praddset(sysp, SYS_forksys); /* fork1() is forksys(0, 0) */ - *ctlp++ = PCRUN; - *ctlp++ = 0; - *ctlp++ = PCWSTOP; - if (!monitor) { - *ctlp++ = PCSET; - *ctlp++ = PR_PTRACE; - } - *ctlp++ = PCUNSET; - *ctlp++ = PR_FORK; - *ctlp++ = PCSEXIT; - sysp = (sysset_t *)ctlp; - ctlp += sizeof (sysset_t) / sizeof (long); - premptyset(sysp); - if (monitor) - praddset(sysp, SYS_execve); - - size = (char *)ctlp - (char *)&ctl[0]; - assert(size <= sizeof (ctl)); - - if (write(fd, ctl, size) != size) { - (void) close(fd); - lx_debug("lx_catch_fork: failed to set %d running", - (int)pid); - return; - } - - /* - * Get the status so we can find the value returned from fork1() -- - * the child process's pid. - */ - if (get_status(pid, &ps) != 0) { - (void) close(fd); - lx_debug("lx_catch_fork: failed to get status for %d", - (int)pid); - return; - } - - child = (pid_t)ps.pr_lwp.pr_reg[R_R0]; - - /* - * We're done with the parent -- off you go. - */ - ctl[0] = PCRUN; - ctl[1] = 0; - size = 2 * sizeof (long); - - if (write(fd, ctl, size) != size) { - (void) close(fd); - lx_debug("lx_catch_fork: failed to set %d running", - (int)pid); - return; - } - - (void) close(fd); - - /* - * If fork1(2) failed, we're done. - */ - if (child < 0) { - lx_debug("lx_catch_fork: fork1 failed"); - return; - } - - /* - * Now we need to screw with the child process. - */ - if ((fd = open_lwpfile(child, 1, O_WRONLY, "lwpctl")) < 0) { - lx_debug("lx_catch_fork: failed to control %d", - (int)child); - return; - } - - ctlp = ctl; - *ctlp++ = PCUNSET; - *ctlp++ = PR_FORK; - *ctlp++ = PCSEXIT; - sysp = (sysset_t *)ctlp; - ctlp += sizeof (sysset_t) / sizeof (long); - premptyset(sysp); - size = (char *)ctlp - (char *)&ctl[0]; - - if (write(fd, ctl, size) != size) { - (void) close(fd); - lx_debug("lx_catch_fork: failed to clear trace flags for %d", - (int)child); - return; - } - - /* - * Now treat the child as though we had attached to it explicitly. - */ - err = ptrace_attach_common(fd, child, child, 1, 1); - assert(err == 0); - - (void) close(fd); -} - -static void -set_dr6(pid_t pid, siginfo_t *infop) -{ - uintptr_t *debugreg; - uintptr_t addr; - uintptr_t base; - size_t size = NULL; - int dr7; - int lrw; - int i; - - if ((debugreg = debug_registers(pid)) == NULL) - return; - - debugreg[6] = 0xffff0ff0; /* read as ones */ - switch (infop->si_code) { - case TRAP_TRACE: - debugreg[6] |= 0x4000; /* single-step */ - break; - case TRAP_RWATCH: - case TRAP_WWATCH: - case TRAP_XWATCH: - dr7 = debugreg[7]; - addr = (uintptr_t)infop->si_addr; - for (i = 0; i < 4; i++) { - if ((dr7 & (1 << (2 * i))) == 0) /* enabled? */ - continue; - lrw = (dr7 >> (16 + (4 * i))) & 0xf; - switch (lrw >> 2) { /* length */ - case 0: size = 1; break; - case 1: size = 2; break; - case 2: size = 8; break; - case 3: size = 4; break; - } - base = debugreg[i]; - if (addr >= base && addr < base + size) - debugreg[6] |= (1 << i); - } - /* - * Were we also attempting a single-step? - * (kludge: we use debugreg[4] for this flag.) - */ - if (debugreg[4]) - debugreg[6] |= 0x4000; - break; - default: - break; - } -} - -/* - * This is called from the emulation of the wait4, waitpid and waitid system - * calls to take into account: - * - the monitor processes which we spawn to observe other processes from - * ptrace_attach(). - * - the extended si_status result we can get when extended ptrace options - * are enabled. - */ -int -lx_ptrace_wait(siginfo_t *infop) -{ - ptrace_monitor_map_t *p, **pp; - pid_t lxpid, pid = infop->si_pid; - lwpid_t lwpid; - int fd; - pstatus_t status; - - /* - * If the process observed by waitid(2) corresponds to the monitor - * process for a traced thread, we need to rewhack the siginfo_t to - * look like it came from the traced thread with the flags set - * according to the current state. - */ - (void) mutex_lock(&ptrace_map_mtx); - for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) { - if (p->pmm_monitor == pid) { - assert(infop->si_code == CLD_EXITED || - infop->si_code == CLD_KILLED || - infop->si_code == CLD_DUMPED || - infop->si_code == CLD_TRAPPED); - goto found; - } - } - (void) mutex_unlock(&ptrace_map_mtx); - - if (infop->si_code == CLD_TRAPPED) { - /* - * If the traced process got a SIGWAITING, we must be in the - * middle of a clone(2) with CLONE_PTRACE set. - */ - if (infop->si_status == SIGWAITING) { - ptrace_catch_fork(pid, 0); - return (-1); - } - - /* - * If the traced process got a SIGTRAP then Linux ptrace - * options might have been set, so setup the extended - * si_status to contain the (possible) event. Note that - * our definitions for the ptrace events (e.g. - * LX_PTRACE_EVENT_FORK) is already shifted <<8 as documented - * on the Linux ptrace(2) man page. - */ - if (infop->si_status == SIGTRAP) { - uint_t event; - - if (syscall(SYS_brand, B_PTRACE_EXT_OPTS, - B_PTRACE_EXT_OPTS_EVT, pid, &event) == 0) - infop->si_status |= event; - } - } - - if (get_status(pid, &status) == 0 && - (status.pr_lwp.pr_flags & PR_STOPPED) && - status.pr_lwp.pr_why == PR_SIGNALLED && - status.pr_lwp.pr_info.si_signo == SIGTRAP) - set_dr6(pid, &status.pr_lwp.pr_info); - - return (0); - -found: - /* - * If the monitor is in the exiting state, ignore the event and free - * the monitor structure if the monitor has exited. By returning -1 we - * indicate to the caller that this was a spurious return from - * waitid(2) and that it should ignore the result and try again. - */ - if (p->pmm_exiting) { - if (infop->si_code == CLD_EXITED || - infop->si_code == CLD_KILLED || - infop->si_code == CLD_DUMPED) { - *pp = p->pmm_next; - (void) mutex_unlock(&ptrace_map_mtx); - free(p); - } - return (-1); - } - - lxpid = p->pmm_target; - pid = p->pmm_pid; - lwpid = p->pmm_lwpid; - (void) mutex_unlock(&ptrace_map_mtx); - - /* - * If we can't find the traced process, kill off its monitor. - */ - if ((fd = open_lwpfile(pid, lwpid, O_RDONLY, "lwpstatus")) < 0) { - assert(errno == ENOENT); - monitor_kill(lxpid, pid); - infop->si_code = CLD_EXITED; - infop->si_status = 0; - infop->si_pid = lxpid; - return (0); - } - - if (read(fd, &status.pr_lwp, sizeof (status.pr_lwp)) != - sizeof (status.pr_lwp)) { - lx_err("read lwpstatus failed %d %s", fd, strerror(errno)); - assert(0); - } - - (void) close(fd); - - /* - * If the traced process isn't stopped, this is a truly spurious - * event probably caused by another /proc consumer tracing the - * monitor. - */ - if (!(status.pr_lwp.pr_flags & PR_STOPPED)) { - (void) ptrace_cont_monitor(p); - return (-1); - } - - switch (status.pr_lwp.pr_why) { - case PR_SIGNALLED: - /* - * If the traced process got a SIGWAITING, we must be in the - * middle of a clone(2) with CLONE_PTRACE set. - */ - if (status.pr_lwp.pr_what == SIGWAITING) { - ptrace_catch_fork(lxpid, 1); - (void) ptrace_cont_monitor(p); - return (-1); - } - infop->si_code = CLD_TRAPPED; - infop->si_status = status.pr_lwp.pr_what; - if (status.pr_lwp.pr_info.si_signo == SIGTRAP) - set_dr6(pid, &status.pr_lwp.pr_info); - break; - - case PR_REQUESTED: - /* - * Make it look like the traced process stopped on an - * event of interest. - */ - infop->si_code = CLD_TRAPPED; - infop->si_status = SIGTRAP; - break; - - case PR_JOBCONTROL: - /* - * Ignore this as it was probably caused by another /proc - * consumer tracing the monitor. - */ - (void) ptrace_cont_monitor(p); - return (-1); - - case PR_SYSEXIT: - /* - * Processes traced via a monitor (rather than using the - * native Solaris ptrace support) explicitly trace returns - * from exec system calls since it's an implicit ptrace - * trace point. Accordingly we need to present a process - * in that state as though it had reached the ptrace trace - * point. - */ - if (status.pr_lwp.pr_what == SYS_execve) { - infop->si_code = CLD_TRAPPED; - infop->si_status = SIGTRAP; - break; - } - - /*FALLTHROUGH*/ - - case PR_SYSENTRY: - case PR_FAULTED: - case PR_SUSPENDED: - default: - lx_err("didn't expect %d (%d %d)", status.pr_lwp.pr_why, - status.pr_lwp.pr_what, status.pr_lwp.pr_flags); - assert(0); - } - - infop->si_pid = lxpid; - - return (0); -} diff --git a/usr/src/lib/brand/lx/lx_brand/common/signal.c b/usr/src/lib/brand/lx/lx_brand/common/signal.c index b845ae5cac..9029249b10 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/signal.c +++ b/usr/src/lib/brand/lx/lx_brand/common/signal.c @@ -345,6 +345,14 @@ static int lx_sigsegv_depth = 0; #endif /* + * Setting LX_NO_ABORT_HANDLER in the environment will prevent the emulated + * Linux program from modifying the signal handling disposition for SIGSEGV or + * SIGABRT. Useful for debugging programs which fall over themselves to + * prevent useful core files being generated. + */ +static int lx_no_abort_handler = 0; + +/* * Cache result of process.max-file-descriptor to avoid calling getrctl() * for each lx_ppoll(). */ @@ -497,6 +505,29 @@ ltos_sigcode(int si_code) } } +/* + * Convert the "status" field of a SIGCLD siginfo_t. We need to extract the + * illumos signal number and convert it to a Linux signal number while leaving + * the ptrace(2) event bits intact. + */ +int +stol_status(int s) +{ + /* + * We mask out the top bit here in case PTRACE_O_TRACESYSGOOD + * is in use and 0x80 has been ORed with the signal number. + */ + int stat = stol_signo[s & 0x7f]; + assert(stat != -1); + + /* + * We must mix in the ptrace(2) event which may be stored in + * the second byte of the status code. We also re-include the + * PTRACE_O_TRACESYSGOOD bit. + */ + return ((s & 0xff80) | stat); +} + int stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop) { @@ -530,7 +561,8 @@ stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop) case LX_SIGCHLD: lx_siginfo.lsi_pid = siginfop->si_pid; - lx_siginfo.lsi_status = siginfop->si_status; + lx_siginfo.lsi_status = stol_status( + siginfop->si_status); lx_siginfo.lsi_utime = siginfop->si_utime; lx_siginfo.lsi_stime = siginfop->si_stime; break; @@ -1552,6 +1584,17 @@ lx_call_user_handler(int sig, siginfo_t *sip, void *p) size_t stksize; int lx_sig; + switch (sig) { + case SIGCLD: + /* + * Signal to an interrupted waitpid() that it was interrupted + * by a SIGCLD, and should restart to grab the wait status + * this signal represented. + */ + lx_had_sigchild = 1; + break; + } + /* * If Illumos signal has no Linux equivalent, effectively ignore it. */ @@ -1568,6 +1611,18 @@ lx_call_user_handler(int sig, siginfo_t *sip, void *p) lx_debug("lxsap @ 0x%p", lxsap); /* + * If the delivery of this signal interrupted a system call, we must + * only restart it if sigaction(2) was used to set the SA_RESTART flag + * for this signal. The lx_emulate() function checks this per-thread + * variable to discover the restart disposition of the most recently + * handled signal. + * + * NOTE: this mechanism may not stand up to close scrutiny in the face + * of nested asynchronous signal delivery. + */ + lx_do_syscall_restart = !!(lxsap->lxsa_flags & LX_SA_RESTART); + + /* * Emulate vsyscall support. * * Linux magically maps a single page into the address space of each @@ -1740,6 +1795,18 @@ lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp, return (-errno); if ((sig = ltos_signo[lx_sig]) != -1) { + if (lx_no_abort_handler != 0) { + /* + * If LX_NO_ABORT_HANDLER has been set, we will + * not allow the emulated program to do + * anything hamfisted with SIGSEGV or SIGABRT + * signals. + */ + if (sig == SIGSEGV || sig == SIGABRT) { + return (0); + } + } + /* * Block this signal while messing with its dispostion */ @@ -2068,6 +2135,10 @@ lx_siginit(void) sigset_t new_set, oset; int lx_sig, sig; + if (getenv("LX_NO_ABORT_HANDLER") != NULL) { + lx_no_abort_handler = 1; + } + /* * Block all signals possible while setting up the signal imposition * mechanism. diff --git a/usr/src/lib/brand/lx/lx_brand/common/wait.c b/usr/src/lib/brand/lx/lx_brand/common/wait.c index 031eb5e5cd..c3421858eb 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/wait.c +++ b/usr/src/lib/brand/lx/lx_brand/common/wait.c @@ -22,7 +22,7 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014 Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. */ /* @@ -70,6 +70,7 @@ #include <sys/wait.h> #include <sys/lx_types.h> #include <sys/lx_signal.h> +#include <sys/lx_debug.h> #include <sys/lx_misc.h> #include <sys/lx_syscall.h> #include <sys/syscall.h> @@ -100,32 +101,23 @@ extern long max_pid; +/* + * Split the passed waitpid/waitid options into two separate variables: + * those for the native illumos waitid(2), and the extra Linux-specific + * options we will handle in our brand-specific code. + */ static int -ltos_options(uintptr_t options) +ltos_options(uintptr_t options, int *native_options, int *extra_options) { int newoptions = 0; - int rval; - lx_waitid_args_t extra; if (((options) & ~(LX_WNOHANG | LX_WUNTRACED | LX_WEXITED | LX_WCONTINUED | LX_WNOWAIT | LX_WNOTHREAD | LX_WALL | LX_WCLONE)) != 0) { return (-1); } - /* - * We use the B_STORE_ARGS command to store any of LX_WNOTHREAD, - * LX_WALL, and LX_WCLONE that have been set as options on this waitid - * call. These flags are stored as part of the lwp_brand_data, so that - * when there is a later syscall to waitid, the brand code there can - * detect that we added extra flags here and use them as appropriate. - * We pass them in here rather than the normal channel for flags to - * prevent polluting the namespace. - */ - extra.waitid_flags = options & (LX_WNOTHREAD | LX_WALL | LX_WCLONE); - rval = syscall(SYS_brand, B_STORE_ARGS, &extra, - sizeof (lx_waitid_args_t), NULL, NULL, NULL, NULL); - if (rval < 0) - return (rval); + + *extra_options = options & (LX_WNOTHREAD | LX_WALL | LX_WCLONE); if (options & LX_WNOHANG) newoptions |= WNOHANG; @@ -138,10 +130,13 @@ ltos_options(uintptr_t options) if (options & LX_WNOWAIT) newoptions |= WNOWAIT; - /* The trapped option is implicit on Linux */ + /* + * The trapped option is implicit on Linux. + */ newoptions |= WTRAPPED; - return (newoptions); + *native_options = newoptions; + return (0); } static int @@ -164,10 +159,7 @@ lx_wstat(int code, int status) break; case CLD_TRAPPED: case CLD_STOPPED: - stat = stol_signo[status]; - assert(stat != -1); - stat <<= 8; - stat |= WSTOPFLG; + stat = (stol_status(status) << 8) | WSTOPFLG; break; case CLD_CONTINUED: stat = WCONTFLG; @@ -177,33 +169,31 @@ lx_wstat(int code, int status) return (stat); } -/* wrapper to make solaris waitid work properly with ptrace */ static int -lx_waitid_helper(idtype_t idtype, id_t id, siginfo_t *info, int options) +lx_waitid_helper(idtype_t idtype, id_t id, siginfo_t *sip, int native_options, + int extra_options) { - do { - /* - * It's possible that we return EINVAL here if the idtype is - * P_PID or P_PGID and id is out of bounds for a valid pid or - * pgid, but Linux expects to see ECHILD. No good way occurs to - * handle this so we'll punt for now. - */ - if (waitid(idtype, id, info, options) < 0) - return (-errno); - - /* - * If the WNOHANG flag was specified and no child was found - * return 0. - */ - if ((options & WNOHANG) && info->si_pid == 0) - return (0); - - /* - * It's possible that we may have a spurious return for one of - * the child processes created by the ptrace subsystem. If - * that's the case, we simply try again. - */ - } while (lx_ptrace_wait(info) == -1); + /* + * Call into our in-kernel waitid() wrapper: + */ +restart: + lx_had_sigchild = 0; + if (syscall(SYS_brand, B_HELPER_WAITID, idtype, id, sip, + native_options, extra_options) != 0) { + if (errno == EINTR && (lx_had_sigchild || + lx_do_syscall_restart)) { + /* + * If we handled a SIGCLD while blocked in waitid(), + * or the SA_RESTART flag was set, we should wait + * again. + */ + lx_debug("lx_waitid_helper() restarting due to" + " interrupted system call"); + goto restart; + } + return (-1); + } + return (0); } @@ -214,11 +204,12 @@ lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) struct rusage ru = { 0 }; idtype_t idtype; id_t id; - int options, status = 0; + int status = 0; pid_t pid = (pid_t)p1; int rval; + int native_options, extra_options; - if ((options = ltos_options(p3)) == -1) + if (ltos_options(p3, &native_options, &extra_options) == -1) return (-EINVAL); if (pid > max_pid) @@ -260,14 +251,17 @@ lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) id = pid; } - options |= WEXITED | WTRAPPED; + native_options |= WEXITED | WTRAPPED; + + if (lx_waitid_helper(idtype, id, &info, native_options, + extra_options) == -1) { + return (-errno); + } - if ((rval = lx_waitid_helper(idtype, id, &info, options)) < 0) - return (rval); /* * If the WNOHANG flag was specified and no child was found return 0. */ - if ((options & WNOHANG) && info.si_pid == 0) + if ((native_options & WNOHANG) && info.si_pid == 0) return (0); status = lx_wstat(info.si_code, info.si_status); @@ -297,9 +291,10 @@ lx_waitpid(uintptr_t p1, uintptr_t p2, uintptr_t p3) long lx_waitid(uintptr_t idtype, uintptr_t id, uintptr_t infop, uintptr_t opt) { - int rval, options; + int native_options, extra_options; siginfo_t s_info = {0}; - if ((options = ltos_options(opt)) == -1) + + if (ltos_options(opt, &native_options, &extra_options) == -1) return (-EINVAL); if (((opt) & (LX_WEXITED | LX_WSTOPPED | LX_WCONTINUED)) == 0) @@ -318,11 +313,14 @@ lx_waitid(uintptr_t idtype, uintptr_t id, uintptr_t infop, uintptr_t opt) default: return (-EINVAL); } - if ((rval = lx_waitid_helper(idtype, (id_t)id, &s_info, options)) < 0) - return (rval); + + if (lx_waitid_helper(idtype, id, &s_info, native_options, + extra_options) == -1) { + return (-errno); + } /* If the WNOHANG flag was specified and no child was found return 0. */ - if ((options & WNOHANG) && s_info.si_pid == 0) + if ((native_options & WNOHANG) && s_info.si_pid == 0) return (0); return (stol_siginfo(&s_info, (lx_siginfo_t *)infop)); diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h index 7d9c6fae0a..f50535d0c4 100644 --- a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h @@ -55,6 +55,13 @@ extern int lx_rpm_delay; extern boolean_t lx_is_rpm; /* + * These thread-specific variables allow the signal interposition code + * to communicate restart disposition for any interrupting signals. + */ +extern __thread int lx_had_sigchild; +extern __thread int lx_do_syscall_restart; + +/* * Values Linux expects for init */ #define LX_INIT_PGID 0 @@ -173,6 +180,7 @@ extern void lx_ptrace_init(); extern int lx_ptrace_wait(siginfo_t *); extern void lx_ptrace_fork(void); extern void lx_ptrace_stop_if_option(int, boolean_t, ulong_t msg); +extern void lx_ptrace_clone_begin(int, boolean_t); extern int lx_check_alloca(size_t); #define SAFE_ALLOCA(sz) (lx_check_alloca(sz) ? alloca(sz) : NULL) diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h index b4dc47faac..f3d39fca64 100644 --- a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h @@ -21,7 +21,7 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014 Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_LX_SIGNAL_H @@ -396,6 +396,7 @@ extern void lx_sigdeliver(int, siginfo_t *, void *, size_t, void (*)(), void (*)(), uintptr_t); extern int stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop); +extern int stol_status(int); #endif /* !defined(_ASM) */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h index b4b72c78f9..3d7b9018e1 100644 --- a/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h @@ -34,6 +34,12 @@ extern "C" { #include <thread.h> +typedef enum lx_exit_type { + LX_ET_NONE = 0, + LX_ET_EXIT, + LX_ET_EXIT_GROUP +} lx_exit_type_t; + typedef struct lx_tsd { #if defined(_ILP32) /* 32-bit thread-specific Linux %gs value */ @@ -42,7 +48,7 @@ typedef struct lx_tsd { /* 64-bit thread-specific Linux %fsbase value */ uintptr_t lxtsd_fsbase; #endif - int lxtsd_exit; + lx_exit_type_t lxtsd_exit; int lxtsd_exit_status; ucontext_t lxtsd_exit_context; } lx_tsd_t; @@ -51,6 +57,8 @@ extern thread_key_t lx_tsd_key; extern void lx_swap_gs(long, long *); +extern void lx_exit_common(lx_exit_type_t, uintptr_t) __NORETURN; + #ifdef __cplusplus } #endif diff --git a/usr/src/lib/libproc/common/Pcontrol.c b/usr/src/lib/libproc/common/Pcontrol.c index bde48d1416..afa04c43c7 100644 --- a/usr/src/lib/libproc/common/Pcontrol.c +++ b/usr/src/lib/libproc/common/Pcontrol.c @@ -26,6 +26,7 @@ * Portions Copyright 2007 Chad Mynhier * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #include <assert.h> @@ -1758,6 +1759,9 @@ prldump(const char *caller, lwpstatus_t *lsp) case PR_SUSPENDED: dprintf("%s: SUSPENDED\n", caller); break; + case PR_BRAND: + dprintf("%s: BRANDPRIVATE (%d)\n", caller, lsp->pr_what); + break; default: dprintf("%s: Unknown\n", caller); break; @@ -1937,6 +1941,7 @@ Pstopstatus(struct ps_prochandle *P, case PR_FAULTED: case PR_JOBCONTROL: case PR_SUSPENDED: + case PR_BRAND: break; default: errno = EPROTO; @@ -3511,6 +3516,7 @@ Lstopstatus(struct ps_lwphandle *L, case PR_FAULTED: case PR_JOBCONTROL: case PR_SUSPENDED: + case PR_BRAND: break; default: errno = EPROTO; diff --git a/usr/src/man/man4/proc.4 b/usr/src/man/man4/proc.4 index e7058c410d..c0a044164a 100644 --- a/usr/src/man/man4/proc.4 +++ b/usr/src/man/man4/proc.4 @@ -665,6 +665,18 @@ the process. \fBpr_what\fR is unused in this case. .RE .sp +.ne 2 +.na +\fB\fBPR_BRAND\fR\fR +.ad +.RS 17n +indicates that the lwp stopped for a brand-specific reason. Interpretation +of the value of \fBpr_what\fR depends on which zone brand is in use. It is +not generally expected that an lwp stopped in this state will be restarted +by native \fBproc\fR(4) consumers. +.RE + +.sp .LP \fBpr_cursig\fR names the current signal, that is, the next signal to be delivered to the lwp, if any. \fBpr_info\fR, when the lwp is in a diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c index 139e40a5d1..4507c0303c 100644 --- a/usr/src/uts/common/brand/lx/os/lx_brand.c +++ b/usr/src/uts/common/brand/lx/os/lx_brand.c @@ -78,6 +78,10 @@ void lx_set_kern_version(zone_t *, char *); void lx_copy_procdata(proc_t *, proc_t *); extern int getsetcontext(int, void *); +extern int waitsys(idtype_t, id_t, siginfo_t *, int); +#if defined(_SYSCALL32_IMPL) +extern int waitsys32(idtype_t, id_t, siginfo_t *, int); +#endif extern void lx_proc_exit(proc_t *, klwp_t *); static void lx_psig_to_proc(proc_t *, kthread_t *, int); @@ -107,35 +111,38 @@ static int lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args, caddr_t exec_file, struct cred *cred, int brand_action); static boolean_t lx_native_exec(uint8_t, const char **); -static void lx_ptrace_exectrap(proc_t *); static uint32_t lx_map32limit(proc_t *); /* lx brand */ struct brand_ops lx_brops = { - lx_init_brand_data, - lx_free_brand_data, - lx_brandsys, - lx_setbrand, - lx_getattr, - lx_setattr, - lx_copy_procdata, - lx_proc_exit, - lx_exec, - lx_setrval, - lx_initlwp, - lx_forklwp, - lx_freelwp, - lx_exitlwp, - lx_elfexec, - NULL, - NULL, - lx_psig_to_proc, - NSIG, - lx_exit_with_sig, - lx_wait_filter, - lx_native_exec, - lx_ptrace_exectrap, - lx_map32limit + lx_init_brand_data, /* b_init_brand_data */ + lx_free_brand_data, /* b_free_brand_data */ + lx_brandsys, /* b_brandsys */ + lx_setbrand, /* b_setbrand */ + lx_getattr, /* b_getattr */ + lx_setattr, /* b_setattr */ + lx_copy_procdata, /* b_copy_procdata */ + lx_proc_exit, /* b_proc_exit */ + lx_exec, /* b_exec */ + lx_setrval, /* b_lwp_setrval */ + lx_initlwp, /* b_initlwp */ + lx_forklwp, /* b_forklwp */ + lx_freelwp, /* b_freelwp */ + lx_exitlwp, /* b_lwpexit */ + lx_elfexec, /* b_elfexec */ + NULL, /* b_sigset_native_to_brand */ + NULL, /* b_sigset_brand_to_native */ + lx_psig_to_proc, /* b_psig_to_proc */ + NSIG, /* b_nsig */ + lx_exit_with_sig, /* b_exit_with_sig */ + lx_wait_filter, /* b_wait_filter */ + lx_native_exec, /* b_native_exec */ + NULL, /* b_ptrace_exectrap */ + lx_map32limit, /* b_map32limit */ + lx_stop_notify, /* b_stop_notify */ + lx_waitid_helper, /* b_waitid_helper */ + lx_sigcld_repost, /* b_sigcld_repost */ + lx_issig_stop /* b_issig_stop */ }; struct brand_mach_ops lx_mops = { @@ -167,33 +174,39 @@ static struct modlinkage modlinkage = { void lx_proc_exit(proc_t *p, klwp_t *lwp) { - zone_t *z = p->p_zone; int sig = ptolxproc(p)->l_signal; - ASSERT(p->p_brand == &lx_brand); - ASSERT(p->p_brand_data != NULL); - - /* - * If init is dying and we aren't explicitly shutting down the zone - * or the system, then Solaris is about to restart init. The Linux - * init is not designed to handle a restart, which it interprets as - * a reboot. To give it a sane environment in which to run, we - * reboot the zone. - */ - if (p->p_pid == z->zone_proc_initpid) { - if (z->zone_boot_err == 0 && - z->zone_restart_init && - zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && - zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) - (void) zone_kadmin(A_REBOOT, 0, NULL, CRED()); - } + VERIFY(p->p_brand == &lx_brand); + VERIFY(p->p_brand_data != NULL); /* * We might get here if fork failed (e.g. ENOMEM) so we don't always * have an lwp (see brand_clearbrand). */ - if (lwp != NULL) + if (lwp != NULL) { + boolean_t reenter_mutex = B_FALSE; + + /* + * This brand entry point is called variously with and without + * the process p_lock held. It would be possible to refactor + * the brand infrastructure so that proc_exit() explicitly + * calls this hook (b_lwpexit/lx_exitlwp) for the last LWP in a + * process prior to detaching the brand with + * brand_clearbrand(). Absent such refactoring, we + * conditionally exit the mutex for the duration of the call. + * + * The atomic replacement of both "p_brand" and "p_brand_data" + * is not affected by dropping and reacquiring the mutex here. + */ + if (mutex_owned(&p->p_lock) != 0) { + mutex_exit(&p->p_lock); + reenter_mutex = B_TRUE; + } lx_exitlwp(lwp); + if (reenter_mutex) { + mutex_enter(&p->p_lock); + } + } /* * The call path here is: @@ -261,310 +274,6 @@ lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize) return (-EINVAL); } -/* - * Enable/disable ptrace system call tracing for the given LWP. Enabling is - * done by both setting the flag in that LWP's brand data (in the kernel) and - * setting the process-wide trace flag (in the brand library of the traced - * process). - */ -static int -lx_ptrace_syscall_set(pid_t pid, id_t lwpid, int set) -{ - proc_t *p; - kthread_t *t; - klwp_t *lwp; - lx_proc_data_t *lpdp; - lx_lwp_data_t *lldp; - uintptr_t addr; - int ret, flag = 1; - - if ((p = sprlock(pid)) == NULL) - return (ESRCH); - - if (priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) { - sprunlock(p); - return (EPERM); - } - - if ((t = idtot(p, lwpid)) == NULL || (lwp = ttolwp(t)) == NULL) { - sprunlock(p); - return (ESRCH); - } - - if ((lpdp = ptolxproc(p)) == NULL || - (lldp = lwp->lwp_brand) == NULL) { - sprunlock(p); - return (ESRCH); - } - - if (set) { - /* - * Enable the ptrace flag for this LWP and this process. Note - * that we will turn off the LWP's ptrace flag, but we don't - * turn off the process's ptrace flag. - */ - lldp->br_ptrace = 1; - lpdp->l_ptrace = 1; - - addr = lpdp->l_traceflag; - - mutex_exit(&p->p_lock); - - /* - * This can fail only in some rare corner cases where the - * process is exiting or we're completely out of memory. In - * these cases, it's sufficient to return an error to the ptrace - * consumer and leave the process-wide flag set. - */ - ret = uwrite(p, &flag, sizeof (flag), addr); - - mutex_enter(&p->p_lock); - - /* - * If we couldn't set the trace flag, unset the LWP's ptrace - * flag as there ptrace consumer won't expect this LWP to stop. - */ - if (ret != 0) - lldp->br_ptrace = 0; - } else { - lldp->br_ptrace = 0; - ret = 0; - } - - sprunlock(p); - - if (ret != 0) - ret = EIO; - - return (ret); -} - -static void -lx_ptrace_fire(void) -{ - kthread_t *t = curthread; - klwp_t *lwp = ttolwp(t); - lx_lwp_data_t *lldp = lwp->lwp_brand; - - /* - * The ptrace flag only applies until the next event is encountered - * for the given LWP. If it's set, turn off the flag and poke the - * controlling process by raising a signal. - */ - if (lldp->br_ptrace) { - lldp->br_ptrace = 0; - tsignal(t, SIGTRAP); - } -} - -/* - * Supports Linux PTRACE_SETOPTIONS handling which is similar to PTRACE_TRACEME - * but return an event in the second byte of si_status. - */ -static int -lx_ptrace_ext_opts(int cmd, pid_t pid, uintptr_t val, int64_t *rval) -{ - proc_t *p; - lx_proc_data_t *lpdp; - uint_t ret; - - if ((p = sprlock(pid)) == NULL) - return (ESRCH); - - /* - * Note that priv_proc_cred_perm can disallow access to ourself if - * the proc's SNOCD p_flag is set, so we skip that check for ourself. - */ - if (curproc != p && - priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) { - sprunlock(p); - return (EPERM); - } - - if ((lpdp = ptolxproc(p)) == NULL) { - sprunlock(p); - return (ESRCH); - } - - switch (cmd) { - case B_PTRACE_EXT_OPTS_SET: - lpdp->l_ptrace_opts = (uint_t)val; - break; - - case B_PTRACE_EXT_OPTS_GET: - ret = lpdp->l_ptrace_opts; - if (lpdp->l_ptrace_is_traced) - ret |= EMUL_PTRACE_IS_TRACED; - break; - - case B_PTRACE_EXT_OPTS_EVT: - ret = lpdp->l_ptrace_event; - lpdp->l_ptrace_event = 0; - break; - - case B_PTRACE_DETACH: - lpdp->l_ptrace_is_traced = 0; - break; - - default: - sprunlock(p); - return (EINVAL); - } - - sprunlock(p); - - if (cmd == B_PTRACE_EXT_OPTS_GET || cmd == B_PTRACE_EXT_OPTS_EVT) { - if (copyout(&ret, (void *)val, sizeof (uint_t)) != 0) - return (EFAULT); - } - - *rval = 0; - return (0); -} - -/* - * Used to support Linux PTRACE_SETOPTIONS handling and similar to - * PTRACE_TRACEME. We signal ourselves to stop on return from this syscall and - * setup the event reason so the emulation can pull this out when someone - * 'waits' on this process. - */ -static void -lx_ptrace_stop_for_option(int option, ulong_t msg) -{ - proc_t *p = ttoproc(curthread); - sigqueue_t *sqp; - lx_proc_data_t *lpdp; - boolean_t child = B_FALSE; - - if ((lpdp = ptolxproc(p)) == NULL) { - /* this should never happen but just to be safe */ - return; - } - - if (option & EMUL_PTRACE_O_CHILD) { - child = B_TRUE; - option &= ~EMUL_PTRACE_O_CHILD; - } - - lpdp->l_ptrace_is_traced = 1; - - /* Track the event as the reason for stopping */ - switch (option) { - case LX_PTRACE_O_TRACEFORK: - if (!child) { - lpdp->l_ptrace_event = LX_PTRACE_EVENT_FORK; - lpdp->l_ptrace_eventmsg = msg; - } - break; - case LX_PTRACE_O_TRACEVFORK: - if (!child) { - lpdp->l_ptrace_event = LX_PTRACE_EVENT_VFORK; - lpdp->l_ptrace_eventmsg = msg; - } - break; - case LX_PTRACE_O_TRACECLONE: - if (!child) { - lpdp->l_ptrace_event = LX_PTRACE_EVENT_CLONE; - lpdp->l_ptrace_eventmsg = msg; - } - break; - case LX_PTRACE_O_TRACEEXEC: - lpdp->l_ptrace_event = LX_PTRACE_EVENT_EXEC; - break; - case LX_PTRACE_O_TRACEVFORKDONE: - lpdp->l_ptrace_event = LX_PTRACE_EVENT_VFORK_DONE; - lpdp->l_ptrace_eventmsg = msg; - break; - case LX_PTRACE_O_TRACEEXIT: - lpdp->l_ptrace_event = LX_PTRACE_EVENT_EXIT; - lpdp->l_ptrace_eventmsg = msg; - break; - case LX_PTRACE_O_TRACESECCOMP: - lpdp->l_ptrace_event = LX_PTRACE_EVENT_SECCOMP; - break; - } - - /* - * Post the required signal to ourselves so that we stop. - * - * Although Linux will send a SIGSTOP to a child process which is - * stopped due to PTRACE_O_TRACEFORK, etc., we do not send that signal - * since that leads us down the code path in the kernel which calls - * stop(PR_JOBCONTROL, SIGSTOP), which in turn means that the TS_XSTART - * flag gets turned off on the thread and this makes it complex to - * actually get this process going when the userland application wants - * to detach. Since consumers don't seem to depend on the specific - * signal, we'll just stop both the parent and child the same way. We - * do keep track of both the parent and child via the - * EMUL_PTRACE_O_CHILD bit, in case we need to revisit this later. - */ - psignal(p, SIGTRAP); - - /* - * Since we're stopping, we need to post the SIGCHLD to the parent. The - * code in sigcld expects p_wdata to be set to SIGTRAP before it can - * send the signal, so do that here. We also need p_wcode to be set as - * if we are ptracing, even though we're not really (see the code in - * stop() when procstop is set and p->p_proc_flag has the P_PR_PTRACE - * bit set). This is needed so that when the application calls waitid, - * it will properly retrieve the process. - */ - sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); - mutex_enter(&pidlock); - p->p_wdata = SIGTRAP; - p->p_wcode = CLD_TRAPPED; - sigcld(p, sqp); - mutex_exit(&pidlock); -} - -static int -lx_ptrace_geteventmsg(pid_t pid, ulong_t *msgp) -{ - proc_t *p; - lx_proc_data_t *lpdp; - ulong_t msg; - - if ((p = sprlock(pid)) == NULL) - return (ESRCH); - - if (curproc != p && - priv_proc_cred_perm(curproc->p_cred, p, NULL, VREAD) != 0) { - sprunlock(p); - return (EPERM); - } - - if ((lpdp = ptolxproc(p)) == NULL) { - sprunlock(p); - return (ESRCH); - } - - msg = lpdp->l_ptrace_eventmsg; - lpdp->l_ptrace_eventmsg = 0; - - sprunlock(p); - - if (copyout(&msg, (void *)msgp, sizeof (ulong_t)) != 0) - return (EFAULT); - - return (0); -} - -/* - * Brand entry to allow us to optionally generate the ptrace SIGTRAP on exec(). - * This will only be called if ptrace is enabled -- and we only generate the - * SIGTRAP if LX_PTRACE_O_TRACEEXEC hasn't been set. - */ -void -lx_ptrace_exectrap(proc_t *p) -{ - lx_proc_data_t *lpdp; - - if ((lpdp = ptolxproc(p)) == NULL || - !(lpdp->l_ptrace_opts & LX_PTRACE_O_TRACEEXEC)) { - psignal(p, SIGTRAP); - } -} - uint32_t lx_map32limit(proc_t *p) { @@ -719,6 +428,12 @@ lx_init_brand_data(zone_t *zone) (void) strlcpy(data->lxzd_kernel_version, "2.4.21", LX_VERS_MAX); data->lxzd_max_syscall = LX_NSYSCALLS; zone->zone_brand_data = data; + + /* + * In Linux, if the init(1) process terminates the system panics. + * The zone must reboot to simulate this behaviour. + */ + zone->zone_reboot_on_init_exit = B_TRUE; } void @@ -835,6 +550,16 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, lwpd->br_scms = 1; #endif + if (pd->l_traceflag != NULL && pd->l_ptrace != 0) { + /* + * If ptrace(2) is active on this process, it is likely + * that we just finished an emulated execve(2) in a + * traced child. The usermode traceflag will have been + * clobbered by the exec, so we set it again here: + */ + (void) suword32((void *)pd->l_traceflag, 1); + } + *rval = 0; return (0); case B_TTYMODES: @@ -934,11 +659,6 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, return (0); } - case B_PTRACE_SYSCALL: - *rval = lx_ptrace_syscall_set((pid_t)arg1, (id_t)arg2, - (int)arg3); - return (0); - case B_SYSENTRY: if (lx_systrace_enabled) { ASSERT(lx_systrace_entry_ptr != NULL); @@ -966,7 +686,7 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, #endif } - lx_ptrace_fire(); + (void) lx_ptrace_stop(LX_PR_SYSENTRY); pd = p->p_brand_data; @@ -987,7 +707,7 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, (*lx_systrace_return_ptr)(arg1, arg2, arg2, 0, 0, 0, 0); } - lx_ptrace_fire(); + (void) lx_ptrace_stop(LX_PR_SYSEXIT); pd = p->p_brand_data; @@ -1013,20 +733,55 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, */ return (lx_sched_affinity(cmd, arg1, arg2, arg3, rval)); - case B_PTRACE_EXT_OPTS: + case B_PTRACE_STOP_FOR_OPT: + return (lx_ptrace_stop_for_option((int)arg1, arg2 == 0 ? + B_FALSE : B_TRUE, (ulong_t)arg3)); + + case B_PTRACE_CLONE_BEGIN: + return (lx_ptrace_set_clone_inherit((int)arg1, arg2 == 0 ? + B_FALSE : B_TRUE)); + + case B_PTRACE_KERNEL: + return (lx_ptrace_kernel((int)arg1, (pid_t)arg2, arg3, arg4)); + + case B_HELPER_WAITID: { + idtype_t idtype = (idtype_t)arg1; + id_t id = (id_t)arg2; + siginfo_t *infop = (siginfo_t *)arg3; + int options = (int)arg4; + + lwpd = ttolxlwp(curthread); + + /* + * Our brand-specific waitid helper only understands a subset of + * the possible idtypes. Ensure we keep to that subset here: + */ + if (idtype != P_ALL && idtype != P_PID && idtype != P_PGID) { + return (EINVAL); + } + /* - * Set or get the ptrace extended options or get the event - * reason for the stop. + * Enable the return of emulated ptrace(2) stop conditions + * through lx_waitid_helper, and stash the Linux-specific + * extra waitid() flags. */ - return (lx_ptrace_ext_opts((int)arg1, (pid_t)arg2, arg3, rval)); + lwpd->br_waitid_emulate = B_TRUE; + lwpd->br_waitid_flags = (int)arg5; - case B_PTRACE_STOP_FOR_OPT: - lx_ptrace_stop_for_option((int)arg1, (ulong_t)arg2); - return (0); +#if defined(_SYSCALL32_IMPL) + if (get_udatamodel() != DATAMODEL_NATIVE) { + return (waitsys32(idtype, id, infop, options)); + } else +#endif + { + return (waitsys(idtype, id, infop, options)); + } + + lwpd->br_waitid_emulate = B_FALSE; + lwpd->br_waitid_flags = 0; - case B_PTRACE_GETEVENTMSG: - lx_ptrace_geteventmsg((pid_t)arg1, (ulong_t *)arg2); return (0); + } case B_UNSUPPORTED: { @@ -1702,6 +1457,7 @@ _init(void) /* for lx_futex() */ lx_futex_init(); + lx_ptrace_init(); err = mod_install(&modlinkage); if (err != 0) { @@ -1741,6 +1497,7 @@ _fini(void) if (brand_zone_count(&lx_brand)) return (EBUSY); + lx_ptrace_fini(); lx_pid_fini(); lx_ioctl_fini(); diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c index 4c95c11100..abb0ab6e63 100644 --- a/usr/src/uts/common/brand/lx/os/lx_misc.c +++ b/usr/src/uts/common/brand/lx/os/lx_misc.c @@ -113,6 +113,13 @@ lx_exec() lx_pid_reassign(curthread); } + /* + * Inform ptrace(2) that we are processing an execve(2) call so that if + * we are traced we can post either the PTRACE_EVENT_EXEC event or the + * legacy SIGTRAP. + */ + (void) lx_ptrace_stop_for_option(LX_PTRACE_O_TRACEEXEC, B_FALSE, 0); + /* clear the fsbase values until the app. can reinitialize them */ lwpd->br_lx_fsbase = NULL; lwpd->br_ntv_fsbase = NULL; @@ -137,15 +144,21 @@ void lx_exitlwp(klwp_t *lwp) { struct lx_lwp_data *lwpd = lwptolxlwp(lwp); - proc_t *p; + proc_t *p = lwptoproc(lwp); kthread_t *t; sigqueue_t *sqp = NULL; pid_t ppid; id_t ptid; + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + if (lwpd == NULL) return; /* second time thru' */ + mutex_enter(&p->p_lock); + lx_ptrace_exit(p, lwp); + mutex_exit(&p->p_lock); + if (lwpd->br_clear_ctidp != NULL) { (void) suword32(lwpd->br_clear_ctidp, 0); (void) lx_futex((uintptr_t)lwpd->br_clear_ctidp, FUTEX_WAKE, 1, @@ -226,9 +239,17 @@ lx_freelwp(klwp_t *lwp) if (lwpd != NULL) { (void) removectx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save, NULL); - if (lwpd->br_pid != 0) + if (lwpd->br_pid != 0) { lx_pid_rele(lwptoproc(lwp)->p_pid, lwptot(lwp)->t_tid); + } + + /* + * Ensure that lx_ptrace_exit() has been called to detach + * ptrace(2) tracers and tracees. + */ + VERIFY(lwpd->br_ptrace_tracer == NULL); + VERIFY(lwpd->br_ptrace_accord == NULL); lwp->lwp_brand = NULL; kmem_free(lwpd, sizeof (struct lx_lwp_data)); @@ -238,8 +259,8 @@ lx_freelwp(klwp_t *lwp) int lx_initlwp(klwp_t *lwp) { - struct lx_lwp_data *lwpd; - struct lx_lwp_data *plwpd; + lx_lwp_data_t *lwpd; + lx_lwp_data_t *plwpd = ttolxlwp(curthread); kthread_t *tp = lwptot(lwp); lwpd = kmem_zalloc(sizeof (struct lx_lwp_data), KM_SLEEP); @@ -265,8 +286,7 @@ lx_initlwp(klwp_t *lwp) if (tp->t_next == tp) { lwpd->br_ppid = tp->t_procp->p_ppid; lwpd->br_ptid = -1; - } else if (ttolxlwp(curthread) != NULL) { - plwpd = ttolxlwp(curthread); + } else if (plwpd != NULL) { bcopy(plwpd->br_tls, lwpd->br_tls, sizeof (lwpd->br_tls)); lwpd->br_ppid = plwpd->br_pid; lwpd->br_ptid = curthread->t_tid; @@ -292,6 +312,14 @@ lx_initlwp(klwp_t *lwp) installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save, NULL); + /* + * If the parent LWP has a ptrace(2) tracer, the new LWP may + * need to inherit that same tracer. + */ + if (plwpd != NULL) { + lx_ptrace_inherit_tracer(plwpd, lwpd); + } + return (0); } @@ -524,10 +552,7 @@ lx_exit_with_sig(proc_t *cp, sigqueue_t *sqp, void *brand_data) * SIGCHLD X - * * This is an XOR of __WCLONE being set, and SIGCHLD being the signal sent on - * process exit. Since (flags & __WCLONE) is not guaranteed to have the - * least-significant bit set when the flags is enabled, !! is used to place - * that bit into the least significant bit. Then, the bitwise XOR can be - * used, because there is no logical XOR in the C language. + * process exit. * * More information on wait in lx brands can be found at * usr/src/lib/brand/lx/lx_brand/common/wait.c. @@ -535,29 +560,45 @@ lx_exit_with_sig(proc_t *cp, sigqueue_t *sqp, void *brand_data) boolean_t lx_wait_filter(proc_t *pp, proc_t *cp) { - int flags; + lx_lwp_data_t *lwpd = ttolxlwp(curthread); + int flags = lwpd->br_waitid_flags; boolean_t ret; - if (LX_ARGS(waitid) != NULL) { - flags = LX_ARGS(waitid)->waitid_flags; - mutex_enter(&cp->p_lock); - if (flags & LX_WALL) { - ret = B_TRUE; - } else if (cp->p_stat == SZOMB || - cp->p_brand == &native_brand) { - ret = (((!!(flags & LX_WCLONE)) ^ - (stol_signo[SIGCHLD] == cp->p_exit_data)) - ? B_TRUE : B_FALSE); + if (!lwpd->br_waitid_emulate) { + return (B_TRUE); + } + + mutex_enter(&cp->p_lock); + if (flags & LX_WALL) { + ret = B_TRUE; + + } else { + int exitsig; + boolean_t is_clone, _wclone; + + /* + * Determine the exit signal for this process: + */ + if (cp->p_stat == SZOMB || cp->p_brand == &native_brand) { + exitsig = cp->p_exit_data; } else { - ret = (((!!(flags & LX_WCLONE)) ^ - (stol_signo[SIGCHLD] == ptolxproc(cp)->l_signal)) - ? B_TRUE : B_FALSE); + exitsig = ptolxproc(cp)->l_signal; } - mutex_exit(&cp->p_lock); - return (ret); - } else { - return (B_TRUE); + + /* + * To enable the bitwise XOR to stand in for the absent C + * logical XOR, we use the logical NOT operator twice to + * ensure the least significant bit is populated with the + * __WCLONE flag status. + */ + _wclone = !!(flags & LX_WCLONE); + is_clone = (stol_signo[SIGCHLD] == exitsig); + + ret = (_wclone ^ is_clone) ? B_TRUE : B_FALSE; } + mutex_exit(&cp->p_lock); + + return (ret); } void diff --git a/usr/src/uts/common/brand/lx/os/lx_pid.c b/usr/src/uts/common/brand/lx/os/lx_pid.c index aa8c751bc2..8552754c43 100644 --- a/usr/src/uts/common/brand/lx/os/lx_pid.c +++ b/usr/src/uts/common/brand/lx/os/lx_pid.c @@ -22,7 +22,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #include <sys/types.h> @@ -222,6 +222,28 @@ lx_lpid_to_spair(pid_t l_pid, pid_t *s_pid, id_t *s_tid) { struct lx_pid *hp; + if (l_pid == 1) { + pid_t initpid; + + /* + * We are trying to look up the Linux init process for the + * current zone, which we pretend has pid 1. + */ + if ((initpid = curzone->zone_proc_initpid) == -1) { + /* + * We could not find the init process for this zone. + */ + return (-1); + } + + if (s_pid != NULL) + *s_pid = initpid; + if (s_tid != NULL) + *s_tid = 1; + + return (0); + } + mutex_enter(&hash_lock); for (hp = ltos_pid_hash[LTOS_HASH(l_pid)]; hp; hp = hp->ltos_next) { if (l_pid == hp->l_pid) { diff --git a/usr/src/uts/common/brand/lx/os/lx_ptrace.c b/usr/src/uts/common/brand/lx/os/lx_ptrace.c new file mode 100644 index 0000000000..6e4b74531d --- /dev/null +++ b/usr/src/uts/common/brand/lx/os/lx_ptrace.c @@ -0,0 +1,2270 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * Emulation of the Linux ptrace(2) interface. + * + * OVERVIEW + * + * The Linux process model is somewhat different from the illumos native + * model. One critical difference is that each Linux thread has a unique + * identifier in the pid namespace. The lx brand assigns a pid to each LWP + * within the emulated process, giving the pid of the process itself to the + * first LWP. + * + * The Linux ptrace(2) interface allows for any LWP in a branded process to + * exert control over any other LWP within the same zone. Control is exerted + * by the use of the ptrace(2) system call itself, which accepts a number of + * request codes. Feedback on traced events is primarily received by the + * tracer through SIGCLD and the emulated waitpid(2) and waitid(2) system + * calls. Many of the possible ptrace(2) requests will only succeed if the + * target LWP is in a "ptrace-stop" condition. + * + * HISTORY + * + * The brand support for ptrace(2) was originally built on top of the rich + * support for debugging and tracing provided through the illumos /proc + * interfaces, mounted at /native/proc within the zone. The native legacy + * ptrace(3C) functionality was used as a starting point, but was generally + * insufficient for complete and precise emulation. The extant legacy + * interface, and indeed our native SIGCLD and waitid(2) facilities, are + * focused on _process_ level concerns -- the Linux interface has been + * extended to be aware of LWPs as well. + * + * In order to allow us to focus on providing more complete and accurate + * emulation without extensive and undesirable changes to the native + * facilities, this second generation ptrace(2) emulation is mostly separate + * from any other tracing or debugging framework in the system. + * + * ATTACHING TRACERS TO TRACEES + * + * There are several ways that a child LWP may becomed traced by a tracer. + * To determine which attach method caused a tracee to become attached, one + * may inspect the "br_ptrace_attach" member of the LWP-specific brand data + * with the debugger. + * + * The first attach methods to consider are the attaching ptrace(2) requests: + * + * PTRACE_TRACEME + * + * If an LWP makes a PTRACE_TRACEME call, it will be attached as a tracee + * to its parent LWP (br_ppid). Using PTRACE_TRACEME does _not_ cause the + * tracee to be held in a stop condition. It is common practice for + * consumers to raise(SIGSTOP) immediately afterward. + * + * PTRACE_ATTACH + * + * An LWP may attempt to trace any other LWP in this, or another, process. + * We currently allow any attach where the process containing the tracer + * LWP has permission to write to /proc for the process containing the + * intended tracer. This action also sends a SIGSTOP to the newly attached + * tracee. + * + * The second class of attach methods are the clone(2)/fork(2) inheritance + * options that may be set on a tracee with PTRACE_SETOPTIONS: + * + * PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK and PTRACE_O_TRACECLONE + * + * If these options have been set on a tracee, then a fork(2), vfork(2) or + * clone(2) respectively will cause the newly created LWP to be traced by + * the same tracer. The same set of ptrace(2) options will also be set on + * the new child. + * + * The third class of attach method is the PTRACE_CLONE flag to clone(2). + * This flag induces the same inheritance as PTRACE_O_TRACECLONE, but is + * passed by the tracee as an argument to clone(2). + * + * DETACHING TRACEES + * + * Tracees can be detached by the tracer with the PTRACE_DETACH request. + * This request is only valid when the tracee is in a ptrace(2) stop + * condition, and is itself a restarting action. + * + * If the tracer exits without detaching all of its tracees, then all of the + * tracees are automatically detached and restarted. If a tracee was in + * "signal-delivery-stop" at the time the tracer exited, the signal will be + * released to the child unless it is a SIGSTOP. We drop this instance of + * SIGSTOP in order to prevent the child from becoming stopped by job + * control. + * + * ACCORD ALLOCATION AND MANAGEMENT + * + * The "lx_ptrace_accord_t" object tracks the agreement between a tracer LWP + * and zero or more tracee LWPs. It is explicitly illegal for a tracee to + * trace its tracer, and we block this in PTRACE_ATTACH/PTRACE_TRACEME. + * + * An LWP starts out without an accord. If a child of that LWP calls + * ptrace(2) with the PTRACE_TRACEME subcommand, or if the LWP itself uses + * PTRACE_ATTACH, an accord will be allocated and stored on that LWP. The + * accord structure is not released from that LWP until it arrives in + * lx_exitlwp(), as called by lwp_exit(). A new accord will not be + * allocated, even if one does not exist, once an LWP arrives in lx_exitlwp() + * and sets the LX_PTRACE_EXITING flag. An LWP will have at most one accord + * structure throughout its entire lifecycle; once it has one, it has the + * same one until death. + * + * The accord is reference counted (lxpa_refcnt), starting at a count of one + * at creation to represent the link from the tracer LWP to its accord. The + * accord is not freed until the reference count falls to zero. + * + * To make mutual exclusion between a detaching tracer and various notifying + * tracees simpler, the tracer will hold "pidlock" while it clears the + * accord members that point back to the tracer LWP and CV. + * + * SIGNALS AND JOB CONTROL + * + * Various actions, either directly ptrace(2) related or commonly associated + * with tracing, cause process- or thread-directed SIGSTOP signals to be sent + * to tracees. These signals, and indeed any signal other than SIGKILL, can + * be suppressed by the tracer when using a restarting request (including + * PTRACE_DETACH) on a child. The signal may also be substituted for a + * different signal. + * + * If a SIGSTOP (or other stopping signal) is not suppressed by the tracer, + * it will induce the regular illumos native job control stop of the entire + * traced process. This is at least passingly similar to the Linux "group + * stop" ptrace(2) condition. + * + * SYSTEM CALL TRACING + * + * The ptrace(2) interface enables the tracer to hold the tracee on entry and + * exit from system calls. When a stopped tracee is restarted through the + * PTRACE_SYSCALL request, the LX_PTRACE_SYSCALL flag is set until the next + * system call boundary. Whether this is a "syscall-entry-stop" or + * "syscall-exit-stop", the tracee is held and the tracer is notified via + * SIGCLD/waitpid(2) in the usual way. The flag LX_PTRACE_SYSCALL flag is + * cleared after each stop; for ongoing system call tracing the tracee must + * be continuously restarted with PTRACE_SYSCALL. + * + * EVENT STOPS + * + * Various events (particularly FORK, VFORK, CLONE, EXEC and EXIT) are + * enabled by the tracer through PTRACE_SETOPTIONS. Once enabled, the tracee + * will be stopped at the nominated points of interest and the tracer + * notified. The tracer may request additional information about the event, + * such as the pid of new LWPs and processes, via PTRACE_GETEVENTMSG. + * + * LOCK ORDERING RULES + * + * It is not safe, in general, to hold p_lock for two different processes at + * the same time. This constraint is the primary reason for the existence + * (and complexity) of the ptrace(2) accord mechanism. + * + * In order to facilitate looking up accords by the "pid" of a tracer LWP, + * p_lock for the tracer process may be held while entering the accord mutex + * (lxpa_lock). This mutex protects the accord flags and reference count. + * The reference count is manipulated through lx_ptrace_accord_hold() and + * lx_ptrace_accord_rele(). + * + * DO NOT interact with the accord mutex (lxpa_lock) directly. The + * lx_ptrace_accord_enter() and lx_ptrace_accord_exit() functions do various + * book-keeping and lock ordering enforcement and MUST be used. + * + * It is NOT legal to take ANY p_lock while holding the accord mutex + * (lxpa_lock). If the lxpa_tracees_lock is to be held concurrently with + * lxpa_lock, lxpa_lock MUST be taken first and dropped before taking p_lock + * of any processes from the tracee list. + * + * It is NOT legal to take a tracee p_lock and then attempt to enter the + * accord mutex (or tracee list mutex) of its tracer. When running as the + * tracee LWP, the tracee's hold will prevent the accord from being freed. + * Use of the LX_PTRACE_STOPPING or LX_PTRACE_CLONING flag in the + * LWP-specific brand data prevents an exiting tracer from altering the + * tracee until the tracee has come to an orderly stop, without requiring the + * tracee to hold its own p_lock the entire time it is stopping. + * + * It is not safe, in general, to enter "pidlock" while holding the p_lock of + * any process. It is similarly illegal to hold any accord locks (lxpa_lock + * or lxpa_sublock) while attempting to enter "pidlock". As "pidlock" is a + * global mutex, it should be held for the shortest possible time. + */ + +#include <sys/types.h> +#include <sys/kmem.h> +#include <sys/ksynch.h> +#include <sys/sysmacros.h> +#include <sys/procfs.h> +#include <sys/cmn_err.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/wait.h> +#include <sys/prsystm.h> +#include <sys/note.h> + +#include <sys/brand.h> +#include <sys/lx_brand.h> +#include <sys/lx_impl.h> +#include <sys/lx_misc.h> +#include <sys/lx_pid.h> +#include <lx_syscall.h> +#include <lx_signum.h> + + +typedef enum lx_ptrace_cont_flags_t { + LX_PTC_NONE = 0x00, + LX_PTC_SYSCALL = 0x01, + LX_PTC_SINGLESTEP = 0x02 +} lx_ptrace_cont_flags_t; + +/* + * Macros for checking the state of an LWP via "br_ptrace_flags": + */ +#define LX_PTRACE_BUSY \ + (LX_PTRACE_EXITING | LX_PTRACE_STOPPING | LX_PTRACE_CLONING) + +#define VISIBLE(a) (((a)->br_ptrace_flags & LX_PTRACE_EXITING) == 0) +#define TRACEE_BUSY(a) (((a)->br_ptrace_flags & LX_PTRACE_BUSY) != 0) + +#define ACCORD_HELD(a) MUTEX_HELD(&(a)->lxpa_lock) + +static kcondvar_t lx_ptrace_busy_cv; +static kmem_cache_t *lx_ptrace_accord_cache; + +/* + * Enter the accord mutex. + */ +static void +lx_ptrace_accord_enter(lx_ptrace_accord_t *accord) +{ + VERIFY(MUTEX_NOT_HELD(&accord->lxpa_tracees_lock)); + + mutex_enter(&accord->lxpa_lock); +} + +/* + * Exit the accord mutex. If the reference count has dropped to zero, + * free the accord. + */ +static void +lx_ptrace_accord_exit(lx_ptrace_accord_t *accord) +{ + VERIFY(ACCORD_HELD(accord)); + + if (accord->lxpa_refcnt > 0) { + mutex_exit(&accord->lxpa_lock); + return; + } + + /* + * When the reference count drops to zero we must free the accord. + */ + VERIFY(accord->lxpa_tracer == NULL); + VERIFY(MUTEX_NOT_HELD(&accord->lxpa_tracees_lock)); + VERIFY(list_is_empty(&accord->lxpa_tracees)); + VERIFY(accord->lxpa_flags & LX_ACC_TOMBSTONE); + + mutex_destroy(&accord->lxpa_lock); + mutex_destroy(&accord->lxpa_tracees_lock); + + kmem_cache_free(lx_ptrace_accord_cache, accord); +} + +/* + * Drop our reference to this accord. If this drops the reference count + * to zero, the next lx_ptrace_accord_exit() will free the accord. + */ +static void +lx_ptrace_accord_rele(lx_ptrace_accord_t *accord) +{ + VERIFY(ACCORD_HELD(accord)); + + VERIFY(accord->lxpa_refcnt > 0); + accord->lxpa_refcnt--; +} + +/* + * Place an additional hold on an accord. + */ +static void +lx_ptrace_accord_hold(lx_ptrace_accord_t *accord) +{ + VERIFY(ACCORD_HELD(accord)); + + accord->lxpa_refcnt++; +} + +/* + * Fetch the accord for this LWP. If one has not yet been created, and the + * process is not exiting, allocate it now. Must be called with p_lock held + * for the process containing the target LWP. + * + * If successful, we return holding the accord lock (lxpa_lock). + */ +static int +lx_ptrace_accord_get_locked(klwp_t *lwp, lx_ptrace_accord_t **accordp, + boolean_t allocate_one) +{ + lx_ptrace_accord_t *lxpa; + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + proc_t *p = lwptoproc(lwp); + + VERIFY(MUTEX_HELD(&p->p_lock)); + + /* + * If this LWP does not have an accord, we wish to allocate + * and install one. + */ + if ((lxpa = lwpd->br_ptrace_accord) == NULL) { + if (!allocate_one || !VISIBLE(lwpd)) { + /* + * Either we do not wish to allocate an accord, or this + * LWP has already begun exiting from a ptrace + * perspective. + */ + *accordp = NULL; + return (ESRCH); + } + + lxpa = kmem_cache_alloc(lx_ptrace_accord_cache, KM_SLEEP); + bzero(lxpa, sizeof (*lxpa)); + + /* + * The initial reference count is 1 because we are referencing + * it in from the soon-to-be tracer LWP. + */ + lxpa->lxpa_refcnt = 1; + mutex_init(&lxpa->lxpa_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&lxpa->lxpa_tracees_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&lxpa->lxpa_tracees, sizeof (lx_lwp_data_t), + offsetof(lx_lwp_data_t, br_ptrace_linkage)); + lxpa->lxpa_cvp = &p->p_cv; + + lxpa->lxpa_tracer = lwpd; + lwpd->br_ptrace_accord = lxpa; + } + + /* + * Lock the accord before returning it to the caller. + */ + lx_ptrace_accord_enter(lxpa); + + /* + * There should be at least one active reference to this accord, + * otherwise it should have been freed. + */ + VERIFY(lxpa->lxpa_refcnt > 0); + + *accordp = lxpa; + return (0); +} + +/* + * Accords belong to the tracer LWP. Get the accord for this tracer or return + * an error if it was not possible. To prevent deadlocks, the caller MUST NOT + * hold p_lock on its own or any other process. + * + * If successful, we return holding the accord lock (lxpa_lock). + */ +static int +lx_ptrace_accord_get_by_pid(pid_t lxpid, lx_ptrace_accord_t **accordp) +{ + int ret = ESRCH; + pid_t apid; + id_t atid; + proc_t *aproc; + kthread_t *athr; + klwp_t *alwp; + lx_lwp_data_t *alwpd; + + VERIFY(MUTEX_NOT_HELD(&curproc->p_lock)); + + /* + * Locate the process containing the tracer LWP based on its Linux pid + * and lock it. + */ + if (lx_lpid_to_spair(lxpid, &apid, &atid) != 0 || + (aproc = sprlock(apid)) == NULL) { + return (ESRCH); + } + + /* + * Locate the tracer LWP itself and ensure that it is visible to + * ptrace(2). + */ + if ((athr = idtot(aproc, atid)) == NULL || + (alwp = ttolwp(athr)) == NULL || + (alwpd = lwptolxlwp(alwp)) == NULL || + !VISIBLE(alwpd)) { + sprunlock(aproc); + return (ESRCH); + } + + /* + * We should not fetch our own accord this way. + */ + if (athr == curthread) { + sprunlock(aproc); + return (EPERM); + } + + /* + * Fetch (or allocate) the accord owned by this tracer LWP: + */ + ret = lx_ptrace_accord_get_locked(alwp, accordp, B_TRUE); + + /* + * Unlock the process and return. + */ + sprunlock(aproc); + return (ret); +} + +/* + * Get (or allocate) the ptrace(2) accord for the current LWP, acting as a + * tracer. The caller MUST NOT currently hold p_lock on the process containing + * this LWP. + * + * If successful, we return holding the accord lock (lxpa_lock). + */ +static int +lx_ptrace_accord_get(lx_ptrace_accord_t **accordp, boolean_t allocate_one) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + int ret; + + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + + /* + * Lock the tracer (this LWP). + */ + mutex_enter(&p->p_lock); + + /* + * Fetch (or allocate) the accord for this LWP: + */ + ret = lx_ptrace_accord_get_locked(lwp, accordp, allocate_one); + + mutex_exit(&p->p_lock); + + return (ret); +} + +/* + * Restart an LWP if it is in "ptrace-stop". This function may induce sleep, + * so the caller MUST NOT hold any mutexes other than p_lock for the process + * containing the LWP. + */ +static void +lx_ptrace_restart_lwp(klwp_t *lwp) +{ + kthread_t *rt = lwptot(lwp); + proc_t *rproc = lwptoproc(lwp); + lx_lwp_data_t *rlwpd = lwptolxlwp(lwp); + + VERIFY(rt != curthread); + VERIFY(MUTEX_HELD(&rproc->p_lock)); + + /* + * Exclude potential meddling from procfs. + */ + prbarrier(rproc); + + /* + * Check that the LWP is still in "ptrace-stop" and, if so, restart it. + */ + thread_lock(rt); + if (BSTOPPED(rt) && rt->t_whystop == PR_BRAND) { + rt->t_schedflag |= TS_BSTART; + setrun_locked(rt); + + /* + * Clear stop reason. + */ + rlwpd->br_ptrace_whystop = 0; + rlwpd->br_ptrace_whatstop = 0; + rlwpd->br_ptrace_flags &= ~LX_PTRACE_CLDPEND; + } + thread_unlock(rt); +} + +static void +lx_winfo(lx_lwp_data_t *remote, k_siginfo_t *ip, boolean_t waitflag, + pid_t *event_ppid, pid_t *event_pid) +{ + int signo; + + /* + * Populate our k_siginfo_t with data about this "ptrace-stop" + * condition: + */ + bzero(ip, sizeof (*ip)); + ip->si_signo = SIGCLD; + ip->si_pid = remote->br_pid; + ip->si_code = CLD_TRAPPED; + + switch (remote->br_ptrace_whatstop) { + case LX_PR_SYSENTRY: + case LX_PR_SYSEXIT: + ip->si_status = SIGTRAP; + if (remote->br_ptrace_options & LX_PTRACE_O_TRACESYSGOOD) { + ip->si_status |= 0x80; + } + break; + + case LX_PR_SIGNALLED: + signo = remote->br_ptrace_stopsig; + if (signo < 1 || signo >= LX_NSIG) { + /* + * If this signal number is not valid, pretend it + * was a SIGTRAP. + */ + ip->si_status = SIGTRAP; + } else { + ip->si_status = ltos_signo[signo]; + } + break; + + case LX_PR_EVENT: + ip->si_status = SIGTRAP | remote->br_ptrace_event; + /* + * Record the Linux pid of both this LWP and the create + * event we are dispatching. We will use this information + * to unblock any subsequent ptrace(2) events that depend + * on this one. + */ + if (event_ppid != NULL) + *event_ppid = remote->br_pid; + if (event_pid != NULL) + *event_pid = (pid_t)remote->br_ptrace_eventmsg; + break; + + default: + cmn_err(CE_PANIC, "unxpected stop subreason: %d", + remote->br_ptrace_whatstop); + } + + /* + * If WNOWAIT was specified, do not mark the event as posted + * so that it may be re-fetched on another call to waitid(). + */ + if (waitflag) { + remote->br_ptrace_whystop = 0; + remote->br_ptrace_whatstop = 0; + remote->br_ptrace_flags &= ~LX_PTRACE_CLDPEND; + } +} + +/* + * Receive notification from stop() of a PR_BRAND stop. + */ +void +lx_stop_notify(proc_t *p, klwp_t *lwp, ushort_t why, ushort_t what) +{ + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + lx_ptrace_accord_t *accord; + klwp_t *plwp = NULL; + proc_t *pp = NULL; + lx_lwp_data_t *parent; + boolean_t cldpend = B_TRUE; + boolean_t cldpost = B_FALSE; + sigqueue_t *sqp = NULL; + + /* + * We currently only care about LX-specific stop reasons. + */ + if (why != PR_BRAND) + return; + + switch (what) { + case LX_PR_SYSENTRY: + case LX_PR_SYSEXIT: + case LX_PR_SIGNALLED: + case LX_PR_EVENT: + break; + default: + cmn_err(CE_PANIC, "unexpected subreason for PR_BRAND" + " stop: %d", (int)what); + } + + /* + * We should be holding the lock on our containing process. The + * STOPPING flag should have been set by lx_ptrace_stop() for all + * PR_BRAND stops. + */ + VERIFY(MUTEX_HELD(&p->p_lock)); + VERIFY(lwpd->br_ptrace_flags & LX_PTRACE_STOPPING); + VERIFY((accord = lwpd->br_ptrace_tracer) != NULL); + + /* + * We must drop our process lock to take "pidlock". The + * LX_PTRACE_STOPPING flag protects us from an exiting tracer. + */ + mutex_exit(&p->p_lock); + + /* + * Allocate before we enter any mutexes. + */ + sqp = kmem_zalloc(sizeof (*sqp), KM_SLEEP); + + /* + * We take pidlock now, which excludes all callers of waitid() and + * prevents a detaching tracer from clearing critical accord members. + */ + mutex_enter(&pidlock); + mutex_enter(&p->p_lock); + + /* + * Get the ptrace(2) "parent" process, to which we may send + * a SIGCLD signal later. + */ + if ((parent = accord->lxpa_tracer) != NULL && + (plwp = parent->br_lwp) != NULL) { + pp = lwptoproc(plwp); + } + + /* + * Our tracer should not have been modified in our absence; the + * LX_PTRACE_STOPPING flag prevents it. + */ + VERIFY(lwpd->br_ptrace_tracer == accord); + + /* + * Stash data for this stop condition in the LWP data while we hold + * both pidlock and our p_lock. + */ + lwpd->br_ptrace_whystop = why; + lwpd->br_ptrace_whatstop = what; + + /* + * If this event does not depend on an event from the parent LWP, + * populate the siginfo_t for the event pending on this tracee LWP. + */ + if (!(lwpd->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) && pp != NULL) { + cldpost = B_TRUE; + lx_winfo(lwpd, &sqp->sq_info, B_FALSE, NULL, NULL); + } + + /* + * Drop our p_lock so that we may lock the tracer. + */ + mutex_exit(&p->p_lock); + if (cldpost && pp != NULL) { + /* + * Post the SIGCLD to the tracer. + */ + mutex_enter(&pp->p_lock); + if (!sigismember(&pp->p_sig, SIGCLD)) { + sigaddqa(pp, plwp->lwp_thread, sqp); + cldpend = B_FALSE; + sqp = NULL; + } + mutex_exit(&pp->p_lock); + } + + /* + * We re-take our process lock now. The lock will be held until + * the thread is actually marked stopped, so we will not race with + * lx_ptrace_lock_if_stopped() or lx_waitid_helper(). + */ + mutex_enter(&p->p_lock); + + /* + * We clear the STOPPING flag; stop() continues to hold our p_lock + * until our thread stop state is visible. + */ + lwpd->br_ptrace_flags &= ~LX_PTRACE_STOPPING; + lwpd->br_ptrace_flags |= LX_PTRACE_STOPPED; + if (cldpend) { + /* + * We sent the SIGCLD for this new wait condition already. + */ + lwpd->br_ptrace_flags |= LX_PTRACE_CLDPEND; + } + + /* + * If lx_ptrace_exit_tracer() is trying to detach our tracer, it will + * be sleeping on this CV until LX_PTRACE_STOPPING is clear. Wake it + * now. + */ + cv_broadcast(&lx_ptrace_busy_cv); + + /* + * While still holding pidlock, we attempt to wake our tracer from a + * potential waitid() slumber. + */ + if (accord->lxpa_cvp != NULL) { + cv_broadcast(accord->lxpa_cvp); + } + + /* + * We release pidlock and return as we were called: with our p_lock + * held. + */ + mutex_exit(&pidlock); + + if (sqp != NULL) { + kmem_free(sqp, sizeof (*sqp)); + } +} + +/* + * For any restarting action (e.g. PTRACE_CONT, PTRACE_SYSCALL or + * PTRACE_DETACH) to be allowed, the tracee LWP must be in "ptrace-stop". This + * check must ONLY be run on tracees of the current LWP. If the check is + * successful, we return with the tracee p_lock held. + */ +static int +lx_ptrace_lock_if_stopped(lx_ptrace_accord_t *accord, lx_lwp_data_t *remote) +{ + klwp_t *rlwp = remote->br_lwp; + proc_t *rproc = lwptoproc(rlwp); + kthread_t *rt = lwptot(rlwp); + + /* + * We must never check that we, ourselves, are stopped. We must also + * have the accord tracee list locked while we lock our tracees. + */ + VERIFY(curthread != rt); + VERIFY(MUTEX_HELD(&accord->lxpa_tracees_lock)); + VERIFY(accord->lxpa_tracer == ttolxlwp(curthread)); + + /* + * Lock the process containing the tracee LWP. + */ + mutex_enter(&rproc->p_lock); + if (!VISIBLE(remote)) { + /* + * The tracee LWP is currently detaching itself as it exits. + * It is no longer visible to ptrace(2). + */ + mutex_exit(&rproc->p_lock); + return (ESRCH); + } + + /* + * We must only check whether tracees of the current LWP are stopped. + * We check this condition after confirming visibility as an exiting + * tracee may no longer be completely consistent. + */ + VERIFY(remote->br_ptrace_tracer == accord); + + if (!(remote->br_ptrace_flags & LX_PTRACE_STOPPED)) { + /* + * The tracee is not in "ptrace-stop", so we release the + * process. + */ + mutex_exit(&rproc->p_lock); + return (ESRCH); + } + + /* + * The tracee is stopped. We return holding its process lock so that + * the caller may manipulate it. + */ + return (0); +} + +static int +lx_ptrace_setoptions(lx_lwp_data_t *remote, uintptr_t options) +{ + /* + * Check for valid options. + */ + if ((options & ~LX_PTRACE_O_ALL) != 0) { + return (EINVAL); + } + + /* + * Set ptrace options on the target LWP. + */ + remote->br_ptrace_options = (lx_ptrace_options_t)options; + + return (0); +} + +static int +lx_ptrace_geteventmsg(lx_lwp_data_t *remote, void *umsgp) +{ + int error; + +#if defined(_SYSCALL32_IMPL) + if (get_udatamodel() != DATAMODEL_NATIVE) { + uint32_t tmp = remote->br_ptrace_eventmsg; + + error = copyout(&tmp, umsgp, sizeof (uint32_t)); + } else +#endif + { + error = copyout(&remote->br_ptrace_eventmsg, umsgp, + sizeof (ulong_t)); + } + + return (error); +} + +/* + * Implements the PTRACE_CONT subcommand of the Linux ptrace(2) interface. + */ +static int +lx_ptrace_cont(lx_lwp_data_t *remote, lx_ptrace_cont_flags_t flags, int signo) +{ + klwp_t *lwp = remote->br_lwp; + + if (flags & LX_PTC_SINGLESTEP) { + /* + * We do not currently support single-stepping. + */ + lx_unsupported("PTRACE_SINGLESTEP not currently implemented"); + return (EINVAL); + } + + /* + * The tracer may choose to suppress the delivery of a signal, or + * select an alternative signal for delivery. If this is an + * appropriate ptrace(2) "signal-delivery-stop", br_ptrace_stopsig + * will be used as the new signal number. + * + * As with so many other aspects of the Linux ptrace(2) interface, this + * may fail silently if the state machine is not aligned correctly. + */ + remote->br_ptrace_stopsig = signo; + + /* + * Handle the syscall-stop flag if this is a PTRACE_SYSCALL restart: + */ + if (flags & LX_PTC_SYSCALL) { + remote->br_ptrace_flags |= LX_PTRACE_SYSCALL; + } else { + remote->br_ptrace_flags &= ~LX_PTRACE_SYSCALL; + } + + lx_ptrace_restart_lwp(lwp); + + return (0); +} + +/* + * Implements the PTRACE_DETACH subcommand of the Linux ptrace(2) interface. + * + * The LWP identified by the Linux pid "lx_pid" will, if it as a tracee of the + * current LWP, be detached and set runnable. If the specified LWP is not + * currently in the "ptrace-stop" state, the routine will return ESRCH as if + * the LWP did not exist at all. + * + * The caller must not hold p_lock on any process. + */ +static int +lx_ptrace_detach(lx_ptrace_accord_t *accord, lx_lwp_data_t *remote, int signo, + boolean_t *release_hold) +{ + klwp_t *rlwp; + + rlwp = remote->br_lwp; + + /* + * The tracee LWP was in "ptrace-stop" and we now hold its p_lock. + * Detach the LWP from the accord and set it running. + */ + VERIFY(!TRACEE_BUSY(remote)); + remote->br_ptrace_flags &= ~(LX_PTRACE_SYSCALL | LX_PTRACE_INHERIT); + VERIFY(list_link_active(&remote->br_ptrace_linkage)); + list_remove(&accord->lxpa_tracees, remote); + + remote->br_ptrace_attach = LX_PTA_NONE; + remote->br_ptrace_tracer = NULL; + remote->br_ptrace_flags = 0; + *release_hold = B_TRUE; + + /* + * The tracer may, as described in lx_ptrace_cont(), choose to suppress + * or modify the delivered signal. + */ + remote->br_ptrace_stopsig = signo; + + lx_ptrace_restart_lwp(rlwp); + + return (0); +} + +/* + * This routine implements the PTRACE_ATTACH operation of the Linux ptrace(2) + * interface. + * + * This LWP is requesting to be attached as a tracer to another LWP -- the + * tracee. If a ptrace accord to track the list of tracees has not yet been + * allocated, one will be allocated and attached to this LWP now. + * + * The "br_ptrace_tracer" on the tracee LWP is set to this accord, and the + * tracee LWP is then added to the "lxpa_tracees" list in the accord. We drop + * locks between these two phases; the only consumer of trace events from this + * accord is this LWP, which obviously cannot be running waitpid(2) at the same + * time as this call to ptrace(2). + */ +static int +lx_ptrace_attach(pid_t lx_pid) +{ + int error = ESRCH; + int32_t one = 1; + /* + * Our (Tracer) LWP: + */ + lx_ptrace_accord_t *accord; + lx_lwp_data_t *lwpd = ttolxlwp(curthread); + /* + * Remote (Tracee) LWP: + */ + pid_t rpid; + id_t rtid; + proc_t *rproc; + kthread_t *rthr; + klwp_t *rlwp; + lx_lwp_data_t *rlwpd; + + if (lwpd->br_pid == lx_pid) { + /* + * We cannot trace ourselves. + */ + return (EPERM); + } + + /* + * Ensure that we have an accord and obtain a lock on it. This + * routine should not fail because the LWP cannot make ptrace(2) system + * calls after it has begun exiting. + */ + VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_EXITING); + VERIFY(lx_ptrace_accord_get(&accord, B_TRUE) == 0); + + /* + * Place speculative hold in case the attach is successful. + */ + lx_ptrace_accord_hold(accord); + lx_ptrace_accord_exit(accord); + + /* + * Locate the process containing the tracee LWP based on its Linux pid + * and lock it. + */ + if (lx_lpid_to_spair(lx_pid, &rpid, &rtid) != 0 || + (rproc = sprlock(rpid)) == NULL) { + /* + * We could not find the target process. + */ + goto errout; + } + + /* + * Locate the tracee LWP. + */ + if ((rthr = idtot(rproc, rtid)) == NULL || + (rlwp = ttolwp(rthr)) == NULL || + (rlwpd = lwptolxlwp(rlwp)) == NULL || + !VISIBLE(rlwpd)) { + /* + * The LWP could not be found, was not branded, or is not + * visible to ptrace(2) at this time. + */ + goto unlock_errout; + } + + /* + * We now hold the lock on the tracee. Attempt to install ourselves + * as the tracer. + */ + if (curproc != rproc && priv_proc_cred_perm(curproc->p_cred, rproc, + NULL, VWRITE) != 0) { + /* + * This process does not have permission to trace the remote + * process. + */ + error = EPERM; + } else if (rlwpd->br_ptrace_tracer != NULL) { + /* + * This LWP is already being traced. + */ + VERIFY(list_link_active(&rlwpd->br_ptrace_linkage)); + VERIFY(rlwpd->br_ptrace_attach != LX_PTA_NONE); + error = EPERM; + } else { + lx_proc_data_t *rprocd; + + /* + * Bond the tracee to the accord. + */ + VERIFY0(rlwpd->br_ptrace_flags & LX_PTRACE_EXITING); + VERIFY(rlwpd->br_ptrace_attach == LX_PTA_NONE); + rlwpd->br_ptrace_attach = LX_PTA_ATTACH; + rlwpd->br_ptrace_tracer = accord; + + /* + * We had no tracer, and are thus not in the tracees list. + * It is safe to take the tracee list lock while we insert + * ourselves. + */ + mutex_enter(&accord->lxpa_tracees_lock); + VERIFY(!list_link_active(&rlwpd->br_ptrace_linkage)); + list_insert_tail(&accord->lxpa_tracees, rlwpd); + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * Send a thread-directed SIGSTOP. + */ + sigtoproc(rproc, rthr, SIGSTOP); + + /* + * Set the in-kernel process-wide ptrace(2) enable flag. + * Attempt also to write the usermode trace flag so that the + * process knows to enter the kernel for potential ptrace(2) + * syscall-stops. + */ + rprocd = ttolxproc(rthr); + rprocd->l_ptrace = 1; + mutex_exit(&rproc->p_lock); + (void) uwrite(rproc, &one, sizeof (one), rprocd->l_traceflag); + mutex_enter(&rproc->p_lock); + + error = 0; + } + +unlock_errout: + /* + * Unlock the process containing the tracee LWP and the accord. + */ + sprunlock(rproc); + +errout: + if (error != 0) { + /* + * The attach was not successful. Remove our speculative + * hold. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); + } + + return (error); +} + +int +lx_ptrace_set_clone_inherit(int option, boolean_t inherit_flag) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + + mutex_enter(&p->p_lock); + + switch (option) { + case LX_PTRACE_O_TRACEFORK: + case LX_PTRACE_O_TRACEVFORK: + case LX_PTRACE_O_TRACECLONE: + lwpd->br_ptrace_clone_option = option; + break; + + default: + return (EINVAL); + } + + if (inherit_flag) { + lwpd->br_ptrace_flags |= LX_PTRACE_INHERIT; + } else { + lwpd->br_ptrace_flags &= ~LX_PTRACE_INHERIT; + } + + mutex_exit(&p->p_lock); + return (0); +} + +/* + * If the parent LWP is being traced, we want to attach ourselves to the + * same accord. + */ +void +lx_ptrace_inherit_tracer(lx_lwp_data_t *src, lx_lwp_data_t *dst) +{ + proc_t *srcp = lwptoproc(src->br_lwp); + proc_t *dstp = lwptoproc(dst->br_lwp); + lx_ptrace_accord_t *accord; + boolean_t unlock = B_FALSE; + + if (srcp == dstp) { + /* + * This is syslwp_create(), so the process p_lock is already + * held. + */ + VERIFY(MUTEX_HELD(&srcp->p_lock)); + } else { + unlock = B_TRUE; + mutex_enter(&srcp->p_lock); + } + + if ((accord = src->br_ptrace_tracer) == NULL) { + /* + * The source LWP does not have a tracer to inherit. + */ + goto out; + } + + /* + * There are two conditions to check when determining if the new + * child should inherit the same tracer (and tracing options) as its + * parent. Either condition is sufficient to trigger inheritance. + */ + dst->br_ptrace_attach = LX_PTA_NONE; + if ((src->br_ptrace_options & src->br_ptrace_clone_option) != 0) { + /* + * Condition 1: + * The clone(2), fork(2) and vfork(2) emulated system calls + * populate "br_ptrace_clone_option" with the specific + * ptrace(2) SETOPTIONS option that applies to this + * operation. If the relevant option has been enabled by the + * tracer then we inherit. + */ + dst->br_ptrace_attach |= LX_PTA_INHERIT_OPTIONS; + + } else if ((src->br_ptrace_flags & LX_PTRACE_INHERIT) != 0) { + /* + * Condition 2: + * If the caller opted in to inheritance with the + * PTRACE_CLONE flag to clone(2), the LX_PTRACE_INHERIT flag + * will be set and we inherit. + */ + dst->br_ptrace_attach |= LX_PTA_INHERIT_CLONE; + } + + /* + * These values only apply for the duration of a single clone(2), et + * al, system call. + */ + src->br_ptrace_flags &= ~LX_PTRACE_INHERIT; + src->br_ptrace_clone_option = 0; + + if (dst->br_ptrace_attach == LX_PTA_NONE) { + /* + * No condition triggered inheritance. + */ + goto out; + } + + /* + * Set the LX_PTRACE_CLONING flag to prevent us from being detached + * while our p_lock is dropped. + */ + src->br_ptrace_flags |= LX_PTRACE_CLONING; + mutex_exit(&srcp->p_lock); + + /* + * Hold the accord for the new LWP. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_hold(accord); + lx_ptrace_accord_exit(accord); + + /* + * Install the tracer and copy the current PTRACE_SETOPTIONS options. + */ + dst->br_ptrace_tracer = accord; + dst->br_ptrace_options = src->br_ptrace_options; + + /* + * This flag prevents waitid() from seeing events for the new child + * until the parent is able to post the relevant ptrace event to + * the tracer. + */ + dst->br_ptrace_flags |= LX_PTRACE_PARENT_WAIT; + + mutex_enter(&accord->lxpa_tracees_lock); + VERIFY(list_link_active(&src->br_ptrace_linkage)); + VERIFY(!list_link_active(&dst->br_ptrace_linkage)); + list_insert_tail(&accord->lxpa_tracees, dst); + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * Relock our process and clear our busy flag. + */ + mutex_enter(&srcp->p_lock); + src->br_ptrace_flags &= ~LX_PTRACE_CLONING; + + /* + * If lx_ptrace_exit_tracer() is trying to detach our tracer, it will + * be sleeping on this CV until LX_PTRACE_CLONING is clear. Wake it + * now. + */ + cv_broadcast(&lx_ptrace_busy_cv); + +out: + if (unlock) { + mutex_exit(&srcp->p_lock); + } +} + +static int +lx_ptrace_traceme(void) +{ + int error; + boolean_t did_attach = B_FALSE; + /* + * Our (Tracee) LWP: + */ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + /* + * Remote (Tracer) LWP: + */ + lx_ptrace_accord_t *accord; + + /* + * We are intending to be the tracee. Fetch (or allocate) the accord + * for our parent LWP. + */ + if ((error = lx_ptrace_accord_get_by_pid(lx_lwp_ppid(lwp, NULL, + NULL), &accord)) != 0) { + /* + * Could not determine the Linux pid of the parent LWP, or + * could not get the accord for that LWP. + */ + return (error); + } + + /* + * We now hold the accord lock. + */ + if (accord->lxpa_flags & LX_ACC_TOMBSTONE) { + /* + * The accord is marked for death; give up now. + */ + lx_ptrace_accord_exit(accord); + return (ESRCH); + } + + /* + * Bump the reference count so that the accord is not freed. We need + * to drop the accord lock before we take our own p_lock. + */ + lx_ptrace_accord_hold(accord); + lx_ptrace_accord_exit(accord); + + /* + * We now lock _our_ process and determine if we can install our parent + * as our tracer. + */ + mutex_enter(&p->p_lock); + if (lwpd->br_ptrace_tracer != NULL) { + /* + * This LWP is already being traced. + */ + VERIFY(lwpd->br_ptrace_attach != LX_PTA_NONE); + error = EPERM; + } else { + /* + * Bond ourselves to the accord. We already bumped the accord + * reference count. + */ + VERIFY(lwpd->br_ptrace_attach == LX_PTA_NONE); + lwpd->br_ptrace_attach = LX_PTA_TRACEME; + lwpd->br_ptrace_tracer = accord; + did_attach = B_TRUE; + error = 0; + } + mutex_exit(&p->p_lock); + + /* + * Lock the accord tracee list and add this LWP. Once we are in the + * tracee list, it is the responsibility of the tracer to detach us. + */ + if (error == 0) { + lx_ptrace_accord_enter(accord); + mutex_enter(&accord->lxpa_tracees_lock); + + if (!(accord->lxpa_flags & LX_ACC_TOMBSTONE)) { + lx_proc_data_t *procd = ttolxproc(curthread); + + /* + * Put ourselves in the tracee list for this accord. + */ + VERIFY(!list_link_active(&lwpd->br_ptrace_linkage)); + list_insert_tail(&accord->lxpa_tracees, lwpd); + mutex_exit(&accord->lxpa_tracees_lock); + lx_ptrace_accord_exit(accord); + + /* + * Set the in-kernel process-wide ptrace(2) enable + * flag. Attempt also to write the usermode trace flag + * so that the process knows to enter the kernel for + * potential ptrace(2) syscall-stops. + */ + procd->l_ptrace = 1; + (void) suword32((void *)procd->l_traceflag, 1); + + return (0); + } + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * The accord has been marked for death. We must + * untrace ourselves. + */ + error = ESRCH; + lx_ptrace_accord_exit(accord); + } + + /* + * Our optimism was unjustified: We were unable to attach. We need to + * lock the process containing this LWP again in order to remove the + * tracer. + */ + VERIFY(error != 0); + mutex_enter(&p->p_lock); + if (did_attach) { + /* + * Verify that things were as we left them: + */ + VERIFY(!list_link_active(&lwpd->br_ptrace_linkage)); + VERIFY(lwpd->br_ptrace_tracer == accord); + + lwpd->br_ptrace_attach = LX_PTA_NONE; + lwpd->br_ptrace_tracer = NULL; + } + mutex_exit(&p->p_lock); + + /* + * Remove our speculative hold on the accord, possibly causing it to be + * freed in the process. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); + + return (error); +} + +static boolean_t +lx_ptrace_stop_common(proc_t *p, lx_lwp_data_t *lwpd, ushort_t what) +{ + VERIFY(MUTEX_HELD(&p->p_lock)); + + /* + * Mark this LWP as stopping and call stop() to enter "ptrace-stop". + */ + VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_STOPPING); + lwpd->br_ptrace_flags |= LX_PTRACE_STOPPING; + stop(PR_BRAND, what); + + /* + * We are back from "ptrace-stop" with our process lock held. + */ + lwpd->br_ptrace_flags &= ~(LX_PTRACE_STOPPING | LX_PTRACE_STOPPED | + LX_PTRACE_CLDPEND); + cv_broadcast(&lx_ptrace_busy_cv); + mutex_exit(&p->p_lock); + + return (B_TRUE); +} + +int +lx_ptrace_stop_for_option(int option, boolean_t child, ulong_t msg) +{ + kthread_t *t = curthread; + klwp_t *lwp = ttolwp(t); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + + mutex_enter(&p->p_lock); + if (lwpd->br_ptrace_tracer == NULL) { + mutex_exit(&p->p_lock); + return (ESRCH); + } + + if (!child) { + /* + * Only the first event posted by a new process is to be held + * until the matching parent event is dispatched, and only if + * it is a "child" event. This is not a child event, so we + * clear the wait flag. + */ + lwpd->br_ptrace_flags &= ~LX_PTRACE_PARENT_WAIT; + } + + if (!(lwpd->br_ptrace_options & option)) { + if (option == LX_PTRACE_O_TRACEEXEC) { + /* + * Without PTRACE_O_TRACEEXEC, the Linux kernel will + * send SIGTRAP to the process. + */ + sigtoproc(p, t, SIGTRAP); + mutex_exit(&p->p_lock); + return (0); + } + + /* + * The flag for this trace event is not enabled, so we will not + * stop. + */ + mutex_exit(&p->p_lock); + return (ESRCH); + } + + if (child) { + switch (option) { + case LX_PTRACE_O_TRACECLONE: + case LX_PTRACE_O_TRACEFORK: + case LX_PTRACE_O_TRACEVFORK: + /* + * Send the child LWP a directed SIGSTOP. + */ + sigtoproc(p, t, SIGSTOP); + mutex_exit(&p->p_lock); + return (0); + default: + goto nostop; + } + } + + lwpd->br_ptrace_eventmsg = msg; + + switch (option) { + case LX_PTRACE_O_TRACECLONE: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_CLONE; + break; + case LX_PTRACE_O_TRACEEXEC: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_EXEC; + lwpd->br_ptrace_eventmsg = 0; + break; + case LX_PTRACE_O_TRACEEXIT: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_EXIT; + break; + case LX_PTRACE_O_TRACEFORK: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_FORK; + break; + case LX_PTRACE_O_TRACEVFORK: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_VFORK; + break; + case LX_PTRACE_O_TRACEVFORKDONE: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_VFORK_DONE; + lwpd->br_ptrace_eventmsg = 0; + break; + default: + goto nostop; + } + + /* + * p_lock for the process containing the tracee will be dropped by + * lx_ptrace_stop_common(). + */ + return (lx_ptrace_stop_common(p, lwpd, LX_PR_EVENT) ? 0 : ESRCH); + +nostop: + lwpd->br_ptrace_event = 0; + lwpd->br_ptrace_eventmsg = 0; + mutex_exit(&p->p_lock); + return (ESRCH); +} + +boolean_t +lx_ptrace_stop(ushort_t what) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + + VERIFY(what == LX_PR_SYSENTRY || what == LX_PR_SYSEXIT || + what == LX_PR_SIGNALLED); + + /* + * If we do not have an accord, bail out early. + */ + if (lwpd->br_ptrace_tracer == NULL) + return (B_FALSE); + + /* + * Lock this process and re-check the condition. + */ + mutex_enter(&p->p_lock); + if (lwpd->br_ptrace_tracer == NULL) { + VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_SYSCALL); + mutex_exit(&p->p_lock); + return (B_FALSE); + } + + if (what == LX_PR_SYSENTRY || what == LX_PR_SYSEXIT) { + /* + * This is a syscall-entry-stop or syscall-exit-stop point. + */ + if (!(lwpd->br_ptrace_flags & LX_PTRACE_SYSCALL)) { + /* + * A system call stop has not been requested. + */ + mutex_exit(&p->p_lock); + return (B_FALSE); + } + + /* + * The PTRACE_SYSCALL restart command applies only to the next + * system call entry or exit. The tracer must restart us with + * PTRACE_SYSCALL while we are in ptrace-stop for us to fire + * again at the next system call boundary. + */ + lwpd->br_ptrace_flags &= ~LX_PTRACE_SYSCALL; + } + + /* + * p_lock for the process containing the tracee will be dropped by + * lx_ptrace_stop_common(). + */ + return (lx_ptrace_stop_common(p, lwpd, what)); +} + +int +lx_issig_stop(proc_t *p, klwp_t *lwp) +{ + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + int lx_sig; + + VERIFY(MUTEX_HELD(&p->p_lock)); + + /* + * If we do not have an accord, bail out now. Additionally, if there + * is no valid signal then we have no reason to stop. + */ + if (lwpd->br_ptrace_tracer == NULL || lwp->lwp_cursig == SIGKILL || + (lwp->lwp_cursig == 0 || lwp->lwp_cursig > NSIG) || + (lx_sig = stol_signo[lwp->lwp_cursig]) < 1) { + return (0); + } + + /* + * We stash the signal on the LWP where our waitid_helper will find it + * and enter the ptrace "signal-delivery-stop" condition. + */ + lwpd->br_ptrace_stopsig = lx_sig; + (void) lx_ptrace_stop_common(p, lwpd, LX_PR_SIGNALLED); + mutex_enter(&p->p_lock); + + /* + * When we return, the signal may have been altered or suppressed. + */ + if (lwpd->br_ptrace_stopsig != lx_sig) { + int native_sig; + lx_sig = lwpd->br_ptrace_stopsig; + + if (lx_sig >= LX_NSIG) { + lx_sig = 0; + } + + /* + * Translate signal from Linux signal number back to + * an illumos native signal. + */ + if (lx_sig >= LX_NSIG || lx_sig < 0 || (native_sig = + ltos_signo[lx_sig]) < 1) { + /* + * The signal is not deliverable. + */ + lwp->lwp_cursig = 0; + lwp->lwp_extsig = 0; + if (lwp->lwp_curinfo) { + siginfofree(lwp->lwp_curinfo); + lwp->lwp_curinfo = NULL; + } + } else { + /* + * Alter the currently dispatching signal. + */ + if (native_sig == SIGKILL) { + /* + * We mark ourselves the victim and request + * a restart of signal processing. + */ + p->p_flag |= SKILLED; + p->p_flag &= ~SEXTKILLED; + return (-1); + } + lwp->lwp_cursig = native_sig; + lwp->lwp_extsig = 0; + if (lwp->lwp_curinfo != NULL) { + lwp->lwp_curinfo->sq_info.si_signo = native_sig; + } + } + } + + lwpd->br_ptrace_stopsig = 0; + return (0); +} + +static void +lx_ptrace_exit_tracer(proc_t *p, lx_lwp_data_t *lwpd, + lx_ptrace_accord_t *accord) +{ + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + + lx_ptrace_accord_enter(accord); + /* + * Mark this accord for death. This means no new tracees can be + * attached to this accord. + */ + VERIFY0(accord->lxpa_flags & LX_ACC_TOMBSTONE); + accord->lxpa_flags |= LX_ACC_TOMBSTONE; + lx_ptrace_accord_exit(accord); + + /* + * Walk the list of tracees, detaching them and setting them runnable + * if they are stopped. + */ + for (;;) { + klwp_t *rlwp; + proc_t *rproc; + lx_lwp_data_t *remote; + kmutex_t *rmp; + + mutex_enter(&accord->lxpa_tracees_lock); + if (list_is_empty(&accord->lxpa_tracees)) { + mutex_exit(&accord->lxpa_tracees_lock); + break; + } + + /* + * Fetch the first tracee LWP in the list and lock the process + * which contains it. + */ + remote = list_head(&accord->lxpa_tracees); + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + /* + * The p_lock mutex persists beyond the life of the process + * itself. We save the address, here, to prevent the need to + * dereference the proc_t after awaking from sleep. + */ + rmp = &rproc->p_lock; + mutex_enter(rmp); + + if (TRACEE_BUSY(remote)) { + /* + * This LWP is currently detaching itself on exit, or + * mid-way through stop(). We must wait for this + * action to be completed. While we wait on the CV, we + * must drop the accord tracee list lock. + */ + mutex_exit(&accord->lxpa_tracees_lock); + cv_wait(&lx_ptrace_busy_cv, rmp); + + /* + * While we were waiting, some state may have changed. + * Restart the walk to be sure we don't miss anything. + */ + mutex_exit(rmp); + continue; + } + + /* + * We now hold p_lock on the process. Remove the tracee from + * the list. + */ + VERIFY(list_link_active(&remote->br_ptrace_linkage)); + list_remove(&accord->lxpa_tracees, remote); + + /* + * Unlink the accord and clear our trace flags. + */ + remote->br_ptrace_attach = LX_PTA_NONE; + remote->br_ptrace_tracer = NULL; + remote->br_ptrace_flags = 0; + + /* + * Let go of the list lock before we restart the LWP. We must + * not hold any locks other than the process p_lock when + * we call lx_ptrace_restart_lwp() as it will thread_lock + * the tracee. + */ + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * Ensure that the LWP is not stopped on our account. + */ + lx_ptrace_restart_lwp(rlwp); + + /* + * Unlock the former tracee. + */ + mutex_exit(rmp); + + /* + * Drop the hold this tracee had on the accord. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); + } + + mutex_enter(&p->p_lock); + lwpd->br_ptrace_accord = NULL; + mutex_exit(&p->p_lock); + + /* + * Clean up and release our hold on the accord If we completely + * detached all tracee LWPs, this will free the accord. Otherwise, it + * will be freed when they complete their cleanup. + * + * We hold "pidlock" while clearing these members for easy exclusion of + * waitid(), etc. + */ + mutex_enter(&pidlock); + lx_ptrace_accord_enter(accord); + accord->lxpa_cvp = NULL; + accord->lxpa_tracer = NULL; + mutex_exit(&pidlock); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); +} + +static void +lx_ptrace_exit_tracee(proc_t *p, lx_lwp_data_t *lwpd, + lx_ptrace_accord_t *accord) +{ + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + + /* + * We are the tracee LWP. Lock the accord tracee list and then our + * containing process. + */ + mutex_enter(&accord->lxpa_tracees_lock); + mutex_enter(&p->p_lock); + + /* + * Remove our reference to the accord. We will release our hold + * later. + */ + VERIFY(lwpd->br_ptrace_tracer == accord); + lwpd->br_ptrace_attach = LX_PTA_NONE; + lwpd->br_ptrace_tracer = NULL; + + /* + * Remove this LWP from the accord tracee list: + */ + VERIFY(list_link_active(&lwpd->br_ptrace_linkage)); + list_remove(&accord->lxpa_tracees, lwpd); + + /* + * Wake up any tracers waiting for us to detach from the accord. + */ + cv_broadcast(&lx_ptrace_busy_cv); + mutex_exit(&p->p_lock); + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * Grab "pidlock" and wake the tracer if it is blocked in waitid(). + */ + mutex_enter(&pidlock); + if (accord->lxpa_cvp != NULL) { + cv_broadcast(accord->lxpa_cvp); + } + mutex_exit(&pidlock); + + /* + * Release our hold on the accord. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); +} + +/* + * This routine is called from lx_exitlwp() when an LWP is ready to exit. If + * this LWP is being traced, it will be detached from the tracer's accord. The + * routine will also detach any LWPs being traced by this LWP. + */ +void +lx_ptrace_exit(proc_t *p, klwp_t *lwp) +{ + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + lx_ptrace_accord_t *accord; + + VERIFY(MUTEX_HELD(&p->p_lock)); + + /* + * Mark our LWP as exiting from a ptrace perspective. This will + * prevent a new accord from being allocated if one does not exist + * already, and will make us invisible to PTRACE_ATTACH/PTRACE_TRACEME. + */ + VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_EXITING); + lwpd->br_ptrace_flags |= LX_PTRACE_EXITING; + + if ((accord = lwpd->br_ptrace_tracer) != NULL) { + /* + * We are traced by another LWP and must detach ourselves. + */ + mutex_exit(&p->p_lock); + lx_ptrace_exit_tracee(p, lwpd, accord); + mutex_enter(&p->p_lock); + } + + if ((accord = lwpd->br_ptrace_accord) != NULL) { + /* + * We have been tracing other LWPs, and must detach from + * them and clean up our accord. + */ + mutex_exit(&p->p_lock); + lx_ptrace_exit_tracer(p, lwpd, accord); + mutex_enter(&p->p_lock); + } +} + +/* + * Called when a SIGCLD signal is dispatched so that we may enqueue another. + * Return 0 if we enqueued a signal, or -1 if not. + */ +int +lx_sigcld_repost(proc_t *pp, sigqueue_t *sqp) +{ + klwp_t *lwp = ttolwp(curthread); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + lx_ptrace_accord_t *accord; + lx_lwp_data_t *remote; + klwp_t *rlwp; + proc_t *rproc; + boolean_t found = B_FALSE; + + VERIFY(MUTEX_HELD(&pidlock)); + VERIFY(MUTEX_NOT_HELD(&pp->p_lock)); + VERIFY(lwptoproc(lwp) == pp); + + mutex_enter(&pp->p_lock); + if ((accord = lwpd->br_ptrace_accord) == NULL) { + /* + * This LWP is not a tracer LWP, so there will be no + * SIGCLD. + */ + mutex_exit(&pp->p_lock); + return (-1); + } + mutex_exit(&pp->p_lock); + + mutex_enter(&accord->lxpa_tracees_lock); + for (remote = list_head(&accord->lxpa_tracees); remote != NULL; + remote = list_next(&accord->lxpa_tracees, remote)) { + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + + /* + * Check if this LWP is in "ptrace-stop". If in the correct + * stop condition, lock the process containing the tracee LWP. + */ + if (lx_ptrace_lock_if_stopped(accord, remote) != 0) { + continue; + } + + if (remote->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) { + /* + * This event depends on waitid() clearing out the + * event of another LWP. Skip it for now. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + if (!(remote->br_ptrace_flags & LX_PTRACE_CLDPEND)) { + /* + * No SIGCLD is required for this LWP. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + if (remote->br_ptrace_whystop == 0 || + remote->br_ptrace_whatstop == 0) { + /* + * No (new) stop reason to post for this LWP. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + /* + * We found a process of interest. Leave the process + * containing the tracee LWP locked and break out of the loop. + */ + found = B_TRUE; + break; + } + mutex_exit(&accord->lxpa_tracees_lock); + + if (!found) { + return (-1); + } + + /* + * Generate siginfo for this tracee LWP. + */ + lx_winfo(remote, &sqp->sq_info, B_FALSE, NULL, NULL); + remote->br_ptrace_flags &= ~LX_PTRACE_CLDPEND; + mutex_exit(&rproc->p_lock); + + mutex_enter(&pp->p_lock); + if (sigismember(&pp->p_sig, SIGCLD)) { + mutex_exit(&pp->p_lock); + + mutex_enter(&rproc->p_lock); + remote->br_ptrace_flags |= LX_PTRACE_CLDPEND; + mutex_exit(&rproc->p_lock); + + return (-1); + } + sigaddqa(pp, curthread, sqp); + mutex_exit(&pp->p_lock); + + return (0); +} + +/* + * Consume the next available ptrace(2) event queued against the accord for + * this LWP. The event will be emitted as if through waitid(), and converted + * by lx_waitpid() and friends before the return to usermode. + */ +int +lx_waitid_helper(idtype_t idtype, id_t id, k_siginfo_t *ip, int options, + boolean_t *brand_wants_wait, int *rval) +{ + lx_ptrace_accord_t *accord; + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *local = lwptolxlwp(lwp); + lx_lwp_data_t *remote; + boolean_t found = B_FALSE; + klwp_t *rlwp = NULL; + proc_t *rproc = NULL; + pid_t event_pid = 0, event_ppid = 0; + boolean_t waitflag = !(options & WNOWAIT); + + VERIFY(MUTEX_HELD(&pidlock)); + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + + /* + * By default, we do not expect waitid() to block on our account. + */ + *brand_wants_wait = B_FALSE; + + if (!local->br_waitid_emulate) { + /* + * This waitid() call is not expecting emulated results. + */ + return (-1); + } + + switch (idtype) { + case P_ALL: + case P_PID: + case P_PGID: + break; + default: + /* + * This idtype has no power here. + */ + return (-1); + } + + if (lx_ptrace_accord_get(&accord, B_FALSE) != 0) { + /* + * This LWP does not have an accord; it cannot be tracing. + */ + return (-1); + } + + /* + * We do not need an additional hold on the accord as it belongs to + * the running, tracer, LWP. + */ + lx_ptrace_accord_exit(accord); + + mutex_enter(&accord->lxpa_tracees_lock); + if (list_is_empty(&accord->lxpa_tracees)) { + /* + * Though it has an accord, there are currently no tracees in + * the list for this LWP. + */ + mutex_exit(&accord->lxpa_tracees_lock); + return (-1); + } + + /* + * Walk the list of tracees and determine if any of them have events to + * report. + */ + for (remote = list_head(&accord->lxpa_tracees); remote != NULL; + remote = list_next(&accord->lxpa_tracees, remote)) { + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + + /* + * If the __WALL option was passed, we unconditionally consider + * every possible child. + */ + if (!(local->br_waitid_flags & LX_WALL)) { + /* + * Otherwise, we check to see if this LWP matches an + * id we are waiting for. + */ + switch (idtype) { + case P_ALL: + break; + case P_PID: + if (remote->br_pid != id) + continue; + break; + case P_PGID: + if (rproc->p_pgrp != id) + continue; + break; + default: + cmn_err(CE_PANIC, "unexpected idtype: %d", + idtype); + } + } + + /* + * Check if this LWP is in "ptrace-stop". If in the correct + * stop condition, lock the process containing the tracee LWP. + */ + if (lx_ptrace_lock_if_stopped(accord, remote) != 0) { + continue; + } + + if (remote->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) { + /* + * This event depends on waitid() clearing out the + * event of another LWP. Skip it for now. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + if (remote->br_ptrace_whystop == 0 || + remote->br_ptrace_whatstop == 0) { + /* + * No (new) stop reason to post for this LWP. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + /* + * We found a process of interest. Leave the process + * containing the tracee LWP locked and break out of the loop. + */ + found = B_TRUE; + break; + } + mutex_exit(&accord->lxpa_tracees_lock); + + if (!found) { + /* + * There were no events of interest, but we have tracees. + * Signal to waitid() that it should block if the provided + * flags allow for it. + */ + *brand_wants_wait = B_TRUE; + return (-1); + } + + /* + * Populate the signal information. + */ + lx_winfo(remote, ip, waitflag, &event_ppid, &event_pid); + + /* + * Unlock the tracee. + */ + mutex_exit(&rproc->p_lock); + + if (event_pid != 0 && event_ppid != 0) { + /* + * We need to do another pass around the tracee list and + * unblock any events that have a "happens after" relationship + * with this event. + */ + mutex_enter(&accord->lxpa_tracees_lock); + for (remote = list_head(&accord->lxpa_tracees); remote != NULL; + remote = list_next(&accord->lxpa_tracees, remote)) { + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + + mutex_enter(&rproc->p_lock); + + if (remote->br_pid != event_pid || + remote->br_ppid != event_ppid) { + mutex_exit(&rproc->p_lock); + continue; + } + + remote->br_ptrace_flags &= ~LX_PTRACE_PARENT_WAIT; + + mutex_exit(&rproc->p_lock); + } + mutex_exit(&accord->lxpa_tracees_lock); + } + + /* + * If we are consuming this wait state, we remove the SIGCLD from + * the queue and post another. + */ + if (waitflag) { + mutex_exit(&pidlock); + sigcld_delete(ip); + sigcld_repost(); + mutex_enter(&pidlock); + } + + *rval = 0; + return (0); +} + +/* + * Some PTRACE_* requests are handled in-kernel by this function. It is called + * through brandsys() via the B_PTRACE_KERNEL subcommand. + */ +int +lx_ptrace_kernel(int ptrace_op, pid_t lxpid, uintptr_t addr, uintptr_t data) +{ + lx_lwp_data_t *local = ttolxlwp(curthread); + lx_ptrace_accord_t *accord; + lx_lwp_data_t *remote; + klwp_t *rlwp; + proc_t *rproc; + int error; + boolean_t found = B_FALSE; + boolean_t release_hold = B_FALSE; + + _NOTE(ARGUNUSED(addr)); + + /* + * These actions do not require the target LWP to be traced or stopped. + */ + switch (ptrace_op) { + case LX_PTRACE_TRACEME: + return (lx_ptrace_traceme()); + + case LX_PTRACE_ATTACH: + return (lx_ptrace_attach(lxpid)); + } + + /* + * Ensure that we have an accord and obtain a lock on it. This routine + * should not fail because the LWP cannot make ptrace(2) system calls + * after it has begun exiting. + */ + VERIFY0(local->br_ptrace_flags & LX_PTRACE_EXITING); + VERIFY(lx_ptrace_accord_get(&accord, B_TRUE) == 0); + + /* + * The accord belongs to this (the tracer) LWP, and we have a hold on + * it. We drop the lock so that we can take other locks. + */ + lx_ptrace_accord_exit(accord); + + /* + * Does the tracee list contain the pid in question? + */ + mutex_enter(&accord->lxpa_tracees_lock); + for (remote = list_head(&accord->lxpa_tracees); remote != NULL; + remote = list_next(&accord->lxpa_tracees, remote)) { + if (remote->br_pid == lxpid) { + found = B_TRUE; + break; + } + } + if (!found) { + /* + * The requested pid does not appear in the tracee list. + */ + mutex_exit(&accord->lxpa_tracees_lock); + return (ESRCH); + } + + /* + * Attempt to lock the target LWP. + */ + if ((error = lx_ptrace_lock_if_stopped(accord, remote)) != 0) { + /* + * The LWP was not in "ptrace-stop". + */ + mutex_exit(&accord->lxpa_tracees_lock); + return (error); + } + + /* + * The target LWP is in "ptrace-stop". We have the containing process + * locked. + */ + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + + /* + * Process the ptrace(2) request: + */ + switch (ptrace_op) { + case LX_PTRACE_DETACH: + error = lx_ptrace_detach(accord, remote, (int)data, + &release_hold); + break; + + case LX_PTRACE_CONT: + error = lx_ptrace_cont(remote, LX_PTC_NONE, (int)data); + break; + + case LX_PTRACE_SYSCALL: + error = lx_ptrace_cont(remote, LX_PTC_SYSCALL, (int)data); + break; + + case LX_PTRACE_SINGLESTEP: + error = lx_ptrace_cont(remote, LX_PTC_SINGLESTEP, (int)data); + break; + + case LX_PTRACE_SETOPTIONS: + error = lx_ptrace_setoptions(remote, data); + break; + + case LX_PTRACE_GETEVENTMSG: + error = lx_ptrace_geteventmsg(remote, (void *)data); + break; + + default: + error = EINVAL; + } + + /* + * Drop the lock on both the tracee process and the tracee list. + */ + mutex_exit(&rproc->p_lock); + mutex_exit(&accord->lxpa_tracees_lock); + + if (release_hold) { + /* + * Release a hold from the accord. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); + } + + return (error); +} + +void +lx_ptrace_init(void) +{ + cv_init(&lx_ptrace_busy_cv, NULL, CV_DEFAULT, NULL); + + lx_ptrace_accord_cache = kmem_cache_create("lx_ptrace_accord", + sizeof (lx_ptrace_accord_t), 0, NULL, NULL, NULL, NULL, NULL, 0); +} + +void +lx_ptrace_fini(void) +{ + cv_destroy(&lx_ptrace_busy_cv); + + kmem_cache_destroy(lx_ptrace_accord_cache); +} diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h index cda0f7f82b..e7f5ee9867 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_brand.h +++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h @@ -80,10 +80,10 @@ extern "C" { #define B_LPID_TO_SPAIR 128 #define B_SYSENTRY 129 #define B_SYSRETURN 130 -#define B_PTRACE_SYSCALL 131 +#define B_PTRACE_KERNEL 131 #define B_SET_AFFINITY_MASK 132 #define B_GET_AFFINITY_MASK 133 -#define B_PTRACE_EXT_OPTS 134 +#define B_PTRACE_CLONE_BEGIN 134 #define B_PTRACE_STOP_FOR_OPT 135 #define B_UNSUPPORTED 136 #define B_STORE_ARGS 137 @@ -91,37 +91,31 @@ extern "C" { #define B_SIGNAL_RETURN 139 #define B_UNWIND_NTV_SYSC_FLAG 140 #define B_EXIT_AS_SIG 141 -#define B_PTRACE_GETEVENTMSG 142 +#define B_HELPER_WAITID 142 #define B_IKE_SYSCALL 192 -/* B_PTRACE_EXT_OPTS subcommands */ -#define B_PTRACE_EXT_OPTS_SET 1 -#define B_PTRACE_EXT_OPTS_GET 2 -#define B_PTRACE_EXT_OPTS_EVT 3 -#define B_PTRACE_DETACH 4 - +#ifndef _ASM /* * Support for Linux PTRACE_SETOPTIONS handling. */ -#define LX_PTRACE_O_TRACESYSGOOD 0x0001 -#define LX_PTRACE_O_TRACEFORK 0x0002 -#define LX_PTRACE_O_TRACEVFORK 0x0004 -#define LX_PTRACE_O_TRACECLONE 0x0008 -#define LX_PTRACE_O_TRACEEXEC 0x0010 -#define LX_PTRACE_O_TRACEVFORKDONE 0x0020 -#define LX_PTRACE_O_TRACEEXIT 0x0040 -#define LX_PTRACE_O_TRACESECCOMP 0x0080 -/* - * lx emulation-specific flag to indicate this is a child process being stopped - * due to one of the PTRACE_SETOPTIONS above. - */ -#define EMUL_PTRACE_O_CHILD 0x8000 -/* - * lx emulation-specific flag to determine via B_PTRACE_EXT_OPTS_GET if a - * process is being traced because of one of the PTRACE_SETOPTIONS above. - */ -#define EMUL_PTRACE_IS_TRACED 0x8000 +typedef enum lx_ptrace_options { + LX_PTRACE_O_TRACESYSGOOD = 0x0001, + LX_PTRACE_O_TRACEFORK = 0x0002, + LX_PTRACE_O_TRACEVFORK = 0x0004, + LX_PTRACE_O_TRACECLONE = 0x0008, + LX_PTRACE_O_TRACEEXEC = 0x0010, + LX_PTRACE_O_TRACEVFORKDONE = 0x0020, + LX_PTRACE_O_TRACEEXIT = 0x0040, + LX_PTRACE_O_TRACESECCOMP = 0x0080 +} lx_ptrace_options_t; + +#define LX_PTRACE_O_ALL \ + (LX_PTRACE_O_TRACESYSGOOD | LX_PTRACE_O_TRACEFORK | \ + LX_PTRACE_O_TRACEVFORK | LX_PTRACE_O_TRACECLONE | \ + LX_PTRACE_O_TRACEEXEC | LX_PTRACE_O_TRACEVFORKDONE | \ + LX_PTRACE_O_TRACEEXIT | LX_PTRACE_O_TRACESECCOMP) +#endif /* !_ASM */ /* siginfo si_status for traced events */ #define LX_PTRACE_EVENT_FORK 0x100 @@ -132,6 +126,17 @@ extern "C" { #define LX_PTRACE_EVENT_EXIT 0x600 #define LX_PTRACE_EVENT_SECCOMP 0x700 +/* + * Brand-private values for the "pr_what" member of lwpstatus, for use with the + * PR_BRAND stop reason. These reasons are validated in lx_stop_notify(); + * update it if you add new reasons here. + */ +#define LX_PR_SYSENTRY 1 +#define LX_PR_SYSEXIT 2 +#define LX_PR_SIGNALLED 3 +#define LX_PR_EVENT 4 + + #define LX_VERSION_1 1 #define LX_VERSION LX_VERSION_1 @@ -257,10 +262,6 @@ typedef struct lx_proc_data { uintptr_t l_traceflag; /* address of 32-bit tracing flag */ pid_t l_ppid; /* pid of originating parent proc */ uint64_t l_ptrace; /* process being observed with ptrace */ - uint_t l_ptrace_opts; /* process's extended ptrace options */ - uint_t l_ptrace_event; /* extended ptrace option trap event */ - uint_t l_ptrace_is_traced; /* set if traced due to ptrace setoptions */ - ulong_t l_ptrace_eventmsg; /* extended ptrace event msg */ lx_elf_data_t l_elf_data; /* ELF data for linux executable */ int l_signal; /* signal to deliver to parent when this */ /* thread group dies */ @@ -282,10 +283,70 @@ typedef ulong_t lx_affmask_t[LX_AFF_ULONGS]; #ifdef _KERNEL +typedef struct lx_lwp_data lx_lwp_data_t; + +/* + * Flag values for "lxpa_flags" on a ptrace(2) accord. + */ +typedef enum lx_accord_flags { + LX_ACC_TOMBSTONE = 0x01 +} lx_accord_flags_t; + +/* + * Flags values for "br_ptrace_flags" in the LWP-specific data. + */ +typedef enum lx_ptrace_state { + LX_PTRACE_SYSCALL = 0x01, + LX_PTRACE_EXITING = 0x02, + LX_PTRACE_STOPPING = 0x04, + LX_PTRACE_INHERIT = 0x08, + LX_PTRACE_STOPPED = 0x10, + LX_PTRACE_PARENT_WAIT = 0x20, + LX_PTRACE_CLDPEND = 0x40, + LX_PTRACE_CLONING = 0x80 +} lx_ptrace_state_t; + +/* + * A ptrace(2) accord represents the relationship between a tracer LWP and the + * set of LWPs that it is tracing: the tracees. This data structure belongs + * primarily to the tracer, but is reference counted so that it may be freed by + * whoever references it last. + */ +typedef struct lx_ptrace_accord { + kmutex_t lxpa_lock; + uint_t lxpa_refcnt; + lx_accord_flags_t lxpa_flags; + + /* + * The tracer must hold "pidlock" while clearing these fields for + * exclusion of waitid(), etc. + */ + lx_lwp_data_t *lxpa_tracer; + kcondvar_t *lxpa_cvp; + + /* + * The "lxpa_tracees_lock" mutex protects the tracee list. + */ + kmutex_t lxpa_tracees_lock; + list_t lxpa_tracees; +} lx_ptrace_accord_t; + +/* + * These values are stored in the per-LWP data for a tracee when it is attached + * to a tracer. They record the method that was used to attach. + */ +typedef enum lx_ptrace_attach { + LX_PTA_NONE = 0x00, /* not attached */ + LX_PTA_ATTACH = 0x01, /* due to tracer using PTRACE_ATTACH */ + LX_PTA_TRACEME = 0x02, /* due to child using PTRACE_TRACEME */ + LX_PTA_INHERIT_CLONE = 0x04, /* due to PTRACE_CLONE clone(2) flag */ + LX_PTA_INHERIT_OPTIONS = 0x08 /* due to PTRACE_SETOPTIONS options */ +} lx_ptrace_attach_t; + /* * lx-specific data in the klwp_t */ -typedef struct lx_lwp_data { +struct lx_lwp_data { uint_t br_ntv_syscall; /* 1 = syscall from native libc */ uint_t br_lwp_flags; /* misc. flags */ klwp_t *br_lwp; /* back pointer to container lwp */ @@ -319,8 +380,26 @@ typedef struct lx_lwp_data { void *br_scall_args; int br_args_size; /* size in bytes of br_scall_args */ - uint_t br_ptrace; /* ptrace is active for this LWP */ -} lx_lwp_data_t; + boolean_t br_waitid_emulate; + int br_waitid_flags; + + lx_ptrace_state_t br_ptrace_flags; /* ptrace state for this LWP */ + lx_ptrace_options_t br_ptrace_options; /* PTRACE_SETOPTIONS options */ + lx_ptrace_options_t br_ptrace_clone_option; /* current clone(2) type */ + + lx_ptrace_attach_t br_ptrace_attach; /* how did we get attached */ + lx_ptrace_accord_t *br_ptrace_accord; /* accord for this tracer LWP */ + lx_ptrace_accord_t *br_ptrace_tracer; /* accord tracing this LWP */ + list_node_t br_ptrace_linkage; /* linkage for lxpa_tracees list */ + + ushort_t br_ptrace_whystop; /* stop reason, 0 for no stop */ + ushort_t br_ptrace_whatstop; /* stop sub-reason */ + + int32_t br_ptrace_stopsig; /* stop signal, 0 for no signal */ + + uint_t br_ptrace_event; + ulong_t br_ptrace_eventmsg; +}; /* * Upper limit on br_args_size, low because this value can persist until diff --git a/usr/src/uts/common/brand/lx/sys/lx_misc.h b/usr/src/uts/common/brand/lx/sys/lx_misc.h index 56b5bb4047..7b77789c56 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_misc.h +++ b/usr/src/uts/common/brand/lx/sys/lx_misc.h @@ -46,6 +46,20 @@ extern boolean_t lx_wait_filter(proc_t *, proc_t *); extern void lx_ifname_convert(char *, int); +extern boolean_t lx_ptrace_stop(ushort_t); +extern void lx_stop_notify(proc_t *, klwp_t *, ushort_t, ushort_t); +extern void lx_ptrace_init(void); +extern void lx_ptrace_fini(void); +extern int lx_ptrace_kernel(int, pid_t, uintptr_t, uintptr_t); +extern int lx_waitid_helper(idtype_t, id_t, k_siginfo_t *, int, boolean_t *, + int *); +extern void lx_ptrace_exit(proc_t *, klwp_t *); +extern void lx_ptrace_inherit_tracer(lx_lwp_data_t *, lx_lwp_data_t *); +extern int lx_ptrace_stop_for_option(int, boolean_t, ulong_t); +extern int lx_ptrace_set_clone_inherit(int, boolean_t); +extern int lx_sigcld_repost(proc_t *, sigqueue_t *); +extern int lx_issig_stop(proc_t *, klwp_t *); + #endif #ifdef __cplusplus diff --git a/usr/src/uts/common/brand/lx/syscall/lx_clone.c b/usr/src/uts/common/brand/lx/syscall/lx_clone.c index 949db3a73b..d73c5f100b 100644 --- a/usr/src/uts/common/brand/lx/syscall/lx_clone.c +++ b/usr/src/uts/common/brand/lx/syscall/lx_clone.c @@ -21,7 +21,7 @@ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014 Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. */ #include <sys/types.h> @@ -32,25 +32,10 @@ #include <sys/lx_ldt.h> #include <sys/lx_misc.h> #include <lx_signum.h> +#include <lx_syscall.h> #include <sys/x86_archext.h> #include <sys/controlregs.h> -#define LX_CSIGNAL 0x000000ff -#define LX_CLONE_VM 0x00000100 -#define LX_CLONE_FS 0x00000200 -#define LX_CLONE_FILES 0x00000400 -#define LX_CLONE_SIGHAND 0x00000800 -#define LX_CLONE_PID 0x00001000 -#define LX_CLONE_PTRACE 0x00002000 -#define LX_CLONE_PARENT 0x00008000 -#define LX_CLONE_THREAD 0x00010000 -#define LX_CLONE_SYSVSEM 0x00040000 -#define LX_CLONE_SETTLS 0x00080000 -#define LX_CLONE_PARENT_SETTID 0x00100000 -#define LX_CLONE_CHILD_CLEARTID 0x00200000 -#define LX_CLONE_DETACH 0x00400000 -#define LX_CLONE_CHILD_SETTID 0x01000000 - /* * Our lwp has already been created at this point, so this routine is * responsible for setting up all the state needed to track this as a diff --git a/usr/src/uts/common/fs/proc/prcontrol.c b/usr/src/uts/common/fs/proc/prcontrol.c index a5679a8afb..7e99d23b97 100644 --- a/usr/src/uts/common/fs/proc/prcontrol.c +++ b/usr/src/uts/common/fs/proc/prcontrol.c @@ -25,7 +25,7 @@ */ /* - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #include <sys/types.h> @@ -1481,7 +1481,7 @@ pr_setsig(prnode_t *pnp, siginfo_t *sip) } else if (t->t_state == TS_STOPPED && sig == SIGKILL) { /* If SIGKILL, set stopped lwp running */ p->p_stopsig = 0; - t->t_schedflag |= TS_XSTART | TS_PSTART; + t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART; t->t_dtrace_stop = 0; setrun_locked(t); } diff --git a/usr/src/uts/common/fs/proc/prsubr.c b/usr/src/uts/common/fs/proc/prsubr.c index 7801fd0ac8..284bf8cb88 100644 --- a/usr/src/uts/common/fs/proc/prsubr.c +++ b/usr/src/uts/common/fs/proc/prsubr.c @@ -21,7 +21,7 @@ /* * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -201,6 +201,7 @@ prchoose(proc_t *p) case PR_SYSEXIT: case PR_SIGNALLED: case PR_FAULTED: + case PR_BRAND: /* * Make an lwp calling exit() be the * last lwp seen in the process. diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c index 6a27544201..02844cef07 100644 --- a/usr/src/uts/common/os/exit.c +++ b/usr/src/uts/common/os/exit.c @@ -400,14 +400,36 @@ proc_exit(int why, int what) if (z->zone_boot_err == 0 && zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) { - if (z->zone_restart_init == B_TRUE) { - if (restart_init(what, why) == 0) - return (0); - } - z->zone_init_status = wstat(why, what); - (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, - zone_kcred()); + /* + * If the init process should be restarted, the + * "zone_restart_init" member will be set. Some init + * programs in branded zones do not tolerate a restart + * in the traditional manner; setting the + * "zone_reboot_on_init_exit" member will cause the + * entire zone to be rebooted instead. If neither of + * these flags is set the zone will shut down. + */ + if (z->zone_reboot_on_init_exit == B_TRUE && + z->zone_restart_init == B_TRUE) { + /* + * Trigger a zone reboot and continue + * with exit processing. + */ + z->zone_init_status = wstat(why, what); + (void) zone_kadmin(A_REBOOT, 0, NULL, + zone_kcred()); + + } else { + if (z->zone_restart_init == B_TRUE) { + if (restart_init(what, why) == 0) + return (0); + } + + z->zone_init_status = wstat(why, what); + (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, + zone_kcred()); + } } /* @@ -995,10 +1017,9 @@ winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) int waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) { - int found; proc_t *cp, *pp; - int proc_gone; int waitflag = !(options & WNOWAIT); + boolean_t have_brand_helper = B_FALSE; /* * Obsolete flag, defined here only for binary compatibility @@ -1047,10 +1068,37 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) return (ECHILD); } - while (pp->p_child != NULL) { + if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) { + have_brand_helper = B_TRUE; + } + + while (pp->p_child != NULL || have_brand_helper) { + boolean_t brand_wants_wait = B_FALSE; + int proc_gone = 0; + int found = 0; + + /* + * Give the brand a chance to return synthetic results from + * this waitid() call before we do the real thing. + */ + if (have_brand_helper) { + int ret; - proc_gone = 0; + if (BROP(pp)->b_waitid_helper(idtype, id, ip, options, + &brand_wants_wait, &ret) == 0) { + mutex_exit(&pidlock); + return (ret); + } + if (pp->p_child == NULL) { + goto no_real_children; + } + } + + /* + * Look for interesting children in the newstate list. + */ + VERIFY(pp->p_child != NULL); for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) { if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID)) continue; @@ -1107,7 +1155,6 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) * Wow! None of the threads on the p_sibling_ns list were * interesting threads. Check all the kids! */ - found = 0; for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) { if (idtype == P_PID && id != cp->p_pid) continue; @@ -1186,11 +1233,12 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) break; } +no_real_children: /* * If we found no interesting processes at all, * break out and return ECHILD. */ - if (found + proc_gone == 0) + if (!brand_wants_wait && (found + proc_gone == 0)) break; if (options & WNOHANG) { @@ -1209,7 +1257,7 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) * change state while we wait, we don't wait at all. * Get out with ECHILD according to SVID. */ - if (found == proc_gone) + if (!brand_wants_wait && (found == proc_gone)) break; if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { diff --git a/usr/src/uts/common/os/sig.c b/usr/src/uts/common/os/sig.c index b117bf3584..ae643c280e 100644 --- a/usr/src/uts/common/os/sig.c +++ b/usr/src/uts/common/os/sig.c @@ -22,7 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -194,7 +194,7 @@ eat_signal(kthread_t *t, int sig) !(ttoproc(t)->p_proc_flag & P_PR_LOCK)) { ttoproc(t)->p_stopsig = 0; t->t_dtrace_stop = 0; - t->t_schedflag |= TS_XSTART | TS_PSTART; + t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART; setrun_locked(t); } else if (t != curthread && t->t_state == TS_ONPROC) { aston(t); /* make it do issig promptly */ @@ -608,6 +608,21 @@ issig_forreal(void) } /* + * Allow the brand the chance to alter (or suppress) delivery + * of this signal. + */ + if (PROC_IS_BRANDED(p) && BROP(p)->b_issig_stop != NULL) { + /* + * The brand hook will return 0 if it would like + * us to drive on, or -1 if we should restart + * the loop to check other conditions. + */ + if (BROP(p)->b_issig_stop(p, lwp) != 0) { + continue; + } + } + + /* * Honor requested stop before dealing with the * current signal; a debugger may change it. * Do not want to go back to loop here since this is a special @@ -939,6 +954,16 @@ stop(int why, int what) } break; + case PR_BRAND: + /* + * We have been stopped by the brand code for a brand-private + * reason. This is an asynchronous stop affecting only this + * LWP. + */ + VERIFY(PROC_IS_BRANDED(p)); + flags &= ~TS_BSTART; + break; + default: /* /proc stop */ flags &= ~TS_PSTART; /* @@ -1050,7 +1075,7 @@ stop(int why, int what) } } - if (why != PR_JOBCONTROL && why != PR_CHECKPOINT) { + if (why != PR_JOBCONTROL && why != PR_CHECKPOINT && why != PR_BRAND) { /* * Do process-level notification when all lwps are * either stopped on events of interest to /proc @@ -1156,6 +1181,13 @@ stop(int why, int what) if (why == PR_CHECKPOINT) del_one_utstop(); + /* + * Allow the brand to post notification of this stop condition. + */ + if (PROC_IS_BRANDED(p) && BROP(p)->b_stop_notify != NULL) { + BROP(p)->b_stop_notify(p, lwp, why, what); + } + thread_lock(t); ASSERT((t->t_schedflag & TS_ALLSTART) == 0); t->t_schedflag |= flags; @@ -1177,7 +1209,7 @@ stop(int why, int what) (p->p_flag & (SEXITLWPS|SKILLED))) { p->p_stopsig = 0; thread_lock(t); - t->t_schedflag |= TS_XSTART | TS_PSTART; + t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART; setrun_locked(t); thread_unlock_nopreempt(t); } else if (why == PR_JOBCONTROL) { @@ -1795,6 +1827,15 @@ sigcld_repost() sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); mutex_enter(&pidlock); + if (PROC_IS_BRANDED(pp) && BROP(pp)->b_sigcld_repost != NULL) { + /* + * Allow the brand to inject synthetic SIGCLD signals. + */ + if (BROP(pp)->b_sigcld_repost(pp, sqp) == 0) { + mutex_exit(&pidlock); + return; + } + } for (cp = pp->p_child; cp; cp = cp->p_sibling) { if (cp->p_pidflag & CLDPEND) { post_sigcld(cp, sqp); diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 145ad10bb5..347a90a022 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -2624,6 +2624,7 @@ zone_init(void) zone0.zone_ntasks = 1; mutex_exit(&p0.p_lock); zone0.zone_restart_init = B_TRUE; + zone0.zone_reboot_on_init_exit = B_FALSE; zone0.zone_init_status = -1; zone0.zone_brand = &native_brand; rctl_prealloc_destroy(gp); @@ -4820,6 +4821,7 @@ zone_create(const char *zone_name, const char *zone_root, zone->zone_ncpus = 0; zone->zone_ncpus_online = 0; zone->zone_restart_init = B_TRUE; + zone->zone_reboot_on_init_exit = B_FALSE; zone->zone_init_status = -1; zone->zone_brand = &native_brand; zone->zone_initname = NULL; diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h index 3486ae864d..b3abada863 100644 --- a/usr/src/uts/common/sys/brand.h +++ b/usr/src/uts/common/sys/brand.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_BRAND_H @@ -132,6 +132,11 @@ struct brand_ops { boolean_t (*b_native_exec)(uint8_t, const char **); void (*b_ptrace_exectrap)(proc_t *); uint32_t (*b_map32limit)(proc_t *); + void (*b_stop_notify)(proc_t *, klwp_t *, ushort_t, ushort_t); + int (*b_waitid_helper)(idtype_t, id_t, k_siginfo_t *, int, + boolean_t *, int *); + int (*b_sigcld_repost)(proc_t *, sigqueue_t *); + int (*b_issig_stop)(proc_t *, klwp_t *); }; /* diff --git a/usr/src/uts/common/sys/procfs.h b/usr/src/uts/common/sys/procfs.h index f592fd9dcf..501af712ef 100644 --- a/usr/src/uts/common/sys/procfs.h +++ b/usr/src/uts/common/sys/procfs.h @@ -25,6 +25,7 @@ */ /* * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_PROCFS_H @@ -233,6 +234,7 @@ typedef struct pstatus { #define PR_FAULTED 6 #define PR_SUSPENDED 7 #define PR_CHECKPOINT 8 +#define PR_BRAND 9 /* * lwp ps(1) information file. /proc/<pid>/lwp/<lwpid>/lwpsinfo diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h index 9f2e166fea..41ea2331df 100644 --- a/usr/src/uts/common/sys/thread.h +++ b/usr/src/uts/common/sys/thread.h @@ -419,8 +419,9 @@ typedef struct _kthread { #define TS_RESUME 0x1000 /* setrun() by CPR resume process */ #define TS_CREATE 0x2000 /* setrun() by syslwp_create() */ #define TS_RUNQMATCH 0x4000 /* exact run queue balancing by setbackdq() */ +#define TS_BSTART 0x8000 /* setrun() by brand */ #define TS_ALLSTART \ - (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE) + (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE|TS_BSTART) #define TS_ANYWAITQ (TS_PROJWAITQ|TS_ZONEWAITQ) /* @@ -448,6 +449,10 @@ typedef struct _kthread { #define ISTOPPED(t) ((t)->t_state == TS_STOPPED && \ !((t)->t_schedflag & TS_PSTART)) +/* True if thread is stopped for a brand-specific reason */ +#define BSTOPPED(t) ((t)->t_state == TS_STOPPED && \ + !((t)->t_schedflag & TS_BSTART)) + /* True if thread is asleep and wakeable */ #define ISWAKEABLE(t) (((t)->t_state == TS_SLEEP && \ ((t)->t_flag & T_WAKEABLE))) diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 7ab9377e16..a5d1610842 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -594,6 +594,7 @@ typedef struct zone { tsol_mlp_list_t zone_mlps; /* MLPs on zone-private addresses */ boolean_t zone_restart_init; /* Restart init if it dies? */ + boolean_t zone_reboot_on_init_exit; /* Reboot if init dies? */ struct brand *zone_brand; /* zone's brand */ void *zone_brand_data; /* store brand specific data */ id_t zone_defaultcid; /* dflt scheduling class id */ diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files index a4d6b7e309..0f058f262d 100644 --- a/usr/src/uts/intel/Makefile.files +++ b/usr/src/uts/intel/Makefile.files @@ -21,7 +21,7 @@ # # Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright (c) 2014, Joyent, Inc. All rights reserved. +# Copyright 2015, Joyent, Inc. # # @@ -289,6 +289,7 @@ LX_BRAND_OBJS = \ lx_modify_ldt.o \ lx_pid.o \ lx_pipe.o \ + lx_ptrace.o \ lx_rw.o \ lx_sched.o \ lx_signum.o \ |