summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/ptools/pflags/pflags.c5
-rw-r--r--usr/src/common/brand/lx/lx_syscall.h47
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/clone.c50
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/fork.c57
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/lx_brand.c45
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/misc.c2
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/ptrace.c1339
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/signal.c73
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/wait.c118
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h8
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h3
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h10
-rw-r--r--usr/src/lib/libproc/common/Pcontrol.c6
-rw-r--r--usr/src/man/man4/proc.412
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_brand.c485
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_misc.c97
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_pid.c24
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_ptrace.c2270
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_brand.h147
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_misc.h14
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_clone.c19
-rw-r--r--usr/src/uts/common/fs/proc/prcontrol.c4
-rw-r--r--usr/src/uts/common/fs/proc/prsubr.c3
-rw-r--r--usr/src/uts/common/os/exit.c76
-rw-r--r--usr/src/uts/common/os/sig.c49
-rw-r--r--usr/src/uts/common/os/zone.c2
-rw-r--r--usr/src/uts/common/sys/brand.h7
-rw-r--r--usr/src/uts/common/sys/procfs.h2
-rw-r--r--usr/src/uts/common/sys/thread.h7
-rw-r--r--usr/src/uts/common/sys/zone.h1
-rw-r--r--usr/src/uts/intel/Makefile.files3
31 files changed, 3163 insertions, 1822 deletions
diff --git a/usr/src/cmd/ptools/pflags/pflags.c b/usr/src/cmd/ptools/pflags/pflags.c
index 8054a80d3c..f19a945d95 100644
--- a/usr/src/cmd/ptools/pflags/pflags.c
+++ b/usr/src/cmd/ptools/pflags/pflags.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <stdio.h>
@@ -469,6 +469,9 @@ prwhy(int why)
case PR_SUSPENDED:
str = "PR_SUSPENDED";
break;
+ case PR_BRAND:
+ str = "PR_BRAND";
+ break;
default:
str = buf;
(void) sprintf(str, "%d", why);
diff --git a/usr/src/common/brand/lx/lx_syscall.h b/usr/src/common/brand/lx/lx_syscall.h
index e80b0486f5..e9d06fd9bc 100644
--- a/usr/src/common/brand/lx/lx_syscall.h
+++ b/usr/src/common/brand/lx/lx_syscall.h
@@ -35,9 +35,6 @@ extern "C" {
#define LX_WNOTHREAD 0x20000000 /* Do not wait on siblings' children */
#define LX_WALL 0x40000000 /* Wait on all children */
#define LX_WCLONE 0x80000000 /* Wait only on clone children */
-typedef struct lx_waitid_args {
- int waitid_flags;
-} lx_waitid_args_t;
/* For arch_prctl(2) */
#define LX_ARCH_SET_GS 0x1001
@@ -45,6 +42,50 @@ typedef struct lx_waitid_args {
#define LX_ARCH_GET_FS 0x1003
#define LX_ARCH_GET_GS 0x1004
+/*
+ * For ptrace(2):
+ */
+#define LX_PTRACE_TRACEME 0
+#define LX_PTRACE_PEEKTEXT 1
+#define LX_PTRACE_PEEKDATA 2
+#define LX_PTRACE_PEEKUSER 3
+#define LX_PTRACE_POKETEXT 4
+#define LX_PTRACE_POKEDATA 5
+#define LX_PTRACE_POKEUSER 6
+#define LX_PTRACE_CONT 7
+#define LX_PTRACE_KILL 8
+#define LX_PTRACE_SINGLESTEP 9
+#define LX_PTRACE_GETREGS 12
+#define LX_PTRACE_SETREGS 13
+#define LX_PTRACE_GETFPREGS 14
+#define LX_PTRACE_SETFPREGS 15
+#define LX_PTRACE_ATTACH 16
+#define LX_PTRACE_DETACH 17
+#define LX_PTRACE_GETFPXREGS 18
+#define LX_PTRACE_SETFPXREGS 19
+#define LX_PTRACE_SYSCALL 24
+#define LX_PTRACE_SETOPTIONS 0x4200
+#define LX_PTRACE_GETEVENTMSG 0x4201
+
+/*
+ * For clone(2):
+ */
+#define LX_CSIGNAL 0x000000ff
+#define LX_CLONE_VM 0x00000100
+#define LX_CLONE_FS 0x00000200
+#define LX_CLONE_FILES 0x00000400
+#define LX_CLONE_SIGHAND 0x00000800
+#define LX_CLONE_PID 0x00001000
+#define LX_CLONE_PTRACE 0x00002000
+#define LX_CLONE_VFORK 0x00004000
+#define LX_CLONE_PARENT 0x00008000
+#define LX_CLONE_THREAD 0x00010000
+#define LX_CLONE_SYSVSEM 0x00040000
+#define LX_CLONE_SETTLS 0x00080000
+#define LX_CLONE_PARENT_SETTID 0x00100000
+#define LX_CLONE_CHILD_CLEARTID 0x00200000
+#define LX_CLONE_DETACH 0x00400000
+#define LX_CLONE_CHILD_SETTID 0x01000000
#ifdef __cplusplus
}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/clone.c b/usr/src/lib/brand/lx/lx_brand/common/clone.c
index 58c84c773b..87f966cc89 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/clone.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/clone.c
@@ -49,23 +49,7 @@
#include <sys/lx_debug.h>
#include <sys/lx_thread.h>
#include <sys/fork.h>
-
-#define LX_CSIGNAL 0x000000ff
-#define LX_CLONE_VM 0x00000100
-#define LX_CLONE_FS 0x00000200
-#define LX_CLONE_FILES 0x00000400
-#define LX_CLONE_SIGHAND 0x00000800
-#define LX_CLONE_PID 0x00001000
-#define LX_CLONE_PTRACE 0x00002000
-#define LX_CLONE_VFORK 0x00004000
-#define LX_CLONE_PARENT 0x00008000
-#define LX_CLONE_THREAD 0x00010000
-#define LX_CLONE_SYSVSEM 0x00040000
-#define LX_CLONE_SETTLS 0x00080000
-#define LX_CLONE_PARENT_SETTID 0x00100000
-#define LX_CLONE_CHILD_CLEARTID 0x00200000
-#define LX_CLONE_DETACH 0x00400000
-#define LX_CLONE_CHILD_SETTID 0x01000000
+#include <lx_syscall.h>
#define SHARED_AS \
(LX_CLONE_VM | LX_CLONE_FS | LX_CLONE_FILES | LX_CLONE_SIGHAND \
@@ -116,6 +100,7 @@ struct clone_state {
sigset_t c_sigmask; /* signal mask */
lx_affmask_t c_affmask; /* CPU affinity mask */
volatile int *c_clone_res; /* pid/error returned to cloner */
+ int c_ptrace_event; /* ptrace(2) event for child stop */
};
extern void lx_setup_clone(uintptr_t, void *, void *);
@@ -147,7 +132,7 @@ lx_exit(uintptr_t p1)
assert(lx_tsd != 0);
- lx_tsd->lxtsd_exit = LX_EXIT;
+ lx_tsd->lxtsd_exit = LX_ET_EXIT;
lx_tsd->lxtsd_exit_status = status;
lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEEXIT, B_FALSE,
@@ -200,7 +185,7 @@ lx_group_exit(uintptr_t p1)
assert(lx_tsd != 0);
- lx_tsd->lxtsd_exit = LX_EXIT_GROUP;
+ lx_tsd->lxtsd_exit = LX_ET_EXIT_GROUP;
lx_tsd->lxtsd_exit_status = status;
/*
@@ -315,7 +300,7 @@ clone_start(void *arg)
* Do the final stack twiddling, reset %gs, and return to the
* clone(2) path.
*/
- if (lx_tsd.lxtsd_exit == 0) {
+ if (lx_tsd.lxtsd_exit == LX_ET_NONE) {
if (sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL) < 0) {
*(cs->c_clone_res) = -errno;
@@ -329,6 +314,11 @@ clone_start(void *arg)
*/
*(cs->c_clone_res) = rval;
+ /*
+ * Fire the ptrace(2) event stop in the new thread:
+ */
+ lx_ptrace_stop_if_option(cs->c_ptrace_event, B_TRUE, 0);
+
#if defined(_LP64)
(void) syscall(SYS_brand, B_CLR_NTV_SYSC_FLAG);
lx_setup_clone((uintptr_t)&cs->c_regs, cs->c_retaddr,
@@ -347,12 +337,7 @@ clone_start(void *arg)
* setcontext() to jump to the thread context state saved in
* getcontext(), above.
*/
- if (lx_tsd.lxtsd_exit == LX_EXIT)
- thr_exit((void *)(long)lx_tsd.lxtsd_exit_status);
- else
- exit(lx_tsd.lxtsd_exit_status);
-
- assert(0);
+ lx_exit_common(lx_tsd.lxtsd_exit, lx_tsd.lxtsd_exit_status);
/*NOTREACHED*/
}
@@ -455,6 +440,12 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
ptrace_event = ptrace_clone_event(flags);
+ /*
+ * Inform the in-kernel ptrace(2) subsystem that we are about to
+ * emulate a fork(2), vfork(2) or clone(2) system call.
+ */
+ lx_ptrace_clone_begin(ptrace_event, !!(flags & LX_CLONE_PTRACE));
+
/* See if this is a fork() operation or a thr_create(). */
if (IS_FORK(flags) || IS_VFORK(flags)) {
if (flags & LX_CLONE_PARENT) {
@@ -463,9 +454,6 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
return (-ENOTSUP);
}
- if (flags & LX_CLONE_PTRACE)
- lx_ptrace_fork();
-
if ((flags & LX_CSIGNAL) == 0)
fork_flags |= FORK_NOSIGCHLD;
@@ -509,7 +497,6 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
return ((rval < 0) ? -errno : rval);
}
-
/*
* Set up additional data in the lx_proc_data structure as
* necessary.
@@ -584,6 +571,7 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
cs->c_ldtinfo = ldtinfo;
cs->c_ctidp = ctidp;
cs->c_clone_res = &clone_res;
+ cs->c_ptrace_event = ptrace_event;
#if defined(_LP64)
/*
* The AMD64 ABI says that the kernel clobbers %rcx and %r11. We
@@ -649,7 +637,7 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
;
rval = clone_res;
- lx_ptrace_stop_if_option(ptrace_event, B_TRUE, 0);
+ lx_ptrace_stop_if_option(ptrace_event, B_FALSE, (ulong_t)rval);
}
return (rval);
diff --git a/usr/src/lib/brand/lx/lx_brand/common/fork.c b/usr/src/lib/brand/lx/lx_brand/common/fork.c
index 9f2fbd6406..b0edee1adb 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/fork.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/fork.c
@@ -41,18 +41,36 @@
long
lx_fork(void)
{
- int ret = fork1();
+ int ret;
- if (ret == 0) {
- if (lx_is_rpm)
+ /*
+ * Inform the in-kernel ptrace(2) subsystem that we are about to
+ * emulate fork(2).
+ */
+ lx_ptrace_clone_begin(LX_PTRACE_O_TRACEFORK, B_FALSE);
+
+ switch (ret = fork1()) {
+ case -1:
+ return (-errno);
+
+ case 0:
+ /*
+ * Returning in the new child.
+ */
+ if (lx_is_rpm) {
(void) sleep(lx_rpm_delay);
+ }
lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEFORK, B_TRUE, 0);
- } else if (ret != -1) {
+ return (0);
+
+ default:
+ /*
+ * Returning in the new parent.
+ */
lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEFORK, B_FALSE,
(ulong_t)ret);
+ return (ret);
}
-
- return (ret == -1 ? -errno : ret);
}
/*
@@ -65,14 +83,31 @@ lx_fork(void)
long
lx_vfork(void)
{
- int ret = fork1();
+ int ret;
- if (ret == 0) {
+ /*
+ * Inform the in-kernel ptrace(2) subsystem that we are about to
+ * emulate vfork(2).
+ */
+ lx_ptrace_clone_begin(LX_PTRACE_O_TRACEVFORK, B_FALSE);
+
+ switch (ret = fork1()) {
+ case -1:
+ return (-errno);
+
+ case 0:
+ /*
+ * Returning in the new child.
+ */
lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEVFORK, B_TRUE, 0);
- } else if (ret != -1) {
+ return (0);
+
+ default:
+ /*
+ * Returning in the new parent.
+ */
lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEVFORK, B_FALSE,
(ulong_t)ret);
+ return (ret);
}
-
- return (ret == -1 ? -errno : ret);
}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
index b8fdf36b42..abe015c2c4 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
@@ -172,6 +172,9 @@ struct lx_locale_ending {
int se_size; /* solaris ending string length */
};
+__thread int lx_do_syscall_restart;
+__thread int lx_had_sigchild;
+
#define l2s_locale(lname, sname) \
{(lname), (sname), sizeof ((lname)) - 1, sizeof ((sname)) - 1}
@@ -647,6 +650,7 @@ lx_emulate(lx_regs_t *rp)
}
#endif /* _ILP32 */
+restart_syscall:
if (s->sy_flags & LX_SYS_IKE) {
lx_debug("\tsyscall %d re-vectoring to lx kernel module "
"for %s()", syscall_num, s->sy_name);
@@ -679,6 +683,12 @@ lx_emulate(lx_regs_t *rp)
ret = -stol_errno[-ret];
}
+ if (lx_do_syscall_restart && ret == -stol_errno[EINTR]) {
+ lx_debug("restarting system call due to signal interruption");
+ lx_do_syscall_restart = 0;
+ goto restart_syscall;
+ }
+
out:
/*
* For 32-bit, %eax holds the return code from the system call. For
@@ -962,7 +972,7 @@ lx_init(int argc, char *argv[], char *envp[])
lx_err_fatal("Unable to initialize thread-specific exit "
"context: %s", strerror(errno));
- if (lx_tsd.lxtsd_exit == 0) {
+ if (lx_tsd.lxtsd_exit == LX_ET_NONE) {
#if defined(_LP64)
/* Switch to Linux syscall mode */
(void) syscall(SYS_brand, B_CLR_NTV_SYSC_FLAG);
@@ -978,17 +988,36 @@ lx_init(int argc, char *argv[], char *envp[])
* exit_group() system call. In turn the brand library did a
* setcontext() to jump to the thread context state we saved above.
*/
- if (lx_tsd.lxtsd_exit == 1)
- thr_exit((void *)(long)lx_tsd.lxtsd_exit_status);
- else
- exit(lx_tsd.lxtsd_exit_status);
-
- assert(0);
-
+ lx_exit_common(lx_tsd.lxtsd_exit, lx_tsd.lxtsd_exit_status);
/*NOTREACHED*/
return (0);
}
+void
+lx_exit_common(lx_exit_type_t exit_type, uintptr_t exit_value)
+{
+ int ev = 0xff & exit_value;
+
+ switch (exit_type) {
+ case LX_ET_EXIT:
+ /*
+ * The native thread return value is never seen so we pass
+ * NULL.
+ */
+ thr_exit(NULL);
+ break;
+
+ case LX_ET_EXIT_GROUP:
+ exit(ev);
+ break;
+
+ default:
+ abort();
+ }
+
+ abort();
+}
+
/*
* Walk back through the stack until we find the lx_emulate() frame.
*/
diff --git a/usr/src/lib/brand/lx/lx_brand/common/misc.c b/usr/src/lib/brand/lx/lx_brand/common/misc.c
index f60f3f290f..750af869a4 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/misc.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/misc.c
@@ -572,8 +572,6 @@ lx_execve(uintptr_t p1, uintptr_t p2, uintptr_t p3)
if (argv == NULL)
argv = nullist;
- lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEEXEC, B_FALSE, 0);
-
/*
* Emulate PR_SET_KEEPCAPS which is reset on execve. If this is not done
* the emulated capabilities could be reduced more than expected.
diff --git a/usr/src/lib/brand/lx/lx_brand/common/ptrace.c b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c
index 2efc64a43e..174dbe8c19 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/ptrace.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c
@@ -51,82 +51,17 @@
#include <ieeefp.h>
#include <assert.h>
#include <libintl.h>
+#include <lx_syscall.h>
/*
- * Linux ptrace compatibility.
- *
- * The brand support for ptrace(2) is built on top of the Solaris /proc
- * interfaces, mounted at /native/proc in the zone. This gets quite
- * complicated due to the way ptrace works and the Solaris realization of the
- * Linux threading model.
- *
- * ptrace can only interact with a process if we are tracing it, and it is
- * currently stopped. There are two ways a process can begin tracing another
- * process:
- *
- * PTRACE_TRACEME
- *
- * A child process can use PTRACE_TRACEME to indicate that it wants to be
- * traced by the parent. This sets the ptrace compatibility flag in /proc
- * which causes ths ptrace consumer to be notified through the wait(2)
- * system call of events of interest. PTRACE_TRACEME is typically used by
- * the debugger by forking a process, using PTRACE_TRACEME, and finally
- * doing an exec of the specified program.
- *
- *
- * PTRACE_ATTACH
- *
- * We can attach to a process using PTRACE_ATTACH. This is considerably
- * more complicated than the previous case. On Linux, the traced process is
- * effectively reparented to the ptrace consumer so that event notification
- * can go through the normal wait(2) system call. Solaris has no such
- * ability to reparent a process (nor should it) so some trickery was
- * required.
- *
- * When the ptrace consumer uses PTRACE_ATTACH it forks a monitor child
- * process. The monitor enables the /proc ptrace flag for itself and uses
- * the native /proc mechanisms to observe the traced process and wait for
- * events of interest. When the traced process stops, the monitor process
- * sends itself a SIGTRAP thus rousting its parent process (the ptrace
- * consumer) out of wait(2). We then translate the process id and status
- * code from wait(2) to those of the traced process.
- *
- * To detach from the process we just have to clean up tracing flags and
- * clean up the monitor.
- *
- * ptrace can only interact with a process if we have traced it, and it is
- * currently stopped (see is_traced()). For threads, there's no way to
- * distinguish whether ptrace() has been called for all threads or some
- * subset. Since most clients will be tracing all threads, and erroneously
- * allowing ptrace to access a non-traced thread is non-fatal (or at least
- * would be fatal on linux), we ignore this aspect of the problem.
+ * Much of the Linux ptrace(2) emulation is performed in the kernel, and there
+ * is a block comment in "lx_ptrace.c" that describes the facility in some
+ * detail.
*/
-#define LX_PTRACE_TRACEME 0
-#define LX_PTRACE_PEEKTEXT 1
-#define LX_PTRACE_PEEKDATA 2
-#define LX_PTRACE_PEEKUSER 3
-#define LX_PTRACE_POKETEXT 4
-#define LX_PTRACE_POKEDATA 5
-#define LX_PTRACE_POKEUSER 6
-#define LX_PTRACE_CONT 7
-#define LX_PTRACE_KILL 8
-#define LX_PTRACE_SINGLESTEP 9
-#define LX_PTRACE_GETREGS 12
-#define LX_PTRACE_SETREGS 13
-#define LX_PTRACE_GETFPREGS 14
-#define LX_PTRACE_SETFPREGS 15
-#define LX_PTRACE_ATTACH 16
-#define LX_PTRACE_DETACH 17
-#define LX_PTRACE_GETFPXREGS 18
-#define LX_PTRACE_SETFPXREGS 19
-#define LX_PTRACE_SYSCALL 24
-#define LX_PTRACE_SETOPTIONS 0x4200
-#define LX_PTRACE_GETEVENTMSG 0x4201
-
/* execve syscall numbers for 64-bit vs. 32-bit */
#if defined(_LP64)
-#define LX_SYS_execve 520
+#define LX_SYS_execve 59
#else
#define LX_SYS_execve 11
#endif
@@ -237,22 +172,12 @@ typedef struct lx_user {
int lxu_debugreg[8];
} lx_user_t;
-typedef struct ptrace_monitor_map {
- struct ptrace_monitor_map *pmm_next; /* next pointer */
- pid_t pmm_monitor; /* monitor child process */
- pid_t pmm_target; /* traced Linux pid */
- pid_t pmm_pid; /* Solaris pid */
- lwpid_t pmm_lwpid; /* Solaris lwpid */
- uint_t pmm_exiting; /* detached */
-} ptrace_monitor_map_t;
-
typedef struct ptrace_state_map {
struct ptrace_state_map *psm_next; /* next pointer */
pid_t psm_pid; /* Solaris pid */
uintptr_t psm_debugreg[8]; /* debug registers */
} ptrace_state_map_t;
-static ptrace_monitor_map_t *ptrace_monitor_map = NULL;
static ptrace_state_map_t *ptrace_state_map = NULL;
static mutex_t ptrace_map_mtx = DEFAULTMUTEX;
@@ -260,6 +185,8 @@ extern void *_START_;
static sigset_t blockable_sigs;
+static long lx_ptrace_kernel(int, pid_t, uintptr_t, uintptr_t);
+
void
lx_ptrace_init(void)
{
@@ -298,24 +225,6 @@ open_lwpfile(pid_t pid, lwpid_t lwpid, int mode, const char *name)
}
static int
-get_status(pid_t pid, pstatus_t *psp)
-{
- int fd;
-
- if ((fd = open_procfile(pid, O_RDONLY, "status")) < 0)
- return (-ESRCH);
-
- if (read(fd, psp, sizeof (pstatus_t)) != sizeof (pstatus_t)) {
- (void) close(fd);
- return (-EIO);
- }
-
- (void) close(fd);
-
- return (0);
-}
-
-static int
get_lwpstatus(pid_t pid, lwpid_t lwpid, lwpstatus_t *lsp)
{
int fd;
@@ -869,22 +778,6 @@ debug_registers(pid_t pid)
return (p != NULL? p->psm_debugreg : NULL);
}
-static void
-free_debug_registers(pid_t pid)
-{
- ptrace_state_map_t **pp;
- ptrace_state_map_t *p;
-
- /* ASSERT(MUTEX_HELD(&ptrace_map_mtx) */
- for (pp = &ptrace_state_map; (p = *pp) != NULL; pp = &p->psm_next) {
- if (p->psm_pid == pid) {
- *pp = p->psm_next;
- free(p);
- break;
- }
- }
-}
-
static int
setup_watchpoints(pid_t pid, uintptr_t *debugreg)
{
@@ -952,156 +845,33 @@ setup_watchpoints(pid_t pid, uintptr_t *debugreg)
}
/*
- * Returns TRUE if the process is traced, FALSE otherwise. This is only true
- * if the process is currently stopped, and has been traced using
- * PTRACE_TRACEME, PTRACE_ATTACH or one of the Linux-specific trace options.
+ * Returns B_TRUE if the target LWP, identified by its Linux pid, is traced by
+ * this LWP and is waiting in "ptrace-stop". Returns B_FALSE otherwise.
*/
-static int
-is_traced(pid_t pid)
+static boolean_t
+is_ptrace_stopped(pid_t lxpid)
{
- ptrace_monitor_map_t *p;
- pstatus_t status;
- uint_t curr_opts;
- pid_t mypid;
+ ulong_t dummy;
/*
- * First get the stop options since that is an indication that the
- * process is being traced.
+ * We attempt a PTRACE_GETEVENTMSG request to determine if the tracee
+ * is stopped appropriately. As we are not in the kernel, this is not
+ * an atomic check; the process is not guaranteed to remain stopped
+ * once we have dropped the locks protecting that state and left the
+ * kernel.
*/
- if (syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_EXT_OPTS_GET, pid,
- &curr_opts) != 0)
- return (0);
-
- mypid = getpid();
-
- if (get_status(pid, &status) != 0)
- return (0);
-
- /*
- * When we look to see if we are tracing a process we have to take the
- * PTRACE_SETOPTIONS handling into account. In particular, if we are
- * tracing with PTRACE_O_TRACEFORK, etc. then we may be dealing with
- * the child of a child that we started tracing. We can determine this
- * by checking the EMUL_PTRACE_IS_TRACED flag and checking the parent
- * of the parent. We cannot check for the presence of the options since
- * those will be cleared during the process of detaching from a tracee.
- */
- if (curr_opts & EMUL_PTRACE_IS_TRACED && status.pr_ppid != mypid) {
- pstatus_t par_status;
- pid_t chkpid = status.pr_ppid;
-
- if (get_status(status.pr_ppid, &par_status) == 0) {
- chkpid = par_status.pr_ppid;
- } else {
- /* parent is gone, re-get our ppid */
- if (get_status(pid, &par_status) == 0)
- chkpid = par_status.pr_ppid;
- }
-
- if (chkpid == mypid)
- return (1);
+ if (lx_ptrace_kernel(LX_PTRACE_GETEVENTMSG, lxpid, NULL,
+ (uintptr_t)&dummy) == 0) {
+ return (B_TRUE);
}
- if ((status.pr_flags & PR_PTRACE ||
- curr_opts & EMUL_PTRACE_IS_TRACED) &&
- (status.pr_ppid == mypid) &&
- (status.pr_lwp.pr_flags & PR_ISTOP))
- return (1);
-
- (void) mutex_lock(&ptrace_map_mtx);
- for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) {
- if (p->pmm_target == pid) {
- (void) mutex_unlock(&ptrace_map_mtx);
- return (1);
- }
- }
- (void) mutex_unlock(&ptrace_map_mtx);
-
- return (0);
-}
-
-static int
-ptrace_trace_common(int fd)
-{
- struct {
- long cmd;
- union {
- long flags;
- sigset_t signals;
- fltset_t faults;
- } arg;
- } ctl;
- size_t size;
-
- ctl.cmd = PCSTRACE;
- prfillset(&ctl.arg.signals);
- size = sizeof (long) + sizeof (sigset_t);
- if (write(fd, &ctl, size) != size)
- return (-1);
-
- ctl.cmd = PCSFAULT;
- premptyset(&ctl.arg.faults);
- size = sizeof (long) + sizeof (fltset_t);
- if (write(fd, &ctl, size) != size)
- return (-1);
-
- ctl.cmd = PCUNSET;
- ctl.arg.flags = PR_FORK;
- size = sizeof (long) + sizeof (long);
- if (write(fd, &ctl, size) != size)
- return (-1);
-
- return (0);
-}
-
-/*
- * Notify that parent that we wish to be traced. This is the equivalent of:
- *
- * 1. Stop on all signals, and nothing else
- * 2. Turn off inherit-on-fork flag
- * 3. Set ptrace compatible flag
- *
- * If we are not the main thread, then the client is trying to request behavior
- * by which one of its own thread is to be traced. We don't support this mode
- * of operation.
- */
-static int
-ptrace_traceme(void)
-{
- int fd, ret;
- int error;
- long ctl[2];
- pstatus_t status;
- pid_t pid = getpid();
-
- if (_lwp_self() != 1) {
- lx_unsupported("thread %d calling PTRACE_TRACEME is "
- "unsupported", _lwp_self());
- return (-ENOTSUP);
- }
-
- if ((ret = get_status(pid, &status)) != 0)
- return (ret);
-
/*
- * Why would a process try to do this twice? I'm not sure, but there's
- * a conformance test which wants this to fail just so.
+ * This call should only fail with ESRCH, which tells us that the
+ * a tracee with that pid was not found in the stopped condition.
*/
- if (status.pr_flags & PR_PTRACE)
- return (-EPERM);
-
- if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0)
- return (-errno);
+ assert(errno == ESRCH);
- ctl[0] = PCSET;
- ctl[1] = PR_PTRACE;
- error = 0;
- if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl) ||
- ptrace_trace_common(fd) != 0)
- error = -errno;
-
- (void) close(fd);
- return (error);
+ return (B_FALSE);
}
/*
@@ -1114,9 +884,6 @@ ptrace_peek(pid_t pid, uintptr_t addr, long *ret)
int fd;
long data;
- if (!is_traced(pid))
- return (-ESRCH);
-
if ((fd = open_procfile(pid, O_RDONLY, "as")) < 0)
return (-ESRCH);
@@ -1143,9 +910,6 @@ ptrace_peek_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int *ret)
uintptr_t *debugreg;
int dreg;
- if (!is_traced(pid))
- return (-ESRCH);
-
/*
* The offset specified by the user is an offset into the Linux
* user structure (seriously). Rather than constructing a full
@@ -1239,9 +1003,6 @@ ptrace_poke(pid_t pid, uintptr_t addr, int data)
{
int fd;
- if (!is_traced(pid))
- return (-ESRCH);
-
if (addr & 0x3)
return (-EINVAL);
@@ -1265,9 +1026,6 @@ ptrace_poke_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int data)
uintptr_t *debugreg;
int dreg;
- if (!is_traced(pid))
- return (-ESRCH);
-
if (off & 0x3)
return (-EINVAL);
@@ -1300,187 +1058,13 @@ ptrace_poke_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int data)
}
static int
-ptrace_cont_common(int fd, int sig, int run, int step)
-{
- long ctl[1 + 1 + sizeof (siginfo_t) / sizeof (long) + 2];
- long *ctlp = ctl;
- size_t size;
-
- assert(0 <= sig && sig <= LX_NSIG);
- assert(!step || run);
-
- /*
- * Clear the current signal.
- */
- *ctlp++ = PCCSIG;
-
- /*
- * Send a signal if one was specified.
- */
- if (sig != 0 && sig != LX_SIGSTOP) {
- siginfo_t *infop;
-
- *ctlp++ = PCSSIG;
- infop = (siginfo_t *)ctlp;
- bzero(infop, sizeof (siginfo_t));
- infop->si_signo = ltos_signo[sig];
-
- ctlp += sizeof (siginfo_t) / sizeof (long);
- }
-
- /*
- * If run is true, set the lwp running.
- */
- if (run) {
- *ctlp++ = PCRUN;
- *ctlp++ = step ? PRSTEP : 0;
- }
-
- size = (char *)ctlp - (char *)&ctl[0];
- assert(size <= sizeof (ctl));
-
- if (write(fd, ctl, size) != size) {
- lx_debug("failed to continue %s", strerror(errno));
- return (-EIO);
- }
-
- return (0);
-}
-
-static int
-ptrace_cont_monitor(ptrace_monitor_map_t *p)
-{
- long ctl[2];
- int fd;
-
- fd = open_procfile(p->pmm_monitor, O_WRONLY, "ctl");
- if (fd < 0) {
- lx_debug("failed to open monitor ctl %d",
- errno);
- return (-EIO);
- }
-
- ctl[0] = PCRUN;
- ctl[1] = PRCSIG;
- if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) {
- (void) close(fd);
- return (-EIO);
- }
-
- (void) close(fd);
-
- return (0);
-}
-
-static int
-ptrace_cont(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig, int step)
-{
- ptrace_monitor_map_t *p;
- uintptr_t *debugreg;
- int fd, ret;
-
- if (!is_traced(pid))
- return (-ESRCH);
-
- if (sig < 0 || sig > LX_NSIG)
- return (-EINVAL);
-
- if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
- return (-ESRCH);
-
- if ((ret = ptrace_cont_common(fd, sig, 1, step)) != 0) {
- (void) close(fd);
- return (ret);
- }
-
- (void) close(fd);
-
- /* kludge: use debugreg[4] to remember the single-step flag */
- if ((debugreg = debug_registers(pid)) != NULL)
- debugreg[4] = step;
-
- /*
- * Check for a monitor and get it moving if we find it. If any of the
- * /proc operations fail, we're kind of sunk so just return an error.
- */
- (void) mutex_lock(&ptrace_map_mtx);
- for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) {
- if (p->pmm_target == lxpid) {
- if ((ret = ptrace_cont_monitor(p)) != 0)
- return (ret);
- break;
- }
- }
- (void) mutex_unlock(&ptrace_map_mtx);
-
- return (0);
-}
-
-/*
- * If a monitor exists for this traced process, dispose of it.
- * First turn off its ptrace flag so we won't be notified of its
- * impending demise. We ignore errors for this step since they
- * indicate only that the monitor has been damaged due to pilot
- * error. Then kill the monitor, and wait for it. If the wait
- * succeeds we can dispose of the corpse, otherwise another thread's
- * wait call has collected it and we need to set a flag in the
- * structure so that if can be picked up in wait.
- */
-static void
-monitor_kill(pid_t lxpid, pid_t pid)
-{
- ptrace_monitor_map_t *p, **pp;
- pid_t mpid;
- int fd;
- long ctl[2];
-
- (void) mutex_lock(&ptrace_map_mtx);
- free_debug_registers(pid);
- for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) {
- if (p->pmm_target == lxpid) {
- mpid = p->pmm_monitor;
- if ((fd = open_procfile(mpid, O_WRONLY, "ctl")) >= 0) {
- ctl[0] = PCUNSET;
- ctl[1] = PR_PTRACE;
- (void) write(fd, ctl, sizeof (ctl));
- (void) close(fd);
- }
-
- (void) kill(mpid, SIGKILL);
-
- if (waitpid(mpid, NULL, 0) == mpid) {
- *pp = p->pmm_next;
- free(p);
- } else {
- p->pmm_exiting = 1;
- }
-
- break;
- }
- }
- (void) mutex_unlock(&ptrace_map_mtx);
-}
-
-static int
-ptrace_kill(pid_t lxpid, pid_t pid)
+ptrace_kill(pid_t pid)
{
int ret;
- if (!is_traced(pid))
- return (-ESRCH);
-
ret = kill(pid, SIGKILL);
- /* kill off the monitor process, if any */
- monitor_kill(lxpid, pid);
-
- return (ret);
-}
-
-static int
-ptrace_step(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig)
-{
- return (ptrace_cont(lxpid, pid, lwpid, sig, 1));
+ return (ret == 0 ? ret : -errno);
}
static int
@@ -1489,9 +1073,6 @@ ptrace_getregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
lx_user_regs_t regs;
int ret;
- if (!is_traced(pid))
- return (-ESRCH);
-
if ((ret = getregs(pid, lwpid, &regs)) != 0)
return (ret);
@@ -1506,9 +1087,6 @@ ptrace_setregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
{
lx_user_regs_t regs;
- if (!is_traced(pid))
- return (-ESRCH);
-
if (uucopy((void *)addr, &regs, sizeof (regs)) != 0)
return (-errno);
@@ -1521,9 +1099,6 @@ ptrace_getfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
lx_user_fpregs_t regs;
int ret;
- if (!is_traced(pid))
- return (-ESRCH);
-
if ((ret = getfpregs(pid, lwpid, &regs)) != 0)
return (ret);
@@ -1538,9 +1113,6 @@ ptrace_setfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
{
lx_user_fpregs_t regs;
- if (!is_traced(pid))
- return (-ESRCH);
-
if (uucopy((void *)addr, &regs, sizeof (regs)) != 0)
return (-errno);
@@ -1553,9 +1125,6 @@ ptrace_getfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
lx_user_fpxregs_t regs;
int ret;
- if (!is_traced(pid))
- return (-ESRCH);
-
if ((ret = getfpxregs(pid, lwpid, &regs)) != 0)
return (ret);
@@ -1570,412 +1139,124 @@ ptrace_setfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
{
lx_user_fpxregs_t regs;
- if (!is_traced(pid))
- return (-ESRCH);
-
if (uucopy((void *)addr, &regs, sizeof (regs)) != 0)
return (-errno);
return (setfpxregs(pid, lwpid, &regs));
}
-static void __NORETURN
-ptrace_monitor(int fd)
+void
+lx_ptrace_stop_if_option(int option, boolean_t child, ulong_t msg)
{
- struct {
- long cmd;
- union {
- long flags;
- sigset_t signals;
- fltset_t faults;
- } arg;
- } ctl;
- size_t size;
- int monfd;
- int rv;
-
- monfd = open_procfile(getpid(), O_WRONLY, "ctl");
-
- ctl.cmd = PCSTRACE; /* trace only SIGTRAP */
- premptyset(&ctl.arg.signals);
- praddset(&ctl.arg.signals, SIGTRAP);
- size = sizeof (long) + sizeof (sigset_t);
- (void) write(monfd, &ctl, size); /* can't fail */
-
- ctl.cmd = PCSFAULT;
- premptyset(&ctl.arg.faults);
- size = sizeof (long) + sizeof (fltset_t);
- (void) write(monfd, &ctl, size); /* can't fail */
-
- ctl.cmd = PCUNSET;
- ctl.arg.flags = PR_FORK;
- size = sizeof (long) + sizeof (long);
- (void) write(monfd, &ctl, size); /* can't fail */
-
- ctl.cmd = PCSET; /* wait()able by the parent */
- ctl.arg.flags = PR_PTRACE;
- size = sizeof (long) + sizeof (long);
- (void) write(monfd, &ctl, size); /* can't fail */
-
- (void) close(monfd);
-
- ctl.cmd = PCWSTOP;
- size = sizeof (long);
-
- for (;;) {
- /*
- * Wait for the traced process to stop.
- */
- if (write(fd, &ctl, size) != size) {
- rv = (errno == ENOENT)? 0 : 1;
- lx_debug("monitor failed to wait for LWP to stop: %s",
+ /*
+ * We call into the kernel to see if we need to stop for specific
+ * ptrace(2) events.
+ */
+ lx_debug("lx_ptrace_stop_if_option(%d, %s, %lu)", option,
+ child ? "TRUE [child]" : "FALSE [parent]", msg);
+ if (syscall(SYS_brand, B_PTRACE_STOP_FOR_OPT, option, child,
+ msg) != 0) {
+ if (errno != ESRCH) {
+ /*
+ * This should _only_ fail if we are not traced, or do
+ * not have this option set.
+ */
+ lx_err_fatal("B_PTRACE_STOP_FOR_OPT failed: %s",
strerror(errno));
- _exit(rv);
}
-
- lx_debug("monitor caught traced LWP");
-
- /*
- * Pull the ptrace trigger by sending ourself a SIGTRAP. This
- * will cause this, the monitor process, to stop which will
- * cause the parent's waitid(2) call to return this process
- * id. In lx_wait(), we remap the monitor process's pid and
- * status to those of the traced LWP. When the parent process
- * uses ptrace to resume the traced LWP, it will additionally
- * restart this process.
- */
- (void) _lwp_kill(_lwp_self(), SIGTRAP);
-
- lx_debug("monitor was resumed");
}
}
-static int
-ptrace_attach_common(int fd, pid_t lxpid, pid_t pid, lwpid_t lwpid, int run)
+/*
+ * Signal to the in-kernel ptrace(2) subsystem that the next native fork() or
+ * thr_create() is part of an emulated fork(2) or clone(2). If PTRACE_CLONE
+ * was passed to clone(2), inherit_flag should be B_TRUE.
+ */
+void
+lx_ptrace_clone_begin(int option, boolean_t inherit_flag)
{
- pid_t child;
- ptrace_monitor_map_t *p;
- sigset_t unblock;
- pstatus_t status;
- long ctl[1 + sizeof (sysset_t) / sizeof (long) + 2];
- long *ctlp = ctl;
- size_t size;
- sysset_t *sysp;
- int ret;
-
- /*
- * We're going to need this structure so better to fail now before its
- * too late to turn back.
- */
- if ((p = malloc(sizeof (ptrace_monitor_map_t))) == NULL)
- return (-EIO);
-
- if ((ret = get_status(pid, &status)) != 0) {
- free(p);
- return (ret);
+ lx_debug("lx_ptrace_clone_begin(%d, %sPTRACE_CLONE)", option,
+ inherit_flag ? "" : "!");
+ if (syscall(SYS_brand, B_PTRACE_CLONE_BEGIN, option,
+ inherit_flag) != 0) {
+ lx_err_fatal("B_PTRACE_CLONE_BEGIN failed: %s",
+ strerror(errno));
}
-
- /*
- * If this process is already traced, bail.
- */
- if (status.pr_flags & PR_PTRACE) {
- free(p);
- return (-EPERM);
- }
-
- /*
- * Turn on the appropriate tracing flags. It's exceedingly unlikely
- * that this operation will fail; any failure would probably be due
- * to another /proc consumer mucking around.
- */
- if (ptrace_trace_common(fd) != 0) {
- free(p);
- return (-EIO);
- }
-
- /*
- * Native ptrace automatically catches processes when they exec so we
- * have to do that explicitly here.
- */
- *ctlp++ = PCSEXIT;
- sysp = (sysset_t *)ctlp;
- ctlp += sizeof (sysset_t) / sizeof (long);
- premptyset(sysp);
- praddset(sysp, SYS_execve);
- if (run) {
- *ctlp++ = PCRUN;
- *ctlp++ = 0;
- }
-
- size = (char *)ctlp - (char *)&ctl[0];
-
- if (write(fd, ctl, size) != size) {
- free(p);
- return (-EIO);
- }
-
- /*
- * Spawn the monitor proceses to notify this process of events of
- * interest in the traced process. We block signals here both so
- * we're not interrupted during this operation and so that the
- * monitor process doesn't accept signals.
- */
- (void) sigprocmask(SIG_BLOCK, &blockable_sigs, &unblock);
- if ((child = fork1()) == 0)
- ptrace_monitor(fd);
- (void) sigprocmask(SIG_SETMASK, &unblock, NULL);
-
- if (child == -1) {
- lx_debug("failed to fork monitor process\n");
- free(p);
- return (-EIO);
- }
-
- p->pmm_monitor = child;
- p->pmm_target = lxpid;
- p->pmm_pid = pid;
- p->pmm_lwpid = lwpid;
- p->pmm_exiting = 0;
-
- (void) mutex_lock(&ptrace_map_mtx);
- p->pmm_next = ptrace_monitor_map;
- ptrace_monitor_map = p;
- (void) mutex_unlock(&ptrace_map_mtx);
-
- return (0);
}
-static int
-ptrace_attach(pid_t lxpid, pid_t pid, lwpid_t lwpid)
+static long
+lx_ptrace_kernel(int ptrace_op, pid_t lxpid, uintptr_t addr, uintptr_t data)
{
- int fd, ret;
- long ctl;
+ int ret;
/*
- * Linux doesn't let you trace process 1 -- go figure.
+ * Call into the in-kernel ptrace(2) emulation code.
*/
- if (lxpid == 1)
- return (-EPERM);
-
- if ((fd = open_lwpfile(pid, lwpid, O_WRONLY | O_EXCL, "lwpctl")) < 0)
- return (errno == EBUSY ? -EPERM : -ESRCH);
-
- ctl = PCSTOP;
- if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
- lx_err("failed to stop %d/%d\n", (int)pid, (int)lwpid);
- assert(0);
+ lx_debug("revectoring to B_PTRACE_KERNEL(%d, %d, %p, %p)", ptrace_op,
+ lxpid, addr, data);
+ ret = syscall(SYS_brand, B_PTRACE_KERNEL, ptrace_op, lxpid, addr,
+ data);
+ if (ret == 0) {
+ lx_debug("\t= %d", ret);
+ } else {
+ lx_debug("\t= %d (%s)", ret, strerror(errno));
}
- ret = ptrace_attach_common(fd, lxpid, pid, lwpid, 0);
-
- (void) close(fd);
-
- return (ret);
+ return (ret == 0 ? ret : -errno);
}
-static int
-ptrace_detach(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig)
+long
+lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
{
- long ctl[2];
- int fd, ret;
-
- if (!is_traced(pid))
- return (-ESRCH);
-
- if (sig < 0 || sig > LX_NSIG)
- return (-EINVAL);
-
- if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
- return (-ESRCH);
-
- if (syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_DETACH, pid, 0) != 0)
- return (-ESRCH);
+ int ptrace_op = (int)p1;
+ pid_t pid, lxpid = (pid_t)p2;
+ lwpid_t lwpid;
/*
- * The /proc ptrace flag may not be set, but we clear it
- * unconditionally since doing so doesn't hurt anything.
+ * Some PTRACE_* requests are emulated entirely in the kernel.
*/
- ctl[0] = PCUNSET;
- ctl[1] = PR_PTRACE;
- if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) {
- (void) close(fd);
- return (-EIO);
- }
-
+ switch (ptrace_op) {
/*
- * Clear the brand-specific system call tracing flag to ensure that
- * the target doesn't stop unexpectedly some time in the future.
+ * PTRACE_TRACEME and PTRACE_ATTACH operations induce the tracing of
+ * one LWP by another. The target LWP must not be traced already.
+ * Both `data' and `addr' are ignored in both cases.
*/
- if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 0)) != 0) {
- (void) close(fd);
- return (-ret);
- }
+ case LX_PTRACE_TRACEME:
+ return (lx_ptrace_kernel(ptrace_op, 0, 0, 0));
- /* kill off the monitor process, if any */
- monitor_kill(lxpid, pid);
+ case LX_PTRACE_ATTACH:
+ return (lx_ptrace_kernel(ptrace_op, lxpid, 0, 0));
/*
- * Turn on the run-on-last-close flag so that all tracing flags will be
- * cleared when we close the control file descriptor.
+ * PTRACE_DETACH, PTRACE_SYSCALL, PTRACE_SINGLESTEP and PTRACE_CONT
+ * are all restarting actions. They are only allowed when attached
+ * to the target LWP and when that target LWP is in a "ptrace-stop"
+ * condition.
*/
- ctl[0] = PCSET;
- ctl[1] = PR_RLC;
- if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) {
- (void) close(fd);
- return (-EIO);
+ case LX_PTRACE_DETACH:
+ case LX_PTRACE_SYSCALL:
+ case LX_PTRACE_CONT:
+ case LX_PTRACE_SINGLESTEP:
+ /*
+ * These actions also require the LWP to be traced and stopped, but do
+ * not restart the target LWP.
+ */
+ case LX_PTRACE_SETOPTIONS:
+ case LX_PTRACE_GETEVENTMSG:
+ return (lx_ptrace_kernel(ptrace_op, lxpid, p3, p4));
}
/*
- * Clear the current signal (if any) and possibly send the traced
- * process a new signal.
+ * The rest of the emulated PTRACE_* actions are emulated in userland.
+ * They require the target LWP to be traced and in currently
+ * "ptrace-stop", but do not subsequently restart the target LWP.
*/
- ret = ptrace_cont_common(fd, sig, 0, 0);
-
- (void) close(fd);
-
- return (ret);
-}
-
-static int
-ptrace_syscall(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig)
-{
- int ret;
-
- if (!is_traced(pid))
+ if (lx_lpid_to_spair(lxpid, &pid, &lwpid) < 0 ||
+ !is_ptrace_stopped(lxpid)) {
return (-ESRCH);
-
- if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 1)) != 0)
- return (-ret);
-
- return (ptrace_cont(lxpid, pid, lwpid, sig, 0));
-}
-
-static int
-ptrace_setoptions(pid_t pid, int options)
-{
- int ret;
- int fd;
- int error = 0;
- struct {
- long cmd;
- union {
- long flags;
- sigset_t signals;
- fltset_t faults;
- } arg;
- } ctl;
- size_t size;
- pstatus_t status;
-
- if ((ret = get_status(pid, &status)) != 0)
- return (ret);
-
- if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0)
- return (-errno);
-
- /* since we're doing option tracing now, only catch sigtrap */
- ctl.cmd = PCSTRACE;
- premptyset(&ctl.arg.signals);
- praddset(&ctl.arg.signals, SIGTRAP);
- size = sizeof (long) + sizeof (sigset_t);
- if (write(fd, &ctl, size) != size) {
- error = -errno;
- } else {
- /*
- * If we're tracing fork, set inherit-on-fork, otherwise clear
- * it.
- */
- if (options & LX_PTRACE_O_TRACEFORK) {
- ctl.cmd = PCSET;
- } else {
- ctl.cmd = PCUNSET;
- }
- ctl.arg.flags = PR_FORK;
- size = sizeof (long) + sizeof (long);
- if (write(fd, &ctl, size) != size)
- error = -errno;
}
- (void) close(fd);
-
- if (error != 0)
- return (error);
-
- ret = syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_EXT_OPTS_SET, pid,
- options);
-
- return ((ret != 0) ? -errno : 0);
-}
-
-void
-lx_ptrace_stop_if_option(int option, boolean_t child, ulong_t msg)
-{
- pid_t pid;
- uint_t curr_opts;
-
- pid = getpid();
- if (pid == 1)
- pid = zoneinit_pid;
-
- /* first we have to see if the stop option is set for this process */
- if (syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_EXT_OPTS_GET, pid,
- &curr_opts) != 0)
- return;
-
- if (child) {
- /*
- * If we just forked/cloned, then the trace flags only carry
- * over to the child if the specific flag was enabled on the
- * parent. For example, if only TRACEFORK is enabled and we
- * clone, then we must clear the trace flags. If TRACEFORK is
- * enabled and we fork, then we keep the flags.
- */
- if (option == LX_PTRACE_O_TRACECLONE ||
- option == LX_PTRACE_O_TRACEFORK ||
- option == LX_PTRACE_O_TRACEVFORK) {
-
- if ((curr_opts & option) == 0)
- (void) syscall(SYS_brand, B_PTRACE_EXT_OPTS,
- B_PTRACE_EXT_OPTS_SET, pid, 0);
-
- /*
- * Since we know we're the child we have to modify how
- * we stop. Set the emulation's child flag in the
- * option.
- */
- option |= EMUL_PTRACE_O_CHILD;
- }
- }
-
- /* now if the option is/was set, this brand call will stop us */
- if (curr_opts & option)
- (void) syscall(SYS_brand, B_PTRACE_STOP_FOR_OPT, option, msg);
-}
-
-static int
-ptrace_geteventmsg(pid_t pid, ulong_t *msgp)
-{
- int ret;
-
- ret = syscall(SYS_brand, B_PTRACE_GETEVENTMSG, pid, msgp);
-
- return ((ret != 0) ? -errno : 0);
-}
-
-long
-lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
-{
- pid_t pid, lxpid = (pid_t)p2;
- lwpid_t lwpid;
-
- if ((p1 != LX_PTRACE_TRACEME) &&
- (lx_lpid_to_spair(lxpid, &pid, &lwpid) < 0))
- return (-ESRCH);
-
- switch (p1) {
- case LX_PTRACE_TRACEME:
- return (ptrace_traceme());
-
+ switch (ptrace_op) {
case LX_PTRACE_PEEKTEXT:
case LX_PTRACE_PEEKDATA:
return (ptrace_peek(pid, p3, (long *)p4));
@@ -1990,14 +1271,8 @@ lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
case LX_PTRACE_POKEUSER:
return (ptrace_poke_user(pid, lwpid, p3, (int)p4));
- case LX_PTRACE_CONT:
- return (ptrace_cont(lxpid, pid, lwpid, (int)p4, 0));
-
case LX_PTRACE_KILL:
- return (ptrace_kill(lxpid, pid));
-
- case LX_PTRACE_SINGLESTEP:
- return (ptrace_step(lxpid, pid, lwpid, (int)p4));
+ return (ptrace_kill(pid));
case LX_PTRACE_GETREGS:
return (ptrace_getregs(pid, lwpid, p4));
@@ -2011,419 +1286,13 @@ lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
case LX_PTRACE_SETFPREGS:
return (ptrace_setfpregs(pid, lwpid, p4));
- case LX_PTRACE_ATTACH:
- return (ptrace_attach(lxpid, pid, lwpid));
-
- case LX_PTRACE_DETACH:
- return (ptrace_detach(lxpid, pid, lwpid, (int)p4));
-
case LX_PTRACE_GETFPXREGS:
return (ptrace_getfpxregs(pid, lwpid, p4));
case LX_PTRACE_SETFPXREGS:
return (ptrace_setfpxregs(pid, lwpid, p4));
- case LX_PTRACE_SYSCALL:
- return (ptrace_syscall(lxpid, pid, lwpid, (int)p4));
-
- case LX_PTRACE_SETOPTIONS:
- return (ptrace_setoptions(pid, (int)p4));
-
- case LX_PTRACE_GETEVENTMSG:
- return (ptrace_geteventmsg(pid, (ulong_t *)p4));
-
default:
return (-EINVAL);
}
}
-
-void
-lx_ptrace_fork(void)
-{
- /*
- * Send a special signal (that has no Linux equivalent) to indicate
- * that we're in this particularly special case. The signal will be
- * ignored by this process, but noticed by /proc consumers tracing
- * this process.
- */
- (void) _lwp_kill(_lwp_self(), SIGWAITING);
-}
-
-static void
-ptrace_catch_fork(pid_t pid, int monitor)
-{
- long ctl[14 + 2 * sizeof (sysset_t) / sizeof (long)];
- long *ctlp;
- sysset_t *sysp;
- size_t size;
- pstatus_t ps;
- pid_t child;
- int fd, err;
-
- /*
- * If any of this fails, we're really sunk since the child
- * will be stuck in the middle of lx_ptrace_fork().
- * Fortunately it's practically assured to succeed unless
- * something is seriously wrong on the system.
- */
- if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) {
- lx_debug("lx_catch_fork: failed to control %d",
- (int)pid);
- return;
- }
-
- /*
- * Turn off the /proc PR_PTRACE flag so the parent doesn't get
- * spurious wake ups while we're working our dark magic. Arrange to
- * catch the process when it exits from fork, and turn on the /proc
- * inherit-on-fork flag so we catcht the child as well. We then run
- * the process, wait for it to stop on the fork1(2) call and reset
- * the tracing flags to their original state.
- */
- ctlp = ctl;
- *ctlp++ = PCCSIG;
- if (!monitor) {
- *ctlp++ = PCUNSET;
- *ctlp++ = PR_PTRACE;
- }
- *ctlp++ = PCSET;
- *ctlp++ = PR_FORK;
- *ctlp++ = PCSEXIT;
- sysp = (sysset_t *)ctlp;
- ctlp += sizeof (sysset_t) / sizeof (long);
- premptyset(sysp);
- praddset(sysp, SYS_forksys); /* fork1() is forksys(0, 0) */
- *ctlp++ = PCRUN;
- *ctlp++ = 0;
- *ctlp++ = PCWSTOP;
- if (!monitor) {
- *ctlp++ = PCSET;
- *ctlp++ = PR_PTRACE;
- }
- *ctlp++ = PCUNSET;
- *ctlp++ = PR_FORK;
- *ctlp++ = PCSEXIT;
- sysp = (sysset_t *)ctlp;
- ctlp += sizeof (sysset_t) / sizeof (long);
- premptyset(sysp);
- if (monitor)
- praddset(sysp, SYS_execve);
-
- size = (char *)ctlp - (char *)&ctl[0];
- assert(size <= sizeof (ctl));
-
- if (write(fd, ctl, size) != size) {
- (void) close(fd);
- lx_debug("lx_catch_fork: failed to set %d running",
- (int)pid);
- return;
- }
-
- /*
- * Get the status so we can find the value returned from fork1() --
- * the child process's pid.
- */
- if (get_status(pid, &ps) != 0) {
- (void) close(fd);
- lx_debug("lx_catch_fork: failed to get status for %d",
- (int)pid);
- return;
- }
-
- child = (pid_t)ps.pr_lwp.pr_reg[R_R0];
-
- /*
- * We're done with the parent -- off you go.
- */
- ctl[0] = PCRUN;
- ctl[1] = 0;
- size = 2 * sizeof (long);
-
- if (write(fd, ctl, size) != size) {
- (void) close(fd);
- lx_debug("lx_catch_fork: failed to set %d running",
- (int)pid);
- return;
- }
-
- (void) close(fd);
-
- /*
- * If fork1(2) failed, we're done.
- */
- if (child < 0) {
- lx_debug("lx_catch_fork: fork1 failed");
- return;
- }
-
- /*
- * Now we need to screw with the child process.
- */
- if ((fd = open_lwpfile(child, 1, O_WRONLY, "lwpctl")) < 0) {
- lx_debug("lx_catch_fork: failed to control %d",
- (int)child);
- return;
- }
-
- ctlp = ctl;
- *ctlp++ = PCUNSET;
- *ctlp++ = PR_FORK;
- *ctlp++ = PCSEXIT;
- sysp = (sysset_t *)ctlp;
- ctlp += sizeof (sysset_t) / sizeof (long);
- premptyset(sysp);
- size = (char *)ctlp - (char *)&ctl[0];
-
- if (write(fd, ctl, size) != size) {
- (void) close(fd);
- lx_debug("lx_catch_fork: failed to clear trace flags for %d",
- (int)child);
- return;
- }
-
- /*
- * Now treat the child as though we had attached to it explicitly.
- */
- err = ptrace_attach_common(fd, child, child, 1, 1);
- assert(err == 0);
-
- (void) close(fd);
-}
-
-static void
-set_dr6(pid_t pid, siginfo_t *infop)
-{
- uintptr_t *debugreg;
- uintptr_t addr;
- uintptr_t base;
- size_t size = NULL;
- int dr7;
- int lrw;
- int i;
-
- if ((debugreg = debug_registers(pid)) == NULL)
- return;
-
- debugreg[6] = 0xffff0ff0; /* read as ones */
- switch (infop->si_code) {
- case TRAP_TRACE:
- debugreg[6] |= 0x4000; /* single-step */
- break;
- case TRAP_RWATCH:
- case TRAP_WWATCH:
- case TRAP_XWATCH:
- dr7 = debugreg[7];
- addr = (uintptr_t)infop->si_addr;
- for (i = 0; i < 4; i++) {
- if ((dr7 & (1 << (2 * i))) == 0) /* enabled? */
- continue;
- lrw = (dr7 >> (16 + (4 * i))) & 0xf;
- switch (lrw >> 2) { /* length */
- case 0: size = 1; break;
- case 1: size = 2; break;
- case 2: size = 8; break;
- case 3: size = 4; break;
- }
- base = debugreg[i];
- if (addr >= base && addr < base + size)
- debugreg[6] |= (1 << i);
- }
- /*
- * Were we also attempting a single-step?
- * (kludge: we use debugreg[4] for this flag.)
- */
- if (debugreg[4])
- debugreg[6] |= 0x4000;
- break;
- default:
- break;
- }
-}
-
-/*
- * This is called from the emulation of the wait4, waitpid and waitid system
- * calls to take into account:
- * - the monitor processes which we spawn to observe other processes from
- * ptrace_attach().
- * - the extended si_status result we can get when extended ptrace options
- * are enabled.
- */
-int
-lx_ptrace_wait(siginfo_t *infop)
-{
- ptrace_monitor_map_t *p, **pp;
- pid_t lxpid, pid = infop->si_pid;
- lwpid_t lwpid;
- int fd;
- pstatus_t status;
-
- /*
- * If the process observed by waitid(2) corresponds to the monitor
- * process for a traced thread, we need to rewhack the siginfo_t to
- * look like it came from the traced thread with the flags set
- * according to the current state.
- */
- (void) mutex_lock(&ptrace_map_mtx);
- for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) {
- if (p->pmm_monitor == pid) {
- assert(infop->si_code == CLD_EXITED ||
- infop->si_code == CLD_KILLED ||
- infop->si_code == CLD_DUMPED ||
- infop->si_code == CLD_TRAPPED);
- goto found;
- }
- }
- (void) mutex_unlock(&ptrace_map_mtx);
-
- if (infop->si_code == CLD_TRAPPED) {
- /*
- * If the traced process got a SIGWAITING, we must be in the
- * middle of a clone(2) with CLONE_PTRACE set.
- */
- if (infop->si_status == SIGWAITING) {
- ptrace_catch_fork(pid, 0);
- return (-1);
- }
-
- /*
- * If the traced process got a SIGTRAP then Linux ptrace
- * options might have been set, so setup the extended
- * si_status to contain the (possible) event. Note that
- * our definitions for the ptrace events (e.g.
- * LX_PTRACE_EVENT_FORK) is already shifted <<8 as documented
- * on the Linux ptrace(2) man page.
- */
- if (infop->si_status == SIGTRAP) {
- uint_t event;
-
- if (syscall(SYS_brand, B_PTRACE_EXT_OPTS,
- B_PTRACE_EXT_OPTS_EVT, pid, &event) == 0)
- infop->si_status |= event;
- }
- }
-
- if (get_status(pid, &status) == 0 &&
- (status.pr_lwp.pr_flags & PR_STOPPED) &&
- status.pr_lwp.pr_why == PR_SIGNALLED &&
- status.pr_lwp.pr_info.si_signo == SIGTRAP)
- set_dr6(pid, &status.pr_lwp.pr_info);
-
- return (0);
-
-found:
- /*
- * If the monitor is in the exiting state, ignore the event and free
- * the monitor structure if the monitor has exited. By returning -1 we
- * indicate to the caller that this was a spurious return from
- * waitid(2) and that it should ignore the result and try again.
- */
- if (p->pmm_exiting) {
- if (infop->si_code == CLD_EXITED ||
- infop->si_code == CLD_KILLED ||
- infop->si_code == CLD_DUMPED) {
- *pp = p->pmm_next;
- (void) mutex_unlock(&ptrace_map_mtx);
- free(p);
- }
- return (-1);
- }
-
- lxpid = p->pmm_target;
- pid = p->pmm_pid;
- lwpid = p->pmm_lwpid;
- (void) mutex_unlock(&ptrace_map_mtx);
-
- /*
- * If we can't find the traced process, kill off its monitor.
- */
- if ((fd = open_lwpfile(pid, lwpid, O_RDONLY, "lwpstatus")) < 0) {
- assert(errno == ENOENT);
- monitor_kill(lxpid, pid);
- infop->si_code = CLD_EXITED;
- infop->si_status = 0;
- infop->si_pid = lxpid;
- return (0);
- }
-
- if (read(fd, &status.pr_lwp, sizeof (status.pr_lwp)) !=
- sizeof (status.pr_lwp)) {
- lx_err("read lwpstatus failed %d %s", fd, strerror(errno));
- assert(0);
- }
-
- (void) close(fd);
-
- /*
- * If the traced process isn't stopped, this is a truly spurious
- * event probably caused by another /proc consumer tracing the
- * monitor.
- */
- if (!(status.pr_lwp.pr_flags & PR_STOPPED)) {
- (void) ptrace_cont_monitor(p);
- return (-1);
- }
-
- switch (status.pr_lwp.pr_why) {
- case PR_SIGNALLED:
- /*
- * If the traced process got a SIGWAITING, we must be in the
- * middle of a clone(2) with CLONE_PTRACE set.
- */
- if (status.pr_lwp.pr_what == SIGWAITING) {
- ptrace_catch_fork(lxpid, 1);
- (void) ptrace_cont_monitor(p);
- return (-1);
- }
- infop->si_code = CLD_TRAPPED;
- infop->si_status = status.pr_lwp.pr_what;
- if (status.pr_lwp.pr_info.si_signo == SIGTRAP)
- set_dr6(pid, &status.pr_lwp.pr_info);
- break;
-
- case PR_REQUESTED:
- /*
- * Make it look like the traced process stopped on an
- * event of interest.
- */
- infop->si_code = CLD_TRAPPED;
- infop->si_status = SIGTRAP;
- break;
-
- case PR_JOBCONTROL:
- /*
- * Ignore this as it was probably caused by another /proc
- * consumer tracing the monitor.
- */
- (void) ptrace_cont_monitor(p);
- return (-1);
-
- case PR_SYSEXIT:
- /*
- * Processes traced via a monitor (rather than using the
- * native Solaris ptrace support) explicitly trace returns
- * from exec system calls since it's an implicit ptrace
- * trace point. Accordingly we need to present a process
- * in that state as though it had reached the ptrace trace
- * point.
- */
- if (status.pr_lwp.pr_what == SYS_execve) {
- infop->si_code = CLD_TRAPPED;
- infop->si_status = SIGTRAP;
- break;
- }
-
- /*FALLTHROUGH*/
-
- case PR_SYSENTRY:
- case PR_FAULTED:
- case PR_SUSPENDED:
- default:
- lx_err("didn't expect %d (%d %d)", status.pr_lwp.pr_why,
- status.pr_lwp.pr_what, status.pr_lwp.pr_flags);
- assert(0);
- }
-
- infop->si_pid = lxpid;
-
- return (0);
-}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/signal.c b/usr/src/lib/brand/lx/lx_brand/common/signal.c
index b845ae5cac..9029249b10 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/signal.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/signal.c
@@ -345,6 +345,14 @@ static int lx_sigsegv_depth = 0;
#endif
/*
+ * Setting LX_NO_ABORT_HANDLER in the environment will prevent the emulated
+ * Linux program from modifying the signal handling disposition for SIGSEGV or
+ * SIGABRT. Useful for debugging programs which fall over themselves to
+ * prevent useful core files being generated.
+ */
+static int lx_no_abort_handler = 0;
+
+/*
* Cache result of process.max-file-descriptor to avoid calling getrctl()
* for each lx_ppoll().
*/
@@ -497,6 +505,29 @@ ltos_sigcode(int si_code)
}
}
+/*
+ * Convert the "status" field of a SIGCLD siginfo_t. We need to extract the
+ * illumos signal number and convert it to a Linux signal number while leaving
+ * the ptrace(2) event bits intact.
+ */
+int
+stol_status(int s)
+{
+ /*
+ * We mask out the top bit here in case PTRACE_O_TRACESYSGOOD
+ * is in use and 0x80 has been ORed with the signal number.
+ */
+ int stat = stol_signo[s & 0x7f];
+ assert(stat != -1);
+
+ /*
+ * We must mix in the ptrace(2) event which may be stored in
+ * the second byte of the status code. We also re-include the
+ * PTRACE_O_TRACESYSGOOD bit.
+ */
+ return ((s & 0xff80) | stat);
+}
+
int
stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop)
{
@@ -530,7 +561,8 @@ stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop)
case LX_SIGCHLD:
lx_siginfo.lsi_pid = siginfop->si_pid;
- lx_siginfo.lsi_status = siginfop->si_status;
+ lx_siginfo.lsi_status = stol_status(
+ siginfop->si_status);
lx_siginfo.lsi_utime = siginfop->si_utime;
lx_siginfo.lsi_stime = siginfop->si_stime;
break;
@@ -1552,6 +1584,17 @@ lx_call_user_handler(int sig, siginfo_t *sip, void *p)
size_t stksize;
int lx_sig;
+ switch (sig) {
+ case SIGCLD:
+ /*
+ * Signal to an interrupted waitpid() that it was interrupted
+ * by a SIGCLD, and should restart to grab the wait status
+ * this signal represented.
+ */
+ lx_had_sigchild = 1;
+ break;
+ }
+
/*
* If Illumos signal has no Linux equivalent, effectively ignore it.
*/
@@ -1568,6 +1611,18 @@ lx_call_user_handler(int sig, siginfo_t *sip, void *p)
lx_debug("lxsap @ 0x%p", lxsap);
/*
+ * If the delivery of this signal interrupted a system call, we must
+ * only restart it if sigaction(2) was used to set the SA_RESTART flag
+ * for this signal. The lx_emulate() function checks this per-thread
+ * variable to discover the restart disposition of the most recently
+ * handled signal.
+ *
+ * NOTE: this mechanism may not stand up to close scrutiny in the face
+ * of nested asynchronous signal delivery.
+ */
+ lx_do_syscall_restart = !!(lxsap->lxsa_flags & LX_SA_RESTART);
+
+ /*
* Emulate vsyscall support.
*
* Linux magically maps a single page into the address space of each
@@ -1740,6 +1795,18 @@ lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp,
return (-errno);
if ((sig = ltos_signo[lx_sig]) != -1) {
+ if (lx_no_abort_handler != 0) {
+ /*
+ * If LX_NO_ABORT_HANDLER has been set, we will
+ * not allow the emulated program to do
+ * anything hamfisted with SIGSEGV or SIGABRT
+ * signals.
+ */
+ if (sig == SIGSEGV || sig == SIGABRT) {
+ return (0);
+ }
+ }
+
/*
* Block this signal while messing with its dispostion
*/
@@ -2068,6 +2135,10 @@ lx_siginit(void)
sigset_t new_set, oset;
int lx_sig, sig;
+ if (getenv("LX_NO_ABORT_HANDLER") != NULL) {
+ lx_no_abort_handler = 1;
+ }
+
/*
* Block all signals possible while setting up the signal imposition
* mechanism.
diff --git a/usr/src/lib/brand/lx/lx_brand/common/wait.c b/usr/src/lib/brand/lx/lx_brand/common/wait.c
index 031eb5e5cd..c3421858eb 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/wait.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/wait.c
@@ -22,7 +22,7 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2014 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
/*
@@ -70,6 +70,7 @@
#include <sys/wait.h>
#include <sys/lx_types.h>
#include <sys/lx_signal.h>
+#include <sys/lx_debug.h>
#include <sys/lx_misc.h>
#include <sys/lx_syscall.h>
#include <sys/syscall.h>
@@ -100,32 +101,23 @@
extern long max_pid;
+/*
+ * Split the passed waitpid/waitid options into two separate variables:
+ * those for the native illumos waitid(2), and the extra Linux-specific
+ * options we will handle in our brand-specific code.
+ */
static int
-ltos_options(uintptr_t options)
+ltos_options(uintptr_t options, int *native_options, int *extra_options)
{
int newoptions = 0;
- int rval;
- lx_waitid_args_t extra;
if (((options) & ~(LX_WNOHANG | LX_WUNTRACED | LX_WEXITED |
LX_WCONTINUED | LX_WNOWAIT | LX_WNOTHREAD | LX_WALL |
LX_WCLONE)) != 0) {
return (-1);
}
- /*
- * We use the B_STORE_ARGS command to store any of LX_WNOTHREAD,
- * LX_WALL, and LX_WCLONE that have been set as options on this waitid
- * call. These flags are stored as part of the lwp_brand_data, so that
- * when there is a later syscall to waitid, the brand code there can
- * detect that we added extra flags here and use them as appropriate.
- * We pass them in here rather than the normal channel for flags to
- * prevent polluting the namespace.
- */
- extra.waitid_flags = options & (LX_WNOTHREAD | LX_WALL | LX_WCLONE);
- rval = syscall(SYS_brand, B_STORE_ARGS, &extra,
- sizeof (lx_waitid_args_t), NULL, NULL, NULL, NULL);
- if (rval < 0)
- return (rval);
+
+ *extra_options = options & (LX_WNOTHREAD | LX_WALL | LX_WCLONE);
if (options & LX_WNOHANG)
newoptions |= WNOHANG;
@@ -138,10 +130,13 @@ ltos_options(uintptr_t options)
if (options & LX_WNOWAIT)
newoptions |= WNOWAIT;
- /* The trapped option is implicit on Linux */
+ /*
+ * The trapped option is implicit on Linux.
+ */
newoptions |= WTRAPPED;
- return (newoptions);
+ *native_options = newoptions;
+ return (0);
}
static int
@@ -164,10 +159,7 @@ lx_wstat(int code, int status)
break;
case CLD_TRAPPED:
case CLD_STOPPED:
- stat = stol_signo[status];
- assert(stat != -1);
- stat <<= 8;
- stat |= WSTOPFLG;
+ stat = (stol_status(status) << 8) | WSTOPFLG;
break;
case CLD_CONTINUED:
stat = WCONTFLG;
@@ -177,33 +169,31 @@ lx_wstat(int code, int status)
return (stat);
}
-/* wrapper to make solaris waitid work properly with ptrace */
static int
-lx_waitid_helper(idtype_t idtype, id_t id, siginfo_t *info, int options)
+lx_waitid_helper(idtype_t idtype, id_t id, siginfo_t *sip, int native_options,
+ int extra_options)
{
- do {
- /*
- * It's possible that we return EINVAL here if the idtype is
- * P_PID or P_PGID and id is out of bounds for a valid pid or
- * pgid, but Linux expects to see ECHILD. No good way occurs to
- * handle this so we'll punt for now.
- */
- if (waitid(idtype, id, info, options) < 0)
- return (-errno);
-
- /*
- * If the WNOHANG flag was specified and no child was found
- * return 0.
- */
- if ((options & WNOHANG) && info->si_pid == 0)
- return (0);
-
- /*
- * It's possible that we may have a spurious return for one of
- * the child processes created by the ptrace subsystem. If
- * that's the case, we simply try again.
- */
- } while (lx_ptrace_wait(info) == -1);
+ /*
+ * Call into our in-kernel waitid() wrapper:
+ */
+restart:
+ lx_had_sigchild = 0;
+ if (syscall(SYS_brand, B_HELPER_WAITID, idtype, id, sip,
+ native_options, extra_options) != 0) {
+ if (errno == EINTR && (lx_had_sigchild ||
+ lx_do_syscall_restart)) {
+ /*
+ * If we handled a SIGCLD while blocked in waitid(),
+ * or the SA_RESTART flag was set, we should wait
+ * again.
+ */
+ lx_debug("lx_waitid_helper() restarting due to"
+ " interrupted system call");
+ goto restart;
+ }
+ return (-1);
+ }
+
return (0);
}
@@ -214,11 +204,12 @@ lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
struct rusage ru = { 0 };
idtype_t idtype;
id_t id;
- int options, status = 0;
+ int status = 0;
pid_t pid = (pid_t)p1;
int rval;
+ int native_options, extra_options;
- if ((options = ltos_options(p3)) == -1)
+ if (ltos_options(p3, &native_options, &extra_options) == -1)
return (-EINVAL);
if (pid > max_pid)
@@ -260,14 +251,17 @@ lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
id = pid;
}
- options |= WEXITED | WTRAPPED;
+ native_options |= WEXITED | WTRAPPED;
+
+ if (lx_waitid_helper(idtype, id, &info, native_options,
+ extra_options) == -1) {
+ return (-errno);
+ }
- if ((rval = lx_waitid_helper(idtype, id, &info, options)) < 0)
- return (rval);
/*
* If the WNOHANG flag was specified and no child was found return 0.
*/
- if ((options & WNOHANG) && info.si_pid == 0)
+ if ((native_options & WNOHANG) && info.si_pid == 0)
return (0);
status = lx_wstat(info.si_code, info.si_status);
@@ -297,9 +291,10 @@ lx_waitpid(uintptr_t p1, uintptr_t p2, uintptr_t p3)
long
lx_waitid(uintptr_t idtype, uintptr_t id, uintptr_t infop, uintptr_t opt)
{
- int rval, options;
+ int native_options, extra_options;
siginfo_t s_info = {0};
- if ((options = ltos_options(opt)) == -1)
+
+ if (ltos_options(opt, &native_options, &extra_options) == -1)
return (-EINVAL);
if (((opt) & (LX_WEXITED | LX_WSTOPPED | LX_WCONTINUED)) == 0)
@@ -318,11 +313,14 @@ lx_waitid(uintptr_t idtype, uintptr_t id, uintptr_t infop, uintptr_t opt)
default:
return (-EINVAL);
}
- if ((rval = lx_waitid_helper(idtype, (id_t)id, &s_info, options)) < 0)
- return (rval);
+
+ if (lx_waitid_helper(idtype, id, &s_info, native_options,
+ extra_options) == -1) {
+ return (-errno);
+ }
/* If the WNOHANG flag was specified and no child was found return 0. */
- if ((options & WNOHANG) && s_info.si_pid == 0)
+ if ((native_options & WNOHANG) && s_info.si_pid == 0)
return (0);
return (stol_siginfo(&s_info, (lx_siginfo_t *)infop));
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h
index 7d9c6fae0a..f50535d0c4 100644
--- a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h
@@ -55,6 +55,13 @@ extern int lx_rpm_delay;
extern boolean_t lx_is_rpm;
/*
+ * These thread-specific variables allow the signal interposition code
+ * to communicate restart disposition for any interrupting signals.
+ */
+extern __thread int lx_had_sigchild;
+extern __thread int lx_do_syscall_restart;
+
+/*
* Values Linux expects for init
*/
#define LX_INIT_PGID 0
@@ -173,6 +180,7 @@ extern void lx_ptrace_init();
extern int lx_ptrace_wait(siginfo_t *);
extern void lx_ptrace_fork(void);
extern void lx_ptrace_stop_if_option(int, boolean_t, ulong_t msg);
+extern void lx_ptrace_clone_begin(int, boolean_t);
extern int lx_check_alloca(size_t);
#define SAFE_ALLOCA(sz) (lx_check_alloca(sz) ? alloca(sz) : NULL)
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h
index b4dc47faac..f3d39fca64 100644
--- a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h
@@ -21,7 +21,7 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2014 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_LX_SIGNAL_H
@@ -396,6 +396,7 @@ extern void lx_sigdeliver(int, siginfo_t *, void *, size_t, void (*)(),
void (*)(), uintptr_t);
extern int stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop);
+extern int stol_status(int);
#endif /* !defined(_ASM) */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h
index b4b72c78f9..3d7b9018e1 100644
--- a/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h
@@ -34,6 +34,12 @@ extern "C" {
#include <thread.h>
+typedef enum lx_exit_type {
+ LX_ET_NONE = 0,
+ LX_ET_EXIT,
+ LX_ET_EXIT_GROUP
+} lx_exit_type_t;
+
typedef struct lx_tsd {
#if defined(_ILP32)
/* 32-bit thread-specific Linux %gs value */
@@ -42,7 +48,7 @@ typedef struct lx_tsd {
/* 64-bit thread-specific Linux %fsbase value */
uintptr_t lxtsd_fsbase;
#endif
- int lxtsd_exit;
+ lx_exit_type_t lxtsd_exit;
int lxtsd_exit_status;
ucontext_t lxtsd_exit_context;
} lx_tsd_t;
@@ -51,6 +57,8 @@ extern thread_key_t lx_tsd_key;
extern void lx_swap_gs(long, long *);
+extern void lx_exit_common(lx_exit_type_t, uintptr_t) __NORETURN;
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/lib/libproc/common/Pcontrol.c b/usr/src/lib/libproc/common/Pcontrol.c
index bde48d1416..afa04c43c7 100644
--- a/usr/src/lib/libproc/common/Pcontrol.c
+++ b/usr/src/lib/libproc/common/Pcontrol.c
@@ -26,6 +26,7 @@
* Portions Copyright 2007 Chad Mynhier
* Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <assert.h>
@@ -1758,6 +1759,9 @@ prldump(const char *caller, lwpstatus_t *lsp)
case PR_SUSPENDED:
dprintf("%s: SUSPENDED\n", caller);
break;
+ case PR_BRAND:
+ dprintf("%s: BRANDPRIVATE (%d)\n", caller, lsp->pr_what);
+ break;
default:
dprintf("%s: Unknown\n", caller);
break;
@@ -1937,6 +1941,7 @@ Pstopstatus(struct ps_prochandle *P,
case PR_FAULTED:
case PR_JOBCONTROL:
case PR_SUSPENDED:
+ case PR_BRAND:
break;
default:
errno = EPROTO;
@@ -3511,6 +3516,7 @@ Lstopstatus(struct ps_lwphandle *L,
case PR_FAULTED:
case PR_JOBCONTROL:
case PR_SUSPENDED:
+ case PR_BRAND:
break;
default:
errno = EPROTO;
diff --git a/usr/src/man/man4/proc.4 b/usr/src/man/man4/proc.4
index e7058c410d..c0a044164a 100644
--- a/usr/src/man/man4/proc.4
+++ b/usr/src/man/man4/proc.4
@@ -665,6 +665,18 @@ the process. \fBpr_what\fR is unused in this case.
.RE
.sp
+.ne 2
+.na
+\fB\fBPR_BRAND\fR\fR
+.ad
+.RS 17n
+indicates that the lwp stopped for a brand-specific reason. Interpretation
+of the value of \fBpr_what\fR depends on which zone brand is in use. It is
+not generally expected that an lwp stopped in this state will be restarted
+by native \fBproc\fR(4) consumers.
+.RE
+
+.sp
.LP
\fBpr_cursig\fR names the current signal, that is, the next signal to be
delivered to the lwp, if any. \fBpr_info\fR, when the lwp is in a
diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c
index 139e40a5d1..4507c0303c 100644
--- a/usr/src/uts/common/brand/lx/os/lx_brand.c
+++ b/usr/src/uts/common/brand/lx/os/lx_brand.c
@@ -78,6 +78,10 @@ void lx_set_kern_version(zone_t *, char *);
void lx_copy_procdata(proc_t *, proc_t *);
extern int getsetcontext(int, void *);
+extern int waitsys(idtype_t, id_t, siginfo_t *, int);
+#if defined(_SYSCALL32_IMPL)
+extern int waitsys32(idtype_t, id_t, siginfo_t *, int);
+#endif
extern void lx_proc_exit(proc_t *, klwp_t *);
static void lx_psig_to_proc(proc_t *, kthread_t *, int);
@@ -107,35 +111,38 @@ static int lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
caddr_t exec_file, struct cred *cred, int brand_action);
static boolean_t lx_native_exec(uint8_t, const char **);
-static void lx_ptrace_exectrap(proc_t *);
static uint32_t lx_map32limit(proc_t *);
/* lx brand */
struct brand_ops lx_brops = {
- lx_init_brand_data,
- lx_free_brand_data,
- lx_brandsys,
- lx_setbrand,
- lx_getattr,
- lx_setattr,
- lx_copy_procdata,
- lx_proc_exit,
- lx_exec,
- lx_setrval,
- lx_initlwp,
- lx_forklwp,
- lx_freelwp,
- lx_exitlwp,
- lx_elfexec,
- NULL,
- NULL,
- lx_psig_to_proc,
- NSIG,
- lx_exit_with_sig,
- lx_wait_filter,
- lx_native_exec,
- lx_ptrace_exectrap,
- lx_map32limit
+ lx_init_brand_data, /* b_init_brand_data */
+ lx_free_brand_data, /* b_free_brand_data */
+ lx_brandsys, /* b_brandsys */
+ lx_setbrand, /* b_setbrand */
+ lx_getattr, /* b_getattr */
+ lx_setattr, /* b_setattr */
+ lx_copy_procdata, /* b_copy_procdata */
+ lx_proc_exit, /* b_proc_exit */
+ lx_exec, /* b_exec */
+ lx_setrval, /* b_lwp_setrval */
+ lx_initlwp, /* b_initlwp */
+ lx_forklwp, /* b_forklwp */
+ lx_freelwp, /* b_freelwp */
+ lx_exitlwp, /* b_lwpexit */
+ lx_elfexec, /* b_elfexec */
+ NULL, /* b_sigset_native_to_brand */
+ NULL, /* b_sigset_brand_to_native */
+ lx_psig_to_proc, /* b_psig_to_proc */
+ NSIG, /* b_nsig */
+ lx_exit_with_sig, /* b_exit_with_sig */
+ lx_wait_filter, /* b_wait_filter */
+ lx_native_exec, /* b_native_exec */
+ NULL, /* b_ptrace_exectrap */
+ lx_map32limit, /* b_map32limit */
+ lx_stop_notify, /* b_stop_notify */
+ lx_waitid_helper, /* b_waitid_helper */
+ lx_sigcld_repost, /* b_sigcld_repost */
+ lx_issig_stop /* b_issig_stop */
};
struct brand_mach_ops lx_mops = {
@@ -167,33 +174,39 @@ static struct modlinkage modlinkage = {
void
lx_proc_exit(proc_t *p, klwp_t *lwp)
{
- zone_t *z = p->p_zone;
int sig = ptolxproc(p)->l_signal;
- ASSERT(p->p_brand == &lx_brand);
- ASSERT(p->p_brand_data != NULL);
-
- /*
- * If init is dying and we aren't explicitly shutting down the zone
- * or the system, then Solaris is about to restart init. The Linux
- * init is not designed to handle a restart, which it interprets as
- * a reboot. To give it a sane environment in which to run, we
- * reboot the zone.
- */
- if (p->p_pid == z->zone_proc_initpid) {
- if (z->zone_boot_err == 0 &&
- z->zone_restart_init &&
- zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
- zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN)
- (void) zone_kadmin(A_REBOOT, 0, NULL, CRED());
- }
+ VERIFY(p->p_brand == &lx_brand);
+ VERIFY(p->p_brand_data != NULL);
/*
* We might get here if fork failed (e.g. ENOMEM) so we don't always
* have an lwp (see brand_clearbrand).
*/
- if (lwp != NULL)
+ if (lwp != NULL) {
+ boolean_t reenter_mutex = B_FALSE;
+
+ /*
+ * This brand entry point is called variously with and without
+ * the process p_lock held. It would be possible to refactor
+ * the brand infrastructure so that proc_exit() explicitly
+ * calls this hook (b_lwpexit/lx_exitlwp) for the last LWP in a
+ * process prior to detaching the brand with
+ * brand_clearbrand(). Absent such refactoring, we
+ * conditionally exit the mutex for the duration of the call.
+ *
+ * The atomic replacement of both "p_brand" and "p_brand_data"
+ * is not affected by dropping and reacquiring the mutex here.
+ */
+ if (mutex_owned(&p->p_lock) != 0) {
+ mutex_exit(&p->p_lock);
+ reenter_mutex = B_TRUE;
+ }
lx_exitlwp(lwp);
+ if (reenter_mutex) {
+ mutex_enter(&p->p_lock);
+ }
+ }
/*
* The call path here is:
@@ -261,310 +274,6 @@ lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
return (-EINVAL);
}
-/*
- * Enable/disable ptrace system call tracing for the given LWP. Enabling is
- * done by both setting the flag in that LWP's brand data (in the kernel) and
- * setting the process-wide trace flag (in the brand library of the traced
- * process).
- */
-static int
-lx_ptrace_syscall_set(pid_t pid, id_t lwpid, int set)
-{
- proc_t *p;
- kthread_t *t;
- klwp_t *lwp;
- lx_proc_data_t *lpdp;
- lx_lwp_data_t *lldp;
- uintptr_t addr;
- int ret, flag = 1;
-
- if ((p = sprlock(pid)) == NULL)
- return (ESRCH);
-
- if (priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) {
- sprunlock(p);
- return (EPERM);
- }
-
- if ((t = idtot(p, lwpid)) == NULL || (lwp = ttolwp(t)) == NULL) {
- sprunlock(p);
- return (ESRCH);
- }
-
- if ((lpdp = ptolxproc(p)) == NULL ||
- (lldp = lwp->lwp_brand) == NULL) {
- sprunlock(p);
- return (ESRCH);
- }
-
- if (set) {
- /*
- * Enable the ptrace flag for this LWP and this process. Note
- * that we will turn off the LWP's ptrace flag, but we don't
- * turn off the process's ptrace flag.
- */
- lldp->br_ptrace = 1;
- lpdp->l_ptrace = 1;
-
- addr = lpdp->l_traceflag;
-
- mutex_exit(&p->p_lock);
-
- /*
- * This can fail only in some rare corner cases where the
- * process is exiting or we're completely out of memory. In
- * these cases, it's sufficient to return an error to the ptrace
- * consumer and leave the process-wide flag set.
- */
- ret = uwrite(p, &flag, sizeof (flag), addr);
-
- mutex_enter(&p->p_lock);
-
- /*
- * If we couldn't set the trace flag, unset the LWP's ptrace
- * flag as there ptrace consumer won't expect this LWP to stop.
- */
- if (ret != 0)
- lldp->br_ptrace = 0;
- } else {
- lldp->br_ptrace = 0;
- ret = 0;
- }
-
- sprunlock(p);
-
- if (ret != 0)
- ret = EIO;
-
- return (ret);
-}
-
-static void
-lx_ptrace_fire(void)
-{
- kthread_t *t = curthread;
- klwp_t *lwp = ttolwp(t);
- lx_lwp_data_t *lldp = lwp->lwp_brand;
-
- /*
- * The ptrace flag only applies until the next event is encountered
- * for the given LWP. If it's set, turn off the flag and poke the
- * controlling process by raising a signal.
- */
- if (lldp->br_ptrace) {
- lldp->br_ptrace = 0;
- tsignal(t, SIGTRAP);
- }
-}
-
-/*
- * Supports Linux PTRACE_SETOPTIONS handling which is similar to PTRACE_TRACEME
- * but return an event in the second byte of si_status.
- */
-static int
-lx_ptrace_ext_opts(int cmd, pid_t pid, uintptr_t val, int64_t *rval)
-{
- proc_t *p;
- lx_proc_data_t *lpdp;
- uint_t ret;
-
- if ((p = sprlock(pid)) == NULL)
- return (ESRCH);
-
- /*
- * Note that priv_proc_cred_perm can disallow access to ourself if
- * the proc's SNOCD p_flag is set, so we skip that check for ourself.
- */
- if (curproc != p &&
- priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) {
- sprunlock(p);
- return (EPERM);
- }
-
- if ((lpdp = ptolxproc(p)) == NULL) {
- sprunlock(p);
- return (ESRCH);
- }
-
- switch (cmd) {
- case B_PTRACE_EXT_OPTS_SET:
- lpdp->l_ptrace_opts = (uint_t)val;
- break;
-
- case B_PTRACE_EXT_OPTS_GET:
- ret = lpdp->l_ptrace_opts;
- if (lpdp->l_ptrace_is_traced)
- ret |= EMUL_PTRACE_IS_TRACED;
- break;
-
- case B_PTRACE_EXT_OPTS_EVT:
- ret = lpdp->l_ptrace_event;
- lpdp->l_ptrace_event = 0;
- break;
-
- case B_PTRACE_DETACH:
- lpdp->l_ptrace_is_traced = 0;
- break;
-
- default:
- sprunlock(p);
- return (EINVAL);
- }
-
- sprunlock(p);
-
- if (cmd == B_PTRACE_EXT_OPTS_GET || cmd == B_PTRACE_EXT_OPTS_EVT) {
- if (copyout(&ret, (void *)val, sizeof (uint_t)) != 0)
- return (EFAULT);
- }
-
- *rval = 0;
- return (0);
-}
-
-/*
- * Used to support Linux PTRACE_SETOPTIONS handling and similar to
- * PTRACE_TRACEME. We signal ourselves to stop on return from this syscall and
- * setup the event reason so the emulation can pull this out when someone
- * 'waits' on this process.
- */
-static void
-lx_ptrace_stop_for_option(int option, ulong_t msg)
-{
- proc_t *p = ttoproc(curthread);
- sigqueue_t *sqp;
- lx_proc_data_t *lpdp;
- boolean_t child = B_FALSE;
-
- if ((lpdp = ptolxproc(p)) == NULL) {
- /* this should never happen but just to be safe */
- return;
- }
-
- if (option & EMUL_PTRACE_O_CHILD) {
- child = B_TRUE;
- option &= ~EMUL_PTRACE_O_CHILD;
- }
-
- lpdp->l_ptrace_is_traced = 1;
-
- /* Track the event as the reason for stopping */
- switch (option) {
- case LX_PTRACE_O_TRACEFORK:
- if (!child) {
- lpdp->l_ptrace_event = LX_PTRACE_EVENT_FORK;
- lpdp->l_ptrace_eventmsg = msg;
- }
- break;
- case LX_PTRACE_O_TRACEVFORK:
- if (!child) {
- lpdp->l_ptrace_event = LX_PTRACE_EVENT_VFORK;
- lpdp->l_ptrace_eventmsg = msg;
- }
- break;
- case LX_PTRACE_O_TRACECLONE:
- if (!child) {
- lpdp->l_ptrace_event = LX_PTRACE_EVENT_CLONE;
- lpdp->l_ptrace_eventmsg = msg;
- }
- break;
- case LX_PTRACE_O_TRACEEXEC:
- lpdp->l_ptrace_event = LX_PTRACE_EVENT_EXEC;
- break;
- case LX_PTRACE_O_TRACEVFORKDONE:
- lpdp->l_ptrace_event = LX_PTRACE_EVENT_VFORK_DONE;
- lpdp->l_ptrace_eventmsg = msg;
- break;
- case LX_PTRACE_O_TRACEEXIT:
- lpdp->l_ptrace_event = LX_PTRACE_EVENT_EXIT;
- lpdp->l_ptrace_eventmsg = msg;
- break;
- case LX_PTRACE_O_TRACESECCOMP:
- lpdp->l_ptrace_event = LX_PTRACE_EVENT_SECCOMP;
- break;
- }
-
- /*
- * Post the required signal to ourselves so that we stop.
- *
- * Although Linux will send a SIGSTOP to a child process which is
- * stopped due to PTRACE_O_TRACEFORK, etc., we do not send that signal
- * since that leads us down the code path in the kernel which calls
- * stop(PR_JOBCONTROL, SIGSTOP), which in turn means that the TS_XSTART
- * flag gets turned off on the thread and this makes it complex to
- * actually get this process going when the userland application wants
- * to detach. Since consumers don't seem to depend on the specific
- * signal, we'll just stop both the parent and child the same way. We
- * do keep track of both the parent and child via the
- * EMUL_PTRACE_O_CHILD bit, in case we need to revisit this later.
- */
- psignal(p, SIGTRAP);
-
- /*
- * Since we're stopping, we need to post the SIGCHLD to the parent. The
- * code in sigcld expects p_wdata to be set to SIGTRAP before it can
- * send the signal, so do that here. We also need p_wcode to be set as
- * if we are ptracing, even though we're not really (see the code in
- * stop() when procstop is set and p->p_proc_flag has the P_PR_PTRACE
- * bit set). This is needed so that when the application calls waitid,
- * it will properly retrieve the process.
- */
- sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
- mutex_enter(&pidlock);
- p->p_wdata = SIGTRAP;
- p->p_wcode = CLD_TRAPPED;
- sigcld(p, sqp);
- mutex_exit(&pidlock);
-}
-
-static int
-lx_ptrace_geteventmsg(pid_t pid, ulong_t *msgp)
-{
- proc_t *p;
- lx_proc_data_t *lpdp;
- ulong_t msg;
-
- if ((p = sprlock(pid)) == NULL)
- return (ESRCH);
-
- if (curproc != p &&
- priv_proc_cred_perm(curproc->p_cred, p, NULL, VREAD) != 0) {
- sprunlock(p);
- return (EPERM);
- }
-
- if ((lpdp = ptolxproc(p)) == NULL) {
- sprunlock(p);
- return (ESRCH);
- }
-
- msg = lpdp->l_ptrace_eventmsg;
- lpdp->l_ptrace_eventmsg = 0;
-
- sprunlock(p);
-
- if (copyout(&msg, (void *)msgp, sizeof (ulong_t)) != 0)
- return (EFAULT);
-
- return (0);
-}
-
-/*
- * Brand entry to allow us to optionally generate the ptrace SIGTRAP on exec().
- * This will only be called if ptrace is enabled -- and we only generate the
- * SIGTRAP if LX_PTRACE_O_TRACEEXEC hasn't been set.
- */
-void
-lx_ptrace_exectrap(proc_t *p)
-{
- lx_proc_data_t *lpdp;
-
- if ((lpdp = ptolxproc(p)) == NULL ||
- !(lpdp->l_ptrace_opts & LX_PTRACE_O_TRACEEXEC)) {
- psignal(p, SIGTRAP);
- }
-}
-
uint32_t
lx_map32limit(proc_t *p)
{
@@ -719,6 +428,12 @@ lx_init_brand_data(zone_t *zone)
(void) strlcpy(data->lxzd_kernel_version, "2.4.21", LX_VERS_MAX);
data->lxzd_max_syscall = LX_NSYSCALLS;
zone->zone_brand_data = data;
+
+ /*
+ * In Linux, if the init(1) process terminates the system panics.
+ * The zone must reboot to simulate this behaviour.
+ */
+ zone->zone_reboot_on_init_exit = B_TRUE;
}
void
@@ -835,6 +550,16 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
lwpd->br_scms = 1;
#endif
+ if (pd->l_traceflag != NULL && pd->l_ptrace != 0) {
+ /*
+ * If ptrace(2) is active on this process, it is likely
+ * that we just finished an emulated execve(2) in a
+ * traced child. The usermode traceflag will have been
+ * clobbered by the exec, so we set it again here:
+ */
+ (void) suword32((void *)pd->l_traceflag, 1);
+ }
+
*rval = 0;
return (0);
case B_TTYMODES:
@@ -934,11 +659,6 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
return (0);
}
- case B_PTRACE_SYSCALL:
- *rval = lx_ptrace_syscall_set((pid_t)arg1, (id_t)arg2,
- (int)arg3);
- return (0);
-
case B_SYSENTRY:
if (lx_systrace_enabled) {
ASSERT(lx_systrace_entry_ptr != NULL);
@@ -966,7 +686,7 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
#endif
}
- lx_ptrace_fire();
+ (void) lx_ptrace_stop(LX_PR_SYSENTRY);
pd = p->p_brand_data;
@@ -987,7 +707,7 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
(*lx_systrace_return_ptr)(arg1, arg2, arg2, 0, 0, 0, 0);
}
- lx_ptrace_fire();
+ (void) lx_ptrace_stop(LX_PR_SYSEXIT);
pd = p->p_brand_data;
@@ -1013,20 +733,55 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
*/
return (lx_sched_affinity(cmd, arg1, arg2, arg3, rval));
- case B_PTRACE_EXT_OPTS:
+ case B_PTRACE_STOP_FOR_OPT:
+ return (lx_ptrace_stop_for_option((int)arg1, arg2 == 0 ?
+ B_FALSE : B_TRUE, (ulong_t)arg3));
+
+ case B_PTRACE_CLONE_BEGIN:
+ return (lx_ptrace_set_clone_inherit((int)arg1, arg2 == 0 ?
+ B_FALSE : B_TRUE));
+
+ case B_PTRACE_KERNEL:
+ return (lx_ptrace_kernel((int)arg1, (pid_t)arg2, arg3, arg4));
+
+ case B_HELPER_WAITID: {
+ idtype_t idtype = (idtype_t)arg1;
+ id_t id = (id_t)arg2;
+ siginfo_t *infop = (siginfo_t *)arg3;
+ int options = (int)arg4;
+
+ lwpd = ttolxlwp(curthread);
+
+ /*
+ * Our brand-specific waitid helper only understands a subset of
+ * the possible idtypes. Ensure we keep to that subset here:
+ */
+ if (idtype != P_ALL && idtype != P_PID && idtype != P_PGID) {
+ return (EINVAL);
+ }
+
/*
- * Set or get the ptrace extended options or get the event
- * reason for the stop.
+ * Enable the return of emulated ptrace(2) stop conditions
+ * through lx_waitid_helper, and stash the Linux-specific
+ * extra waitid() flags.
*/
- return (lx_ptrace_ext_opts((int)arg1, (pid_t)arg2, arg3, rval));
+ lwpd->br_waitid_emulate = B_TRUE;
+ lwpd->br_waitid_flags = (int)arg5;
- case B_PTRACE_STOP_FOR_OPT:
- lx_ptrace_stop_for_option((int)arg1, (ulong_t)arg2);
- return (0);
+#if defined(_SYSCALL32_IMPL)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ return (waitsys32(idtype, id, infop, options));
+ } else
+#endif
+ {
+ return (waitsys(idtype, id, infop, options));
+ }
+
+ lwpd->br_waitid_emulate = B_FALSE;
+ lwpd->br_waitid_flags = 0;
- case B_PTRACE_GETEVENTMSG:
- lx_ptrace_geteventmsg((pid_t)arg1, (ulong_t *)arg2);
return (0);
+ }
case B_UNSUPPORTED:
{
@@ -1702,6 +1457,7 @@ _init(void)
/* for lx_futex() */
lx_futex_init();
+ lx_ptrace_init();
err = mod_install(&modlinkage);
if (err != 0) {
@@ -1741,6 +1497,7 @@ _fini(void)
if (brand_zone_count(&lx_brand))
return (EBUSY);
+ lx_ptrace_fini();
lx_pid_fini();
lx_ioctl_fini();
diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c
index 4c95c11100..abb0ab6e63 100644
--- a/usr/src/uts/common/brand/lx/os/lx_misc.c
+++ b/usr/src/uts/common/brand/lx/os/lx_misc.c
@@ -113,6 +113,13 @@ lx_exec()
lx_pid_reassign(curthread);
}
+ /*
+ * Inform ptrace(2) that we are processing an execve(2) call so that if
+ * we are traced we can post either the PTRACE_EVENT_EXEC event or the
+ * legacy SIGTRAP.
+ */
+ (void) lx_ptrace_stop_for_option(LX_PTRACE_O_TRACEEXEC, B_FALSE, 0);
+
/* clear the fsbase values until the app. can reinitialize them */
lwpd->br_lx_fsbase = NULL;
lwpd->br_ntv_fsbase = NULL;
@@ -137,15 +144,21 @@ void
lx_exitlwp(klwp_t *lwp)
{
struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
- proc_t *p;
+ proc_t *p = lwptoproc(lwp);
kthread_t *t;
sigqueue_t *sqp = NULL;
pid_t ppid;
id_t ptid;
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
if (lwpd == NULL)
return; /* second time thru' */
+ mutex_enter(&p->p_lock);
+ lx_ptrace_exit(p, lwp);
+ mutex_exit(&p->p_lock);
+
if (lwpd->br_clear_ctidp != NULL) {
(void) suword32(lwpd->br_clear_ctidp, 0);
(void) lx_futex((uintptr_t)lwpd->br_clear_ctidp, FUTEX_WAKE, 1,
@@ -226,9 +239,17 @@ lx_freelwp(klwp_t *lwp)
if (lwpd != NULL) {
(void) removectx(lwptot(lwp), lwp, lx_save, lx_restore,
NULL, NULL, lx_save, NULL);
- if (lwpd->br_pid != 0)
+ if (lwpd->br_pid != 0) {
lx_pid_rele(lwptoproc(lwp)->p_pid,
lwptot(lwp)->t_tid);
+ }
+
+ /*
+ * Ensure that lx_ptrace_exit() has been called to detach
+ * ptrace(2) tracers and tracees.
+ */
+ VERIFY(lwpd->br_ptrace_tracer == NULL);
+ VERIFY(lwpd->br_ptrace_accord == NULL);
lwp->lwp_brand = NULL;
kmem_free(lwpd, sizeof (struct lx_lwp_data));
@@ -238,8 +259,8 @@ lx_freelwp(klwp_t *lwp)
int
lx_initlwp(klwp_t *lwp)
{
- struct lx_lwp_data *lwpd;
- struct lx_lwp_data *plwpd;
+ lx_lwp_data_t *lwpd;
+ lx_lwp_data_t *plwpd = ttolxlwp(curthread);
kthread_t *tp = lwptot(lwp);
lwpd = kmem_zalloc(sizeof (struct lx_lwp_data), KM_SLEEP);
@@ -265,8 +286,7 @@ lx_initlwp(klwp_t *lwp)
if (tp->t_next == tp) {
lwpd->br_ppid = tp->t_procp->p_ppid;
lwpd->br_ptid = -1;
- } else if (ttolxlwp(curthread) != NULL) {
- plwpd = ttolxlwp(curthread);
+ } else if (plwpd != NULL) {
bcopy(plwpd->br_tls, lwpd->br_tls, sizeof (lwpd->br_tls));
lwpd->br_ppid = plwpd->br_pid;
lwpd->br_ptid = curthread->t_tid;
@@ -292,6 +312,14 @@ lx_initlwp(klwp_t *lwp)
installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL,
lx_save, NULL);
+ /*
+ * If the parent LWP has a ptrace(2) tracer, the new LWP may
+ * need to inherit that same tracer.
+ */
+ if (plwpd != NULL) {
+ lx_ptrace_inherit_tracer(plwpd, lwpd);
+ }
+
return (0);
}
@@ -524,10 +552,7 @@ lx_exit_with_sig(proc_t *cp, sigqueue_t *sqp, void *brand_data)
* SIGCHLD X -
*
* This is an XOR of __WCLONE being set, and SIGCHLD being the signal sent on
- * process exit. Since (flags & __WCLONE) is not guaranteed to have the
- * least-significant bit set when the flags is enabled, !! is used to place
- * that bit into the least significant bit. Then, the bitwise XOR can be
- * used, because there is no logical XOR in the C language.
+ * process exit.
*
* More information on wait in lx brands can be found at
* usr/src/lib/brand/lx/lx_brand/common/wait.c.
@@ -535,29 +560,45 @@ lx_exit_with_sig(proc_t *cp, sigqueue_t *sqp, void *brand_data)
boolean_t
lx_wait_filter(proc_t *pp, proc_t *cp)
{
- int flags;
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+ int flags = lwpd->br_waitid_flags;
boolean_t ret;
- if (LX_ARGS(waitid) != NULL) {
- flags = LX_ARGS(waitid)->waitid_flags;
- mutex_enter(&cp->p_lock);
- if (flags & LX_WALL) {
- ret = B_TRUE;
- } else if (cp->p_stat == SZOMB ||
- cp->p_brand == &native_brand) {
- ret = (((!!(flags & LX_WCLONE)) ^
- (stol_signo[SIGCHLD] == cp->p_exit_data))
- ? B_TRUE : B_FALSE);
+ if (!lwpd->br_waitid_emulate) {
+ return (B_TRUE);
+ }
+
+ mutex_enter(&cp->p_lock);
+ if (flags & LX_WALL) {
+ ret = B_TRUE;
+
+ } else {
+ int exitsig;
+ boolean_t is_clone, _wclone;
+
+ /*
+ * Determine the exit signal for this process:
+ */
+ if (cp->p_stat == SZOMB || cp->p_brand == &native_brand) {
+ exitsig = cp->p_exit_data;
} else {
- ret = (((!!(flags & LX_WCLONE)) ^
- (stol_signo[SIGCHLD] == ptolxproc(cp)->l_signal))
- ? B_TRUE : B_FALSE);
+ exitsig = ptolxproc(cp)->l_signal;
}
- mutex_exit(&cp->p_lock);
- return (ret);
- } else {
- return (B_TRUE);
+
+ /*
+ * To enable the bitwise XOR to stand in for the absent C
+ * logical XOR, we use the logical NOT operator twice to
+ * ensure the least significant bit is populated with the
+ * __WCLONE flag status.
+ */
+ _wclone = !!(flags & LX_WCLONE);
+ is_clone = (stol_signo[SIGCHLD] == exitsig);
+
+ ret = (_wclone ^ is_clone) ? B_TRUE : B_FALSE;
}
+ mutex_exit(&cp->p_lock);
+
+ return (ret);
}
void
diff --git a/usr/src/uts/common/brand/lx/os/lx_pid.c b/usr/src/uts/common/brand/lx/os/lx_pid.c
index aa8c751bc2..8552754c43 100644
--- a/usr/src/uts/common/brand/lx/os/lx_pid.c
+++ b/usr/src/uts/common/brand/lx/os/lx_pid.c
@@ -22,7 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -222,6 +222,28 @@ lx_lpid_to_spair(pid_t l_pid, pid_t *s_pid, id_t *s_tid)
{
struct lx_pid *hp;
+ if (l_pid == 1) {
+ pid_t initpid;
+
+ /*
+ * We are trying to look up the Linux init process for the
+ * current zone, which we pretend has pid 1.
+ */
+ if ((initpid = curzone->zone_proc_initpid) == -1) {
+ /*
+ * We could not find the init process for this zone.
+ */
+ return (-1);
+ }
+
+ if (s_pid != NULL)
+ *s_pid = initpid;
+ if (s_tid != NULL)
+ *s_tid = 1;
+
+ return (0);
+ }
+
mutex_enter(&hash_lock);
for (hp = ltos_pid_hash[LTOS_HASH(l_pid)]; hp; hp = hp->ltos_next) {
if (l_pid == hp->l_pid) {
diff --git a/usr/src/uts/common/brand/lx/os/lx_ptrace.c b/usr/src/uts/common/brand/lx/os/lx_ptrace.c
new file mode 100644
index 0000000000..6e4b74531d
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_ptrace.c
@@ -0,0 +1,2270 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Emulation of the Linux ptrace(2) interface.
+ *
+ * OVERVIEW
+ *
+ * The Linux process model is somewhat different from the illumos native
+ * model. One critical difference is that each Linux thread has a unique
+ * identifier in the pid namespace. The lx brand assigns a pid to each LWP
+ * within the emulated process, giving the pid of the process itself to the
+ * first LWP.
+ *
+ * The Linux ptrace(2) interface allows for any LWP in a branded process to
+ * exert control over any other LWP within the same zone. Control is exerted
+ * by the use of the ptrace(2) system call itself, which accepts a number of
+ * request codes. Feedback on traced events is primarily received by the
+ * tracer through SIGCLD and the emulated waitpid(2) and waitid(2) system
+ * calls. Many of the possible ptrace(2) requests will only succeed if the
+ * target LWP is in a "ptrace-stop" condition.
+ *
+ * HISTORY
+ *
+ * The brand support for ptrace(2) was originally built on top of the rich
+ * support for debugging and tracing provided through the illumos /proc
+ * interfaces, mounted at /native/proc within the zone. The native legacy
+ * ptrace(3C) functionality was used as a starting point, but was generally
+ * insufficient for complete and precise emulation. The extant legacy
+ * interface, and indeed our native SIGCLD and waitid(2) facilities, are
+ * focused on _process_ level concerns -- the Linux interface has been
+ * extended to be aware of LWPs as well.
+ *
+ * In order to allow us to focus on providing more complete and accurate
+ * emulation without extensive and undesirable changes to the native
+ * facilities, this second generation ptrace(2) emulation is mostly separate
+ * from any other tracing or debugging framework in the system.
+ *
+ * ATTACHING TRACERS TO TRACEES
+ *
+ * There are several ways that a child LWP may becomed traced by a tracer.
+ * To determine which attach method caused a tracee to become attached, one
+ * may inspect the "br_ptrace_attach" member of the LWP-specific brand data
+ * with the debugger.
+ *
+ * The first attach methods to consider are the attaching ptrace(2) requests:
+ *
+ * PTRACE_TRACEME
+ *
+ * If an LWP makes a PTRACE_TRACEME call, it will be attached as a tracee
+ * to its parent LWP (br_ppid). Using PTRACE_TRACEME does _not_ cause the
+ * tracee to be held in a stop condition. It is common practice for
+ * consumers to raise(SIGSTOP) immediately afterward.
+ *
+ * PTRACE_ATTACH
+ *
+ * An LWP may attempt to trace any other LWP in this, or another, process.
+ * We currently allow any attach where the process containing the tracer
+ * LWP has permission to write to /proc for the process containing the
+ * intended tracer. This action also sends a SIGSTOP to the newly attached
+ * tracee.
+ *
+ * The second class of attach methods are the clone(2)/fork(2) inheritance
+ * options that may be set on a tracee with PTRACE_SETOPTIONS:
+ *
+ * PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK and PTRACE_O_TRACECLONE
+ *
+ * If these options have been set on a tracee, then a fork(2), vfork(2) or
+ * clone(2) respectively will cause the newly created LWP to be traced by
+ * the same tracer. The same set of ptrace(2) options will also be set on
+ * the new child.
+ *
+ * The third class of attach method is the PTRACE_CLONE flag to clone(2).
+ * This flag induces the same inheritance as PTRACE_O_TRACECLONE, but is
+ * passed by the tracee as an argument to clone(2).
+ *
+ * DETACHING TRACEES
+ *
+ * Tracees can be detached by the tracer with the PTRACE_DETACH request.
+ * This request is only valid when the tracee is in a ptrace(2) stop
+ * condition, and is itself a restarting action.
+ *
+ * If the tracer exits without detaching all of its tracees, then all of the
+ * tracees are automatically detached and restarted. If a tracee was in
+ * "signal-delivery-stop" at the time the tracer exited, the signal will be
+ * released to the child unless it is a SIGSTOP. We drop this instance of
+ * SIGSTOP in order to prevent the child from becoming stopped by job
+ * control.
+ *
+ * ACCORD ALLOCATION AND MANAGEMENT
+ *
+ * The "lx_ptrace_accord_t" object tracks the agreement between a tracer LWP
+ * and zero or more tracee LWPs. It is explicitly illegal for a tracee to
+ * trace its tracer, and we block this in PTRACE_ATTACH/PTRACE_TRACEME.
+ *
+ * An LWP starts out without an accord. If a child of that LWP calls
+ * ptrace(2) with the PTRACE_TRACEME subcommand, or if the LWP itself uses
+ * PTRACE_ATTACH, an accord will be allocated and stored on that LWP. The
+ * accord structure is not released from that LWP until it arrives in
+ * lx_exitlwp(), as called by lwp_exit(). A new accord will not be
+ * allocated, even if one does not exist, once an LWP arrives in lx_exitlwp()
+ * and sets the LX_PTRACE_EXITING flag. An LWP will have at most one accord
+ * structure throughout its entire lifecycle; once it has one, it has the
+ * same one until death.
+ *
+ * The accord is reference counted (lxpa_refcnt), starting at a count of one
+ * at creation to represent the link from the tracer LWP to its accord. The
+ * accord is not freed until the reference count falls to zero.
+ *
+ * To make mutual exclusion between a detaching tracer and various notifying
+ * tracees simpler, the tracer will hold "pidlock" while it clears the
+ * accord members that point back to the tracer LWP and CV.
+ *
+ * SIGNALS AND JOB CONTROL
+ *
+ * Various actions, either directly ptrace(2) related or commonly associated
+ * with tracing, cause process- or thread-directed SIGSTOP signals to be sent
+ * to tracees. These signals, and indeed any signal other than SIGKILL, can
+ * be suppressed by the tracer when using a restarting request (including
+ * PTRACE_DETACH) on a child. The signal may also be substituted for a
+ * different signal.
+ *
+ * If a SIGSTOP (or other stopping signal) is not suppressed by the tracer,
+ * it will induce the regular illumos native job control stop of the entire
+ * traced process. This is at least passingly similar to the Linux "group
+ * stop" ptrace(2) condition.
+ *
+ * SYSTEM CALL TRACING
+ *
+ * The ptrace(2) interface enables the tracer to hold the tracee on entry and
+ * exit from system calls. When a stopped tracee is restarted through the
+ * PTRACE_SYSCALL request, the LX_PTRACE_SYSCALL flag is set until the next
+ * system call boundary. Whether this is a "syscall-entry-stop" or
+ * "syscall-exit-stop", the tracee is held and the tracer is notified via
+ * SIGCLD/waitpid(2) in the usual way. The flag LX_PTRACE_SYSCALL flag is
+ * cleared after each stop; for ongoing system call tracing the tracee must
+ * be continuously restarted with PTRACE_SYSCALL.
+ *
+ * EVENT STOPS
+ *
+ * Various events (particularly FORK, VFORK, CLONE, EXEC and EXIT) are
+ * enabled by the tracer through PTRACE_SETOPTIONS. Once enabled, the tracee
+ * will be stopped at the nominated points of interest and the tracer
+ * notified. The tracer may request additional information about the event,
+ * such as the pid of new LWPs and processes, via PTRACE_GETEVENTMSG.
+ *
+ * LOCK ORDERING RULES
+ *
+ * It is not safe, in general, to hold p_lock for two different processes at
+ * the same time. This constraint is the primary reason for the existence
+ * (and complexity) of the ptrace(2) accord mechanism.
+ *
+ * In order to facilitate looking up accords by the "pid" of a tracer LWP,
+ * p_lock for the tracer process may be held while entering the accord mutex
+ * (lxpa_lock). This mutex protects the accord flags and reference count.
+ * The reference count is manipulated through lx_ptrace_accord_hold() and
+ * lx_ptrace_accord_rele().
+ *
+ * DO NOT interact with the accord mutex (lxpa_lock) directly. The
+ * lx_ptrace_accord_enter() and lx_ptrace_accord_exit() functions do various
+ * book-keeping and lock ordering enforcement and MUST be used.
+ *
+ * It is NOT legal to take ANY p_lock while holding the accord mutex
+ * (lxpa_lock). If the lxpa_tracees_lock is to be held concurrently with
+ * lxpa_lock, lxpa_lock MUST be taken first and dropped before taking p_lock
+ * of any processes from the tracee list.
+ *
+ * It is NOT legal to take a tracee p_lock and then attempt to enter the
+ * accord mutex (or tracee list mutex) of its tracer. When running as the
+ * tracee LWP, the tracee's hold will prevent the accord from being freed.
+ * Use of the LX_PTRACE_STOPPING or LX_PTRACE_CLONING flag in the
+ * LWP-specific brand data prevents an exiting tracer from altering the
+ * tracee until the tracee has come to an orderly stop, without requiring the
+ * tracee to hold its own p_lock the entire time it is stopping.
+ *
+ * It is not safe, in general, to enter "pidlock" while holding the p_lock of
+ * any process. It is similarly illegal to hold any accord locks (lxpa_lock
+ * or lxpa_sublock) while attempting to enter "pidlock". As "pidlock" is a
+ * global mutex, it should be held for the shortest possible time.
+ */
+
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/ksynch.h>
+#include <sys/sysmacros.h>
+#include <sys/procfs.h>
+#include <sys/cmn_err.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/wait.h>
+#include <sys/prsystm.h>
+#include <sys/note.h>
+
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_impl.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_pid.h>
+#include <lx_syscall.h>
+#include <lx_signum.h>
+
+
+typedef enum lx_ptrace_cont_flags_t {
+ LX_PTC_NONE = 0x00,
+ LX_PTC_SYSCALL = 0x01,
+ LX_PTC_SINGLESTEP = 0x02
+} lx_ptrace_cont_flags_t;
+
+/*
+ * Macros for checking the state of an LWP via "br_ptrace_flags":
+ */
+#define LX_PTRACE_BUSY \
+ (LX_PTRACE_EXITING | LX_PTRACE_STOPPING | LX_PTRACE_CLONING)
+
+#define VISIBLE(a) (((a)->br_ptrace_flags & LX_PTRACE_EXITING) == 0)
+#define TRACEE_BUSY(a) (((a)->br_ptrace_flags & LX_PTRACE_BUSY) != 0)
+
+#define ACCORD_HELD(a) MUTEX_HELD(&(a)->lxpa_lock)
+
+static kcondvar_t lx_ptrace_busy_cv;
+static kmem_cache_t *lx_ptrace_accord_cache;
+
+/*
+ * Enter the accord mutex.
+ */
+static void
+lx_ptrace_accord_enter(lx_ptrace_accord_t *accord)
+{
+ VERIFY(MUTEX_NOT_HELD(&accord->lxpa_tracees_lock));
+
+ mutex_enter(&accord->lxpa_lock);
+}
+
+/*
+ * Exit the accord mutex. If the reference count has dropped to zero,
+ * free the accord.
+ */
+static void
+lx_ptrace_accord_exit(lx_ptrace_accord_t *accord)
+{
+ VERIFY(ACCORD_HELD(accord));
+
+ if (accord->lxpa_refcnt > 0) {
+ mutex_exit(&accord->lxpa_lock);
+ return;
+ }
+
+ /*
+ * When the reference count drops to zero we must free the accord.
+ */
+ VERIFY(accord->lxpa_tracer == NULL);
+ VERIFY(MUTEX_NOT_HELD(&accord->lxpa_tracees_lock));
+ VERIFY(list_is_empty(&accord->lxpa_tracees));
+ VERIFY(accord->lxpa_flags & LX_ACC_TOMBSTONE);
+
+ mutex_destroy(&accord->lxpa_lock);
+ mutex_destroy(&accord->lxpa_tracees_lock);
+
+ kmem_cache_free(lx_ptrace_accord_cache, accord);
+}
+
+/*
+ * Drop our reference to this accord. If this drops the reference count
+ * to zero, the next lx_ptrace_accord_exit() will free the accord.
+ */
+static void
+lx_ptrace_accord_rele(lx_ptrace_accord_t *accord)
+{
+ VERIFY(ACCORD_HELD(accord));
+
+ VERIFY(accord->lxpa_refcnt > 0);
+ accord->lxpa_refcnt--;
+}
+
+/*
+ * Place an additional hold on an accord.
+ */
+static void
+lx_ptrace_accord_hold(lx_ptrace_accord_t *accord)
+{
+ VERIFY(ACCORD_HELD(accord));
+
+ accord->lxpa_refcnt++;
+}
+
+/*
+ * Fetch the accord for this LWP. If one has not yet been created, and the
+ * process is not exiting, allocate it now. Must be called with p_lock held
+ * for the process containing the target LWP.
+ *
+ * If successful, we return holding the accord lock (lxpa_lock).
+ */
+static int
+lx_ptrace_accord_get_locked(klwp_t *lwp, lx_ptrace_accord_t **accordp,
+ boolean_t allocate_one)
+{
+ lx_ptrace_accord_t *lxpa;
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ proc_t *p = lwptoproc(lwp);
+
+ VERIFY(MUTEX_HELD(&p->p_lock));
+
+ /*
+ * If this LWP does not have an accord, we wish to allocate
+ * and install one.
+ */
+ if ((lxpa = lwpd->br_ptrace_accord) == NULL) {
+ if (!allocate_one || !VISIBLE(lwpd)) {
+ /*
+ * Either we do not wish to allocate an accord, or this
+ * LWP has already begun exiting from a ptrace
+ * perspective.
+ */
+ *accordp = NULL;
+ return (ESRCH);
+ }
+
+ lxpa = kmem_cache_alloc(lx_ptrace_accord_cache, KM_SLEEP);
+ bzero(lxpa, sizeof (*lxpa));
+
+ /*
+ * The initial reference count is 1 because we are referencing
+ * it in from the soon-to-be tracer LWP.
+ */
+ lxpa->lxpa_refcnt = 1;
+ mutex_init(&lxpa->lxpa_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&lxpa->lxpa_tracees_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&lxpa->lxpa_tracees, sizeof (lx_lwp_data_t),
+ offsetof(lx_lwp_data_t, br_ptrace_linkage));
+ lxpa->lxpa_cvp = &p->p_cv;
+
+ lxpa->lxpa_tracer = lwpd;
+ lwpd->br_ptrace_accord = lxpa;
+ }
+
+ /*
+ * Lock the accord before returning it to the caller.
+ */
+ lx_ptrace_accord_enter(lxpa);
+
+ /*
+ * There should be at least one active reference to this accord,
+ * otherwise it should have been freed.
+ */
+ VERIFY(lxpa->lxpa_refcnt > 0);
+
+ *accordp = lxpa;
+ return (0);
+}
+
+/*
+ * Accords belong to the tracer LWP. Get the accord for this tracer or return
+ * an error if it was not possible. To prevent deadlocks, the caller MUST NOT
+ * hold p_lock on its own or any other process.
+ *
+ * If successful, we return holding the accord lock (lxpa_lock).
+ */
+static int
+lx_ptrace_accord_get_by_pid(pid_t lxpid, lx_ptrace_accord_t **accordp)
+{
+ int ret = ESRCH;
+ pid_t apid;
+ id_t atid;
+ proc_t *aproc;
+ kthread_t *athr;
+ klwp_t *alwp;
+ lx_lwp_data_t *alwpd;
+
+ VERIFY(MUTEX_NOT_HELD(&curproc->p_lock));
+
+ /*
+ * Locate the process containing the tracer LWP based on its Linux pid
+ * and lock it.
+ */
+ if (lx_lpid_to_spair(lxpid, &apid, &atid) != 0 ||
+ (aproc = sprlock(apid)) == NULL) {
+ return (ESRCH);
+ }
+
+ /*
+ * Locate the tracer LWP itself and ensure that it is visible to
+ * ptrace(2).
+ */
+ if ((athr = idtot(aproc, atid)) == NULL ||
+ (alwp = ttolwp(athr)) == NULL ||
+ (alwpd = lwptolxlwp(alwp)) == NULL ||
+ !VISIBLE(alwpd)) {
+ sprunlock(aproc);
+ return (ESRCH);
+ }
+
+ /*
+ * We should not fetch our own accord this way.
+ */
+ if (athr == curthread) {
+ sprunlock(aproc);
+ return (EPERM);
+ }
+
+ /*
+ * Fetch (or allocate) the accord owned by this tracer LWP:
+ */
+ ret = lx_ptrace_accord_get_locked(alwp, accordp, B_TRUE);
+
+ /*
+ * Unlock the process and return.
+ */
+ sprunlock(aproc);
+ return (ret);
+}
+
+/*
+ * Get (or allocate) the ptrace(2) accord for the current LWP, acting as a
+ * tracer. The caller MUST NOT currently hold p_lock on the process containing
+ * this LWP.
+ *
+ * If successful, we return holding the accord lock (lxpa_lock).
+ */
+static int
+lx_ptrace_accord_get(lx_ptrace_accord_t **accordp, boolean_t allocate_one)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ int ret;
+
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ /*
+ * Lock the tracer (this LWP).
+ */
+ mutex_enter(&p->p_lock);
+
+ /*
+ * Fetch (or allocate) the accord for this LWP:
+ */
+ ret = lx_ptrace_accord_get_locked(lwp, accordp, allocate_one);
+
+ mutex_exit(&p->p_lock);
+
+ return (ret);
+}
+
+/*
+ * Restart an LWP if it is in "ptrace-stop". This function may induce sleep,
+ * so the caller MUST NOT hold any mutexes other than p_lock for the process
+ * containing the LWP.
+ */
+static void
+lx_ptrace_restart_lwp(klwp_t *lwp)
+{
+ kthread_t *rt = lwptot(lwp);
+ proc_t *rproc = lwptoproc(lwp);
+ lx_lwp_data_t *rlwpd = lwptolxlwp(lwp);
+
+ VERIFY(rt != curthread);
+ VERIFY(MUTEX_HELD(&rproc->p_lock));
+
+ /*
+ * Exclude potential meddling from procfs.
+ */
+ prbarrier(rproc);
+
+ /*
+ * Check that the LWP is still in "ptrace-stop" and, if so, restart it.
+ */
+ thread_lock(rt);
+ if (BSTOPPED(rt) && rt->t_whystop == PR_BRAND) {
+ rt->t_schedflag |= TS_BSTART;
+ setrun_locked(rt);
+
+ /*
+ * Clear stop reason.
+ */
+ rlwpd->br_ptrace_whystop = 0;
+ rlwpd->br_ptrace_whatstop = 0;
+ rlwpd->br_ptrace_flags &= ~LX_PTRACE_CLDPEND;
+ }
+ thread_unlock(rt);
+}
+
+static void
+lx_winfo(lx_lwp_data_t *remote, k_siginfo_t *ip, boolean_t waitflag,
+ pid_t *event_ppid, pid_t *event_pid)
+{
+ int signo;
+
+ /*
+ * Populate our k_siginfo_t with data about this "ptrace-stop"
+ * condition:
+ */
+ bzero(ip, sizeof (*ip));
+ ip->si_signo = SIGCLD;
+ ip->si_pid = remote->br_pid;
+ ip->si_code = CLD_TRAPPED;
+
+ switch (remote->br_ptrace_whatstop) {
+ case LX_PR_SYSENTRY:
+ case LX_PR_SYSEXIT:
+ ip->si_status = SIGTRAP;
+ if (remote->br_ptrace_options & LX_PTRACE_O_TRACESYSGOOD) {
+ ip->si_status |= 0x80;
+ }
+ break;
+
+ case LX_PR_SIGNALLED:
+ signo = remote->br_ptrace_stopsig;
+ if (signo < 1 || signo >= LX_NSIG) {
+ /*
+ * If this signal number is not valid, pretend it
+ * was a SIGTRAP.
+ */
+ ip->si_status = SIGTRAP;
+ } else {
+ ip->si_status = ltos_signo[signo];
+ }
+ break;
+
+ case LX_PR_EVENT:
+ ip->si_status = SIGTRAP | remote->br_ptrace_event;
+ /*
+ * Record the Linux pid of both this LWP and the create
+ * event we are dispatching. We will use this information
+ * to unblock any subsequent ptrace(2) events that depend
+ * on this one.
+ */
+ if (event_ppid != NULL)
+ *event_ppid = remote->br_pid;
+ if (event_pid != NULL)
+ *event_pid = (pid_t)remote->br_ptrace_eventmsg;
+ break;
+
+ default:
+ cmn_err(CE_PANIC, "unxpected stop subreason: %d",
+ remote->br_ptrace_whatstop);
+ }
+
+ /*
+ * If WNOWAIT was specified, do not mark the event as posted
+ * so that it may be re-fetched on another call to waitid().
+ */
+ if (waitflag) {
+ remote->br_ptrace_whystop = 0;
+ remote->br_ptrace_whatstop = 0;
+ remote->br_ptrace_flags &= ~LX_PTRACE_CLDPEND;
+ }
+}
+
+/*
+ * Receive notification from stop() of a PR_BRAND stop.
+ */
+void
+lx_stop_notify(proc_t *p, klwp_t *lwp, ushort_t why, ushort_t what)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ lx_ptrace_accord_t *accord;
+ klwp_t *plwp = NULL;
+ proc_t *pp = NULL;
+ lx_lwp_data_t *parent;
+ boolean_t cldpend = B_TRUE;
+ boolean_t cldpost = B_FALSE;
+ sigqueue_t *sqp = NULL;
+
+ /*
+ * We currently only care about LX-specific stop reasons.
+ */
+ if (why != PR_BRAND)
+ return;
+
+ switch (what) {
+ case LX_PR_SYSENTRY:
+ case LX_PR_SYSEXIT:
+ case LX_PR_SIGNALLED:
+ case LX_PR_EVENT:
+ break;
+ default:
+ cmn_err(CE_PANIC, "unexpected subreason for PR_BRAND"
+ " stop: %d", (int)what);
+ }
+
+ /*
+ * We should be holding the lock on our containing process. The
+ * STOPPING flag should have been set by lx_ptrace_stop() for all
+ * PR_BRAND stops.
+ */
+ VERIFY(MUTEX_HELD(&p->p_lock));
+ VERIFY(lwpd->br_ptrace_flags & LX_PTRACE_STOPPING);
+ VERIFY((accord = lwpd->br_ptrace_tracer) != NULL);
+
+ /*
+ * We must drop our process lock to take "pidlock". The
+ * LX_PTRACE_STOPPING flag protects us from an exiting tracer.
+ */
+ mutex_exit(&p->p_lock);
+
+ /*
+ * Allocate before we enter any mutexes.
+ */
+ sqp = kmem_zalloc(sizeof (*sqp), KM_SLEEP);
+
+ /*
+ * We take pidlock now, which excludes all callers of waitid() and
+ * prevents a detaching tracer from clearing critical accord members.
+ */
+ mutex_enter(&pidlock);
+ mutex_enter(&p->p_lock);
+
+ /*
+ * Get the ptrace(2) "parent" process, to which we may send
+ * a SIGCLD signal later.
+ */
+ if ((parent = accord->lxpa_tracer) != NULL &&
+ (plwp = parent->br_lwp) != NULL) {
+ pp = lwptoproc(plwp);
+ }
+
+ /*
+ * Our tracer should not have been modified in our absence; the
+ * LX_PTRACE_STOPPING flag prevents it.
+ */
+ VERIFY(lwpd->br_ptrace_tracer == accord);
+
+ /*
+ * Stash data for this stop condition in the LWP data while we hold
+ * both pidlock and our p_lock.
+ */
+ lwpd->br_ptrace_whystop = why;
+ lwpd->br_ptrace_whatstop = what;
+
+ /*
+ * If this event does not depend on an event from the parent LWP,
+ * populate the siginfo_t for the event pending on this tracee LWP.
+ */
+ if (!(lwpd->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) && pp != NULL) {
+ cldpost = B_TRUE;
+ lx_winfo(lwpd, &sqp->sq_info, B_FALSE, NULL, NULL);
+ }
+
+ /*
+ * Drop our p_lock so that we may lock the tracer.
+ */
+ mutex_exit(&p->p_lock);
+ if (cldpost && pp != NULL) {
+ /*
+ * Post the SIGCLD to the tracer.
+ */
+ mutex_enter(&pp->p_lock);
+ if (!sigismember(&pp->p_sig, SIGCLD)) {
+ sigaddqa(pp, plwp->lwp_thread, sqp);
+ cldpend = B_FALSE;
+ sqp = NULL;
+ }
+ mutex_exit(&pp->p_lock);
+ }
+
+ /*
+ * We re-take our process lock now. The lock will be held until
+ * the thread is actually marked stopped, so we will not race with
+ * lx_ptrace_lock_if_stopped() or lx_waitid_helper().
+ */
+ mutex_enter(&p->p_lock);
+
+ /*
+ * We clear the STOPPING flag; stop() continues to hold our p_lock
+ * until our thread stop state is visible.
+ */
+ lwpd->br_ptrace_flags &= ~LX_PTRACE_STOPPING;
+ lwpd->br_ptrace_flags |= LX_PTRACE_STOPPED;
+ if (cldpend) {
+ /*
+ * We sent the SIGCLD for this new wait condition already.
+ */
+ lwpd->br_ptrace_flags |= LX_PTRACE_CLDPEND;
+ }
+
+ /*
+ * If lx_ptrace_exit_tracer() is trying to detach our tracer, it will
+ * be sleeping on this CV until LX_PTRACE_STOPPING is clear. Wake it
+ * now.
+ */
+ cv_broadcast(&lx_ptrace_busy_cv);
+
+ /*
+ * While still holding pidlock, we attempt to wake our tracer from a
+ * potential waitid() slumber.
+ */
+ if (accord->lxpa_cvp != NULL) {
+ cv_broadcast(accord->lxpa_cvp);
+ }
+
+ /*
+ * We release pidlock and return as we were called: with our p_lock
+ * held.
+ */
+ mutex_exit(&pidlock);
+
+ if (sqp != NULL) {
+ kmem_free(sqp, sizeof (*sqp));
+ }
+}
+
+/*
+ * For any restarting action (e.g. PTRACE_CONT, PTRACE_SYSCALL or
+ * PTRACE_DETACH) to be allowed, the tracee LWP must be in "ptrace-stop". This
+ * check must ONLY be run on tracees of the current LWP. If the check is
+ * successful, we return with the tracee p_lock held.
+ */
+static int
+lx_ptrace_lock_if_stopped(lx_ptrace_accord_t *accord, lx_lwp_data_t *remote)
+{
+ klwp_t *rlwp = remote->br_lwp;
+ proc_t *rproc = lwptoproc(rlwp);
+ kthread_t *rt = lwptot(rlwp);
+
+ /*
+ * We must never check that we, ourselves, are stopped. We must also
+ * have the accord tracee list locked while we lock our tracees.
+ */
+ VERIFY(curthread != rt);
+ VERIFY(MUTEX_HELD(&accord->lxpa_tracees_lock));
+ VERIFY(accord->lxpa_tracer == ttolxlwp(curthread));
+
+ /*
+ * Lock the process containing the tracee LWP.
+ */
+ mutex_enter(&rproc->p_lock);
+ if (!VISIBLE(remote)) {
+ /*
+ * The tracee LWP is currently detaching itself as it exits.
+ * It is no longer visible to ptrace(2).
+ */
+ mutex_exit(&rproc->p_lock);
+ return (ESRCH);
+ }
+
+ /*
+ * We must only check whether tracees of the current LWP are stopped.
+ * We check this condition after confirming visibility as an exiting
+ * tracee may no longer be completely consistent.
+ */
+ VERIFY(remote->br_ptrace_tracer == accord);
+
+ if (!(remote->br_ptrace_flags & LX_PTRACE_STOPPED)) {
+ /*
+ * The tracee is not in "ptrace-stop", so we release the
+ * process.
+ */
+ mutex_exit(&rproc->p_lock);
+ return (ESRCH);
+ }
+
+ /*
+ * The tracee is stopped. We return holding its process lock so that
+ * the caller may manipulate it.
+ */
+ return (0);
+}
+
+static int
+lx_ptrace_setoptions(lx_lwp_data_t *remote, uintptr_t options)
+{
+ /*
+ * Check for valid options.
+ */
+ if ((options & ~LX_PTRACE_O_ALL) != 0) {
+ return (EINVAL);
+ }
+
+ /*
+ * Set ptrace options on the target LWP.
+ */
+ remote->br_ptrace_options = (lx_ptrace_options_t)options;
+
+ return (0);
+}
+
+static int
+lx_ptrace_geteventmsg(lx_lwp_data_t *remote, void *umsgp)
+{
+ int error;
+
+#if defined(_SYSCALL32_IMPL)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ uint32_t tmp = remote->br_ptrace_eventmsg;
+
+ error = copyout(&tmp, umsgp, sizeof (uint32_t));
+ } else
+#endif
+ {
+ error = copyout(&remote->br_ptrace_eventmsg, umsgp,
+ sizeof (ulong_t));
+ }
+
+ return (error);
+}
+
+/*
+ * Implements the PTRACE_CONT subcommand of the Linux ptrace(2) interface.
+ */
+static int
+lx_ptrace_cont(lx_lwp_data_t *remote, lx_ptrace_cont_flags_t flags, int signo)
+{
+ klwp_t *lwp = remote->br_lwp;
+
+ if (flags & LX_PTC_SINGLESTEP) {
+ /*
+ * We do not currently support single-stepping.
+ */
+ lx_unsupported("PTRACE_SINGLESTEP not currently implemented");
+ return (EINVAL);
+ }
+
+ /*
+ * The tracer may choose to suppress the delivery of a signal, or
+ * select an alternative signal for delivery. If this is an
+ * appropriate ptrace(2) "signal-delivery-stop", br_ptrace_stopsig
+ * will be used as the new signal number.
+ *
+ * As with so many other aspects of the Linux ptrace(2) interface, this
+ * may fail silently if the state machine is not aligned correctly.
+ */
+ remote->br_ptrace_stopsig = signo;
+
+ /*
+ * Handle the syscall-stop flag if this is a PTRACE_SYSCALL restart:
+ */
+ if (flags & LX_PTC_SYSCALL) {
+ remote->br_ptrace_flags |= LX_PTRACE_SYSCALL;
+ } else {
+ remote->br_ptrace_flags &= ~LX_PTRACE_SYSCALL;
+ }
+
+ lx_ptrace_restart_lwp(lwp);
+
+ return (0);
+}
+
+/*
+ * Implements the PTRACE_DETACH subcommand of the Linux ptrace(2) interface.
+ *
+ * The LWP identified by the Linux pid "lx_pid" will, if it as a tracee of the
+ * current LWP, be detached and set runnable. If the specified LWP is not
+ * currently in the "ptrace-stop" state, the routine will return ESRCH as if
+ * the LWP did not exist at all.
+ *
+ * The caller must not hold p_lock on any process.
+ */
+static int
+lx_ptrace_detach(lx_ptrace_accord_t *accord, lx_lwp_data_t *remote, int signo,
+ boolean_t *release_hold)
+{
+ klwp_t *rlwp;
+
+ rlwp = remote->br_lwp;
+
+ /*
+ * The tracee LWP was in "ptrace-stop" and we now hold its p_lock.
+ * Detach the LWP from the accord and set it running.
+ */
+ VERIFY(!TRACEE_BUSY(remote));
+ remote->br_ptrace_flags &= ~(LX_PTRACE_SYSCALL | LX_PTRACE_INHERIT);
+ VERIFY(list_link_active(&remote->br_ptrace_linkage));
+ list_remove(&accord->lxpa_tracees, remote);
+
+ remote->br_ptrace_attach = LX_PTA_NONE;
+ remote->br_ptrace_tracer = NULL;
+ remote->br_ptrace_flags = 0;
+ *release_hold = B_TRUE;
+
+ /*
+ * The tracer may, as described in lx_ptrace_cont(), choose to suppress
+ * or modify the delivered signal.
+ */
+ remote->br_ptrace_stopsig = signo;
+
+ lx_ptrace_restart_lwp(rlwp);
+
+ return (0);
+}
+
+/*
+ * This routine implements the PTRACE_ATTACH operation of the Linux ptrace(2)
+ * interface.
+ *
+ * This LWP is requesting to be attached as a tracer to another LWP -- the
+ * tracee. If a ptrace accord to track the list of tracees has not yet been
+ * allocated, one will be allocated and attached to this LWP now.
+ *
+ * The "br_ptrace_tracer" on the tracee LWP is set to this accord, and the
+ * tracee LWP is then added to the "lxpa_tracees" list in the accord. We drop
+ * locks between these two phases; the only consumer of trace events from this
+ * accord is this LWP, which obviously cannot be running waitpid(2) at the same
+ * time as this call to ptrace(2).
+ */
+static int
+lx_ptrace_attach(pid_t lx_pid)
+{
+ int error = ESRCH;
+ int32_t one = 1;
+ /*
+ * Our (Tracer) LWP:
+ */
+ lx_ptrace_accord_t *accord;
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+ /*
+ * Remote (Tracee) LWP:
+ */
+ pid_t rpid;
+ id_t rtid;
+ proc_t *rproc;
+ kthread_t *rthr;
+ klwp_t *rlwp;
+ lx_lwp_data_t *rlwpd;
+
+ if (lwpd->br_pid == lx_pid) {
+ /*
+ * We cannot trace ourselves.
+ */
+ return (EPERM);
+ }
+
+ /*
+ * Ensure that we have an accord and obtain a lock on it. This
+ * routine should not fail because the LWP cannot make ptrace(2) system
+ * calls after it has begun exiting.
+ */
+ VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_EXITING);
+ VERIFY(lx_ptrace_accord_get(&accord, B_TRUE) == 0);
+
+ /*
+ * Place speculative hold in case the attach is successful.
+ */
+ lx_ptrace_accord_hold(accord);
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Locate the process containing the tracee LWP based on its Linux pid
+ * and lock it.
+ */
+ if (lx_lpid_to_spair(lx_pid, &rpid, &rtid) != 0 ||
+ (rproc = sprlock(rpid)) == NULL) {
+ /*
+ * We could not find the target process.
+ */
+ goto errout;
+ }
+
+ /*
+ * Locate the tracee LWP.
+ */
+ if ((rthr = idtot(rproc, rtid)) == NULL ||
+ (rlwp = ttolwp(rthr)) == NULL ||
+ (rlwpd = lwptolxlwp(rlwp)) == NULL ||
+ !VISIBLE(rlwpd)) {
+ /*
+ * The LWP could not be found, was not branded, or is not
+ * visible to ptrace(2) at this time.
+ */
+ goto unlock_errout;
+ }
+
+ /*
+ * We now hold the lock on the tracee. Attempt to install ourselves
+ * as the tracer.
+ */
+ if (curproc != rproc && priv_proc_cred_perm(curproc->p_cred, rproc,
+ NULL, VWRITE) != 0) {
+ /*
+ * This process does not have permission to trace the remote
+ * process.
+ */
+ error = EPERM;
+ } else if (rlwpd->br_ptrace_tracer != NULL) {
+ /*
+ * This LWP is already being traced.
+ */
+ VERIFY(list_link_active(&rlwpd->br_ptrace_linkage));
+ VERIFY(rlwpd->br_ptrace_attach != LX_PTA_NONE);
+ error = EPERM;
+ } else {
+ lx_proc_data_t *rprocd;
+
+ /*
+ * Bond the tracee to the accord.
+ */
+ VERIFY0(rlwpd->br_ptrace_flags & LX_PTRACE_EXITING);
+ VERIFY(rlwpd->br_ptrace_attach == LX_PTA_NONE);
+ rlwpd->br_ptrace_attach = LX_PTA_ATTACH;
+ rlwpd->br_ptrace_tracer = accord;
+
+ /*
+ * We had no tracer, and are thus not in the tracees list.
+ * It is safe to take the tracee list lock while we insert
+ * ourselves.
+ */
+ mutex_enter(&accord->lxpa_tracees_lock);
+ VERIFY(!list_link_active(&rlwpd->br_ptrace_linkage));
+ list_insert_tail(&accord->lxpa_tracees, rlwpd);
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * Send a thread-directed SIGSTOP.
+ */
+ sigtoproc(rproc, rthr, SIGSTOP);
+
+ /*
+ * Set the in-kernel process-wide ptrace(2) enable flag.
+ * Attempt also to write the usermode trace flag so that the
+ * process knows to enter the kernel for potential ptrace(2)
+ * syscall-stops.
+ */
+ rprocd = ttolxproc(rthr);
+ rprocd->l_ptrace = 1;
+ mutex_exit(&rproc->p_lock);
+ (void) uwrite(rproc, &one, sizeof (one), rprocd->l_traceflag);
+ mutex_enter(&rproc->p_lock);
+
+ error = 0;
+ }
+
+unlock_errout:
+ /*
+ * Unlock the process containing the tracee LWP and the accord.
+ */
+ sprunlock(rproc);
+
+errout:
+ if (error != 0) {
+ /*
+ * The attach was not successful. Remove our speculative
+ * hold.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+ }
+
+ return (error);
+}
+
+int
+lx_ptrace_set_clone_inherit(int option, boolean_t inherit_flag)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+
+ mutex_enter(&p->p_lock);
+
+ switch (option) {
+ case LX_PTRACE_O_TRACEFORK:
+ case LX_PTRACE_O_TRACEVFORK:
+ case LX_PTRACE_O_TRACECLONE:
+ lwpd->br_ptrace_clone_option = option;
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ if (inherit_flag) {
+ lwpd->br_ptrace_flags |= LX_PTRACE_INHERIT;
+ } else {
+ lwpd->br_ptrace_flags &= ~LX_PTRACE_INHERIT;
+ }
+
+ mutex_exit(&p->p_lock);
+ return (0);
+}
+
+/*
+ * If the parent LWP is being traced, we want to attach ourselves to the
+ * same accord.
+ */
+void
+lx_ptrace_inherit_tracer(lx_lwp_data_t *src, lx_lwp_data_t *dst)
+{
+ proc_t *srcp = lwptoproc(src->br_lwp);
+ proc_t *dstp = lwptoproc(dst->br_lwp);
+ lx_ptrace_accord_t *accord;
+ boolean_t unlock = B_FALSE;
+
+ if (srcp == dstp) {
+ /*
+ * This is syslwp_create(), so the process p_lock is already
+ * held.
+ */
+ VERIFY(MUTEX_HELD(&srcp->p_lock));
+ } else {
+ unlock = B_TRUE;
+ mutex_enter(&srcp->p_lock);
+ }
+
+ if ((accord = src->br_ptrace_tracer) == NULL) {
+ /*
+ * The source LWP does not have a tracer to inherit.
+ */
+ goto out;
+ }
+
+ /*
+ * There are two conditions to check when determining if the new
+ * child should inherit the same tracer (and tracing options) as its
+ * parent. Either condition is sufficient to trigger inheritance.
+ */
+ dst->br_ptrace_attach = LX_PTA_NONE;
+ if ((src->br_ptrace_options & src->br_ptrace_clone_option) != 0) {
+ /*
+ * Condition 1:
+ * The clone(2), fork(2) and vfork(2) emulated system calls
+ * populate "br_ptrace_clone_option" with the specific
+ * ptrace(2) SETOPTIONS option that applies to this
+ * operation. If the relevant option has been enabled by the
+ * tracer then we inherit.
+ */
+ dst->br_ptrace_attach |= LX_PTA_INHERIT_OPTIONS;
+
+ } else if ((src->br_ptrace_flags & LX_PTRACE_INHERIT) != 0) {
+ /*
+ * Condition 2:
+ * If the caller opted in to inheritance with the
+ * PTRACE_CLONE flag to clone(2), the LX_PTRACE_INHERIT flag
+ * will be set and we inherit.
+ */
+ dst->br_ptrace_attach |= LX_PTA_INHERIT_CLONE;
+ }
+
+ /*
+ * These values only apply for the duration of a single clone(2), et
+ * al, system call.
+ */
+ src->br_ptrace_flags &= ~LX_PTRACE_INHERIT;
+ src->br_ptrace_clone_option = 0;
+
+ if (dst->br_ptrace_attach == LX_PTA_NONE) {
+ /*
+ * No condition triggered inheritance.
+ */
+ goto out;
+ }
+
+ /*
+ * Set the LX_PTRACE_CLONING flag to prevent us from being detached
+ * while our p_lock is dropped.
+ */
+ src->br_ptrace_flags |= LX_PTRACE_CLONING;
+ mutex_exit(&srcp->p_lock);
+
+ /*
+ * Hold the accord for the new LWP.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_hold(accord);
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Install the tracer and copy the current PTRACE_SETOPTIONS options.
+ */
+ dst->br_ptrace_tracer = accord;
+ dst->br_ptrace_options = src->br_ptrace_options;
+
+ /*
+ * This flag prevents waitid() from seeing events for the new child
+ * until the parent is able to post the relevant ptrace event to
+ * the tracer.
+ */
+ dst->br_ptrace_flags |= LX_PTRACE_PARENT_WAIT;
+
+ mutex_enter(&accord->lxpa_tracees_lock);
+ VERIFY(list_link_active(&src->br_ptrace_linkage));
+ VERIFY(!list_link_active(&dst->br_ptrace_linkage));
+ list_insert_tail(&accord->lxpa_tracees, dst);
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * Relock our process and clear our busy flag.
+ */
+ mutex_enter(&srcp->p_lock);
+ src->br_ptrace_flags &= ~LX_PTRACE_CLONING;
+
+ /*
+ * If lx_ptrace_exit_tracer() is trying to detach our tracer, it will
+ * be sleeping on this CV until LX_PTRACE_CLONING is clear. Wake it
+ * now.
+ */
+ cv_broadcast(&lx_ptrace_busy_cv);
+
+out:
+ if (unlock) {
+ mutex_exit(&srcp->p_lock);
+ }
+}
+
+static int
+lx_ptrace_traceme(void)
+{
+ int error;
+ boolean_t did_attach = B_FALSE;
+ /*
+ * Our (Tracee) LWP:
+ */
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ /*
+ * Remote (Tracer) LWP:
+ */
+ lx_ptrace_accord_t *accord;
+
+ /*
+ * We are intending to be the tracee. Fetch (or allocate) the accord
+ * for our parent LWP.
+ */
+ if ((error = lx_ptrace_accord_get_by_pid(lx_lwp_ppid(lwp, NULL,
+ NULL), &accord)) != 0) {
+ /*
+ * Could not determine the Linux pid of the parent LWP, or
+ * could not get the accord for that LWP.
+ */
+ return (error);
+ }
+
+ /*
+ * We now hold the accord lock.
+ */
+ if (accord->lxpa_flags & LX_ACC_TOMBSTONE) {
+ /*
+ * The accord is marked for death; give up now.
+ */
+ lx_ptrace_accord_exit(accord);
+ return (ESRCH);
+ }
+
+ /*
+ * Bump the reference count so that the accord is not freed. We need
+ * to drop the accord lock before we take our own p_lock.
+ */
+ lx_ptrace_accord_hold(accord);
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * We now lock _our_ process and determine if we can install our parent
+ * as our tracer.
+ */
+ mutex_enter(&p->p_lock);
+ if (lwpd->br_ptrace_tracer != NULL) {
+ /*
+ * This LWP is already being traced.
+ */
+ VERIFY(lwpd->br_ptrace_attach != LX_PTA_NONE);
+ error = EPERM;
+ } else {
+ /*
+ * Bond ourselves to the accord. We already bumped the accord
+ * reference count.
+ */
+ VERIFY(lwpd->br_ptrace_attach == LX_PTA_NONE);
+ lwpd->br_ptrace_attach = LX_PTA_TRACEME;
+ lwpd->br_ptrace_tracer = accord;
+ did_attach = B_TRUE;
+ error = 0;
+ }
+ mutex_exit(&p->p_lock);
+
+ /*
+ * Lock the accord tracee list and add this LWP. Once we are in the
+ * tracee list, it is the responsibility of the tracer to detach us.
+ */
+ if (error == 0) {
+ lx_ptrace_accord_enter(accord);
+ mutex_enter(&accord->lxpa_tracees_lock);
+
+ if (!(accord->lxpa_flags & LX_ACC_TOMBSTONE)) {
+ lx_proc_data_t *procd = ttolxproc(curthread);
+
+ /*
+ * Put ourselves in the tracee list for this accord.
+ */
+ VERIFY(!list_link_active(&lwpd->br_ptrace_linkage));
+ list_insert_tail(&accord->lxpa_tracees, lwpd);
+ mutex_exit(&accord->lxpa_tracees_lock);
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Set the in-kernel process-wide ptrace(2) enable
+ * flag. Attempt also to write the usermode trace flag
+ * so that the process knows to enter the kernel for
+ * potential ptrace(2) syscall-stops.
+ */
+ procd->l_ptrace = 1;
+ (void) suword32((void *)procd->l_traceflag, 1);
+
+ return (0);
+ }
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * The accord has been marked for death. We must
+ * untrace ourselves.
+ */
+ error = ESRCH;
+ lx_ptrace_accord_exit(accord);
+ }
+
+ /*
+ * Our optimism was unjustified: We were unable to attach. We need to
+ * lock the process containing this LWP again in order to remove the
+ * tracer.
+ */
+ VERIFY(error != 0);
+ mutex_enter(&p->p_lock);
+ if (did_attach) {
+ /*
+ * Verify that things were as we left them:
+ */
+ VERIFY(!list_link_active(&lwpd->br_ptrace_linkage));
+ VERIFY(lwpd->br_ptrace_tracer == accord);
+
+ lwpd->br_ptrace_attach = LX_PTA_NONE;
+ lwpd->br_ptrace_tracer = NULL;
+ }
+ mutex_exit(&p->p_lock);
+
+ /*
+ * Remove our speculative hold on the accord, possibly causing it to be
+ * freed in the process.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+
+ return (error);
+}
+
+static boolean_t
+lx_ptrace_stop_common(proc_t *p, lx_lwp_data_t *lwpd, ushort_t what)
+{
+ VERIFY(MUTEX_HELD(&p->p_lock));
+
+ /*
+ * Mark this LWP as stopping and call stop() to enter "ptrace-stop".
+ */
+ VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_STOPPING);
+ lwpd->br_ptrace_flags |= LX_PTRACE_STOPPING;
+ stop(PR_BRAND, what);
+
+ /*
+ * We are back from "ptrace-stop" with our process lock held.
+ */
+ lwpd->br_ptrace_flags &= ~(LX_PTRACE_STOPPING | LX_PTRACE_STOPPED |
+ LX_PTRACE_CLDPEND);
+ cv_broadcast(&lx_ptrace_busy_cv);
+ mutex_exit(&p->p_lock);
+
+ return (B_TRUE);
+}
+
+int
+lx_ptrace_stop_for_option(int option, boolean_t child, ulong_t msg)
+{
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+
+ mutex_enter(&p->p_lock);
+ if (lwpd->br_ptrace_tracer == NULL) {
+ mutex_exit(&p->p_lock);
+ return (ESRCH);
+ }
+
+ if (!child) {
+ /*
+ * Only the first event posted by a new process is to be held
+ * until the matching parent event is dispatched, and only if
+ * it is a "child" event. This is not a child event, so we
+ * clear the wait flag.
+ */
+ lwpd->br_ptrace_flags &= ~LX_PTRACE_PARENT_WAIT;
+ }
+
+ if (!(lwpd->br_ptrace_options & option)) {
+ if (option == LX_PTRACE_O_TRACEEXEC) {
+ /*
+ * Without PTRACE_O_TRACEEXEC, the Linux kernel will
+ * send SIGTRAP to the process.
+ */
+ sigtoproc(p, t, SIGTRAP);
+ mutex_exit(&p->p_lock);
+ return (0);
+ }
+
+ /*
+ * The flag for this trace event is not enabled, so we will not
+ * stop.
+ */
+ mutex_exit(&p->p_lock);
+ return (ESRCH);
+ }
+
+ if (child) {
+ switch (option) {
+ case LX_PTRACE_O_TRACECLONE:
+ case LX_PTRACE_O_TRACEFORK:
+ case LX_PTRACE_O_TRACEVFORK:
+ /*
+ * Send the child LWP a directed SIGSTOP.
+ */
+ sigtoproc(p, t, SIGSTOP);
+ mutex_exit(&p->p_lock);
+ return (0);
+ default:
+ goto nostop;
+ }
+ }
+
+ lwpd->br_ptrace_eventmsg = msg;
+
+ switch (option) {
+ case LX_PTRACE_O_TRACECLONE:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_CLONE;
+ break;
+ case LX_PTRACE_O_TRACEEXEC:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_EXEC;
+ lwpd->br_ptrace_eventmsg = 0;
+ break;
+ case LX_PTRACE_O_TRACEEXIT:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_EXIT;
+ break;
+ case LX_PTRACE_O_TRACEFORK:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_FORK;
+ break;
+ case LX_PTRACE_O_TRACEVFORK:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_VFORK;
+ break;
+ case LX_PTRACE_O_TRACEVFORKDONE:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_VFORK_DONE;
+ lwpd->br_ptrace_eventmsg = 0;
+ break;
+ default:
+ goto nostop;
+ }
+
+ /*
+ * p_lock for the process containing the tracee will be dropped by
+ * lx_ptrace_stop_common().
+ */
+ return (lx_ptrace_stop_common(p, lwpd, LX_PR_EVENT) ? 0 : ESRCH);
+
+nostop:
+ lwpd->br_ptrace_event = 0;
+ lwpd->br_ptrace_eventmsg = 0;
+ mutex_exit(&p->p_lock);
+ return (ESRCH);
+}
+
+boolean_t
+lx_ptrace_stop(ushort_t what)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+
+ VERIFY(what == LX_PR_SYSENTRY || what == LX_PR_SYSEXIT ||
+ what == LX_PR_SIGNALLED);
+
+ /*
+ * If we do not have an accord, bail out early.
+ */
+ if (lwpd->br_ptrace_tracer == NULL)
+ return (B_FALSE);
+
+ /*
+ * Lock this process and re-check the condition.
+ */
+ mutex_enter(&p->p_lock);
+ if (lwpd->br_ptrace_tracer == NULL) {
+ VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_SYSCALL);
+ mutex_exit(&p->p_lock);
+ return (B_FALSE);
+ }
+
+ if (what == LX_PR_SYSENTRY || what == LX_PR_SYSEXIT) {
+ /*
+ * This is a syscall-entry-stop or syscall-exit-stop point.
+ */
+ if (!(lwpd->br_ptrace_flags & LX_PTRACE_SYSCALL)) {
+ /*
+ * A system call stop has not been requested.
+ */
+ mutex_exit(&p->p_lock);
+ return (B_FALSE);
+ }
+
+ /*
+ * The PTRACE_SYSCALL restart command applies only to the next
+ * system call entry or exit. The tracer must restart us with
+ * PTRACE_SYSCALL while we are in ptrace-stop for us to fire
+ * again at the next system call boundary.
+ */
+ lwpd->br_ptrace_flags &= ~LX_PTRACE_SYSCALL;
+ }
+
+ /*
+ * p_lock for the process containing the tracee will be dropped by
+ * lx_ptrace_stop_common().
+ */
+ return (lx_ptrace_stop_common(p, lwpd, what));
+}
+
+int
+lx_issig_stop(proc_t *p, klwp_t *lwp)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ int lx_sig;
+
+ VERIFY(MUTEX_HELD(&p->p_lock));
+
+ /*
+ * If we do not have an accord, bail out now. Additionally, if there
+ * is no valid signal then we have no reason to stop.
+ */
+ if (lwpd->br_ptrace_tracer == NULL || lwp->lwp_cursig == SIGKILL ||
+ (lwp->lwp_cursig == 0 || lwp->lwp_cursig > NSIG) ||
+ (lx_sig = stol_signo[lwp->lwp_cursig]) < 1) {
+ return (0);
+ }
+
+ /*
+ * We stash the signal on the LWP where our waitid_helper will find it
+ * and enter the ptrace "signal-delivery-stop" condition.
+ */
+ lwpd->br_ptrace_stopsig = lx_sig;
+ (void) lx_ptrace_stop_common(p, lwpd, LX_PR_SIGNALLED);
+ mutex_enter(&p->p_lock);
+
+ /*
+ * When we return, the signal may have been altered or suppressed.
+ */
+ if (lwpd->br_ptrace_stopsig != lx_sig) {
+ int native_sig;
+ lx_sig = lwpd->br_ptrace_stopsig;
+
+ if (lx_sig >= LX_NSIG) {
+ lx_sig = 0;
+ }
+
+ /*
+ * Translate signal from Linux signal number back to
+ * an illumos native signal.
+ */
+ if (lx_sig >= LX_NSIG || lx_sig < 0 || (native_sig =
+ ltos_signo[lx_sig]) < 1) {
+ /*
+ * The signal is not deliverable.
+ */
+ lwp->lwp_cursig = 0;
+ lwp->lwp_extsig = 0;
+ if (lwp->lwp_curinfo) {
+ siginfofree(lwp->lwp_curinfo);
+ lwp->lwp_curinfo = NULL;
+ }
+ } else {
+ /*
+ * Alter the currently dispatching signal.
+ */
+ if (native_sig == SIGKILL) {
+ /*
+ * We mark ourselves the victim and request
+ * a restart of signal processing.
+ */
+ p->p_flag |= SKILLED;
+ p->p_flag &= ~SEXTKILLED;
+ return (-1);
+ }
+ lwp->lwp_cursig = native_sig;
+ lwp->lwp_extsig = 0;
+ if (lwp->lwp_curinfo != NULL) {
+ lwp->lwp_curinfo->sq_info.si_signo = native_sig;
+ }
+ }
+ }
+
+ lwpd->br_ptrace_stopsig = 0;
+ return (0);
+}
+
+static void
+lx_ptrace_exit_tracer(proc_t *p, lx_lwp_data_t *lwpd,
+ lx_ptrace_accord_t *accord)
+{
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ lx_ptrace_accord_enter(accord);
+ /*
+ * Mark this accord for death. This means no new tracees can be
+ * attached to this accord.
+ */
+ VERIFY0(accord->lxpa_flags & LX_ACC_TOMBSTONE);
+ accord->lxpa_flags |= LX_ACC_TOMBSTONE;
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Walk the list of tracees, detaching them and setting them runnable
+ * if they are stopped.
+ */
+ for (;;) {
+ klwp_t *rlwp;
+ proc_t *rproc;
+ lx_lwp_data_t *remote;
+ kmutex_t *rmp;
+
+ mutex_enter(&accord->lxpa_tracees_lock);
+ if (list_is_empty(&accord->lxpa_tracees)) {
+ mutex_exit(&accord->lxpa_tracees_lock);
+ break;
+ }
+
+ /*
+ * Fetch the first tracee LWP in the list and lock the process
+ * which contains it.
+ */
+ remote = list_head(&accord->lxpa_tracees);
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+ /*
+ * The p_lock mutex persists beyond the life of the process
+ * itself. We save the address, here, to prevent the need to
+ * dereference the proc_t after awaking from sleep.
+ */
+ rmp = &rproc->p_lock;
+ mutex_enter(rmp);
+
+ if (TRACEE_BUSY(remote)) {
+ /*
+ * This LWP is currently detaching itself on exit, or
+ * mid-way through stop(). We must wait for this
+ * action to be completed. While we wait on the CV, we
+ * must drop the accord tracee list lock.
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+ cv_wait(&lx_ptrace_busy_cv, rmp);
+
+ /*
+ * While we were waiting, some state may have changed.
+ * Restart the walk to be sure we don't miss anything.
+ */
+ mutex_exit(rmp);
+ continue;
+ }
+
+ /*
+ * We now hold p_lock on the process. Remove the tracee from
+ * the list.
+ */
+ VERIFY(list_link_active(&remote->br_ptrace_linkage));
+ list_remove(&accord->lxpa_tracees, remote);
+
+ /*
+ * Unlink the accord and clear our trace flags.
+ */
+ remote->br_ptrace_attach = LX_PTA_NONE;
+ remote->br_ptrace_tracer = NULL;
+ remote->br_ptrace_flags = 0;
+
+ /*
+ * Let go of the list lock before we restart the LWP. We must
+ * not hold any locks other than the process p_lock when
+ * we call lx_ptrace_restart_lwp() as it will thread_lock
+ * the tracee.
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * Ensure that the LWP is not stopped on our account.
+ */
+ lx_ptrace_restart_lwp(rlwp);
+
+ /*
+ * Unlock the former tracee.
+ */
+ mutex_exit(rmp);
+
+ /*
+ * Drop the hold this tracee had on the accord.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+ }
+
+ mutex_enter(&p->p_lock);
+ lwpd->br_ptrace_accord = NULL;
+ mutex_exit(&p->p_lock);
+
+ /*
+ * Clean up and release our hold on the accord If we completely
+ * detached all tracee LWPs, this will free the accord. Otherwise, it
+ * will be freed when they complete their cleanup.
+ *
+ * We hold "pidlock" while clearing these members for easy exclusion of
+ * waitid(), etc.
+ */
+ mutex_enter(&pidlock);
+ lx_ptrace_accord_enter(accord);
+ accord->lxpa_cvp = NULL;
+ accord->lxpa_tracer = NULL;
+ mutex_exit(&pidlock);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+}
+
+static void
+lx_ptrace_exit_tracee(proc_t *p, lx_lwp_data_t *lwpd,
+ lx_ptrace_accord_t *accord)
+{
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ /*
+ * We are the tracee LWP. Lock the accord tracee list and then our
+ * containing process.
+ */
+ mutex_enter(&accord->lxpa_tracees_lock);
+ mutex_enter(&p->p_lock);
+
+ /*
+ * Remove our reference to the accord. We will release our hold
+ * later.
+ */
+ VERIFY(lwpd->br_ptrace_tracer == accord);
+ lwpd->br_ptrace_attach = LX_PTA_NONE;
+ lwpd->br_ptrace_tracer = NULL;
+
+ /*
+ * Remove this LWP from the accord tracee list:
+ */
+ VERIFY(list_link_active(&lwpd->br_ptrace_linkage));
+ list_remove(&accord->lxpa_tracees, lwpd);
+
+ /*
+ * Wake up any tracers waiting for us to detach from the accord.
+ */
+ cv_broadcast(&lx_ptrace_busy_cv);
+ mutex_exit(&p->p_lock);
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * Grab "pidlock" and wake the tracer if it is blocked in waitid().
+ */
+ mutex_enter(&pidlock);
+ if (accord->lxpa_cvp != NULL) {
+ cv_broadcast(accord->lxpa_cvp);
+ }
+ mutex_exit(&pidlock);
+
+ /*
+ * Release our hold on the accord.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+}
+
+/*
+ * This routine is called from lx_exitlwp() when an LWP is ready to exit. If
+ * this LWP is being traced, it will be detached from the tracer's accord. The
+ * routine will also detach any LWPs being traced by this LWP.
+ */
+void
+lx_ptrace_exit(proc_t *p, klwp_t *lwp)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ lx_ptrace_accord_t *accord;
+
+ VERIFY(MUTEX_HELD(&p->p_lock));
+
+ /*
+ * Mark our LWP as exiting from a ptrace perspective. This will
+ * prevent a new accord from being allocated if one does not exist
+ * already, and will make us invisible to PTRACE_ATTACH/PTRACE_TRACEME.
+ */
+ VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_EXITING);
+ lwpd->br_ptrace_flags |= LX_PTRACE_EXITING;
+
+ if ((accord = lwpd->br_ptrace_tracer) != NULL) {
+ /*
+ * We are traced by another LWP and must detach ourselves.
+ */
+ mutex_exit(&p->p_lock);
+ lx_ptrace_exit_tracee(p, lwpd, accord);
+ mutex_enter(&p->p_lock);
+ }
+
+ if ((accord = lwpd->br_ptrace_accord) != NULL) {
+ /*
+ * We have been tracing other LWPs, and must detach from
+ * them and clean up our accord.
+ */
+ mutex_exit(&p->p_lock);
+ lx_ptrace_exit_tracer(p, lwpd, accord);
+ mutex_enter(&p->p_lock);
+ }
+}
+
+/*
+ * Called when a SIGCLD signal is dispatched so that we may enqueue another.
+ * Return 0 if we enqueued a signal, or -1 if not.
+ */
+int
+lx_sigcld_repost(proc_t *pp, sigqueue_t *sqp)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ lx_ptrace_accord_t *accord;
+ lx_lwp_data_t *remote;
+ klwp_t *rlwp;
+ proc_t *rproc;
+ boolean_t found = B_FALSE;
+
+ VERIFY(MUTEX_HELD(&pidlock));
+ VERIFY(MUTEX_NOT_HELD(&pp->p_lock));
+ VERIFY(lwptoproc(lwp) == pp);
+
+ mutex_enter(&pp->p_lock);
+ if ((accord = lwpd->br_ptrace_accord) == NULL) {
+ /*
+ * This LWP is not a tracer LWP, so there will be no
+ * SIGCLD.
+ */
+ mutex_exit(&pp->p_lock);
+ return (-1);
+ }
+ mutex_exit(&pp->p_lock);
+
+ mutex_enter(&accord->lxpa_tracees_lock);
+ for (remote = list_head(&accord->lxpa_tracees); remote != NULL;
+ remote = list_next(&accord->lxpa_tracees, remote)) {
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+
+ /*
+ * Check if this LWP is in "ptrace-stop". If in the correct
+ * stop condition, lock the process containing the tracee LWP.
+ */
+ if (lx_ptrace_lock_if_stopped(accord, remote) != 0) {
+ continue;
+ }
+
+ if (remote->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) {
+ /*
+ * This event depends on waitid() clearing out the
+ * event of another LWP. Skip it for now.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ if (!(remote->br_ptrace_flags & LX_PTRACE_CLDPEND)) {
+ /*
+ * No SIGCLD is required for this LWP.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ if (remote->br_ptrace_whystop == 0 ||
+ remote->br_ptrace_whatstop == 0) {
+ /*
+ * No (new) stop reason to post for this LWP.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ /*
+ * We found a process of interest. Leave the process
+ * containing the tracee LWP locked and break out of the loop.
+ */
+ found = B_TRUE;
+ break;
+ }
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ if (!found) {
+ return (-1);
+ }
+
+ /*
+ * Generate siginfo for this tracee LWP.
+ */
+ lx_winfo(remote, &sqp->sq_info, B_FALSE, NULL, NULL);
+ remote->br_ptrace_flags &= ~LX_PTRACE_CLDPEND;
+ mutex_exit(&rproc->p_lock);
+
+ mutex_enter(&pp->p_lock);
+ if (sigismember(&pp->p_sig, SIGCLD)) {
+ mutex_exit(&pp->p_lock);
+
+ mutex_enter(&rproc->p_lock);
+ remote->br_ptrace_flags |= LX_PTRACE_CLDPEND;
+ mutex_exit(&rproc->p_lock);
+
+ return (-1);
+ }
+ sigaddqa(pp, curthread, sqp);
+ mutex_exit(&pp->p_lock);
+
+ return (0);
+}
+
+/*
+ * Consume the next available ptrace(2) event queued against the accord for
+ * this LWP. The event will be emitted as if through waitid(), and converted
+ * by lx_waitpid() and friends before the return to usermode.
+ */
+int
+lx_waitid_helper(idtype_t idtype, id_t id, k_siginfo_t *ip, int options,
+ boolean_t *brand_wants_wait, int *rval)
+{
+ lx_ptrace_accord_t *accord;
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *local = lwptolxlwp(lwp);
+ lx_lwp_data_t *remote;
+ boolean_t found = B_FALSE;
+ klwp_t *rlwp = NULL;
+ proc_t *rproc = NULL;
+ pid_t event_pid = 0, event_ppid = 0;
+ boolean_t waitflag = !(options & WNOWAIT);
+
+ VERIFY(MUTEX_HELD(&pidlock));
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ /*
+ * By default, we do not expect waitid() to block on our account.
+ */
+ *brand_wants_wait = B_FALSE;
+
+ if (!local->br_waitid_emulate) {
+ /*
+ * This waitid() call is not expecting emulated results.
+ */
+ return (-1);
+ }
+
+ switch (idtype) {
+ case P_ALL:
+ case P_PID:
+ case P_PGID:
+ break;
+ default:
+ /*
+ * This idtype has no power here.
+ */
+ return (-1);
+ }
+
+ if (lx_ptrace_accord_get(&accord, B_FALSE) != 0) {
+ /*
+ * This LWP does not have an accord; it cannot be tracing.
+ */
+ return (-1);
+ }
+
+ /*
+ * We do not need an additional hold on the accord as it belongs to
+ * the running, tracer, LWP.
+ */
+ lx_ptrace_accord_exit(accord);
+
+ mutex_enter(&accord->lxpa_tracees_lock);
+ if (list_is_empty(&accord->lxpa_tracees)) {
+ /*
+ * Though it has an accord, there are currently no tracees in
+ * the list for this LWP.
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+ return (-1);
+ }
+
+ /*
+ * Walk the list of tracees and determine if any of them have events to
+ * report.
+ */
+ for (remote = list_head(&accord->lxpa_tracees); remote != NULL;
+ remote = list_next(&accord->lxpa_tracees, remote)) {
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+
+ /*
+ * If the __WALL option was passed, we unconditionally consider
+ * every possible child.
+ */
+ if (!(local->br_waitid_flags & LX_WALL)) {
+ /*
+ * Otherwise, we check to see if this LWP matches an
+ * id we are waiting for.
+ */
+ switch (idtype) {
+ case P_ALL:
+ break;
+ case P_PID:
+ if (remote->br_pid != id)
+ continue;
+ break;
+ case P_PGID:
+ if (rproc->p_pgrp != id)
+ continue;
+ break;
+ default:
+ cmn_err(CE_PANIC, "unexpected idtype: %d",
+ idtype);
+ }
+ }
+
+ /*
+ * Check if this LWP is in "ptrace-stop". If in the correct
+ * stop condition, lock the process containing the tracee LWP.
+ */
+ if (lx_ptrace_lock_if_stopped(accord, remote) != 0) {
+ continue;
+ }
+
+ if (remote->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) {
+ /*
+ * This event depends on waitid() clearing out the
+ * event of another LWP. Skip it for now.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ if (remote->br_ptrace_whystop == 0 ||
+ remote->br_ptrace_whatstop == 0) {
+ /*
+ * No (new) stop reason to post for this LWP.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ /*
+ * We found a process of interest. Leave the process
+ * containing the tracee LWP locked and break out of the loop.
+ */
+ found = B_TRUE;
+ break;
+ }
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ if (!found) {
+ /*
+ * There were no events of interest, but we have tracees.
+ * Signal to waitid() that it should block if the provided
+ * flags allow for it.
+ */
+ *brand_wants_wait = B_TRUE;
+ return (-1);
+ }
+
+ /*
+ * Populate the signal information.
+ */
+ lx_winfo(remote, ip, waitflag, &event_ppid, &event_pid);
+
+ /*
+ * Unlock the tracee.
+ */
+ mutex_exit(&rproc->p_lock);
+
+ if (event_pid != 0 && event_ppid != 0) {
+ /*
+ * We need to do another pass around the tracee list and
+ * unblock any events that have a "happens after" relationship
+ * with this event.
+ */
+ mutex_enter(&accord->lxpa_tracees_lock);
+ for (remote = list_head(&accord->lxpa_tracees); remote != NULL;
+ remote = list_next(&accord->lxpa_tracees, remote)) {
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+
+ mutex_enter(&rproc->p_lock);
+
+ if (remote->br_pid != event_pid ||
+ remote->br_ppid != event_ppid) {
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ remote->br_ptrace_flags &= ~LX_PTRACE_PARENT_WAIT;
+
+ mutex_exit(&rproc->p_lock);
+ }
+ mutex_exit(&accord->lxpa_tracees_lock);
+ }
+
+ /*
+ * If we are consuming this wait state, we remove the SIGCLD from
+ * the queue and post another.
+ */
+ if (waitflag) {
+ mutex_exit(&pidlock);
+ sigcld_delete(ip);
+ sigcld_repost();
+ mutex_enter(&pidlock);
+ }
+
+ *rval = 0;
+ return (0);
+}
+
+/*
+ * Some PTRACE_* requests are handled in-kernel by this function. It is called
+ * through brandsys() via the B_PTRACE_KERNEL subcommand.
+ */
+int
+lx_ptrace_kernel(int ptrace_op, pid_t lxpid, uintptr_t addr, uintptr_t data)
+{
+ lx_lwp_data_t *local = ttolxlwp(curthread);
+ lx_ptrace_accord_t *accord;
+ lx_lwp_data_t *remote;
+ klwp_t *rlwp;
+ proc_t *rproc;
+ int error;
+ boolean_t found = B_FALSE;
+ boolean_t release_hold = B_FALSE;
+
+ _NOTE(ARGUNUSED(addr));
+
+ /*
+ * These actions do not require the target LWP to be traced or stopped.
+ */
+ switch (ptrace_op) {
+ case LX_PTRACE_TRACEME:
+ return (lx_ptrace_traceme());
+
+ case LX_PTRACE_ATTACH:
+ return (lx_ptrace_attach(lxpid));
+ }
+
+ /*
+ * Ensure that we have an accord and obtain a lock on it. This routine
+ * should not fail because the LWP cannot make ptrace(2) system calls
+ * after it has begun exiting.
+ */
+ VERIFY0(local->br_ptrace_flags & LX_PTRACE_EXITING);
+ VERIFY(lx_ptrace_accord_get(&accord, B_TRUE) == 0);
+
+ /*
+ * The accord belongs to this (the tracer) LWP, and we have a hold on
+ * it. We drop the lock so that we can take other locks.
+ */
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Does the tracee list contain the pid in question?
+ */
+ mutex_enter(&accord->lxpa_tracees_lock);
+ for (remote = list_head(&accord->lxpa_tracees); remote != NULL;
+ remote = list_next(&accord->lxpa_tracees, remote)) {
+ if (remote->br_pid == lxpid) {
+ found = B_TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ /*
+ * The requested pid does not appear in the tracee list.
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+ return (ESRCH);
+ }
+
+ /*
+ * Attempt to lock the target LWP.
+ */
+ if ((error = lx_ptrace_lock_if_stopped(accord, remote)) != 0) {
+ /*
+ * The LWP was not in "ptrace-stop".
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+ return (error);
+ }
+
+ /*
+ * The target LWP is in "ptrace-stop". We have the containing process
+ * locked.
+ */
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+
+ /*
+ * Process the ptrace(2) request:
+ */
+ switch (ptrace_op) {
+ case LX_PTRACE_DETACH:
+ error = lx_ptrace_detach(accord, remote, (int)data,
+ &release_hold);
+ break;
+
+ case LX_PTRACE_CONT:
+ error = lx_ptrace_cont(remote, LX_PTC_NONE, (int)data);
+ break;
+
+ case LX_PTRACE_SYSCALL:
+ error = lx_ptrace_cont(remote, LX_PTC_SYSCALL, (int)data);
+ break;
+
+ case LX_PTRACE_SINGLESTEP:
+ error = lx_ptrace_cont(remote, LX_PTC_SINGLESTEP, (int)data);
+ break;
+
+ case LX_PTRACE_SETOPTIONS:
+ error = lx_ptrace_setoptions(remote, data);
+ break;
+
+ case LX_PTRACE_GETEVENTMSG:
+ error = lx_ptrace_geteventmsg(remote, (void *)data);
+ break;
+
+ default:
+ error = EINVAL;
+ }
+
+ /*
+ * Drop the lock on both the tracee process and the tracee list.
+ */
+ mutex_exit(&rproc->p_lock);
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ if (release_hold) {
+ /*
+ * Release a hold from the accord.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+ }
+
+ return (error);
+}
+
+void
+lx_ptrace_init(void)
+{
+ cv_init(&lx_ptrace_busy_cv, NULL, CV_DEFAULT, NULL);
+
+ lx_ptrace_accord_cache = kmem_cache_create("lx_ptrace_accord",
+ sizeof (lx_ptrace_accord_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+}
+
+void
+lx_ptrace_fini(void)
+{
+ cv_destroy(&lx_ptrace_busy_cv);
+
+ kmem_cache_destroy(lx_ptrace_accord_cache);
+}
diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h
index cda0f7f82b..e7f5ee9867 100644
--- a/usr/src/uts/common/brand/lx/sys/lx_brand.h
+++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h
@@ -80,10 +80,10 @@ extern "C" {
#define B_LPID_TO_SPAIR 128
#define B_SYSENTRY 129
#define B_SYSRETURN 130
-#define B_PTRACE_SYSCALL 131
+#define B_PTRACE_KERNEL 131
#define B_SET_AFFINITY_MASK 132
#define B_GET_AFFINITY_MASK 133
-#define B_PTRACE_EXT_OPTS 134
+#define B_PTRACE_CLONE_BEGIN 134
#define B_PTRACE_STOP_FOR_OPT 135
#define B_UNSUPPORTED 136
#define B_STORE_ARGS 137
@@ -91,37 +91,31 @@ extern "C" {
#define B_SIGNAL_RETURN 139
#define B_UNWIND_NTV_SYSC_FLAG 140
#define B_EXIT_AS_SIG 141
-#define B_PTRACE_GETEVENTMSG 142
+#define B_HELPER_WAITID 142
#define B_IKE_SYSCALL 192
-/* B_PTRACE_EXT_OPTS subcommands */
-#define B_PTRACE_EXT_OPTS_SET 1
-#define B_PTRACE_EXT_OPTS_GET 2
-#define B_PTRACE_EXT_OPTS_EVT 3
-#define B_PTRACE_DETACH 4
-
+#ifndef _ASM
/*
* Support for Linux PTRACE_SETOPTIONS handling.
*/
-#define LX_PTRACE_O_TRACESYSGOOD 0x0001
-#define LX_PTRACE_O_TRACEFORK 0x0002
-#define LX_PTRACE_O_TRACEVFORK 0x0004
-#define LX_PTRACE_O_TRACECLONE 0x0008
-#define LX_PTRACE_O_TRACEEXEC 0x0010
-#define LX_PTRACE_O_TRACEVFORKDONE 0x0020
-#define LX_PTRACE_O_TRACEEXIT 0x0040
-#define LX_PTRACE_O_TRACESECCOMP 0x0080
-/*
- * lx emulation-specific flag to indicate this is a child process being stopped
- * due to one of the PTRACE_SETOPTIONS above.
- */
-#define EMUL_PTRACE_O_CHILD 0x8000
-/*
- * lx emulation-specific flag to determine via B_PTRACE_EXT_OPTS_GET if a
- * process is being traced because of one of the PTRACE_SETOPTIONS above.
- */
-#define EMUL_PTRACE_IS_TRACED 0x8000
+typedef enum lx_ptrace_options {
+ LX_PTRACE_O_TRACESYSGOOD = 0x0001,
+ LX_PTRACE_O_TRACEFORK = 0x0002,
+ LX_PTRACE_O_TRACEVFORK = 0x0004,
+ LX_PTRACE_O_TRACECLONE = 0x0008,
+ LX_PTRACE_O_TRACEEXEC = 0x0010,
+ LX_PTRACE_O_TRACEVFORKDONE = 0x0020,
+ LX_PTRACE_O_TRACEEXIT = 0x0040,
+ LX_PTRACE_O_TRACESECCOMP = 0x0080
+} lx_ptrace_options_t;
+
+#define LX_PTRACE_O_ALL \
+ (LX_PTRACE_O_TRACESYSGOOD | LX_PTRACE_O_TRACEFORK | \
+ LX_PTRACE_O_TRACEVFORK | LX_PTRACE_O_TRACECLONE | \
+ LX_PTRACE_O_TRACEEXEC | LX_PTRACE_O_TRACEVFORKDONE | \
+ LX_PTRACE_O_TRACEEXIT | LX_PTRACE_O_TRACESECCOMP)
+#endif /* !_ASM */
/* siginfo si_status for traced events */
#define LX_PTRACE_EVENT_FORK 0x100
@@ -132,6 +126,17 @@ extern "C" {
#define LX_PTRACE_EVENT_EXIT 0x600
#define LX_PTRACE_EVENT_SECCOMP 0x700
+/*
+ * Brand-private values for the "pr_what" member of lwpstatus, for use with the
+ * PR_BRAND stop reason. These reasons are validated in lx_stop_notify();
+ * update it if you add new reasons here.
+ */
+#define LX_PR_SYSENTRY 1
+#define LX_PR_SYSEXIT 2
+#define LX_PR_SIGNALLED 3
+#define LX_PR_EVENT 4
+
+
#define LX_VERSION_1 1
#define LX_VERSION LX_VERSION_1
@@ -257,10 +262,6 @@ typedef struct lx_proc_data {
uintptr_t l_traceflag; /* address of 32-bit tracing flag */
pid_t l_ppid; /* pid of originating parent proc */
uint64_t l_ptrace; /* process being observed with ptrace */
- uint_t l_ptrace_opts; /* process's extended ptrace options */
- uint_t l_ptrace_event; /* extended ptrace option trap event */
- uint_t l_ptrace_is_traced; /* set if traced due to ptrace setoptions */
- ulong_t l_ptrace_eventmsg; /* extended ptrace event msg */
lx_elf_data_t l_elf_data; /* ELF data for linux executable */
int l_signal; /* signal to deliver to parent when this */
/* thread group dies */
@@ -282,10 +283,70 @@ typedef ulong_t lx_affmask_t[LX_AFF_ULONGS];
#ifdef _KERNEL
+typedef struct lx_lwp_data lx_lwp_data_t;
+
+/*
+ * Flag values for "lxpa_flags" on a ptrace(2) accord.
+ */
+typedef enum lx_accord_flags {
+ LX_ACC_TOMBSTONE = 0x01
+} lx_accord_flags_t;
+
+/*
+ * Flags values for "br_ptrace_flags" in the LWP-specific data.
+ */
+typedef enum lx_ptrace_state {
+ LX_PTRACE_SYSCALL = 0x01,
+ LX_PTRACE_EXITING = 0x02,
+ LX_PTRACE_STOPPING = 0x04,
+ LX_PTRACE_INHERIT = 0x08,
+ LX_PTRACE_STOPPED = 0x10,
+ LX_PTRACE_PARENT_WAIT = 0x20,
+ LX_PTRACE_CLDPEND = 0x40,
+ LX_PTRACE_CLONING = 0x80
+} lx_ptrace_state_t;
+
+/*
+ * A ptrace(2) accord represents the relationship between a tracer LWP and the
+ * set of LWPs that it is tracing: the tracees. This data structure belongs
+ * primarily to the tracer, but is reference counted so that it may be freed by
+ * whoever references it last.
+ */
+typedef struct lx_ptrace_accord {
+ kmutex_t lxpa_lock;
+ uint_t lxpa_refcnt;
+ lx_accord_flags_t lxpa_flags;
+
+ /*
+ * The tracer must hold "pidlock" while clearing these fields for
+ * exclusion of waitid(), etc.
+ */
+ lx_lwp_data_t *lxpa_tracer;
+ kcondvar_t *lxpa_cvp;
+
+ /*
+ * The "lxpa_tracees_lock" mutex protects the tracee list.
+ */
+ kmutex_t lxpa_tracees_lock;
+ list_t lxpa_tracees;
+} lx_ptrace_accord_t;
+
+/*
+ * These values are stored in the per-LWP data for a tracee when it is attached
+ * to a tracer. They record the method that was used to attach.
+ */
+typedef enum lx_ptrace_attach {
+ LX_PTA_NONE = 0x00, /* not attached */
+ LX_PTA_ATTACH = 0x01, /* due to tracer using PTRACE_ATTACH */
+ LX_PTA_TRACEME = 0x02, /* due to child using PTRACE_TRACEME */
+ LX_PTA_INHERIT_CLONE = 0x04, /* due to PTRACE_CLONE clone(2) flag */
+ LX_PTA_INHERIT_OPTIONS = 0x08 /* due to PTRACE_SETOPTIONS options */
+} lx_ptrace_attach_t;
+
/*
* lx-specific data in the klwp_t
*/
-typedef struct lx_lwp_data {
+struct lx_lwp_data {
uint_t br_ntv_syscall; /* 1 = syscall from native libc */
uint_t br_lwp_flags; /* misc. flags */
klwp_t *br_lwp; /* back pointer to container lwp */
@@ -319,8 +380,26 @@ typedef struct lx_lwp_data {
void *br_scall_args;
int br_args_size; /* size in bytes of br_scall_args */
- uint_t br_ptrace; /* ptrace is active for this LWP */
-} lx_lwp_data_t;
+ boolean_t br_waitid_emulate;
+ int br_waitid_flags;
+
+ lx_ptrace_state_t br_ptrace_flags; /* ptrace state for this LWP */
+ lx_ptrace_options_t br_ptrace_options; /* PTRACE_SETOPTIONS options */
+ lx_ptrace_options_t br_ptrace_clone_option; /* current clone(2) type */
+
+ lx_ptrace_attach_t br_ptrace_attach; /* how did we get attached */
+ lx_ptrace_accord_t *br_ptrace_accord; /* accord for this tracer LWP */
+ lx_ptrace_accord_t *br_ptrace_tracer; /* accord tracing this LWP */
+ list_node_t br_ptrace_linkage; /* linkage for lxpa_tracees list */
+
+ ushort_t br_ptrace_whystop; /* stop reason, 0 for no stop */
+ ushort_t br_ptrace_whatstop; /* stop sub-reason */
+
+ int32_t br_ptrace_stopsig; /* stop signal, 0 for no signal */
+
+ uint_t br_ptrace_event;
+ ulong_t br_ptrace_eventmsg;
+};
/*
* Upper limit on br_args_size, low because this value can persist until
diff --git a/usr/src/uts/common/brand/lx/sys/lx_misc.h b/usr/src/uts/common/brand/lx/sys/lx_misc.h
index 56b5bb4047..7b77789c56 100644
--- a/usr/src/uts/common/brand/lx/sys/lx_misc.h
+++ b/usr/src/uts/common/brand/lx/sys/lx_misc.h
@@ -46,6 +46,20 @@ extern boolean_t lx_wait_filter(proc_t *, proc_t *);
extern void lx_ifname_convert(char *, int);
+extern boolean_t lx_ptrace_stop(ushort_t);
+extern void lx_stop_notify(proc_t *, klwp_t *, ushort_t, ushort_t);
+extern void lx_ptrace_init(void);
+extern void lx_ptrace_fini(void);
+extern int lx_ptrace_kernel(int, pid_t, uintptr_t, uintptr_t);
+extern int lx_waitid_helper(idtype_t, id_t, k_siginfo_t *, int, boolean_t *,
+ int *);
+extern void lx_ptrace_exit(proc_t *, klwp_t *);
+extern void lx_ptrace_inherit_tracer(lx_lwp_data_t *, lx_lwp_data_t *);
+extern int lx_ptrace_stop_for_option(int, boolean_t, ulong_t);
+extern int lx_ptrace_set_clone_inherit(int, boolean_t);
+extern int lx_sigcld_repost(proc_t *, sigqueue_t *);
+extern int lx_issig_stop(proc_t *, klwp_t *);
+
#endif
#ifdef __cplusplus
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_clone.c b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
index 949db3a73b..d73c5f100b 100644
--- a/usr/src/uts/common/brand/lx/syscall/lx_clone.c
+++ b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
@@ -21,7 +21,7 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2014 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#include <sys/types.h>
@@ -32,25 +32,10 @@
#include <sys/lx_ldt.h>
#include <sys/lx_misc.h>
#include <lx_signum.h>
+#include <lx_syscall.h>
#include <sys/x86_archext.h>
#include <sys/controlregs.h>
-#define LX_CSIGNAL 0x000000ff
-#define LX_CLONE_VM 0x00000100
-#define LX_CLONE_FS 0x00000200
-#define LX_CLONE_FILES 0x00000400
-#define LX_CLONE_SIGHAND 0x00000800
-#define LX_CLONE_PID 0x00001000
-#define LX_CLONE_PTRACE 0x00002000
-#define LX_CLONE_PARENT 0x00008000
-#define LX_CLONE_THREAD 0x00010000
-#define LX_CLONE_SYSVSEM 0x00040000
-#define LX_CLONE_SETTLS 0x00080000
-#define LX_CLONE_PARENT_SETTID 0x00100000
-#define LX_CLONE_CHILD_CLEARTID 0x00200000
-#define LX_CLONE_DETACH 0x00400000
-#define LX_CLONE_CHILD_SETTID 0x01000000
-
/*
* Our lwp has already been created at this point, so this routine is
* responsible for setting up all the state needed to track this as a
diff --git a/usr/src/uts/common/fs/proc/prcontrol.c b/usr/src/uts/common/fs/proc/prcontrol.c
index a5679a8afb..7e99d23b97 100644
--- a/usr/src/uts/common/fs/proc/prcontrol.c
+++ b/usr/src/uts/common/fs/proc/prcontrol.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -1481,7 +1481,7 @@ pr_setsig(prnode_t *pnp, siginfo_t *sip)
} else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
/* If SIGKILL, set stopped lwp running */
p->p_stopsig = 0;
- t->t_schedflag |= TS_XSTART | TS_PSTART;
+ t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART;
t->t_dtrace_stop = 0;
setrun_locked(t);
}
diff --git a/usr/src/uts/common/fs/proc/prsubr.c b/usr/src/uts/common/fs/proc/prsubr.c
index 7801fd0ac8..284bf8cb88 100644
--- a/usr/src/uts/common/fs/proc/prsubr.c
+++ b/usr/src/uts/common/fs/proc/prsubr.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -201,6 +201,7 @@ prchoose(proc_t *p)
case PR_SYSEXIT:
case PR_SIGNALLED:
case PR_FAULTED:
+ case PR_BRAND:
/*
* Make an lwp calling exit() be the
* last lwp seen in the process.
diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c
index 6a27544201..02844cef07 100644
--- a/usr/src/uts/common/os/exit.c
+++ b/usr/src/uts/common/os/exit.c
@@ -400,14 +400,36 @@ proc_exit(int why, int what)
if (z->zone_boot_err == 0 &&
zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
- if (z->zone_restart_init == B_TRUE) {
- if (restart_init(what, why) == 0)
- return (0);
- }
- z->zone_init_status = wstat(why, what);
- (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
- zone_kcred());
+ /*
+ * If the init process should be restarted, the
+ * "zone_restart_init" member will be set. Some init
+ * programs in branded zones do not tolerate a restart
+ * in the traditional manner; setting the
+ * "zone_reboot_on_init_exit" member will cause the
+ * entire zone to be rebooted instead. If neither of
+ * these flags is set the zone will shut down.
+ */
+ if (z->zone_reboot_on_init_exit == B_TRUE &&
+ z->zone_restart_init == B_TRUE) {
+ /*
+ * Trigger a zone reboot and continue
+ * with exit processing.
+ */
+ z->zone_init_status = wstat(why, what);
+ (void) zone_kadmin(A_REBOOT, 0, NULL,
+ zone_kcred());
+
+ } else {
+ if (z->zone_restart_init == B_TRUE) {
+ if (restart_init(what, why) == 0)
+ return (0);
+ }
+
+ z->zone_init_status = wstat(why, what);
+ (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
+ zone_kcred());
+ }
}
/*
@@ -995,10 +1017,9 @@ winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
int
waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
{
- int found;
proc_t *cp, *pp;
- int proc_gone;
int waitflag = !(options & WNOWAIT);
+ boolean_t have_brand_helper = B_FALSE;
/*
* Obsolete flag, defined here only for binary compatibility
@@ -1047,10 +1068,37 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
return (ECHILD);
}
- while (pp->p_child != NULL) {
+ if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
+ have_brand_helper = B_TRUE;
+ }
+
+ while (pp->p_child != NULL || have_brand_helper) {
+ boolean_t brand_wants_wait = B_FALSE;
+ int proc_gone = 0;
+ int found = 0;
+
+ /*
+ * Give the brand a chance to return synthetic results from
+ * this waitid() call before we do the real thing.
+ */
+ if (have_brand_helper) {
+ int ret;
- proc_gone = 0;
+ if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
+ &brand_wants_wait, &ret) == 0) {
+ mutex_exit(&pidlock);
+ return (ret);
+ }
+ if (pp->p_child == NULL) {
+ goto no_real_children;
+ }
+ }
+
+ /*
+ * Look for interesting children in the newstate list.
+ */
+ VERIFY(pp->p_child != NULL);
for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
continue;
@@ -1107,7 +1155,6 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
* Wow! None of the threads on the p_sibling_ns list were
* interesting threads. Check all the kids!
*/
- found = 0;
for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
if (idtype == P_PID && id != cp->p_pid)
continue;
@@ -1186,11 +1233,12 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
break;
}
+no_real_children:
/*
* If we found no interesting processes at all,
* break out and return ECHILD.
*/
- if (found + proc_gone == 0)
+ if (!brand_wants_wait && (found + proc_gone == 0))
break;
if (options & WNOHANG) {
@@ -1209,7 +1257,7 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
* change state while we wait, we don't wait at all.
* Get out with ECHILD according to SVID.
*/
- if (found == proc_gone)
+ if (!brand_wants_wait && (found == proc_gone))
break;
if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
diff --git a/usr/src/uts/common/os/sig.c b/usr/src/uts/common/os/sig.c
index b117bf3584..ae643c280e 100644
--- a/usr/src/uts/common/os/sig.c
+++ b/usr/src/uts/common/os/sig.c
@@ -22,7 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -194,7 +194,7 @@ eat_signal(kthread_t *t, int sig)
!(ttoproc(t)->p_proc_flag & P_PR_LOCK)) {
ttoproc(t)->p_stopsig = 0;
t->t_dtrace_stop = 0;
- t->t_schedflag |= TS_XSTART | TS_PSTART;
+ t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART;
setrun_locked(t);
} else if (t != curthread && t->t_state == TS_ONPROC) {
aston(t); /* make it do issig promptly */
@@ -608,6 +608,21 @@ issig_forreal(void)
}
/*
+ * Allow the brand the chance to alter (or suppress) delivery
+ * of this signal.
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_issig_stop != NULL) {
+ /*
+ * The brand hook will return 0 if it would like
+ * us to drive on, or -1 if we should restart
+ * the loop to check other conditions.
+ */
+ if (BROP(p)->b_issig_stop(p, lwp) != 0) {
+ continue;
+ }
+ }
+
+ /*
* Honor requested stop before dealing with the
* current signal; a debugger may change it.
* Do not want to go back to loop here since this is a special
@@ -939,6 +954,16 @@ stop(int why, int what)
}
break;
+ case PR_BRAND:
+ /*
+ * We have been stopped by the brand code for a brand-private
+ * reason. This is an asynchronous stop affecting only this
+ * LWP.
+ */
+ VERIFY(PROC_IS_BRANDED(p));
+ flags &= ~TS_BSTART;
+ break;
+
default: /* /proc stop */
flags &= ~TS_PSTART;
/*
@@ -1050,7 +1075,7 @@ stop(int why, int what)
}
}
- if (why != PR_JOBCONTROL && why != PR_CHECKPOINT) {
+ if (why != PR_JOBCONTROL && why != PR_CHECKPOINT && why != PR_BRAND) {
/*
* Do process-level notification when all lwps are
* either stopped on events of interest to /proc
@@ -1156,6 +1181,13 @@ stop(int why, int what)
if (why == PR_CHECKPOINT)
del_one_utstop();
+ /*
+ * Allow the brand to post notification of this stop condition.
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_stop_notify != NULL) {
+ BROP(p)->b_stop_notify(p, lwp, why, what);
+ }
+
thread_lock(t);
ASSERT((t->t_schedflag & TS_ALLSTART) == 0);
t->t_schedflag |= flags;
@@ -1177,7 +1209,7 @@ stop(int why, int what)
(p->p_flag & (SEXITLWPS|SKILLED))) {
p->p_stopsig = 0;
thread_lock(t);
- t->t_schedflag |= TS_XSTART | TS_PSTART;
+ t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART;
setrun_locked(t);
thread_unlock_nopreempt(t);
} else if (why == PR_JOBCONTROL) {
@@ -1795,6 +1827,15 @@ sigcld_repost()
sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
mutex_enter(&pidlock);
+ if (PROC_IS_BRANDED(pp) && BROP(pp)->b_sigcld_repost != NULL) {
+ /*
+ * Allow the brand to inject synthetic SIGCLD signals.
+ */
+ if (BROP(pp)->b_sigcld_repost(pp, sqp) == 0) {
+ mutex_exit(&pidlock);
+ return;
+ }
+ }
for (cp = pp->p_child; cp; cp = cp->p_sibling) {
if (cp->p_pidflag & CLDPEND) {
post_sigcld(cp, sqp);
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index 145ad10bb5..347a90a022 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -2624,6 +2624,7 @@ zone_init(void)
zone0.zone_ntasks = 1;
mutex_exit(&p0.p_lock);
zone0.zone_restart_init = B_TRUE;
+ zone0.zone_reboot_on_init_exit = B_FALSE;
zone0.zone_init_status = -1;
zone0.zone_brand = &native_brand;
rctl_prealloc_destroy(gp);
@@ -4820,6 +4821,7 @@ zone_create(const char *zone_name, const char *zone_root,
zone->zone_ncpus = 0;
zone->zone_ncpus_online = 0;
zone->zone_restart_init = B_TRUE;
+ zone->zone_reboot_on_init_exit = B_FALSE;
zone->zone_init_status = -1;
zone->zone_brand = &native_brand;
zone->zone_initname = NULL;
diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h
index 3486ae864d..b3abada863 100644
--- a/usr/src/uts/common/sys/brand.h
+++ b/usr/src/uts/common/sys/brand.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _SYS_BRAND_H
@@ -132,6 +132,11 @@ struct brand_ops {
boolean_t (*b_native_exec)(uint8_t, const char **);
void (*b_ptrace_exectrap)(proc_t *);
uint32_t (*b_map32limit)(proc_t *);
+ void (*b_stop_notify)(proc_t *, klwp_t *, ushort_t, ushort_t);
+ int (*b_waitid_helper)(idtype_t, id_t, k_siginfo_t *, int,
+ boolean_t *, int *);
+ int (*b_sigcld_repost)(proc_t *, sigqueue_t *);
+ int (*b_issig_stop)(proc_t *, klwp_t *);
};
/*
diff --git a/usr/src/uts/common/sys/procfs.h b/usr/src/uts/common/sys/procfs.h
index f592fd9dcf..501af712ef 100644
--- a/usr/src/uts/common/sys/procfs.h
+++ b/usr/src/uts/common/sys/procfs.h
@@ -25,6 +25,7 @@
*/
/*
* Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _SYS_PROCFS_H
@@ -233,6 +234,7 @@ typedef struct pstatus {
#define PR_FAULTED 6
#define PR_SUSPENDED 7
#define PR_CHECKPOINT 8
+#define PR_BRAND 9
/*
* lwp ps(1) information file. /proc/<pid>/lwp/<lwpid>/lwpsinfo
diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h
index 9f2e166fea..41ea2331df 100644
--- a/usr/src/uts/common/sys/thread.h
+++ b/usr/src/uts/common/sys/thread.h
@@ -419,8 +419,9 @@ typedef struct _kthread {
#define TS_RESUME 0x1000 /* setrun() by CPR resume process */
#define TS_CREATE 0x2000 /* setrun() by syslwp_create() */
#define TS_RUNQMATCH 0x4000 /* exact run queue balancing by setbackdq() */
+#define TS_BSTART 0x8000 /* setrun() by brand */
#define TS_ALLSTART \
- (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE)
+ (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE|TS_BSTART)
#define TS_ANYWAITQ (TS_PROJWAITQ|TS_ZONEWAITQ)
/*
@@ -448,6 +449,10 @@ typedef struct _kthread {
#define ISTOPPED(t) ((t)->t_state == TS_STOPPED && \
!((t)->t_schedflag & TS_PSTART))
+/* True if thread is stopped for a brand-specific reason */
+#define BSTOPPED(t) ((t)->t_state == TS_STOPPED && \
+ !((t)->t_schedflag & TS_BSTART))
+
/* True if thread is asleep and wakeable */
#define ISWAKEABLE(t) (((t)->t_state == TS_SLEEP && \
((t)->t_flag & T_WAKEABLE)))
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 7ab9377e16..a5d1610842 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -594,6 +594,7 @@ typedef struct zone {
tsol_mlp_list_t zone_mlps; /* MLPs on zone-private addresses */
boolean_t zone_restart_init; /* Restart init if it dies? */
+ boolean_t zone_reboot_on_init_exit; /* Reboot if init dies? */
struct brand *zone_brand; /* zone's brand */
void *zone_brand_data; /* store brand specific data */
id_t zone_defaultcid; /* dflt scheduling class id */
diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files
index a4d6b7e309..0f058f262d 100644
--- a/usr/src/uts/intel/Makefile.files
+++ b/usr/src/uts/intel/Makefile.files
@@ -21,7 +21,7 @@
#
# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2014, Joyent, Inc. All rights reserved.
+# Copyright 2015, Joyent, Inc.
#
#
@@ -289,6 +289,7 @@ LX_BRAND_OBJS = \
lx_modify_ldt.o \
lx_pid.o \
lx_pipe.o \
+ lx_ptrace.o \
lx_rw.o \
lx_sched.o \
lx_signum.o \