diff options
Diffstat (limited to 'usr/src/lib')
30 files changed, 1175 insertions, 1468 deletions
diff --git a/usr/src/lib/brand/lx/lx_brand/common/aio.c b/usr/src/lib/brand/lx/lx_brand/common/aio.c index 1e46041f19..36c7cf3afb 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/aio.c +++ b/usr/src/lib/brand/lx/lx_brand/common/aio.c @@ -299,7 +299,7 @@ lx_io_getevents(lx_aio_context_t *ctx, long min_nr, long nr, assert(ctx->lxaio_waiters > 0); ctx->lxaio_waiters--; - if (rval == -1 || nget == 0 || + if ((rval == -1 && err != ETIME) || nget == 0 || (nget == 1 && list[0].portev_source == PORT_SOURCE_ALERT)) { /* * If we're being destroyed, kick our waiter and clear out with @@ -312,7 +312,7 @@ lx_io_getevents(lx_aio_context_t *ctx, long min_nr, long nr, mutex_unlock(&ctx->lxaio_lock); - return (nget == 0 || err == ETIME ? 0 : -err); + return (nget == 0 ? 0 : -err); } out = SAFE_ALLOCA(nget * sizeof (lx_io_event_t)); diff --git a/usr/src/lib/brand/lx/lx_brand/common/clone.c b/usr/src/lib/brand/lx/lx_brand/common/clone.c index 58c84c773b..87f966cc89 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/clone.c +++ b/usr/src/lib/brand/lx/lx_brand/common/clone.c @@ -49,23 +49,7 @@ #include <sys/lx_debug.h> #include <sys/lx_thread.h> #include <sys/fork.h> - -#define LX_CSIGNAL 0x000000ff -#define LX_CLONE_VM 0x00000100 -#define LX_CLONE_FS 0x00000200 -#define LX_CLONE_FILES 0x00000400 -#define LX_CLONE_SIGHAND 0x00000800 -#define LX_CLONE_PID 0x00001000 -#define LX_CLONE_PTRACE 0x00002000 -#define LX_CLONE_VFORK 0x00004000 -#define LX_CLONE_PARENT 0x00008000 -#define LX_CLONE_THREAD 0x00010000 -#define LX_CLONE_SYSVSEM 0x00040000 -#define LX_CLONE_SETTLS 0x00080000 -#define LX_CLONE_PARENT_SETTID 0x00100000 -#define LX_CLONE_CHILD_CLEARTID 0x00200000 -#define LX_CLONE_DETACH 0x00400000 -#define LX_CLONE_CHILD_SETTID 0x01000000 +#include <lx_syscall.h> #define SHARED_AS \ (LX_CLONE_VM | LX_CLONE_FS | LX_CLONE_FILES | LX_CLONE_SIGHAND \ @@ -116,6 +100,7 @@ struct clone_state { sigset_t c_sigmask; /* signal mask */ lx_affmask_t c_affmask; /* CPU affinity mask */ volatile int *c_clone_res; /* pid/error returned to cloner */ + int c_ptrace_event; /* ptrace(2) event for child stop */ }; extern void lx_setup_clone(uintptr_t, void *, void *); @@ -147,7 +132,7 @@ lx_exit(uintptr_t p1) assert(lx_tsd != 0); - lx_tsd->lxtsd_exit = LX_EXIT; + lx_tsd->lxtsd_exit = LX_ET_EXIT; lx_tsd->lxtsd_exit_status = status; lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEEXIT, B_FALSE, @@ -200,7 +185,7 @@ lx_group_exit(uintptr_t p1) assert(lx_tsd != 0); - lx_tsd->lxtsd_exit = LX_EXIT_GROUP; + lx_tsd->lxtsd_exit = LX_ET_EXIT_GROUP; lx_tsd->lxtsd_exit_status = status; /* @@ -315,7 +300,7 @@ clone_start(void *arg) * Do the final stack twiddling, reset %gs, and return to the * clone(2) path. */ - if (lx_tsd.lxtsd_exit == 0) { + if (lx_tsd.lxtsd_exit == LX_ET_NONE) { if (sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL) < 0) { *(cs->c_clone_res) = -errno; @@ -329,6 +314,11 @@ clone_start(void *arg) */ *(cs->c_clone_res) = rval; + /* + * Fire the ptrace(2) event stop in the new thread: + */ + lx_ptrace_stop_if_option(cs->c_ptrace_event, B_TRUE, 0); + #if defined(_LP64) (void) syscall(SYS_brand, B_CLR_NTV_SYSC_FLAG); lx_setup_clone((uintptr_t)&cs->c_regs, cs->c_retaddr, @@ -347,12 +337,7 @@ clone_start(void *arg) * setcontext() to jump to the thread context state saved in * getcontext(), above. */ - if (lx_tsd.lxtsd_exit == LX_EXIT) - thr_exit((void *)(long)lx_tsd.lxtsd_exit_status); - else - exit(lx_tsd.lxtsd_exit_status); - - assert(0); + lx_exit_common(lx_tsd.lxtsd_exit, lx_tsd.lxtsd_exit_status); /*NOTREACHED*/ } @@ -455,6 +440,12 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, ptrace_event = ptrace_clone_event(flags); + /* + * Inform the in-kernel ptrace(2) subsystem that we are about to + * emulate a fork(2), vfork(2) or clone(2) system call. + */ + lx_ptrace_clone_begin(ptrace_event, !!(flags & LX_CLONE_PTRACE)); + /* See if this is a fork() operation or a thr_create(). */ if (IS_FORK(flags) || IS_VFORK(flags)) { if (flags & LX_CLONE_PARENT) { @@ -463,9 +454,6 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, return (-ENOTSUP); } - if (flags & LX_CLONE_PTRACE) - lx_ptrace_fork(); - if ((flags & LX_CSIGNAL) == 0) fork_flags |= FORK_NOSIGCHLD; @@ -509,7 +497,6 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, return ((rval < 0) ? -errno : rval); } - /* * Set up additional data in the lx_proc_data structure as * necessary. @@ -584,6 +571,7 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, cs->c_ldtinfo = ldtinfo; cs->c_ctidp = ctidp; cs->c_clone_res = &clone_res; + cs->c_ptrace_event = ptrace_event; #if defined(_LP64) /* * The AMD64 ABI says that the kernel clobbers %rcx and %r11. We @@ -649,7 +637,7 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, ; rval = clone_res; - lx_ptrace_stop_if_option(ptrace_event, B_TRUE, 0); + lx_ptrace_stop_if_option(ptrace_event, B_FALSE, (ulong_t)rval); } return (rval); diff --git a/usr/src/lib/brand/lx/lx_brand/common/fork.c b/usr/src/lib/brand/lx/lx_brand/common/fork.c index 9f2fbd6406..b0edee1adb 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/fork.c +++ b/usr/src/lib/brand/lx/lx_brand/common/fork.c @@ -41,18 +41,36 @@ long lx_fork(void) { - int ret = fork1(); + int ret; - if (ret == 0) { - if (lx_is_rpm) + /* + * Inform the in-kernel ptrace(2) subsystem that we are about to + * emulate fork(2). + */ + lx_ptrace_clone_begin(LX_PTRACE_O_TRACEFORK, B_FALSE); + + switch (ret = fork1()) { + case -1: + return (-errno); + + case 0: + /* + * Returning in the new child. + */ + if (lx_is_rpm) { (void) sleep(lx_rpm_delay); + } lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEFORK, B_TRUE, 0); - } else if (ret != -1) { + return (0); + + default: + /* + * Returning in the new parent. + */ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEFORK, B_FALSE, (ulong_t)ret); + return (ret); } - - return (ret == -1 ? -errno : ret); } /* @@ -65,14 +83,31 @@ lx_fork(void) long lx_vfork(void) { - int ret = fork1(); + int ret; - if (ret == 0) { + /* + * Inform the in-kernel ptrace(2) subsystem that we are about to + * emulate vfork(2). + */ + lx_ptrace_clone_begin(LX_PTRACE_O_TRACEVFORK, B_FALSE); + + switch (ret = fork1()) { + case -1: + return (-errno); + + case 0: + /* + * Returning in the new child. + */ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEVFORK, B_TRUE, 0); - } else if (ret != -1) { + return (0); + + default: + /* + * Returning in the new parent. + */ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEVFORK, B_FALSE, (ulong_t)ret); + return (ret); } - - return (ret == -1 ? -errno : ret); } diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c index b8fdf36b42..abe015c2c4 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c +++ b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c @@ -172,6 +172,9 @@ struct lx_locale_ending { int se_size; /* solaris ending string length */ }; +__thread int lx_do_syscall_restart; +__thread int lx_had_sigchild; + #define l2s_locale(lname, sname) \ {(lname), (sname), sizeof ((lname)) - 1, sizeof ((sname)) - 1} @@ -647,6 +650,7 @@ lx_emulate(lx_regs_t *rp) } #endif /* _ILP32 */ +restart_syscall: if (s->sy_flags & LX_SYS_IKE) { lx_debug("\tsyscall %d re-vectoring to lx kernel module " "for %s()", syscall_num, s->sy_name); @@ -679,6 +683,12 @@ lx_emulate(lx_regs_t *rp) ret = -stol_errno[-ret]; } + if (lx_do_syscall_restart && ret == -stol_errno[EINTR]) { + lx_debug("restarting system call due to signal interruption"); + lx_do_syscall_restart = 0; + goto restart_syscall; + } + out: /* * For 32-bit, %eax holds the return code from the system call. For @@ -962,7 +972,7 @@ lx_init(int argc, char *argv[], char *envp[]) lx_err_fatal("Unable to initialize thread-specific exit " "context: %s", strerror(errno)); - if (lx_tsd.lxtsd_exit == 0) { + if (lx_tsd.lxtsd_exit == LX_ET_NONE) { #if defined(_LP64) /* Switch to Linux syscall mode */ (void) syscall(SYS_brand, B_CLR_NTV_SYSC_FLAG); @@ -978,17 +988,36 @@ lx_init(int argc, char *argv[], char *envp[]) * exit_group() system call. In turn the brand library did a * setcontext() to jump to the thread context state we saved above. */ - if (lx_tsd.lxtsd_exit == 1) - thr_exit((void *)(long)lx_tsd.lxtsd_exit_status); - else - exit(lx_tsd.lxtsd_exit_status); - - assert(0); - + lx_exit_common(lx_tsd.lxtsd_exit, lx_tsd.lxtsd_exit_status); /*NOTREACHED*/ return (0); } +void +lx_exit_common(lx_exit_type_t exit_type, uintptr_t exit_value) +{ + int ev = 0xff & exit_value; + + switch (exit_type) { + case LX_ET_EXIT: + /* + * The native thread return value is never seen so we pass + * NULL. + */ + thr_exit(NULL); + break; + + case LX_ET_EXIT_GROUP: + exit(ev); + break; + + default: + abort(); + } + + abort(); +} + /* * Walk back through the stack until we find the lx_emulate() frame. */ diff --git a/usr/src/lib/brand/lx/lx_brand/common/misc.c b/usr/src/lib/brand/lx/lx_brand/common/misc.c index f60f3f290f..750af869a4 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/misc.c +++ b/usr/src/lib/brand/lx/lx_brand/common/misc.c @@ -572,8 +572,6 @@ lx_execve(uintptr_t p1, uintptr_t p2, uintptr_t p3) if (argv == NULL) argv = nullist; - lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEEXEC, B_FALSE, 0); - /* * Emulate PR_SET_KEEPCAPS which is reset on execve. If this is not done * the emulated capabilities could be reduced more than expected. diff --git a/usr/src/lib/brand/lx/lx_brand/common/ptrace.c b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c index 2efc64a43e..174dbe8c19 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/ptrace.c +++ b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c @@ -51,82 +51,17 @@ #include <ieeefp.h> #include <assert.h> #include <libintl.h> +#include <lx_syscall.h> /* - * Linux ptrace compatibility. - * - * The brand support for ptrace(2) is built on top of the Solaris /proc - * interfaces, mounted at /native/proc in the zone. This gets quite - * complicated due to the way ptrace works and the Solaris realization of the - * Linux threading model. - * - * ptrace can only interact with a process if we are tracing it, and it is - * currently stopped. There are two ways a process can begin tracing another - * process: - * - * PTRACE_TRACEME - * - * A child process can use PTRACE_TRACEME to indicate that it wants to be - * traced by the parent. This sets the ptrace compatibility flag in /proc - * which causes ths ptrace consumer to be notified through the wait(2) - * system call of events of interest. PTRACE_TRACEME is typically used by - * the debugger by forking a process, using PTRACE_TRACEME, and finally - * doing an exec of the specified program. - * - * - * PTRACE_ATTACH - * - * We can attach to a process using PTRACE_ATTACH. This is considerably - * more complicated than the previous case. On Linux, the traced process is - * effectively reparented to the ptrace consumer so that event notification - * can go through the normal wait(2) system call. Solaris has no such - * ability to reparent a process (nor should it) so some trickery was - * required. - * - * When the ptrace consumer uses PTRACE_ATTACH it forks a monitor child - * process. The monitor enables the /proc ptrace flag for itself and uses - * the native /proc mechanisms to observe the traced process and wait for - * events of interest. When the traced process stops, the monitor process - * sends itself a SIGTRAP thus rousting its parent process (the ptrace - * consumer) out of wait(2). We then translate the process id and status - * code from wait(2) to those of the traced process. - * - * To detach from the process we just have to clean up tracing flags and - * clean up the monitor. - * - * ptrace can only interact with a process if we have traced it, and it is - * currently stopped (see is_traced()). For threads, there's no way to - * distinguish whether ptrace() has been called for all threads or some - * subset. Since most clients will be tracing all threads, and erroneously - * allowing ptrace to access a non-traced thread is non-fatal (or at least - * would be fatal on linux), we ignore this aspect of the problem. + * Much of the Linux ptrace(2) emulation is performed in the kernel, and there + * is a block comment in "lx_ptrace.c" that describes the facility in some + * detail. */ -#define LX_PTRACE_TRACEME 0 -#define LX_PTRACE_PEEKTEXT 1 -#define LX_PTRACE_PEEKDATA 2 -#define LX_PTRACE_PEEKUSER 3 -#define LX_PTRACE_POKETEXT 4 -#define LX_PTRACE_POKEDATA 5 -#define LX_PTRACE_POKEUSER 6 -#define LX_PTRACE_CONT 7 -#define LX_PTRACE_KILL 8 -#define LX_PTRACE_SINGLESTEP 9 -#define LX_PTRACE_GETREGS 12 -#define LX_PTRACE_SETREGS 13 -#define LX_PTRACE_GETFPREGS 14 -#define LX_PTRACE_SETFPREGS 15 -#define LX_PTRACE_ATTACH 16 -#define LX_PTRACE_DETACH 17 -#define LX_PTRACE_GETFPXREGS 18 -#define LX_PTRACE_SETFPXREGS 19 -#define LX_PTRACE_SYSCALL 24 -#define LX_PTRACE_SETOPTIONS 0x4200 -#define LX_PTRACE_GETEVENTMSG 0x4201 - /* execve syscall numbers for 64-bit vs. 32-bit */ #if defined(_LP64) -#define LX_SYS_execve 520 +#define LX_SYS_execve 59 #else #define LX_SYS_execve 11 #endif @@ -237,22 +172,12 @@ typedef struct lx_user { int lxu_debugreg[8]; } lx_user_t; -typedef struct ptrace_monitor_map { - struct ptrace_monitor_map *pmm_next; /* next pointer */ - pid_t pmm_monitor; /* monitor child process */ - pid_t pmm_target; /* traced Linux pid */ - pid_t pmm_pid; /* Solaris pid */ - lwpid_t pmm_lwpid; /* Solaris lwpid */ - uint_t pmm_exiting; /* detached */ -} ptrace_monitor_map_t; - typedef struct ptrace_state_map { struct ptrace_state_map *psm_next; /* next pointer */ pid_t psm_pid; /* Solaris pid */ uintptr_t psm_debugreg[8]; /* debug registers */ } ptrace_state_map_t; -static ptrace_monitor_map_t *ptrace_monitor_map = NULL; static ptrace_state_map_t *ptrace_state_map = NULL; static mutex_t ptrace_map_mtx = DEFAULTMUTEX; @@ -260,6 +185,8 @@ extern void *_START_; static sigset_t blockable_sigs; +static long lx_ptrace_kernel(int, pid_t, uintptr_t, uintptr_t); + void lx_ptrace_init(void) { @@ -298,24 +225,6 @@ open_lwpfile(pid_t pid, lwpid_t lwpid, int mode, const char *name) } static int -get_status(pid_t pid, pstatus_t *psp) -{ - int fd; - - if ((fd = open_procfile(pid, O_RDONLY, "status")) < 0) - return (-ESRCH); - - if (read(fd, psp, sizeof (pstatus_t)) != sizeof (pstatus_t)) { - (void) close(fd); - return (-EIO); - } - - (void) close(fd); - - return (0); -} - -static int get_lwpstatus(pid_t pid, lwpid_t lwpid, lwpstatus_t *lsp) { int fd; @@ -869,22 +778,6 @@ debug_registers(pid_t pid) return (p != NULL? p->psm_debugreg : NULL); } -static void -free_debug_registers(pid_t pid) -{ - ptrace_state_map_t **pp; - ptrace_state_map_t *p; - - /* ASSERT(MUTEX_HELD(&ptrace_map_mtx) */ - for (pp = &ptrace_state_map; (p = *pp) != NULL; pp = &p->psm_next) { - if (p->psm_pid == pid) { - *pp = p->psm_next; - free(p); - break; - } - } -} - static int setup_watchpoints(pid_t pid, uintptr_t *debugreg) { @@ -952,156 +845,33 @@ setup_watchpoints(pid_t pid, uintptr_t *debugreg) } /* - * Returns TRUE if the process is traced, FALSE otherwise. This is only true - * if the process is currently stopped, and has been traced using - * PTRACE_TRACEME, PTRACE_ATTACH or one of the Linux-specific trace options. + * Returns B_TRUE if the target LWP, identified by its Linux pid, is traced by + * this LWP and is waiting in "ptrace-stop". Returns B_FALSE otherwise. */ -static int -is_traced(pid_t pid) +static boolean_t +is_ptrace_stopped(pid_t lxpid) { - ptrace_monitor_map_t *p; - pstatus_t status; - uint_t curr_opts; - pid_t mypid; + ulong_t dummy; /* - * First get the stop options since that is an indication that the - * process is being traced. + * We attempt a PTRACE_GETEVENTMSG request to determine if the tracee + * is stopped appropriately. As we are not in the kernel, this is not + * an atomic check; the process is not guaranteed to remain stopped + * once we have dropped the locks protecting that state and left the + * kernel. */ - if (syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_EXT_OPTS_GET, pid, - &curr_opts) != 0) - return (0); - - mypid = getpid(); - - if (get_status(pid, &status) != 0) - return (0); - - /* - * When we look to see if we are tracing a process we have to take the - * PTRACE_SETOPTIONS handling into account. In particular, if we are - * tracing with PTRACE_O_TRACEFORK, etc. then we may be dealing with - * the child of a child that we started tracing. We can determine this - * by checking the EMUL_PTRACE_IS_TRACED flag and checking the parent - * of the parent. We cannot check for the presence of the options since - * those will be cleared during the process of detaching from a tracee. - */ - if (curr_opts & EMUL_PTRACE_IS_TRACED && status.pr_ppid != mypid) { - pstatus_t par_status; - pid_t chkpid = status.pr_ppid; - - if (get_status(status.pr_ppid, &par_status) == 0) { - chkpid = par_status.pr_ppid; - } else { - /* parent is gone, re-get our ppid */ - if (get_status(pid, &par_status) == 0) - chkpid = par_status.pr_ppid; - } - - if (chkpid == mypid) - return (1); + if (lx_ptrace_kernel(LX_PTRACE_GETEVENTMSG, lxpid, NULL, + (uintptr_t)&dummy) == 0) { + return (B_TRUE); } - if ((status.pr_flags & PR_PTRACE || - curr_opts & EMUL_PTRACE_IS_TRACED) && - (status.pr_ppid == mypid) && - (status.pr_lwp.pr_flags & PR_ISTOP)) - return (1); - - (void) mutex_lock(&ptrace_map_mtx); - for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) { - if (p->pmm_target == pid) { - (void) mutex_unlock(&ptrace_map_mtx); - return (1); - } - } - (void) mutex_unlock(&ptrace_map_mtx); - - return (0); -} - -static int -ptrace_trace_common(int fd) -{ - struct { - long cmd; - union { - long flags; - sigset_t signals; - fltset_t faults; - } arg; - } ctl; - size_t size; - - ctl.cmd = PCSTRACE; - prfillset(&ctl.arg.signals); - size = sizeof (long) + sizeof (sigset_t); - if (write(fd, &ctl, size) != size) - return (-1); - - ctl.cmd = PCSFAULT; - premptyset(&ctl.arg.faults); - size = sizeof (long) + sizeof (fltset_t); - if (write(fd, &ctl, size) != size) - return (-1); - - ctl.cmd = PCUNSET; - ctl.arg.flags = PR_FORK; - size = sizeof (long) + sizeof (long); - if (write(fd, &ctl, size) != size) - return (-1); - - return (0); -} - -/* - * Notify that parent that we wish to be traced. This is the equivalent of: - * - * 1. Stop on all signals, and nothing else - * 2. Turn off inherit-on-fork flag - * 3. Set ptrace compatible flag - * - * If we are not the main thread, then the client is trying to request behavior - * by which one of its own thread is to be traced. We don't support this mode - * of operation. - */ -static int -ptrace_traceme(void) -{ - int fd, ret; - int error; - long ctl[2]; - pstatus_t status; - pid_t pid = getpid(); - - if (_lwp_self() != 1) { - lx_unsupported("thread %d calling PTRACE_TRACEME is " - "unsupported", _lwp_self()); - return (-ENOTSUP); - } - - if ((ret = get_status(pid, &status)) != 0) - return (ret); - /* - * Why would a process try to do this twice? I'm not sure, but there's - * a conformance test which wants this to fail just so. + * This call should only fail with ESRCH, which tells us that the + * a tracee with that pid was not found in the stopped condition. */ - if (status.pr_flags & PR_PTRACE) - return (-EPERM); - - if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) - return (-errno); + assert(errno == ESRCH); - ctl[0] = PCSET; - ctl[1] = PR_PTRACE; - error = 0; - if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl) || - ptrace_trace_common(fd) != 0) - error = -errno; - - (void) close(fd); - return (error); + return (B_FALSE); } /* @@ -1114,9 +884,6 @@ ptrace_peek(pid_t pid, uintptr_t addr, long *ret) int fd; long data; - if (!is_traced(pid)) - return (-ESRCH); - if ((fd = open_procfile(pid, O_RDONLY, "as")) < 0) return (-ESRCH); @@ -1143,9 +910,6 @@ ptrace_peek_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int *ret) uintptr_t *debugreg; int dreg; - if (!is_traced(pid)) - return (-ESRCH); - /* * The offset specified by the user is an offset into the Linux * user structure (seriously). Rather than constructing a full @@ -1239,9 +1003,6 @@ ptrace_poke(pid_t pid, uintptr_t addr, int data) { int fd; - if (!is_traced(pid)) - return (-ESRCH); - if (addr & 0x3) return (-EINVAL); @@ -1265,9 +1026,6 @@ ptrace_poke_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int data) uintptr_t *debugreg; int dreg; - if (!is_traced(pid)) - return (-ESRCH); - if (off & 0x3) return (-EINVAL); @@ -1300,187 +1058,13 @@ ptrace_poke_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int data) } static int -ptrace_cont_common(int fd, int sig, int run, int step) -{ - long ctl[1 + 1 + sizeof (siginfo_t) / sizeof (long) + 2]; - long *ctlp = ctl; - size_t size; - - assert(0 <= sig && sig <= LX_NSIG); - assert(!step || run); - - /* - * Clear the current signal. - */ - *ctlp++ = PCCSIG; - - /* - * Send a signal if one was specified. - */ - if (sig != 0 && sig != LX_SIGSTOP) { - siginfo_t *infop; - - *ctlp++ = PCSSIG; - infop = (siginfo_t *)ctlp; - bzero(infop, sizeof (siginfo_t)); - infop->si_signo = ltos_signo[sig]; - - ctlp += sizeof (siginfo_t) / sizeof (long); - } - - /* - * If run is true, set the lwp running. - */ - if (run) { - *ctlp++ = PCRUN; - *ctlp++ = step ? PRSTEP : 0; - } - - size = (char *)ctlp - (char *)&ctl[0]; - assert(size <= sizeof (ctl)); - - if (write(fd, ctl, size) != size) { - lx_debug("failed to continue %s", strerror(errno)); - return (-EIO); - } - - return (0); -} - -static int -ptrace_cont_monitor(ptrace_monitor_map_t *p) -{ - long ctl[2]; - int fd; - - fd = open_procfile(p->pmm_monitor, O_WRONLY, "ctl"); - if (fd < 0) { - lx_debug("failed to open monitor ctl %d", - errno); - return (-EIO); - } - - ctl[0] = PCRUN; - ctl[1] = PRCSIG; - if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) { - (void) close(fd); - return (-EIO); - } - - (void) close(fd); - - return (0); -} - -static int -ptrace_cont(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig, int step) -{ - ptrace_monitor_map_t *p; - uintptr_t *debugreg; - int fd, ret; - - if (!is_traced(pid)) - return (-ESRCH); - - if (sig < 0 || sig > LX_NSIG) - return (-EINVAL); - - if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0) - return (-ESRCH); - - if ((ret = ptrace_cont_common(fd, sig, 1, step)) != 0) { - (void) close(fd); - return (ret); - } - - (void) close(fd); - - /* kludge: use debugreg[4] to remember the single-step flag */ - if ((debugreg = debug_registers(pid)) != NULL) - debugreg[4] = step; - - /* - * Check for a monitor and get it moving if we find it. If any of the - * /proc operations fail, we're kind of sunk so just return an error. - */ - (void) mutex_lock(&ptrace_map_mtx); - for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) { - if (p->pmm_target == lxpid) { - if ((ret = ptrace_cont_monitor(p)) != 0) - return (ret); - break; - } - } - (void) mutex_unlock(&ptrace_map_mtx); - - return (0); -} - -/* - * If a monitor exists for this traced process, dispose of it. - * First turn off its ptrace flag so we won't be notified of its - * impending demise. We ignore errors for this step since they - * indicate only that the monitor has been damaged due to pilot - * error. Then kill the monitor, and wait for it. If the wait - * succeeds we can dispose of the corpse, otherwise another thread's - * wait call has collected it and we need to set a flag in the - * structure so that if can be picked up in wait. - */ -static void -monitor_kill(pid_t lxpid, pid_t pid) -{ - ptrace_monitor_map_t *p, **pp; - pid_t mpid; - int fd; - long ctl[2]; - - (void) mutex_lock(&ptrace_map_mtx); - free_debug_registers(pid); - for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) { - if (p->pmm_target == lxpid) { - mpid = p->pmm_monitor; - if ((fd = open_procfile(mpid, O_WRONLY, "ctl")) >= 0) { - ctl[0] = PCUNSET; - ctl[1] = PR_PTRACE; - (void) write(fd, ctl, sizeof (ctl)); - (void) close(fd); - } - - (void) kill(mpid, SIGKILL); - - if (waitpid(mpid, NULL, 0) == mpid) { - *pp = p->pmm_next; - free(p); - } else { - p->pmm_exiting = 1; - } - - break; - } - } - (void) mutex_unlock(&ptrace_map_mtx); -} - -static int -ptrace_kill(pid_t lxpid, pid_t pid) +ptrace_kill(pid_t pid) { int ret; - if (!is_traced(pid)) - return (-ESRCH); - ret = kill(pid, SIGKILL); - /* kill off the monitor process, if any */ - monitor_kill(lxpid, pid); - - return (ret); -} - -static int -ptrace_step(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig) -{ - return (ptrace_cont(lxpid, pid, lwpid, sig, 1)); + return (ret == 0 ? ret : -errno); } static int @@ -1489,9 +1073,6 @@ ptrace_getregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) lx_user_regs_t regs; int ret; - if (!is_traced(pid)) - return (-ESRCH); - if ((ret = getregs(pid, lwpid, ®s)) != 0) return (ret); @@ -1506,9 +1087,6 @@ ptrace_setregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) { lx_user_regs_t regs; - if (!is_traced(pid)) - return (-ESRCH); - if (uucopy((void *)addr, ®s, sizeof (regs)) != 0) return (-errno); @@ -1521,9 +1099,6 @@ ptrace_getfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) lx_user_fpregs_t regs; int ret; - if (!is_traced(pid)) - return (-ESRCH); - if ((ret = getfpregs(pid, lwpid, ®s)) != 0) return (ret); @@ -1538,9 +1113,6 @@ ptrace_setfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) { lx_user_fpregs_t regs; - if (!is_traced(pid)) - return (-ESRCH); - if (uucopy((void *)addr, ®s, sizeof (regs)) != 0) return (-errno); @@ -1553,9 +1125,6 @@ ptrace_getfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) lx_user_fpxregs_t regs; int ret; - if (!is_traced(pid)) - return (-ESRCH); - if ((ret = getfpxregs(pid, lwpid, ®s)) != 0) return (ret); @@ -1570,412 +1139,124 @@ ptrace_setfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) { lx_user_fpxregs_t regs; - if (!is_traced(pid)) - return (-ESRCH); - if (uucopy((void *)addr, ®s, sizeof (regs)) != 0) return (-errno); return (setfpxregs(pid, lwpid, ®s)); } -static void __NORETURN -ptrace_monitor(int fd) +void +lx_ptrace_stop_if_option(int option, boolean_t child, ulong_t msg) { - struct { - long cmd; - union { - long flags; - sigset_t signals; - fltset_t faults; - } arg; - } ctl; - size_t size; - int monfd; - int rv; - - monfd = open_procfile(getpid(), O_WRONLY, "ctl"); - - ctl.cmd = PCSTRACE; /* trace only SIGTRAP */ - premptyset(&ctl.arg.signals); - praddset(&ctl.arg.signals, SIGTRAP); - size = sizeof (long) + sizeof (sigset_t); - (void) write(monfd, &ctl, size); /* can't fail */ - - ctl.cmd = PCSFAULT; - premptyset(&ctl.arg.faults); - size = sizeof (long) + sizeof (fltset_t); - (void) write(monfd, &ctl, size); /* can't fail */ - - ctl.cmd = PCUNSET; - ctl.arg.flags = PR_FORK; - size = sizeof (long) + sizeof (long); - (void) write(monfd, &ctl, size); /* can't fail */ - - ctl.cmd = PCSET; /* wait()able by the parent */ - ctl.arg.flags = PR_PTRACE; - size = sizeof (long) + sizeof (long); - (void) write(monfd, &ctl, size); /* can't fail */ - - (void) close(monfd); - - ctl.cmd = PCWSTOP; - size = sizeof (long); - - for (;;) { - /* - * Wait for the traced process to stop. - */ - if (write(fd, &ctl, size) != size) { - rv = (errno == ENOENT)? 0 : 1; - lx_debug("monitor failed to wait for LWP to stop: %s", + /* + * We call into the kernel to see if we need to stop for specific + * ptrace(2) events. + */ + lx_debug("lx_ptrace_stop_if_option(%d, %s, %lu)", option, + child ? "TRUE [child]" : "FALSE [parent]", msg); + if (syscall(SYS_brand, B_PTRACE_STOP_FOR_OPT, option, child, + msg) != 0) { + if (errno != ESRCH) { + /* + * This should _only_ fail if we are not traced, or do + * not have this option set. + */ + lx_err_fatal("B_PTRACE_STOP_FOR_OPT failed: %s", strerror(errno)); - _exit(rv); } - - lx_debug("monitor caught traced LWP"); - - /* - * Pull the ptrace trigger by sending ourself a SIGTRAP. This - * will cause this, the monitor process, to stop which will - * cause the parent's waitid(2) call to return this process - * id. In lx_wait(), we remap the monitor process's pid and - * status to those of the traced LWP. When the parent process - * uses ptrace to resume the traced LWP, it will additionally - * restart this process. - */ - (void) _lwp_kill(_lwp_self(), SIGTRAP); - - lx_debug("monitor was resumed"); } } -static int -ptrace_attach_common(int fd, pid_t lxpid, pid_t pid, lwpid_t lwpid, int run) +/* + * Signal to the in-kernel ptrace(2) subsystem that the next native fork() or + * thr_create() is part of an emulated fork(2) or clone(2). If PTRACE_CLONE + * was passed to clone(2), inherit_flag should be B_TRUE. + */ +void +lx_ptrace_clone_begin(int option, boolean_t inherit_flag) { - pid_t child; - ptrace_monitor_map_t *p; - sigset_t unblock; - pstatus_t status; - long ctl[1 + sizeof (sysset_t) / sizeof (long) + 2]; - long *ctlp = ctl; - size_t size; - sysset_t *sysp; - int ret; - - /* - * We're going to need this structure so better to fail now before its - * too late to turn back. - */ - if ((p = malloc(sizeof (ptrace_monitor_map_t))) == NULL) - return (-EIO); - - if ((ret = get_status(pid, &status)) != 0) { - free(p); - return (ret); + lx_debug("lx_ptrace_clone_begin(%d, %sPTRACE_CLONE)", option, + inherit_flag ? "" : "!"); + if (syscall(SYS_brand, B_PTRACE_CLONE_BEGIN, option, + inherit_flag) != 0) { + lx_err_fatal("B_PTRACE_CLONE_BEGIN failed: %s", + strerror(errno)); } - - /* - * If this process is already traced, bail. - */ - if (status.pr_flags & PR_PTRACE) { - free(p); - return (-EPERM); - } - - /* - * Turn on the appropriate tracing flags. It's exceedingly unlikely - * that this operation will fail; any failure would probably be due - * to another /proc consumer mucking around. - */ - if (ptrace_trace_common(fd) != 0) { - free(p); - return (-EIO); - } - - /* - * Native ptrace automatically catches processes when they exec so we - * have to do that explicitly here. - */ - *ctlp++ = PCSEXIT; - sysp = (sysset_t *)ctlp; - ctlp += sizeof (sysset_t) / sizeof (long); - premptyset(sysp); - praddset(sysp, SYS_execve); - if (run) { - *ctlp++ = PCRUN; - *ctlp++ = 0; - } - - size = (char *)ctlp - (char *)&ctl[0]; - - if (write(fd, ctl, size) != size) { - free(p); - return (-EIO); - } - - /* - * Spawn the monitor proceses to notify this process of events of - * interest in the traced process. We block signals here both so - * we're not interrupted during this operation and so that the - * monitor process doesn't accept signals. - */ - (void) sigprocmask(SIG_BLOCK, &blockable_sigs, &unblock); - if ((child = fork1()) == 0) - ptrace_monitor(fd); - (void) sigprocmask(SIG_SETMASK, &unblock, NULL); - - if (child == -1) { - lx_debug("failed to fork monitor process\n"); - free(p); - return (-EIO); - } - - p->pmm_monitor = child; - p->pmm_target = lxpid; - p->pmm_pid = pid; - p->pmm_lwpid = lwpid; - p->pmm_exiting = 0; - - (void) mutex_lock(&ptrace_map_mtx); - p->pmm_next = ptrace_monitor_map; - ptrace_monitor_map = p; - (void) mutex_unlock(&ptrace_map_mtx); - - return (0); } -static int -ptrace_attach(pid_t lxpid, pid_t pid, lwpid_t lwpid) +static long +lx_ptrace_kernel(int ptrace_op, pid_t lxpid, uintptr_t addr, uintptr_t data) { - int fd, ret; - long ctl; + int ret; /* - * Linux doesn't let you trace process 1 -- go figure. + * Call into the in-kernel ptrace(2) emulation code. */ - if (lxpid == 1) - return (-EPERM); - - if ((fd = open_lwpfile(pid, lwpid, O_WRONLY | O_EXCL, "lwpctl")) < 0) - return (errno == EBUSY ? -EPERM : -ESRCH); - - ctl = PCSTOP; - if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) { - lx_err("failed to stop %d/%d\n", (int)pid, (int)lwpid); - assert(0); + lx_debug("revectoring to B_PTRACE_KERNEL(%d, %d, %p, %p)", ptrace_op, + lxpid, addr, data); + ret = syscall(SYS_brand, B_PTRACE_KERNEL, ptrace_op, lxpid, addr, + data); + if (ret == 0) { + lx_debug("\t= %d", ret); + } else { + lx_debug("\t= %d (%s)", ret, strerror(errno)); } - ret = ptrace_attach_common(fd, lxpid, pid, lwpid, 0); - - (void) close(fd); - - return (ret); + return (ret == 0 ? ret : -errno); } -static int -ptrace_detach(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig) +long +lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) { - long ctl[2]; - int fd, ret; - - if (!is_traced(pid)) - return (-ESRCH); - - if (sig < 0 || sig > LX_NSIG) - return (-EINVAL); - - if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0) - return (-ESRCH); - - if (syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_DETACH, pid, 0) != 0) - return (-ESRCH); + int ptrace_op = (int)p1; + pid_t pid, lxpid = (pid_t)p2; + lwpid_t lwpid; /* - * The /proc ptrace flag may not be set, but we clear it - * unconditionally since doing so doesn't hurt anything. + * Some PTRACE_* requests are emulated entirely in the kernel. */ - ctl[0] = PCUNSET; - ctl[1] = PR_PTRACE; - if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) { - (void) close(fd); - return (-EIO); - } - + switch (ptrace_op) { /* - * Clear the brand-specific system call tracing flag to ensure that - * the target doesn't stop unexpectedly some time in the future. + * PTRACE_TRACEME and PTRACE_ATTACH operations induce the tracing of + * one LWP by another. The target LWP must not be traced already. + * Both `data' and `addr' are ignored in both cases. */ - if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 0)) != 0) { - (void) close(fd); - return (-ret); - } + case LX_PTRACE_TRACEME: + return (lx_ptrace_kernel(ptrace_op, 0, 0, 0)); - /* kill off the monitor process, if any */ - monitor_kill(lxpid, pid); + case LX_PTRACE_ATTACH: + return (lx_ptrace_kernel(ptrace_op, lxpid, 0, 0)); /* - * Turn on the run-on-last-close flag so that all tracing flags will be - * cleared when we close the control file descriptor. + * PTRACE_DETACH, PTRACE_SYSCALL, PTRACE_SINGLESTEP and PTRACE_CONT + * are all restarting actions. They are only allowed when attached + * to the target LWP and when that target LWP is in a "ptrace-stop" + * condition. */ - ctl[0] = PCSET; - ctl[1] = PR_RLC; - if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) { - (void) close(fd); - return (-EIO); + case LX_PTRACE_DETACH: + case LX_PTRACE_SYSCALL: + case LX_PTRACE_CONT: + case LX_PTRACE_SINGLESTEP: + /* + * These actions also require the LWP to be traced and stopped, but do + * not restart the target LWP. + */ + case LX_PTRACE_SETOPTIONS: + case LX_PTRACE_GETEVENTMSG: + return (lx_ptrace_kernel(ptrace_op, lxpid, p3, p4)); } /* - * Clear the current signal (if any) and possibly send the traced - * process a new signal. + * The rest of the emulated PTRACE_* actions are emulated in userland. + * They require the target LWP to be traced and in currently + * "ptrace-stop", but do not subsequently restart the target LWP. */ - ret = ptrace_cont_common(fd, sig, 0, 0); - - (void) close(fd); - - return (ret); -} - -static int -ptrace_syscall(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig) -{ - int ret; - - if (!is_traced(pid)) + if (lx_lpid_to_spair(lxpid, &pid, &lwpid) < 0 || + !is_ptrace_stopped(lxpid)) { return (-ESRCH); - - if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 1)) != 0) - return (-ret); - - return (ptrace_cont(lxpid, pid, lwpid, sig, 0)); -} - -static int -ptrace_setoptions(pid_t pid, int options) -{ - int ret; - int fd; - int error = 0; - struct { - long cmd; - union { - long flags; - sigset_t signals; - fltset_t faults; - } arg; - } ctl; - size_t size; - pstatus_t status; - - if ((ret = get_status(pid, &status)) != 0) - return (ret); - - if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) - return (-errno); - - /* since we're doing option tracing now, only catch sigtrap */ - ctl.cmd = PCSTRACE; - premptyset(&ctl.arg.signals); - praddset(&ctl.arg.signals, SIGTRAP); - size = sizeof (long) + sizeof (sigset_t); - if (write(fd, &ctl, size) != size) { - error = -errno; - } else { - /* - * If we're tracing fork, set inherit-on-fork, otherwise clear - * it. - */ - if (options & LX_PTRACE_O_TRACEFORK) { - ctl.cmd = PCSET; - } else { - ctl.cmd = PCUNSET; - } - ctl.arg.flags = PR_FORK; - size = sizeof (long) + sizeof (long); - if (write(fd, &ctl, size) != size) - error = -errno; } - (void) close(fd); - - if (error != 0) - return (error); - - ret = syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_EXT_OPTS_SET, pid, - options); - - return ((ret != 0) ? -errno : 0); -} - -void -lx_ptrace_stop_if_option(int option, boolean_t child, ulong_t msg) -{ - pid_t pid; - uint_t curr_opts; - - pid = getpid(); - if (pid == 1) - pid = zoneinit_pid; - - /* first we have to see if the stop option is set for this process */ - if (syscall(SYS_brand, B_PTRACE_EXT_OPTS, B_PTRACE_EXT_OPTS_GET, pid, - &curr_opts) != 0) - return; - - if (child) { - /* - * If we just forked/cloned, then the trace flags only carry - * over to the child if the specific flag was enabled on the - * parent. For example, if only TRACEFORK is enabled and we - * clone, then we must clear the trace flags. If TRACEFORK is - * enabled and we fork, then we keep the flags. - */ - if (option == LX_PTRACE_O_TRACECLONE || - option == LX_PTRACE_O_TRACEFORK || - option == LX_PTRACE_O_TRACEVFORK) { - - if ((curr_opts & option) == 0) - (void) syscall(SYS_brand, B_PTRACE_EXT_OPTS, - B_PTRACE_EXT_OPTS_SET, pid, 0); - - /* - * Since we know we're the child we have to modify how - * we stop. Set the emulation's child flag in the - * option. - */ - option |= EMUL_PTRACE_O_CHILD; - } - } - - /* now if the option is/was set, this brand call will stop us */ - if (curr_opts & option) - (void) syscall(SYS_brand, B_PTRACE_STOP_FOR_OPT, option, msg); -} - -static int -ptrace_geteventmsg(pid_t pid, ulong_t *msgp) -{ - int ret; - - ret = syscall(SYS_brand, B_PTRACE_GETEVENTMSG, pid, msgp); - - return ((ret != 0) ? -errno : 0); -} - -long -lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) -{ - pid_t pid, lxpid = (pid_t)p2; - lwpid_t lwpid; - - if ((p1 != LX_PTRACE_TRACEME) && - (lx_lpid_to_spair(lxpid, &pid, &lwpid) < 0)) - return (-ESRCH); - - switch (p1) { - case LX_PTRACE_TRACEME: - return (ptrace_traceme()); - + switch (ptrace_op) { case LX_PTRACE_PEEKTEXT: case LX_PTRACE_PEEKDATA: return (ptrace_peek(pid, p3, (long *)p4)); @@ -1990,14 +1271,8 @@ lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) case LX_PTRACE_POKEUSER: return (ptrace_poke_user(pid, lwpid, p3, (int)p4)); - case LX_PTRACE_CONT: - return (ptrace_cont(lxpid, pid, lwpid, (int)p4, 0)); - case LX_PTRACE_KILL: - return (ptrace_kill(lxpid, pid)); - - case LX_PTRACE_SINGLESTEP: - return (ptrace_step(lxpid, pid, lwpid, (int)p4)); + return (ptrace_kill(pid)); case LX_PTRACE_GETREGS: return (ptrace_getregs(pid, lwpid, p4)); @@ -2011,419 +1286,13 @@ lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) case LX_PTRACE_SETFPREGS: return (ptrace_setfpregs(pid, lwpid, p4)); - case LX_PTRACE_ATTACH: - return (ptrace_attach(lxpid, pid, lwpid)); - - case LX_PTRACE_DETACH: - return (ptrace_detach(lxpid, pid, lwpid, (int)p4)); - case LX_PTRACE_GETFPXREGS: return (ptrace_getfpxregs(pid, lwpid, p4)); case LX_PTRACE_SETFPXREGS: return (ptrace_setfpxregs(pid, lwpid, p4)); - case LX_PTRACE_SYSCALL: - return (ptrace_syscall(lxpid, pid, lwpid, (int)p4)); - - case LX_PTRACE_SETOPTIONS: - return (ptrace_setoptions(pid, (int)p4)); - - case LX_PTRACE_GETEVENTMSG: - return (ptrace_geteventmsg(pid, (ulong_t *)p4)); - default: return (-EINVAL); } } - -void -lx_ptrace_fork(void) -{ - /* - * Send a special signal (that has no Linux equivalent) to indicate - * that we're in this particularly special case. The signal will be - * ignored by this process, but noticed by /proc consumers tracing - * this process. - */ - (void) _lwp_kill(_lwp_self(), SIGWAITING); -} - -static void -ptrace_catch_fork(pid_t pid, int monitor) -{ - long ctl[14 + 2 * sizeof (sysset_t) / sizeof (long)]; - long *ctlp; - sysset_t *sysp; - size_t size; - pstatus_t ps; - pid_t child; - int fd, err; - - /* - * If any of this fails, we're really sunk since the child - * will be stuck in the middle of lx_ptrace_fork(). - * Fortunately it's practically assured to succeed unless - * something is seriously wrong on the system. - */ - if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) { - lx_debug("lx_catch_fork: failed to control %d", - (int)pid); - return; - } - - /* - * Turn off the /proc PR_PTRACE flag so the parent doesn't get - * spurious wake ups while we're working our dark magic. Arrange to - * catch the process when it exits from fork, and turn on the /proc - * inherit-on-fork flag so we catcht the child as well. We then run - * the process, wait for it to stop on the fork1(2) call and reset - * the tracing flags to their original state. - */ - ctlp = ctl; - *ctlp++ = PCCSIG; - if (!monitor) { - *ctlp++ = PCUNSET; - *ctlp++ = PR_PTRACE; - } - *ctlp++ = PCSET; - *ctlp++ = PR_FORK; - *ctlp++ = PCSEXIT; - sysp = (sysset_t *)ctlp; - ctlp += sizeof (sysset_t) / sizeof (long); - premptyset(sysp); - praddset(sysp, SYS_forksys); /* fork1() is forksys(0, 0) */ - *ctlp++ = PCRUN; - *ctlp++ = 0; - *ctlp++ = PCWSTOP; - if (!monitor) { - *ctlp++ = PCSET; - *ctlp++ = PR_PTRACE; - } - *ctlp++ = PCUNSET; - *ctlp++ = PR_FORK; - *ctlp++ = PCSEXIT; - sysp = (sysset_t *)ctlp; - ctlp += sizeof (sysset_t) / sizeof (long); - premptyset(sysp); - if (monitor) - praddset(sysp, SYS_execve); - - size = (char *)ctlp - (char *)&ctl[0]; - assert(size <= sizeof (ctl)); - - if (write(fd, ctl, size) != size) { - (void) close(fd); - lx_debug("lx_catch_fork: failed to set %d running", - (int)pid); - return; - } - - /* - * Get the status so we can find the value returned from fork1() -- - * the child process's pid. - */ - if (get_status(pid, &ps) != 0) { - (void) close(fd); - lx_debug("lx_catch_fork: failed to get status for %d", - (int)pid); - return; - } - - child = (pid_t)ps.pr_lwp.pr_reg[R_R0]; - - /* - * We're done with the parent -- off you go. - */ - ctl[0] = PCRUN; - ctl[1] = 0; - size = 2 * sizeof (long); - - if (write(fd, ctl, size) != size) { - (void) close(fd); - lx_debug("lx_catch_fork: failed to set %d running", - (int)pid); - return; - } - - (void) close(fd); - - /* - * If fork1(2) failed, we're done. - */ - if (child < 0) { - lx_debug("lx_catch_fork: fork1 failed"); - return; - } - - /* - * Now we need to screw with the child process. - */ - if ((fd = open_lwpfile(child, 1, O_WRONLY, "lwpctl")) < 0) { - lx_debug("lx_catch_fork: failed to control %d", - (int)child); - return; - } - - ctlp = ctl; - *ctlp++ = PCUNSET; - *ctlp++ = PR_FORK; - *ctlp++ = PCSEXIT; - sysp = (sysset_t *)ctlp; - ctlp += sizeof (sysset_t) / sizeof (long); - premptyset(sysp); - size = (char *)ctlp - (char *)&ctl[0]; - - if (write(fd, ctl, size) != size) { - (void) close(fd); - lx_debug("lx_catch_fork: failed to clear trace flags for %d", - (int)child); - return; - } - - /* - * Now treat the child as though we had attached to it explicitly. - */ - err = ptrace_attach_common(fd, child, child, 1, 1); - assert(err == 0); - - (void) close(fd); -} - -static void -set_dr6(pid_t pid, siginfo_t *infop) -{ - uintptr_t *debugreg; - uintptr_t addr; - uintptr_t base; - size_t size = NULL; - int dr7; - int lrw; - int i; - - if ((debugreg = debug_registers(pid)) == NULL) - return; - - debugreg[6] = 0xffff0ff0; /* read as ones */ - switch (infop->si_code) { - case TRAP_TRACE: - debugreg[6] |= 0x4000; /* single-step */ - break; - case TRAP_RWATCH: - case TRAP_WWATCH: - case TRAP_XWATCH: - dr7 = debugreg[7]; - addr = (uintptr_t)infop->si_addr; - for (i = 0; i < 4; i++) { - if ((dr7 & (1 << (2 * i))) == 0) /* enabled? */ - continue; - lrw = (dr7 >> (16 + (4 * i))) & 0xf; - switch (lrw >> 2) { /* length */ - case 0: size = 1; break; - case 1: size = 2; break; - case 2: size = 8; break; - case 3: size = 4; break; - } - base = debugreg[i]; - if (addr >= base && addr < base + size) - debugreg[6] |= (1 << i); - } - /* - * Were we also attempting a single-step? - * (kludge: we use debugreg[4] for this flag.) - */ - if (debugreg[4]) - debugreg[6] |= 0x4000; - break; - default: - break; - } -} - -/* - * This is called from the emulation of the wait4, waitpid and waitid system - * calls to take into account: - * - the monitor processes which we spawn to observe other processes from - * ptrace_attach(). - * - the extended si_status result we can get when extended ptrace options - * are enabled. - */ -int -lx_ptrace_wait(siginfo_t *infop) -{ - ptrace_monitor_map_t *p, **pp; - pid_t lxpid, pid = infop->si_pid; - lwpid_t lwpid; - int fd; - pstatus_t status; - - /* - * If the process observed by waitid(2) corresponds to the monitor - * process for a traced thread, we need to rewhack the siginfo_t to - * look like it came from the traced thread with the flags set - * according to the current state. - */ - (void) mutex_lock(&ptrace_map_mtx); - for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) { - if (p->pmm_monitor == pid) { - assert(infop->si_code == CLD_EXITED || - infop->si_code == CLD_KILLED || - infop->si_code == CLD_DUMPED || - infop->si_code == CLD_TRAPPED); - goto found; - } - } - (void) mutex_unlock(&ptrace_map_mtx); - - if (infop->si_code == CLD_TRAPPED) { - /* - * If the traced process got a SIGWAITING, we must be in the - * middle of a clone(2) with CLONE_PTRACE set. - */ - if (infop->si_status == SIGWAITING) { - ptrace_catch_fork(pid, 0); - return (-1); - } - - /* - * If the traced process got a SIGTRAP then Linux ptrace - * options might have been set, so setup the extended - * si_status to contain the (possible) event. Note that - * our definitions for the ptrace events (e.g. - * LX_PTRACE_EVENT_FORK) is already shifted <<8 as documented - * on the Linux ptrace(2) man page. - */ - if (infop->si_status == SIGTRAP) { - uint_t event; - - if (syscall(SYS_brand, B_PTRACE_EXT_OPTS, - B_PTRACE_EXT_OPTS_EVT, pid, &event) == 0) - infop->si_status |= event; - } - } - - if (get_status(pid, &status) == 0 && - (status.pr_lwp.pr_flags & PR_STOPPED) && - status.pr_lwp.pr_why == PR_SIGNALLED && - status.pr_lwp.pr_info.si_signo == SIGTRAP) - set_dr6(pid, &status.pr_lwp.pr_info); - - return (0); - -found: - /* - * If the monitor is in the exiting state, ignore the event and free - * the monitor structure if the monitor has exited. By returning -1 we - * indicate to the caller that this was a spurious return from - * waitid(2) and that it should ignore the result and try again. - */ - if (p->pmm_exiting) { - if (infop->si_code == CLD_EXITED || - infop->si_code == CLD_KILLED || - infop->si_code == CLD_DUMPED) { - *pp = p->pmm_next; - (void) mutex_unlock(&ptrace_map_mtx); - free(p); - } - return (-1); - } - - lxpid = p->pmm_target; - pid = p->pmm_pid; - lwpid = p->pmm_lwpid; - (void) mutex_unlock(&ptrace_map_mtx); - - /* - * If we can't find the traced process, kill off its monitor. - */ - if ((fd = open_lwpfile(pid, lwpid, O_RDONLY, "lwpstatus")) < 0) { - assert(errno == ENOENT); - monitor_kill(lxpid, pid); - infop->si_code = CLD_EXITED; - infop->si_status = 0; - infop->si_pid = lxpid; - return (0); - } - - if (read(fd, &status.pr_lwp, sizeof (status.pr_lwp)) != - sizeof (status.pr_lwp)) { - lx_err("read lwpstatus failed %d %s", fd, strerror(errno)); - assert(0); - } - - (void) close(fd); - - /* - * If the traced process isn't stopped, this is a truly spurious - * event probably caused by another /proc consumer tracing the - * monitor. - */ - if (!(status.pr_lwp.pr_flags & PR_STOPPED)) { - (void) ptrace_cont_monitor(p); - return (-1); - } - - switch (status.pr_lwp.pr_why) { - case PR_SIGNALLED: - /* - * If the traced process got a SIGWAITING, we must be in the - * middle of a clone(2) with CLONE_PTRACE set. - */ - if (status.pr_lwp.pr_what == SIGWAITING) { - ptrace_catch_fork(lxpid, 1); - (void) ptrace_cont_monitor(p); - return (-1); - } - infop->si_code = CLD_TRAPPED; - infop->si_status = status.pr_lwp.pr_what; - if (status.pr_lwp.pr_info.si_signo == SIGTRAP) - set_dr6(pid, &status.pr_lwp.pr_info); - break; - - case PR_REQUESTED: - /* - * Make it look like the traced process stopped on an - * event of interest. - */ - infop->si_code = CLD_TRAPPED; - infop->si_status = SIGTRAP; - break; - - case PR_JOBCONTROL: - /* - * Ignore this as it was probably caused by another /proc - * consumer tracing the monitor. - */ - (void) ptrace_cont_monitor(p); - return (-1); - - case PR_SYSEXIT: - /* - * Processes traced via a monitor (rather than using the - * native Solaris ptrace support) explicitly trace returns - * from exec system calls since it's an implicit ptrace - * trace point. Accordingly we need to present a process - * in that state as though it had reached the ptrace trace - * point. - */ - if (status.pr_lwp.pr_what == SYS_execve) { - infop->si_code = CLD_TRAPPED; - infop->si_status = SIGTRAP; - break; - } - - /*FALLTHROUGH*/ - - case PR_SYSENTRY: - case PR_FAULTED: - case PR_SUSPENDED: - default: - lx_err("didn't expect %d (%d %d)", status.pr_lwp.pr_why, - status.pr_lwp.pr_what, status.pr_lwp.pr_flags); - assert(0); - } - - infop->si_pid = lxpid; - - return (0); -} diff --git a/usr/src/lib/brand/lx/lx_brand/common/signal.c b/usr/src/lib/brand/lx/lx_brand/common/signal.c index b71d712591..9029249b10 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/signal.c +++ b/usr/src/lib/brand/lx/lx_brand/common/signal.c @@ -345,6 +345,14 @@ static int lx_sigsegv_depth = 0; #endif /* + * Setting LX_NO_ABORT_HANDLER in the environment will prevent the emulated + * Linux program from modifying the signal handling disposition for SIGSEGV or + * SIGABRT. Useful for debugging programs which fall over themselves to + * prevent useful core files being generated. + */ +static int lx_no_abort_handler = 0; + +/* * Cache result of process.max-file-descriptor to avoid calling getrctl() * for each lx_ppoll(). */ @@ -497,6 +505,29 @@ ltos_sigcode(int si_code) } } +/* + * Convert the "status" field of a SIGCLD siginfo_t. We need to extract the + * illumos signal number and convert it to a Linux signal number while leaving + * the ptrace(2) event bits intact. + */ +int +stol_status(int s) +{ + /* + * We mask out the top bit here in case PTRACE_O_TRACESYSGOOD + * is in use and 0x80 has been ORed with the signal number. + */ + int stat = stol_signo[s & 0x7f]; + assert(stat != -1); + + /* + * We must mix in the ptrace(2) event which may be stored in + * the second byte of the status code. We also re-include the + * PTRACE_O_TRACESYSGOOD bit. + */ + return ((s & 0xff80) | stat); +} + int stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop) { @@ -530,7 +561,8 @@ stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop) case LX_SIGCHLD: lx_siginfo.lsi_pid = siginfop->si_pid; - lx_siginfo.lsi_status = siginfop->si_status; + lx_siginfo.lsi_status = stol_status( + siginfop->si_status); lx_siginfo.lsi_utime = siginfop->si_utime; lx_siginfo.lsi_stime = siginfop->si_stime; break; @@ -789,6 +821,19 @@ lx_sigprocmask_common(uintptr_t how, uintptr_t l_setp, uintptr_t l_osetp, if (err != 0) return (err); + +#if defined(_LP64) + /* + * To assure that vsyscall operates correctly, we must never + * block SIGSEGV. (Fortunately, SIGSEGV is a synchronous + * signal for which the default disposition is to nuke the + * process -- it would be hard for a correctly-written program + * to rely upon its ability to block SIGSEGV.) + */ + if (how == SIG_BLOCK || how == SIG_SETMASK) + sigdelset(s_setp, SIGSEGV); +#endif + } s_osetp = (l_osetp ? &oset : NULL); @@ -1510,6 +1555,13 @@ lx_vsyscall_return(long ret, ucontext_t *ucp) &ucp->uc_mcontext.gregs[REG_RIP], sizeof (void*)); lx_debug("\tvsyscall return to %p", ucp->uc_mcontext.gregs[REG_RIP]); ucp->uc_mcontext.gregs[REG_RSP] += sizeof (void*); + + /* + * Make sure that libc's ul_sigmask reflects what the sigmask is about + * to become. + */ + thr_sigsetmask(SIG_SETMASK, &ucp->uc_sigmask, NULL); + (void) syscall(SYS_brand, B_SIGNAL_RETURN, ucp); } #endif @@ -1532,6 +1584,17 @@ lx_call_user_handler(int sig, siginfo_t *sip, void *p) size_t stksize; int lx_sig; + switch (sig) { + case SIGCLD: + /* + * Signal to an interrupted waitpid() that it was interrupted + * by a SIGCLD, and should restart to grab the wait status + * this signal represented. + */ + lx_had_sigchild = 1; + break; + } + /* * If Illumos signal has no Linux equivalent, effectively ignore it. */ @@ -1548,6 +1611,18 @@ lx_call_user_handler(int sig, siginfo_t *sip, void *p) lx_debug("lxsap @ 0x%p", lxsap); /* + * If the delivery of this signal interrupted a system call, we must + * only restart it if sigaction(2) was used to set the SA_RESTART flag + * for this signal. The lx_emulate() function checks this per-thread + * variable to discover the restart disposition of the most recently + * handled signal. + * + * NOTE: this mechanism may not stand up to close scrutiny in the face + * of nested asynchronous signal delivery. + */ + lx_do_syscall_restart = !!(lxsap->lxsa_flags & LX_SA_RESTART); + + /* * Emulate vsyscall support. * * Linux magically maps a single page into the address space of each @@ -1605,9 +1680,12 @@ lx_call_user_handler(int sig, siginfo_t *sip, void *p) * 64bit for vsyscall emulation, there are certain cases * where a SIGSEGV is ignored or forces an exit. */ - if (lxsap->lxsa_handler == SIG_IGN) { + if (lxsap->lxsa_handler == SIG_IGN && + sip->si_code == SI_USER) { + /* Safely ignore a user-sent signal */ return; } else if (lxsap->lxsa_handler == SIG_DFL || + lxsap->lxsa_handler == SIG_IGN || ((lxsap->lxsa_flags & LX_SA_NODEFER) == 0 && lx_sigsegv_depth > 0)) { (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGSEGV); @@ -1717,6 +1795,18 @@ lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp, return (-errno); if ((sig = ltos_signo[lx_sig]) != -1) { + if (lx_no_abort_handler != 0) { + /* + * If LX_NO_ABORT_HANDLER has been set, we will + * not allow the emulated program to do + * anything hamfisted with SIGSEGV or SIGABRT + * signals. + */ + if (sig == SIGSEGV || sig == SIGABRT) { + return (0); + } + } + /* * Block this signal while messing with its dispostion */ @@ -1802,6 +1892,15 @@ lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp, return (-err); } +#if defined(_LP64) + /* + * To assure that vsyscall can operate properly + * from within signal handlers, we (implicitly) + * disallow blocking SEGV in any signal handler. + */ + sigdelset(&sa.sa_mask, SIGSEGV); +#endif + lx_debug("interposing handler @ 0x%p for " "signal %d (lx %d), flags 0x%x", lxsa.lxsa_handler, sig, lx_sig, @@ -1816,6 +1915,16 @@ lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp, NULL); return (-err); } +#if defined(_LP64) + } else if (sig == SIGSEGV) { + /* + * If user code attempts to set SIGSEGV to + * SIG_IGN or SIG_DFL, the interposing handler + * is still required to handle vsyscalls. + */ + lx_debug("interposing handler maintained " + "for SIGSEGV"); +#endif } else if ((sig != SIGPWR) || ((sig == SIGPWR) && (lxsa.lxsa_handler == SIG_IGN))) { @@ -2026,6 +2135,10 @@ lx_siginit(void) sigset_t new_set, oset; int lx_sig, sig; + if (getenv("LX_NO_ABORT_HANDLER") != NULL) { + lx_no_abort_handler = 1; + } + /* * Block all signals possible while setting up the signal imposition * mechanism. @@ -2236,7 +2349,7 @@ lx_ppoll(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5) sfds[i].revents = 0; } - if ((rval = ppoll(sfds, nfds, tsp, sp) < 0)) + if ((rval = ppoll(sfds, nfds, tsp, sp)) < 0) return (-errno); /* Convert the Illumos revents bitmask into the Linux equivalent */ diff --git a/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c b/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c index 7eb6a6cd12..665d4ce0a7 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c +++ b/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c @@ -21,7 +21,7 @@ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014 Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. All rights reserved. */ #include <errno.h> @@ -48,7 +48,7 @@ #define SLOT_MSG 2 static int -get_rctlval(rctlblk_t *rblk, char *name) +get_rctlval(rctlblk_t *rblk, char *name, ulong_t limit, uint64_t *val) { rctl_qty_t r; @@ -56,9 +56,11 @@ get_rctlval(rctlblk_t *rblk, char *name) return (-errno); r = rctlblk_get_value(rblk); - if (r > MAXINT) + if (r > limit) return (-EOVERFLOW); - return (r); + + *val = r; + return (0); } /* @@ -206,18 +208,26 @@ lx_semctl_ipcinfo(void *buf) int rblksz; uint_t nids; int idbuf; + int err; + uint64_t val; rblksz = rctlblk_size(); if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL) return (-ENOMEM); bzero(&i, sizeof (i)); - if ((i.semmni = get_rctlval(rblk, "project.max-sem-ids")) < 0) - return (i.semmni); - if ((i.semmsl = get_rctlval(rblk, "process.max-sem-nsems")) < 0) - return (i.semmsl); - if ((i.semopm = get_rctlval(rblk, "process.max-sem-ops")) < 0) - return (i.semopm); + err = get_rctlval(rblk, "project.max-sem-ids", (ulong_t)MAXINT, &val); + if (err < 0) + return (err); + i.semmni = (int)val; + err = get_rctlval(rblk, "process.max-sem-nsems", (ulong_t)MAXINT, &val); + if (err < 0) + return (err); + i.semmsl = (int)val; + err = get_rctlval(rblk, "process.max-sem-ops", (ulong_t)MAXINT, &val); + if (err < 0) + return (err); + i.semopm = (int)val; /* * We don't have corresponding rctls for these fields. The values @@ -516,21 +526,30 @@ lx_msgctl_ipcinfo(int cmd, void *buf) int idbuf, rblksz, msgseg, maxmsgs; uint_t nids; int rval; + int err; + uint64_t val; rblksz = rctlblk_size(); if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL) return (-ENOMEM); bzero(&m, sizeof (m)); - if ((m.msgmni = get_rctlval(rblk, "project.max-msg-ids")) < 0) - return (m.msgmni); - if ((m.msgmnb = get_rctlval(rblk, "process.max-msg-qbytes")) < 0) - return (m.msgmnb); + err = get_rctlval(rblk, "project.max-msg-ids", (ulong_t)MAXINT, &val); + if (err < 0) + return (err); + m.msgmni = (int)val; + err = get_rctlval(rblk, "process.max-msg-qbytes", (ulong_t)MAXINT, + &val); + if (err < 0) + return (err); + m.msgmnb = (int)val; if (cmd == LX_IPC_INFO) { - if ((maxmsgs = get_rctlval(rblk, - "process.max-msg-messages")) < 0) - return (maxmsgs); + err = get_rctlval(rblk, "process.max-msg-messages", + (ulong_t)MAXINT, &val); + if (err < 0) + return (err); + maxmsgs = (int)val; m.msgtql = maxmsgs * m.msgmni; m.msgmap = m.msgmnb; m.msgpool = m.msgmax * m.msgmnb; @@ -693,16 +712,22 @@ lx_shmctl_ipcinfo(void *buf) struct lx_shminfo s; rctlblk_t *rblk; int rblksz; + int err; + uint64_t val; rblksz = rctlblk_size(); if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL) return (-ENOMEM); bzero(&s, sizeof (s)); - if ((s.shmmni = get_rctlval(rblk, "project.max-shm-ids")) < 0) - return (s.shmmni); - if ((s.shmmax = get_rctlval(rblk, "project.max-shm-memory")) < 0) - return (s.shmmax); + err = get_rctlval(rblk, "project.max-shm-ids", ULONG_MAX, &val); + if (err < 0) + return (err); + s.shmmni = val; + err = get_rctlval(rblk, "project.max-shm-memory", ULONG_MAX, &val); + if (err < 0) + return (err); + s.shmmax = val; /* * We don't have corresponding rctls for these fields. The values @@ -711,7 +736,7 @@ lx_shmctl_ipcinfo(void *buf) * coherent about it. */ s.shmmin = 1; - s.shmseg = INT_MAX; + s.shmseg = ULONG_MAX; s.shmall = s.shmmax / getpagesize(); if (uucopy(&s, buf, sizeof (s))) diff --git a/usr/src/lib/brand/lx/lx_brand/common/wait.c b/usr/src/lib/brand/lx/lx_brand/common/wait.c index 031eb5e5cd..c3421858eb 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/wait.c +++ b/usr/src/lib/brand/lx/lx_brand/common/wait.c @@ -22,7 +22,7 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014 Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. */ /* @@ -70,6 +70,7 @@ #include <sys/wait.h> #include <sys/lx_types.h> #include <sys/lx_signal.h> +#include <sys/lx_debug.h> #include <sys/lx_misc.h> #include <sys/lx_syscall.h> #include <sys/syscall.h> @@ -100,32 +101,23 @@ extern long max_pid; +/* + * Split the passed waitpid/waitid options into two separate variables: + * those for the native illumos waitid(2), and the extra Linux-specific + * options we will handle in our brand-specific code. + */ static int -ltos_options(uintptr_t options) +ltos_options(uintptr_t options, int *native_options, int *extra_options) { int newoptions = 0; - int rval; - lx_waitid_args_t extra; if (((options) & ~(LX_WNOHANG | LX_WUNTRACED | LX_WEXITED | LX_WCONTINUED | LX_WNOWAIT | LX_WNOTHREAD | LX_WALL | LX_WCLONE)) != 0) { return (-1); } - /* - * We use the B_STORE_ARGS command to store any of LX_WNOTHREAD, - * LX_WALL, and LX_WCLONE that have been set as options on this waitid - * call. These flags are stored as part of the lwp_brand_data, so that - * when there is a later syscall to waitid, the brand code there can - * detect that we added extra flags here and use them as appropriate. - * We pass them in here rather than the normal channel for flags to - * prevent polluting the namespace. - */ - extra.waitid_flags = options & (LX_WNOTHREAD | LX_WALL | LX_WCLONE); - rval = syscall(SYS_brand, B_STORE_ARGS, &extra, - sizeof (lx_waitid_args_t), NULL, NULL, NULL, NULL); - if (rval < 0) - return (rval); + + *extra_options = options & (LX_WNOTHREAD | LX_WALL | LX_WCLONE); if (options & LX_WNOHANG) newoptions |= WNOHANG; @@ -138,10 +130,13 @@ ltos_options(uintptr_t options) if (options & LX_WNOWAIT) newoptions |= WNOWAIT; - /* The trapped option is implicit on Linux */ + /* + * The trapped option is implicit on Linux. + */ newoptions |= WTRAPPED; - return (newoptions); + *native_options = newoptions; + return (0); } static int @@ -164,10 +159,7 @@ lx_wstat(int code, int status) break; case CLD_TRAPPED: case CLD_STOPPED: - stat = stol_signo[status]; - assert(stat != -1); - stat <<= 8; - stat |= WSTOPFLG; + stat = (stol_status(status) << 8) | WSTOPFLG; break; case CLD_CONTINUED: stat = WCONTFLG; @@ -177,33 +169,31 @@ lx_wstat(int code, int status) return (stat); } -/* wrapper to make solaris waitid work properly with ptrace */ static int -lx_waitid_helper(idtype_t idtype, id_t id, siginfo_t *info, int options) +lx_waitid_helper(idtype_t idtype, id_t id, siginfo_t *sip, int native_options, + int extra_options) { - do { - /* - * It's possible that we return EINVAL here if the idtype is - * P_PID or P_PGID and id is out of bounds for a valid pid or - * pgid, but Linux expects to see ECHILD. No good way occurs to - * handle this so we'll punt for now. - */ - if (waitid(idtype, id, info, options) < 0) - return (-errno); - - /* - * If the WNOHANG flag was specified and no child was found - * return 0. - */ - if ((options & WNOHANG) && info->si_pid == 0) - return (0); - - /* - * It's possible that we may have a spurious return for one of - * the child processes created by the ptrace subsystem. If - * that's the case, we simply try again. - */ - } while (lx_ptrace_wait(info) == -1); + /* + * Call into our in-kernel waitid() wrapper: + */ +restart: + lx_had_sigchild = 0; + if (syscall(SYS_brand, B_HELPER_WAITID, idtype, id, sip, + native_options, extra_options) != 0) { + if (errno == EINTR && (lx_had_sigchild || + lx_do_syscall_restart)) { + /* + * If we handled a SIGCLD while blocked in waitid(), + * or the SA_RESTART flag was set, we should wait + * again. + */ + lx_debug("lx_waitid_helper() restarting due to" + " interrupted system call"); + goto restart; + } + return (-1); + } + return (0); } @@ -214,11 +204,12 @@ lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) struct rusage ru = { 0 }; idtype_t idtype; id_t id; - int options, status = 0; + int status = 0; pid_t pid = (pid_t)p1; int rval; + int native_options, extra_options; - if ((options = ltos_options(p3)) == -1) + if (ltos_options(p3, &native_options, &extra_options) == -1) return (-EINVAL); if (pid > max_pid) @@ -260,14 +251,17 @@ lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) id = pid; } - options |= WEXITED | WTRAPPED; + native_options |= WEXITED | WTRAPPED; + + if (lx_waitid_helper(idtype, id, &info, native_options, + extra_options) == -1) { + return (-errno); + } - if ((rval = lx_waitid_helper(idtype, id, &info, options)) < 0) - return (rval); /* * If the WNOHANG flag was specified and no child was found return 0. */ - if ((options & WNOHANG) && info.si_pid == 0) + if ((native_options & WNOHANG) && info.si_pid == 0) return (0); status = lx_wstat(info.si_code, info.si_status); @@ -297,9 +291,10 @@ lx_waitpid(uintptr_t p1, uintptr_t p2, uintptr_t p3) long lx_waitid(uintptr_t idtype, uintptr_t id, uintptr_t infop, uintptr_t opt) { - int rval, options; + int native_options, extra_options; siginfo_t s_info = {0}; - if ((options = ltos_options(opt)) == -1) + + if (ltos_options(opt, &native_options, &extra_options) == -1) return (-EINVAL); if (((opt) & (LX_WEXITED | LX_WSTOPPED | LX_WCONTINUED)) == 0) @@ -318,11 +313,14 @@ lx_waitid(uintptr_t idtype, uintptr_t id, uintptr_t infop, uintptr_t opt) default: return (-EINVAL); } - if ((rval = lx_waitid_helper(idtype, (id_t)id, &s_info, options)) < 0) - return (rval); + + if (lx_waitid_helper(idtype, id, &s_info, native_options, + extra_options) == -1) { + return (-errno); + } /* If the WNOHANG flag was specified and no child was found return 0. */ - if ((options & WNOHANG) && s_info.si_pid == 0) + if ((native_options & WNOHANG) && s_info.si_pid == 0) return (0); return (stol_siginfo(&s_info, (lx_siginfo_t *)infop)); diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h index 7d9c6fae0a..f50535d0c4 100644 --- a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h @@ -55,6 +55,13 @@ extern int lx_rpm_delay; extern boolean_t lx_is_rpm; /* + * These thread-specific variables allow the signal interposition code + * to communicate restart disposition for any interrupting signals. + */ +extern __thread int lx_had_sigchild; +extern __thread int lx_do_syscall_restart; + +/* * Values Linux expects for init */ #define LX_INIT_PGID 0 @@ -173,6 +180,7 @@ extern void lx_ptrace_init(); extern int lx_ptrace_wait(siginfo_t *); extern void lx_ptrace_fork(void); extern void lx_ptrace_stop_if_option(int, boolean_t, ulong_t msg); +extern void lx_ptrace_clone_begin(int, boolean_t); extern int lx_check_alloca(size_t); #define SAFE_ALLOCA(sz) (lx_check_alloca(sz) ? alloca(sz) : NULL) diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h index b4dc47faac..f3d39fca64 100644 --- a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h @@ -21,7 +21,7 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014 Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_LX_SIGNAL_H @@ -396,6 +396,7 @@ extern void lx_sigdeliver(int, siginfo_t *, void *, size_t, void (*)(), void (*)(), uintptr_t); extern int stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop); +extern int stol_status(int); #endif /* !defined(_ASM) */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h index 9cd9cdedb7..f9f49598b7 100644 --- a/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h @@ -22,7 +22,7 @@ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014 Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. All rights reserved. */ #ifndef _LX_SYSV_IPC_H @@ -208,11 +208,11 @@ struct lx_shm_info { }; struct lx_shminfo { - long shmmax; - long shmmin; - long shmmni; - long shmseg; - long shmall; + ulong_t shmmax; + ulong_t shmmin; + ulong_t shmmni; + ulong_t shmseg; + ulong_t shmall; }; #ifdef __cplusplus diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h index b4b72c78f9..3d7b9018e1 100644 --- a/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h @@ -34,6 +34,12 @@ extern "C" { #include <thread.h> +typedef enum lx_exit_type { + LX_ET_NONE = 0, + LX_ET_EXIT, + LX_ET_EXIT_GROUP +} lx_exit_type_t; + typedef struct lx_tsd { #if defined(_ILP32) /* 32-bit thread-specific Linux %gs value */ @@ -42,7 +48,7 @@ typedef struct lx_tsd { /* 64-bit thread-specific Linux %fsbase value */ uintptr_t lxtsd_fsbase; #endif - int lxtsd_exit; + lx_exit_type_t lxtsd_exit; int lxtsd_exit_status; ucontext_t lxtsd_exit_context; } lx_tsd_t; @@ -51,6 +57,8 @@ extern thread_key_t lx_tsd_key; extern void lx_swap_gs(long, long *); +extern void lx_exit_common(lx_exit_type_t, uintptr_t) __NORETURN; + #ifdef __cplusplus } #endif diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h index f69ed45820..d98c8bc586 100644 --- a/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h @@ -21,7 +21,7 @@ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014 Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. All rights reserved. */ #ifndef _SYS_LX_TYPES_H @@ -40,8 +40,10 @@ extern "C" { #if defined(_LP64) #define LONG_MAX 9223372036854775807L +#define ULONG_MAX 18446744073709551615UL #else #define LONG_MAX 2147483647L /* max value of a 32-bit "long int" */ +#define ULONG_MAX 4294967295UL /* max value of a 32-bit "ulong int" */ #endif #define LX_SYS_UTS_LN 65 diff --git a/usr/src/lib/brand/lx/testing/ltp_skiplist b/usr/src/lib/brand/lx/testing/ltp_skiplist index 6f1f5f0318..ae23505fcf 100644 --- a/usr/src/lib/brand/lx/testing/ltp_skiplist +++ b/usr/src/lib/brand/lx/testing/ltp_skiplist @@ -216,6 +216,7 @@ setxattr03 sgetmask01 shmget05 sighold02 +signal_test_05 signalfd01 signalfd4_01 signalfd4_02 diff --git a/usr/src/lib/brand/lx/zone/Makefile b/usr/src/lib/brand/lx/zone/Makefile index d72586f9e1..f013697b4e 100644 --- a/usr/src/lib/brand/lx/zone/Makefile +++ b/usr/src/lib/brand/lx/zone/Makefile @@ -21,13 +21,12 @@ # # Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Copyright 2014 Joyent, Inc. All rights reserved. -# Use is subject to license terms. +# Copyright 2015 Joyent, Inc. All rights reserved. # PROGS = lx_install lx_distro_install lx_init_zone lx_boot PROGS += lx_init_zone_debian lx_init_zone_redhat lx_init_zone_ubuntu -PROGS += lx_networking lx_boot_zone_ubuntu +PROGS += lx_networking lx_boot_zone_redhat lx_boot_zone_ubuntu SUBDIRS = distros XMLDOCS = config.xml platform.xml TEMPLATES = SUNWlx.xml SUNWlx26.xml diff --git a/usr/src/lib/brand/lx/zone/lx_boot.ksh b/usr/src/lib/brand/lx/zone/lx_boot.ksh index 9fc53240ca..9604a46ce2 100644 --- a/usr/src/lib/brand/lx/zone/lx_boot.ksh +++ b/usr/src/lib/brand/lx/zone/lx_boot.ksh @@ -21,7 +21,7 @@ # # # Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright 2014, Joyent, Inc. All rights reserved. +# Copyright 2015, Joyent, Inc. # # lx boot script. # @@ -60,15 +60,12 @@ EXIT_CODE=1 # kernel runs the linker at /lib/ld.so.1, which doesn't exist in an lx zone. # In lx, the linker is ld-linux.so.N. Hence when we run the native executable # from the wrappers, we explicitly specify /native/lib/ld.so.1 as our 32-bit -# linker (or /native/lib/64/ld.so.1 as our 64-bit linker). - -# Setup a native command which uses the thunk library to access the Linux -# nameservices within the zone. +# linker (or /native/lib/64/ld.so.1 as our 64-bit linker). # # $1 is lx cmd, $2 is native cmd, $3 is an optional inclusion in the script # the lx cmd path must have already be verified with safe_dir # -setup_native_thunk_cmd() { +setup_native_chroot_cmd() { cmd_name=$ZONEROOT/$1 if [ -h $cmd_name -o \( -e $cmd_name -a ! -f $cmd_name \) ]; then @@ -80,10 +77,9 @@ setup_native_thunk_cmd() { #!/bin/sh $3 - exec /native/usr/lib/brand/lx/lx_native \ - /native/lib/ld.so.1 -e LD_NOENVIRON=1 -e LD_NOCONFIG=1 \ - -e LD_PRELOAD_32=/native/usr/lib/brand/lx/lx_thunk.so.1 \ - -e LD_LIBRARY_PATH_32="/native/lib:/native/usr/lib" $2 "\$@" + exec /native/usr/sbin/chroot /native \ + /lib/ld.so.1 -e LD_NOENVIRON=1 -e LD_NOCONFIG=1 \ + $2 "\$@" DONE chmod 755 $ZONEROOT/$1 @@ -180,16 +176,12 @@ safe_opt_dir /etc/update-motd.d # # Replace Linux binaries with native binaries. # -# XXX The lx_thunk code will perform a chroot, so these commands will not work -# if they are run by a non-privileged user. -# -setup_native_thunk_cmd /sbin/ipmgmtd /native/lib/inet/ipmgmtd \ +setup_native_chroot_cmd /sbin/ipmgmtd /lib/inet/ipmgmtd \ "export SMF_FMRI=\"svc:/network/ip-interface-management:default\"" -setup_native_thunk_cmd /sbin/ifconfig-native /native/sbin/ifconfig -setup_native_thunk_cmd /sbin/dladm /native/usr/sbin/dladm +setup_native_chroot_cmd /sbin/dladm /usr/sbin/dladm +setup_native_chroot_cmd /sbin/ifconfig-native /sbin/ifconfig setup_native_cmd /sbin/route /native/usr/sbin/route -setup_native_cmd /bin/netstat /native/usr/bin/netstat # # STEP THREE diff --git a/usr/src/lib/brand/lx/zone/lx_boot_zone_redhat.ksh b/usr/src/lib/brand/lx/zone/lx_boot_zone_redhat.ksh new file mode 100644 index 0000000000..57b879cfd0 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/lx_boot_zone_redhat.ksh @@ -0,0 +1,399 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. All rights reserved. +# Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved. +# + +# +# Since CentOS, Red Hat Enterprise Linux, and Fedora all use approximately +# the same source, this file should be good for all three. +# +# Currently, this file assumed a pre-systemd existence, so this should be +# CentOS 6.x or earlier. Testing has been done on CentOS 6.6. +# + +# This script was taken from an earlier file. Initialize some variables here. +install_aborted="Install aborted" +disable_failed="Disable failed" +create_failed="Create failed" +tag=lx-redhat.$$ +tmpfile=/tmp/$tag +cmd2_failed=lx_boot_zone_redhat + +# Function for setting up networking in the zone. +# Generate the /etc/rc.d/init,d/network rc script +setup_net() +{ + zonecfg -z $ZONENAME info net >/tmp/$ZONENAME.$$ + zonecfg -z $ZONENAME info attr name=resolvers >>/tmp/$ZONENAME.$$ + rm -f $ZONEROOT/tmp/.lx_net_up + + awk ' + BEGIN { + printf("#! /bin/bash \n\n") + printf("# network Bring up/down networking\n#\n") + printf("### BEGIN INIT INFO\n"); + printf("# Provides: $network\n"); + printf("# Should-Start: iptables ip6tables\n"); + printf("# Short-Description: Bring up/down networking\n"); + printf("# Description: Bring up/down networking\n"); + printf("### END INIT INFO\n\n"); + + printf("case \"\$1\" in\n") + printf(" start)\n") + printf(" [ \"\$EUID\" != \"0\" ] && exit 4\n") + printf(" [ -f /tmp/.lx_net_up ] && exit 0\n") + printf(" touch /tmp/.lx_net_up\n\n") + printf(" /sbin/ipmgmtd || true\n") + printf(" /sbin/ifconfig-native lo0 plumb\n") + printf(" /sbin/ifconfig-native lo0 up\n") + printf(" /sbin/ifconfig-native lo0 inet6 plumb\n") + printf(" /sbin/ifconfig-native lo0 inet6 up\n") + + } { + if ($1 == "net:") { + in_net = 1 + in_attr = 0 + next + } else if ($1 == "attr:") { + in_net = 0 + in_attr = 1 + next + } + + if (in_net == 1) { + if ($1 == "physical:") { + phys = $2 + } else if ($1 == "property:") { + split($2, a, ",") + split(a[1], k, "=") + split(a[2], v, "=") + + val = substr(v[2], 2) + val = substr(val, 1, length(val) - 2) + + if (k[2] == "ip") + ip = val + else if (k[2] == "netmask") + mask = val + else if (k[2] == "primary") + prim = val + else if (k[2] == "gateway") + gw = val + } + + if ($1 == "net:" && phys != "") { + printf(" /sbin/ifconfig-native %s plumb || true\n", phys) + printf(" /sbin/ifconfig-native %s %s netmask %s up || true\n", + phys, ip, mask) + printf(" /sbin/ifconfig-native %s inet6 plumb up || true\n", phys) + if (prim == "true" && length(gw) > 0) + + printf(" /sbin/route add default %s || true\n", gw) + + phys = "" + prim = "" + gw = "" + } + } else if (in_attr == 1) { + if ($1 == "value:") { + nres = split($2, resolvers, ",") + } + } + } + END { + printf(" /sbin/ifconfig-native %s plumb || true\n", phys) + printf(" /sbin/ifconfig-native %s %s netmask %s up || true\n", + phys, ip, mask) + printf(" /sbin/ifconfig-native %s inet6 plumb up || true\n", phys) + if (prim == "true" && length(gw) > 0) + printf(" /sbin/route add default %s || true\n", gw) + + printf(" rm -f /etc/resolv.conf\n") + for (i = 1; i <= nres; i++) + printf(" echo \"nameserver %s\" >> %s\n", resolvers[i], + "/etc/resolv.conf") + + printf(" touch /var/lock/subsys/network\n") + printf(" rc=0\n") + printf(" ;;\n") + printf(" stop)\n") + printf(" [ \"\$EUID\" != \"0\" ] && exit 4\n\n") + printf(" rm -f /var/lock/subsys/network\n") + printf(" rc=0\n") + printf(" ;;\n") + printf(" status)\n") + printf(" echo \"Configured devices:\"\n") + printf(" echo \"lo \$(cd /dev/net; ls)\"\n") + printf(" echo \"Currently active devices:\"\n") + printf(" echo \$(/sbin/ip -o link show up | awk -F \": \" %s{ print \$2 }%s)\n", "\047", "\047") + printf(" rc=0\n") + printf(" ;;\n") + printf(" restart|reload|force-reload)\n") + printf(" cd \"\$CWD\"\n") + printf(" \$0 stop\n") + printf(" \$0 start\n") + printf(" rc=\$?\n") + printf(" ;;\n") + printf(" *)\n") + printf(" echo \"Usage: \$0 {start|stop|status|restart|reload|force-reload}\"\n") + printf(" exit 2\n") + printf("esac\n\n") + printf("exit \$rc\n") + + }' /tmp/$ZONENAME.$$ > $fnm + chmod +x $fnm + + rm -f /tmp/$ZONENAME.$$ +} + +# +# The default /etc/inittab might spawn mingetty on each of the virtual consoles +# as well as xdm on the X console. Since we don't have virtual consoles nor +# an X console, spawn a single mingetty on /dev/console instead. +# +# Don't bother changing the file if it looks like we already did. +# +if ! egrep -s "Modified by lx brand" $ZONEROOT/etc/inittab; then + sed 's/^[1-6]:/# Disabled by lx brand: &/ + s/^id:5:initdefault:/id:3:initdefault: &/' \ + $ZONEROOT/etc/inittab > $tmpfile + echo "# Modified by lx brand" >> $tmpfile + + # + # Attempt to save off the original inittab + # before moving over the modified version. + # + mv -f $ZONEROOT/etc/inittab $ZONEROOT/etc/inittab.$tag 2>/dev/null + mv -f $tmpfile $ZONEROOT/etc/inittab + chmod 644 $ZONEROOT/etc/inittab +fi + +# +# We use our own way of bringing up networking, so don't let the init system +# try. +# + +mv -f $ZONEROOT/etc/sysconfig/network $ZONEROOT/etc/sysconfig/network.$tag \ + 2>/dev/null + +cat > $ZONEROOT/etc/sysconfig/network <<- EOF + NETWORKING="no" + # + # To enable networking, change the "no" above to "yes" and + # uncomment and fill in the following parameters. + # + # If you are specifying a hostname by name rather than by IP address, + # be sure the system can resolve the name properly via the use of a + # name service and/or the proper name files, as specified by + # nsswitch.conf. See nsswitch.conf(5) for further details. + # + # HOSTNAME=your_hostname_here + # +EOF + +# +# SELinux must be disabled otherwise we won't get past init. +# +egrep -s "^SELINUX=enforcing|^SELINUX=permissive" $ZONEROOT/etc/selinux/config +if [[ $? -eq 0 ]]; then + tmpfile=/tmp/selinux_config.$$ + + sed 's/^SELINUX=.*$/SELINUX=disabled/' \ + $ZONEROOT/etc/selinux/config > $tmpfile + + mv -f $ZONEROOT/etc/selinux/config \ + $ZONEROOT/etc/selinux/config.$tag 2>/dev/null + mv -f $tmpfile $ZONEROOT/etc/selinux/config + chmod 644 $ZONEROOT/etc/selinux/config +fi + +# +# /etc/rc.d/init.d/keytable tries to load a physical keyboard map, which won't +# work in a zone. If we remove etc/sysconfig/keyboard, it won't try this at all. +# +mv -f $ZONEROOT/etc/sysconfig/keyboard $ZONEROOT/etc/sysconfig/keyboard.$tag \ + 2>/dev/null + +# +# The following scripts attempt to start services or otherwise configure +# the system in ways incompatible with zones, so don't execute them at boot +# time. +# +unsupported_rc_services=" + auditd + gpm + hpoj + ip6tables + iptables + irda + irqbalance + iscsi + isdn + kudzu + mdmpd + mdmonitor + microcode_ctl + netdump + pcmcia + psacct + random + rawdevices + smartd +" + +for file in $unsupported_rc_services; do + if [[ -a "$ZONEROOT/etc/rc.d/init.d/$file" ]]; then + mv -f "$ZONEROOT/etc/rc.d/init.d/$file" \ + "$ZONEROOT/etc/rc.d/init.d/$file.$tag" + fi + + rc_files="$(echo $ZONEROOT/etc/rc.d/rc[0-6].d/[SK]+([0-9])$file)" + + if [[ "$rc_files" != \ + "$ZONEROOT/etc/rc.d/rc[0-6].d/[SK]+([0-9])$file" ]]; then + for file in $rc_files; do + rm -f "$file" + done + fi +done + +disable_svc() +{ + # XXX - TBD does this work like on Ubuntu? + # + # fnm=$ZONEROOT/etc/init/$1.override + # [[ -h $fnm || -f $fnm ]] && return + # echo "manual" > $fnm + + fnm=$ZONEROOT/etc/init/$1.conf + rm -f $fnm +} + +RMSVCS="ttyS0" + +# +# Now customize upstart +# + +for f in $RMSVCS +do + disable_svc $f +done + +if [[ ! -f $ZONEROOT/etc/init/tty.override ]]; then + cat > $ZONEROOT/etc/init/tty.override <<- EOF + # tty - getty + # + # This service maintains a getty on the console. + + stop on runlevel [S016] + + respawn + instance console + exec /sbin/mingetty console +EOF +fi + +if [[ ! -f $ZONEROOT/etc/init/start-ttys.override ]]; then + cat > $ZONEROOT/etc/init/start-ttys.override <<- EOF + # This service starts the configured number of gettys. + # + + start on stopped rc RUNLEVEL=[2345] + + task + script + initctl start tty + end script +EOF +fi + +# +# There is a lot of stuff in the standard halt and reboot scripts that we +# have no business running in a zone. Fortunately, the stuff we want to +# skip is all in one contiguous chunk. +# +# Don't bother to modify the file if it looks like we already did. +# +if ! egrep -s "Disabled by lx brand" $ZONEROOT/etc/rc.d/init.d/halt; then + awk 'BEGIN {skip = ""} + /^# Save mixer/ {skip = "# Disabled by lx brand: "} + /halt.local/ {skip = ""} + /./ {print skip $0}' $ZONEROOT/etc/rc.d/init.d/halt > /tmp/halt.$$ + + if [[ $? -eq 0 ]]; then + mv -f $ZONEROOT/etc/rc.d/init.d/halt \ + $ZONEROOT/etc/rc.d/init.d/halt.$tag 2>/dev/null + mv -f /tmp/halt.$$ $ZONEROOT/etc/rc.d/init.d/halt + chmod 755 $ZONEROOT/etc/rc.d/init.d/halt + fi +fi + +# +# Fix up /etc/rc.d/rc.sysinit: +# +# 1) /sbin/hwclock requires the iopl() system call, which BrandZ won't support. +# Since the hardware clock cannot be set from within a zone, we comment out +# the line. +# +# 2) Disable dmesg commands, since we don't implement klogctl +# +# 3) Disable initlog and the mount of /dev/pts +# +# 4) Don't touch /dev/tty* in order to start virtual terminals, as that won't +# work from within a zone. +# +# 5) Don't try to check the root filesystem (/) as there is no associated +# physical device, and any attempt to run fsck will fail. +# +# Don't modify the rc.sysinit file if it looks like we already did. +# +if ! egrep -s "Disabled by lx brand" $ZONEROOT/etc/rc.d/rc.sysinit; then + tmpfile=/tmp/lx_rc.sysinit.$$ + + sed 's@^/sbin/hwclock@# Disabled by lx brand: &@ + s@^HOSTTYPE=@HOSTTYPE=\"s390\" # Spoofed for lx brand: &@ + s@/bin/dmesg -n@: # Disabled by lx brand: &@ + s@^dmesg -s@# Disabled by lx brand: &@ + s@initlog -c \"fsck@: # Disabled by lx brand: &@ + s@^.*mount .* /dev/pts$@# Disabled by lx brand: &@' \ + $ZONEROOT/etc/rc.d/rc.sysinit > $tmpfile + + # + # Attempt to save off the original rc.sysinit + # before moving over the modified version. + # + mv -f $ZONEROOT/etc/rc.d/rc.sysinit \ + $ZONEROOT/etc/rc.d/rc.sysinit.$tag 2>/dev/null + mv -f $tmpfile $ZONEROOT/etc/rc.d/rc.sysinit + chmod 755 $ZONEROOT/etc/rc.d/rc.sysinit +fi + + +# NOTE: The networking setup assumes an exclusive-stack zone. +iptype=`/usr/sbin/zonecfg -z $ZONENAME info ip-type | cut -f2 -d' '` + +if [[ "$iptype" == "exclusive" ]]; then + fnm=$ZONEROOT/etc/rc.d/init.d/network + if [[ ! -h $fnm && -f $fnm ]] then + setup_net + fi +fi + +# +# upstart modifications are complete +# + +# Hand control back to lx_boot diff --git a/usr/src/lib/brand/lx/zone/platform.xml b/usr/src/lib/brand/lx/zone/platform.xml index e6a2ef46e3..bb689d7c58 100644 --- a/usr/src/lib/brand/lx/zone/platform.xml +++ b/usr/src/lib/brand/lx/zone/platform.xml @@ -22,7 +22,7 @@ Copyright 2007 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. - Copyright 2014 Sun Microsystems, Inc. All rights reserved. + Copyright 2015 Joyent, Inc. DO NOT EDIT THIS FILE. --> @@ -47,7 +47,7 @@ opt="ro" type="lofs" /> <global_mount special="/usr/lib/brand/lx/etc_default_nfs" directory="/native/etc/default/nfs" type="lofs" opt="ro" /> - <global_mount special="/usr/lib/brand/lx/etc_netconfig" + <global_mount special="/etc/netconfig" directory="/native/etc/netconfig" type="lofs" opt="ro" /> <global_mount special="/sbin" directory="/native/sbin" opt="ro,nodevices" type="lofs" /> @@ -137,6 +137,7 @@ <symlink source="stdout" target="../proc/self/fd/1" /> <symlink source="systty" target="console" /> <symlink source="kmsg" target="console" /> + <symlink source="conslog" target="log" /> <!-- Create a mount point for for the /dev/initctl fifo --> <device match="null" name="initctl" /> diff --git a/usr/src/lib/libm/amd64/src/locallibm.il b/usr/src/lib/libm/amd64/src/locallibm.il index 65921d3c97..375720c84d 100644 --- a/usr/src/lib/libm/amd64/src/locallibm.il +++ b/usr/src/lib/libm/amd64/src/locallibm.il @@ -91,7 +91,7 @@ 1: movl %edi,%eax .end - .inline __copysign,0 + .inline copysign,0 movq $0x7fffffffffffffff,%rax movdq %rax,%xmm2 andpd %xmm2,%xmm0 @@ -99,19 +99,19 @@ orpd %xmm2,%xmm0 .end - .inline __fabs,0 + .inline fabs,0 movq $0x7fffffffffffffff,%rax movdq %rax,%xmm1 andpd %xmm1,%xmm0 .end - .inline __fabsf,0 + .inline fabsf,0 movl $0x7fffffff,%eax movdl %eax,%xmm1 andps %xmm1,%xmm0 .end - .inline _finite,0 + .inline finite,0 subq $16,%rsp movlpd %xmm0,(%rsp) movq (%rsp),%rcx @@ -123,16 +123,16 @@ addq $16,%rsp .end - .inline __signbit,0 + .inline signbit,0 movmskpd %xmm0,%eax andq $1,%rax .end - .inline __sqrt,0 + .inline sqrt,0 sqrtsd %xmm0,%xmm0 .end - .inline __sqrtf,0 + .inline sqrtf,0 sqrtss %xmm0,%xmm0 .end diff --git a/usr/src/lib/libm/i386/src/locallibm.il b/usr/src/lib/libm/i386/src/locallibm.il index ca79724f86..bca43cb8e5 100644 --- a/usr/src/lib/libm/i386/src/locallibm.il +++ b/usr/src/lib/libm/i386/src/locallibm.il @@ -127,7 +127,7 @@ addl $8,%esp .end - .inline __ceil,0 + .inline ceil,0 subl $8,%esp fstcw (%esp) fldl 8(%esp) /// @@ -148,7 +148,7 @@ addl $8,%esp .end - .inline __copysign,0 + .inline copysign,0 movl 4(%esp),%eax /// eax <-- hi_32(x) movl 12(%esp),%ecx /// ecx <-- hi_32(y) andl $0x7fffffff,%eax / eax <-- hi_32(abs(x)) @@ -170,17 +170,17 @@ fsqrt .end - .inline __fabs,0 + .inline fabs,0 fldl (%esp) /// fabs .end - .inline __fabsf,0 + .inline fabsf,0 flds (%esp) fabs .end - .inline __fabsl,0 + .inline fabsl,0 fldt (%esp) fabs .end @@ -188,7 +188,7 @@ / / branchless _finite / - .inline _finite,0 + .inline finite,0 movl 4(%esp),%eax /// eax <-- hi_32(x) notl %eax / not(bexp) = 0 iff bexp = all 1's andl $0x7ff00000,%eax @@ -196,7 +196,7 @@ shrl $31,%eax .end - .inline __floor,0 + .inline floor,0 subl $8,%esp fstcw (%esp) fldl 8(%esp) /// @@ -217,7 +217,7 @@ addl $8,%esp .end - .inline __isnanf,0 + .inline isnanf,0 movl (%esp),%eax andl $0x7fffffff,%eax negl %eax @@ -278,7 +278,7 @@ fsqrt .end - .inline __rint,0 + .inline rint,0 fldl (%esp) movl 4(%esp),%eax andl $0x7fffffff,%eax @@ -289,39 +289,39 @@ fwait / in case we jumped around frndint .end - .inline __scalbn,0 + .inline scalbn,0 fildl 8(%esp) /// convert N to extended fldl (%esp) /// push x fscale fstp %st(1) .end - .inline __signbit,0 + .inline signbit,0 movl 4(%esp),%eax /// high part of x shrl $31,%eax .end - .inline __signbitf,0 + .inline signbitf,0 movl (%esp),%eax shrl $31,%eax .end - .inline __sqrt,0 + .inline sqrt,0 fldl (%esp) fsqrt .end - .inline __sqrtf,0 + .inline sqrtf,0 flds (%esp) fsqrt .end - .inline __sqrtl,0 + .inline sqrtl,0 fldt (%esp) fsqrt .end - .inline __isnanl,0 + .inline isnanl,0 movl 8(%esp),%eax / ax <-- sign bit and __exp andl $0x00007fff,%eax jz 1f / jump if __exp is all 0 diff --git a/usr/src/lib/libm/sparc/src/locallibm.il b/usr/src/lib/libm/sparc/src/locallibm.il index 3822f5f92d..b6fa0adfc5 100644 --- a/usr/src/lib/libm/sparc/src/locallibm.il +++ b/usr/src/lib/libm/sparc/src/locallibm.il @@ -311,13 +311,13 @@ fsqrtd %f0,%f0 .end - .inline __sqrtf,1 + .inline sqrtf,1 st %o0,[%sp+0x44] ld [%sp+0x44],%f0 fsqrts %f0,%f0 .end - .inline __sqrt,2 + .inline sqrt,2 std %o0,[%sp+0x48] ! store to 8-aligned address ldd [%sp+0x48],%f0 fsqrtd %f0,%f0 @@ -814,7 +814,7 @@ .nonvolatile .end - .inline __fp_class,2 + .inline fp_class,2 sethi %hi(0x80000000),%o2 ! o2 gets 80000000 andn %o0,%o2,%o0 ! o0-o1 gets abs(x) orcc %o0,%o1,%g0 ! set cc as x is zero/nonzero @@ -859,7 +859,7 @@ 2: .end - .inline __fp_classf,1 + .inline fp_classf,1 sethi %hi(0x80000000),%o2 andncc %o0,%o2,%o0 bne 1f @@ -1229,14 +1229,14 @@ sub %o0,%o1,%o0 .end - .inline __fabs,2 + .inline fabs,2 st %o0,[%sp+0x48] st %o1,[%sp+0x4c] ldd [%sp+0x48],%f0 fabsd %f0,%f0 .end - .inline __fabsf,1 + .inline fabsf,1 st %o0,[%sp+0x44] ld [%sp+0x44],%f0 fabss %f0,%f0 diff --git a/usr/src/lib/libm/sparcv9/src/locallibm.il b/usr/src/lib/libm/sparcv9/src/locallibm.il index dcef23826a..2cd9e21470 100644 --- a/usr/src/lib/libm/sparcv9/src/locallibm.il +++ b/usr/src/lib/libm/sparcv9/src/locallibm.il @@ -39,11 +39,11 @@ fsqrtd %f0,%f0 .end - .inline __sqrtf,1 + .inline sqrtf,1 fsqrts %f1,%f0 .end - .inline __sqrt,1 + .inline sqrt,1 fsqrtd %f0,%f0 .end @@ -420,7 +420,7 @@ .nonvolatile .end - .inline __fp_class,1 + .inline fp_class,1 fabsd %f0,%f0 std %f0,[%sp+0x87f] ldx [%sp+0x87f],%o0 @@ -460,7 +460,7 @@ 2: .end - .inline __fp_classf,1 + .inline fp_classf,1 fabss %f1,%f1 st %f1,[%sp+0x87f] ld [%sp+0x87f],%o0 @@ -676,11 +676,11 @@ sra %o0,0,%o0 .end - .inline __fabs,1 + .inline fabs,1 fabsd %f0,%f0 .end - .inline __fabsf,1 + .inline fabsf,1 fabss %f1,%f0 .end ! diff --git a/usr/src/lib/libmvec/Makefile.com b/usr/src/lib/libmvec/Makefile.com index b9f39e2279..e574b0df22 100644 --- a/usr/src/lib/libmvec/Makefile.com +++ b/usr/src/lib/libmvec/Makefile.com @@ -179,7 +179,6 @@ FLTRPATH = $(FLTRPATH_$(TARGET_ARCH)) sparc_CFLAGS += -_cc=-W0,-xintrinsic sparcv9_CFLAGS += -_cc=-W0,-xintrinsic -CPPFLAGS_i386 += -Dfabs=__fabs SRCS_mvec_i386 = ../common/__vsqrtf.c diff --git a/usr/src/lib/libnsl/nsl/_utility.c b/usr/src/lib/libnsl/nsl/_utility.c index f01cea5c6e..994e3158ee 100644 --- a/usr/src/lib/libnsl/nsl/_utility.c +++ b/usr/src/lib/libnsl/nsl/_utility.c @@ -26,6 +26,9 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + */ #include "mt.h" #include <stdlib.h> @@ -82,15 +85,14 @@ _t_checkfd(int fd, int force_sync, int api_semantics) t_errno = TBADF; return (NULL); } - tiptr = NULL; - sig_mutex_lock(&_ti_userlock); - if ((tiptr = find_tilink(fd)) != NULL) { - if (!force_sync) { - sig_mutex_unlock(&_ti_userlock); + + if (!force_sync) { + sig_mutex_lock(&_ti_userlock); + tiptr = find_tilink(fd); + sig_mutex_unlock(&_ti_userlock); + if (tiptr != NULL) return (tiptr); - } } - sig_mutex_unlock(&_ti_userlock); /* * Not found or a forced sync is required. @@ -270,7 +272,7 @@ _t_register_lookevent( * signals are deferred, calls to malloc() are safe. */ if ((tlbs->tl_next = malloc(sizeof (struct _ti_lookbufs))) == - NULL) + NULL) return (-1); /* error */ tlbs = tlbs->tl_next; /* @@ -485,9 +487,9 @@ _t_do_ioctl(int fd, char *buf, int size, int cmd, int *retlenp) } if (retval > 0) { - t_errno = retval&0xff; + t_errno = retval & 0xff; if (t_errno == TSYSERR) - errno = (retval >> 8)&0xff; + errno = (retval >> 8) & 0xff; return (-1); } if (retlenp) @@ -689,7 +691,7 @@ add_tilink(int s) * duplicate entry or the end. */ for (curptr = hash_bucket[x]; curptr != NULL; - curptr = curptr->ti_next) { + curptr = curptr->ti_next) { if (curptr->ti_fd == s) { /* * This can happen when the user has close(2)'ed @@ -964,7 +966,6 @@ _t_create(int fd, struct t_info *info, int api_semantics, int *t_capreq_failed) errno = ENOMEM; return (NULL); } - sig_mutex_lock(&ntiptr->ti_lock); /* * Allocate buffers for the new descriptor @@ -973,7 +974,6 @@ _t_create(int fd, struct t_info *info, int api_semantics, int *t_capreq_failed) sv_errno = errno; (void) _t_delete_tilink(fd); t_errno = TSYSERR; - sig_mutex_unlock(&ntiptr->ti_lock); errno = sv_errno; return (NULL); } @@ -1018,7 +1018,6 @@ _t_create(int fd, struct t_info *info, int api_semantics, int *t_capreq_failed) if ((rstate = _t_adjust_state(fd, T_IDLE)) < 0) { sv_errno = errno; (void) _t_delete_tilink(fd); - sig_mutex_unlock(&ntiptr->ti_lock); errno = sv_errno; return (NULL); } @@ -1037,7 +1036,6 @@ _t_create(int fd, struct t_info *info, int api_semantics, int *t_capreq_failed) if ((rstate = _t_adjust_state(fd, T_DATAXFER)) < 0) { sv_errno = errno; (void) _t_delete_tilink(fd); - sig_mutex_unlock(&ntiptr->ti_lock); errno = sv_errno; return (NULL); } @@ -1052,7 +1050,6 @@ _t_create(int fd, struct t_info *info, int api_semantics, int *t_capreq_failed) if ((rstate = _t_adjust_state(fd, T_INREL)) < 0) { sv_errno = errno; (void) _t_delete_tilink(fd); - sig_mutex_unlock(&ntiptr->ti_lock); errno = sv_errno; return (NULL); } @@ -1061,7 +1058,6 @@ _t_create(int fd, struct t_info *info, int api_semantics, int *t_capreq_failed) default: t_errno = TSTATECHNG; (void) _t_delete_tilink(fd); - sig_mutex_unlock(&ntiptr->ti_lock); return (NULL); } @@ -1078,7 +1074,6 @@ _t_create(int fd, struct t_info *info, int api_semantics, int *t_capreq_failed) sv_errno = errno; (void) _t_delete_tilink(fd); t_errno = TSYSERR; - sig_mutex_unlock(&ntiptr->ti_lock); errno = sv_errno; return (NULL); } @@ -1092,7 +1087,6 @@ _t_create(int fd, struct t_info *info, int api_semantics, int *t_capreq_failed) sv_errno = errno; (void) _t_delete_tilink(fd); t_errno = TSYSERR; - sig_mutex_unlock(&ntiptr->ti_lock); errno = sv_errno; return (NULL); } @@ -1101,7 +1095,7 @@ _t_create(int fd, struct t_info *info, int api_semantics, int *t_capreq_failed) tsap->tsa_qlen = 0; /* not needed for TLI */ ntiptr->ti_qlen = tsap->tsa_qlen; - sig_mutex_unlock(&ntiptr->ti_lock); + return (ntiptr); } @@ -1162,8 +1156,8 @@ _t_adjust_state(int fd, int instate) * from the stream head. */ if ((arg.ctlbuf.len == 4) && - /* LINTED pointer cast */ - ((*(int32_t *)arg.ctlbuf.buf) == T_CONN_CON)) + /* LINTED pointer cast */ + ((*(int32_t *)arg.ctlbuf.buf) == T_CONN_CON)) outstate = T_OUTCON; break; case T_INREL: @@ -1375,7 +1369,7 @@ _t_acquire_ctlbuf( * allocate new buffer and free after use. */ if ((ctlbufp->maxlen = _t_cbuf_alloc(tiptr, - &ctlbufp->buf)) < 0) { + &ctlbufp->buf)) < 0) { t_errno = TSYSERR; return (-1); } @@ -1419,7 +1413,7 @@ _t_acquire_databuf( * allocate new buffer and free after use. */ if ((databufp->maxlen = _t_rbuf_alloc(tiptr, - &databufp->buf)) < 0) { + &databufp->buf)) < 0) { t_errno = TSYSERR; return (-1); } diff --git a/usr/src/lib/libproc/common/Pcontrol.c b/usr/src/lib/libproc/common/Pcontrol.c index bde48d1416..afa04c43c7 100644 --- a/usr/src/lib/libproc/common/Pcontrol.c +++ b/usr/src/lib/libproc/common/Pcontrol.c @@ -26,6 +26,7 @@ * Portions Copyright 2007 Chad Mynhier * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #include <assert.h> @@ -1758,6 +1759,9 @@ prldump(const char *caller, lwpstatus_t *lsp) case PR_SUSPENDED: dprintf("%s: SUSPENDED\n", caller); break; + case PR_BRAND: + dprintf("%s: BRANDPRIVATE (%d)\n", caller, lsp->pr_what); + break; default: dprintf("%s: Unknown\n", caller); break; @@ -1937,6 +1941,7 @@ Pstopstatus(struct ps_prochandle *P, case PR_FAULTED: case PR_JOBCONTROL: case PR_SUSPENDED: + case PR_BRAND: break; default: errno = EPROTO; @@ -3511,6 +3516,7 @@ Lstopstatus(struct ps_lwphandle *L, case PR_FAULTED: case PR_JOBCONTROL: case PR_SUSPENDED: + case PR_BRAND: break; default: errno = EPROTO; diff --git a/usr/src/lib/libproc/common/Pcontrol.h b/usr/src/lib/libproc/common/Pcontrol.h index 6697d5736b..9e3aa1ac7d 100644 --- a/usr/src/lib/libproc/common/Pcontrol.h +++ b/usr/src/lib/libproc/common/Pcontrol.h @@ -24,7 +24,7 @@ */ /* * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright (c) 2015, Joyent, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. */ @@ -96,6 +96,7 @@ typedef struct file_info { /* symbol information for a mapped file */ struct map_info *file_map; /* primary (text) mapping */ int file_ref; /* references from map_info_t structures */ int file_fd; /* file descriptor for the mapped file */ + int file_dbgfile; /* file descriptor for the debug file */ int file_init; /* 0: initialization yet to be performed */ GElf_Half file_etype; /* ELF e_type from ehdr */ GElf_Half file_class; /* ELF e_ident[EI_CLASS] from ehdr */ @@ -105,6 +106,7 @@ typedef struct file_info { /* symbol information for a mapped file */ char *file_rname; /* resolved on-disk object pathname */ char *file_rbase; /* pointer to basename of file_rname */ Elf *file_elf; /* ELF handle so we can close */ + Elf *file_dbgelf; /* Debug ELF handle so we can close */ void *file_elfmem; /* data for faked-up ELF handle */ sym_tbl_t file_symtab; /* symbol table */ sym_tbl_t file_dynsym; /* dynamic symbol table */ diff --git a/usr/src/lib/libproc/common/Pcore.c b/usr/src/lib/libproc/common/Pcore.c index c899ee1b20..454360a8b4 100644 --- a/usr/src/lib/libproc/common/Pcore.c +++ b/usr/src/lib/libproc/common/Pcore.c @@ -2695,6 +2695,7 @@ Pfgrab_core(int core_fd, const char *aout_path, int *perr) fp->file_ref = 1; fp->file_fd = -1; + fp->file_dbgfile = -1; fp->file_lo = malloc(sizeof (rd_loadobj_t)); fp->file_lname = strdup(execname); diff --git a/usr/src/lib/libproc/common/Pidle.c b/usr/src/lib/libproc/common/Pidle.c index 3191f4fa7e..c69bcaf860 100644 --- a/usr/src/lib/libproc/common/Pidle.c +++ b/usr/src/lib/libproc/common/Pidle.c @@ -226,6 +226,7 @@ Pgrab_file(const char *fname, int *perr) } fp->file_fd = fd; + fp->file_dbgfile = -1; fp->file_lo->rl_lmident = LM_ID_BASE; if ((fp->file_lname = strdup(fp->file_pname)) == NULL) { *perr = G_STRANGE; diff --git a/usr/src/lib/libproc/common/Psymtab.c b/usr/src/lib/libproc/common/Psymtab.c index 62354f9a7b..41f41f3dde 100644 --- a/usr/src/lib/libproc/common/Psymtab.c +++ b/usr/src/lib/libproc/common/Psymtab.c @@ -21,7 +21,7 @@ /* * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2015, Joyent, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. */ @@ -43,6 +43,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <sys/sysmacros.h> +#include <sys/crc32.h> #include "libproc.h" #include "Pcontrol.h" @@ -61,6 +62,7 @@ static int read_ehdr32(struct ps_prochandle *, Elf32_Ehdr *, uint_t *, static int read_ehdr64(struct ps_prochandle *, Elf64_Ehdr *, uint_t *, uintptr_t); #endif +static uint32_t psym_crc32[] = { CRC32_TABLE }; #define DATA_TYPES \ ((1 << STT_OBJECT) | (1 << STT_FUNC) | \ @@ -184,6 +186,7 @@ file_info_new(struct ps_prochandle *P, map_info_t *mptr) mptr->map_file = fptr; fptr->file_ref = 1; fptr->file_fd = -1; + fptr->file_dbgfile = -1; P->num_files++; /* @@ -274,6 +277,10 @@ file_info_free(struct ps_prochandle *P, file_info_t *fptr) free(fptr->file_elfmem); if (fptr->file_fd >= 0) (void) close(fptr->file_fd); + if (fptr->file_dbgelf) + (void) elf_end(fptr->file_dbgelf); + if (fptr->file_dbgfile >= 0) + (void) close(fptr->file_dbgfile); if (fptr->file_ctfp) { ctf_close(fptr->file_ctfp); free(fptr->file_ctf_buf); @@ -1567,6 +1574,170 @@ build_fake_elf(struct ps_prochandle *P, file_info_t *fptr, GElf_Ehdr *ehdr, } /* + * Try and find the file described by path in the file system and validate that + * it matches our CRC before we try and process it for symbol information. + * + * Before we valiate if it's a crc, we check to ensure that it's a normal file + * and not anything else. + */ +static boolean_t +build_alt_debug(file_info_t *fptr, const char *path, uint32_t crc) +{ + int fd; + struct stat st; + Elf *elf; + Elf_Scn *scn; + GElf_Shdr symshdr, strshdr; + Elf_Data *symdata, *strdata; + uint32_t c = -1U; + + if ((fd = open(path, O_RDONLY)) < 0) + return (B_FALSE); + + if (fstat(fd, &st) != 0) { + (void) close(fd); + return (B_FALSE); + } + + if (S_ISREG(st.st_mode) == 0) { + (void) close(fd); + return (B_FALSE); + } + + for (;;) { + char buf[4096]; + ssize_t ret = read(fd, buf, sizeof (buf)); + if (ret == -1) { + if (ret == EINTR) + continue; + (void) close(fd); + return (B_FALSE); + } + if (ret == 0) { + c = ~c; + if (c != crc) { + dprintf("crc mismatch, found: 0x%x " + "expected 0x%x\n", c, crc); + (void) close(fd); + return (B_FALSE); + } + break; + } + CRC32(c, buf, ret, c, psym_crc32); + } + + elf = elf_begin(fd, ELF_C_READ, NULL); + if (elf == NULL) { + (void) close(fd); + return (B_FALSE); + } + + if (elf_kind(elf) != ELF_K_ELF) { + goto fail; + } + + /* + * Do two passes, first see if we have a symbol header, then see if we + * can find the corresponding linked string table. + */ + scn = NULL; + for (scn = elf_nextscn(elf, scn); scn != NULL; + scn = elf_nextscn(elf, scn)) { + + if (gelf_getshdr(scn, &symshdr) == NULL) + goto fail; + + if (symshdr.sh_type != SHT_SYMTAB) + continue; + + if ((symdata = elf_getdata(scn, NULL)) == NULL) + goto fail; + + break; + } + if (scn == NULL) + goto fail; + + if ((scn = elf_getscn(elf, symshdr.sh_link)) == NULL) + goto fail; + + if (gelf_getshdr(scn, &strshdr) == NULL) + goto fail; + + if ((strdata = elf_getdata(scn, NULL)) == NULL) + goto fail; + + fptr->file_symtab.sym_data_pri = symdata; + fptr->file_symtab.sym_symn += symshdr.sh_size / symshdr.sh_entsize; + fptr->file_symtab.sym_strs = strdata->d_buf; + fptr->file_symtab.sym_strsz = strdata->d_size; + fptr->file_symtab.sym_hdr_pri = symshdr; + fptr->file_symtab.sym_strhdr = strshdr; + + dprintf("successfully loaded additional debug symbols for %s from %s\n", + fptr->file_rname, path); + + fptr->file_dbgfile = fd; + fptr->file_dbgelf = elf; + return (B_TRUE); +fail: + (void) elf_end(elf); + (void) close(fd); + return (B_FALSE); +} + +/* + * We're here because the object in question has no symbol information, that's a + * bit unfortunate. However, we've found that there's a .gnu_debuglink sitting + * around. By convention that means that given the current location of the + * object on disk, and the debug name that we found in the binary we need to + * search the following locations for a matching file. + * + * <dirname>/.debug/<debug-name> + * /usr/lib/debug/<dirname>/<debug-name> + * + * In the future, we should consider supporting looking in the prefix's + * lib/debug directory for a matching object. + */ +static void +find_alt_debug(file_info_t *fptr, const char *name, uint32_t crc) +{ + boolean_t r; + char *dup = NULL, *path = NULL, *dname; + + dprintf("find_alt_debug: looking for %s, crc 0x%x\n", name, crc); + if (fptr->file_rname == NULL) { + dprintf("find_alt_debug: encountered null file_rname\n"); + return; + } + + dup = strdup(fptr->file_rname); + if (dup == NULL) + return; + + dname = dirname(dup); + if (asprintf(&path, "%s/.debug/%s", dname, name) != -1) { + dprintf("attempting to load alternate debug information " + "from %s\n", path); + r = build_alt_debug(fptr, path, crc); + free(path); + if (r == B_TRUE) + goto out; + } + + if (asprintf(&path, "/usr/lib/debug/%s/%s", dname, name) != -1) { + dprintf("attempting to load alternate debug information " + "from %s\n", path); + r = build_alt_debug(fptr, path, crc); + free(path); + if (r == B_TRUE) + goto out; + } +out: + free(dup); +} + +/* * Build the symbol table for the given mapped file. */ void @@ -1587,7 +1758,8 @@ Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr) GElf_Shdr c_shdr; Elf_Data *c_data; const char *c_name; - } *cp, *cache = NULL, *dyn = NULL, *plt = NULL, *ctf = NULL; + } *cp, *cache = NULL, *dyn = NULL, *plt = NULL, *ctf = NULL, + *dbglink = NULL; if (fptr->file_init) return; /* We've already processed this file */ @@ -1813,10 +1985,70 @@ Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr) continue; } ctf = cp; + } else if (strcmp(cp->c_name, ".gnu_debuglink") == 0) { + dprintf("found .gnu_debuglink section for %s\n", + fptr->file_rname); + /* + * Let's make sure of a few things before we do this. + */ + if (cp->c_shdr.sh_type == SHT_PROGBITS && + cp->c_data->d_buf != NULL) { + dprintf(".gnu_debuglink pases initial " + "sanity\n"); + dbglink = cp; + } } } /* + * If we haven't found any symbol table information and we have found a + * .gnu_debuglink, it's time to try and figure out where we might find + * this. To do so, we're going to first verify that the elf data seems + * somewhat sane, eg. the elf data should be a string, so we want to + * verify we have a null-terminator. + */ + if (fptr->file_symtab.sym_data_pri == NULL && dbglink != NULL) { + char *c = dbglink->c_data->d_buf; + size_t i; + boolean_t found = B_FALSE; + Elf_Data *ed = dbglink->c_data; + uint32_t crc; + + for (i = 0; i < ed->d_size; i++) { + if (c[i] == '\0') { + uintptr_t off; + dprintf("got .gnu_debuglink terminator at " + "offset %lu\n", (unsigned long)i); + /* + * After the null terminator, there should be + * padding, followed by a 4 byte CRC of the + * file. If we don't see this, we're going to + * assume this is bogus. + */ + if ((i % sizeof (uint32_t)) == 0) + i += 4; + else + i += i % sizeof (uint32_t); + if (i + sizeof (uint32_t) == + dbglink->c_data->d_size) { + found = B_TRUE; + off = (uintptr_t)ed->d_buf + i; + crc = *(uint32_t *)off; + } else { + dprintf(".gnu_debuglink size mismatch, " + "expected: %lu, found: %lu\n", + (unsigned long)i, + (unsigned long)ed->d_size); + } + break; + } + } + + if (found == B_TRUE) + find_alt_debug(fptr, dbglink->c_data->d_buf, crc); + } + + /* * At this point, we've found all the symbol tables we're ever going * to find: the ones in the loop above and possibly the symtab that * was included in the core file. Before we perform any lookups, we @@ -1943,7 +2175,13 @@ bad: fptr->file_elfmem = NULL; } (void) close(fptr->file_fd); + if (fptr->file_dbgelf != NULL) + (void) elf_end(fptr->file_dbgelf); + fptr->file_dbgelf = NULL; + if (fptr->file_dbgfile >= 0) + (void) close(fptr->file_dbgfile); fptr->file_fd = -1; + fptr->file_dbgfile = -1; } /* |
