diff options
Diffstat (limited to 'usr/src/uts/common')
45 files changed, 3941 insertions, 1075 deletions
diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c index b964aab1d3..4507c0303c 100644 --- a/usr/src/uts/common/brand/lx/os/lx_brand.c +++ b/usr/src/uts/common/brand/lx/os/lx_brand.c @@ -62,6 +62,7 @@ #include <sys/sdt.h> #include <sys/x86_archext.h> #include <sys/controlregs.h> +#include <sys/core.h> #include <lx_signum.h> int lx_debug = 0; @@ -77,6 +78,10 @@ void lx_set_kern_version(zone_t *, char *); void lx_copy_procdata(proc_t *, proc_t *); extern int getsetcontext(int, void *); +extern int waitsys(idtype_t, id_t, siginfo_t *, int); +#if defined(_SYSCALL32_IMPL) +extern int waitsys32(idtype_t, id_t, siginfo_t *, int); +#endif extern void lx_proc_exit(proc_t *, klwp_t *); static void lx_psig_to_proc(proc_t *, kthread_t *, int); @@ -106,35 +111,38 @@ static int lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args, caddr_t exec_file, struct cred *cred, int brand_action); static boolean_t lx_native_exec(uint8_t, const char **); -static void lx_ptrace_exectrap(proc_t *); static uint32_t lx_map32limit(proc_t *); /* lx brand */ struct brand_ops lx_brops = { - lx_init_brand_data, - lx_free_brand_data, - lx_brandsys, - lx_setbrand, - lx_getattr, - lx_setattr, - lx_copy_procdata, - lx_proc_exit, - lx_exec, - lx_setrval, - lx_initlwp, - lx_forklwp, - lx_freelwp, - lx_exitlwp, - lx_elfexec, - NULL, - NULL, - lx_psig_to_proc, - NSIG, - lx_exit_with_sig, - lx_wait_filter, - lx_native_exec, - lx_ptrace_exectrap, - lx_map32limit + lx_init_brand_data, /* b_init_brand_data */ + lx_free_brand_data, /* b_free_brand_data */ + lx_brandsys, /* b_brandsys */ + lx_setbrand, /* b_setbrand */ + lx_getattr, /* b_getattr */ + lx_setattr, /* b_setattr */ + lx_copy_procdata, /* b_copy_procdata */ + lx_proc_exit, /* b_proc_exit */ + lx_exec, /* b_exec */ + lx_setrval, /* b_lwp_setrval */ + lx_initlwp, /* b_initlwp */ + lx_forklwp, /* b_forklwp */ + lx_freelwp, /* b_freelwp */ + lx_exitlwp, /* b_lwpexit */ + lx_elfexec, /* b_elfexec */ + NULL, /* b_sigset_native_to_brand */ + NULL, /* b_sigset_brand_to_native */ + lx_psig_to_proc, /* b_psig_to_proc */ + NSIG, /* b_nsig */ + lx_exit_with_sig, /* b_exit_with_sig */ + lx_wait_filter, /* b_wait_filter */ + lx_native_exec, /* b_native_exec */ + NULL, /* b_ptrace_exectrap */ + lx_map32limit, /* b_map32limit */ + lx_stop_notify, /* b_stop_notify */ + lx_waitid_helper, /* b_waitid_helper */ + lx_sigcld_repost, /* b_sigcld_repost */ + lx_issig_stop /* b_issig_stop */ }; struct brand_mach_ops lx_mops = { @@ -166,33 +174,39 @@ static struct modlinkage modlinkage = { void lx_proc_exit(proc_t *p, klwp_t *lwp) { - zone_t *z = p->p_zone; int sig = ptolxproc(p)->l_signal; - ASSERT(p->p_brand != NULL); - ASSERT(p->p_brand_data != NULL); - - /* - * If init is dying and we aren't explicitly shutting down the zone - * or the system, then Solaris is about to restart init. The Linux - * init is not designed to handle a restart, which it interprets as - * a reboot. To give it a sane environment in which to run, we - * reboot the zone. - */ - if (p->p_pid == z->zone_proc_initpid) { - if (z->zone_boot_err == 0 && - z->zone_restart_init && - zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && - zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) - (void) zone_kadmin(A_REBOOT, 0, NULL, CRED()); - } + VERIFY(p->p_brand == &lx_brand); + VERIFY(p->p_brand_data != NULL); /* * We might get here if fork failed (e.g. ENOMEM) so we don't always * have an lwp (see brand_clearbrand). */ - if (lwp != NULL) + if (lwp != NULL) { + boolean_t reenter_mutex = B_FALSE; + + /* + * This brand entry point is called variously with and without + * the process p_lock held. It would be possible to refactor + * the brand infrastructure so that proc_exit() explicitly + * calls this hook (b_lwpexit/lx_exitlwp) for the last LWP in a + * process prior to detaching the brand with + * brand_clearbrand(). Absent such refactoring, we + * conditionally exit the mutex for the duration of the call. + * + * The atomic replacement of both "p_brand" and "p_brand_data" + * is not affected by dropping and reacquiring the mutex here. + */ + if (mutex_owned(&p->p_lock) != 0) { + mutex_exit(&p->p_lock); + reenter_mutex = B_TRUE; + } lx_exitlwp(lwp); + if (reenter_mutex) { + mutex_enter(&p->p_lock); + } + } /* * The call path here is: @@ -260,310 +274,6 @@ lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize) return (-EINVAL); } -/* - * Enable/disable ptrace system call tracing for the given LWP. Enabling is - * done by both setting the flag in that LWP's brand data (in the kernel) and - * setting the process-wide trace flag (in the brand library of the traced - * process). - */ -static int -lx_ptrace_syscall_set(pid_t pid, id_t lwpid, int set) -{ - proc_t *p; - kthread_t *t; - klwp_t *lwp; - lx_proc_data_t *lpdp; - lx_lwp_data_t *lldp; - uintptr_t addr; - int ret, flag = 1; - - if ((p = sprlock(pid)) == NULL) - return (ESRCH); - - if (priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) { - sprunlock(p); - return (EPERM); - } - - if ((t = idtot(p, lwpid)) == NULL || (lwp = ttolwp(t)) == NULL) { - sprunlock(p); - return (ESRCH); - } - - if ((lpdp = p->p_brand_data) == NULL || - (lldp = lwp->lwp_brand) == NULL) { - sprunlock(p); - return (ESRCH); - } - - if (set) { - /* - * Enable the ptrace flag for this LWP and this process. Note - * that we will turn off the LWP's ptrace flag, but we don't - * turn off the process's ptrace flag. - */ - lldp->br_ptrace = 1; - lpdp->l_ptrace = 1; - - addr = lpdp->l_traceflag; - - mutex_exit(&p->p_lock); - - /* - * This can fail only in some rare corner cases where the - * process is exiting or we're completely out of memory. In - * these cases, it's sufficient to return an error to the ptrace - * consumer and leave the process-wide flag set. - */ - ret = uwrite(p, &flag, sizeof (flag), addr); - - mutex_enter(&p->p_lock); - - /* - * If we couldn't set the trace flag, unset the LWP's ptrace - * flag as there ptrace consumer won't expect this LWP to stop. - */ - if (ret != 0) - lldp->br_ptrace = 0; - } else { - lldp->br_ptrace = 0; - ret = 0; - } - - sprunlock(p); - - if (ret != 0) - ret = EIO; - - return (ret); -} - -static void -lx_ptrace_fire(void) -{ - kthread_t *t = curthread; - klwp_t *lwp = ttolwp(t); - lx_lwp_data_t *lldp = lwp->lwp_brand; - - /* - * The ptrace flag only applies until the next event is encountered - * for the given LWP. If it's set, turn off the flag and poke the - * controlling process by raising a signal. - */ - if (lldp->br_ptrace) { - lldp->br_ptrace = 0; - tsignal(t, SIGTRAP); - } -} - -/* - * Supports Linux PTRACE_SETOPTIONS handling which is similar to PTRACE_TRACEME - * but return an event in the second byte of si_status. - */ -static int -lx_ptrace_ext_opts(int cmd, pid_t pid, uintptr_t val, int64_t *rval) -{ - proc_t *p; - lx_proc_data_t *lpdp; - uint_t ret; - - if ((p = sprlock(pid)) == NULL) - return (ESRCH); - - /* - * Note that priv_proc_cred_perm can disallow access to ourself if - * the proc's SNOCD p_flag is set, so we skip that check for ourself. - */ - if (curproc != p && - priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) { - sprunlock(p); - return (EPERM); - } - - if ((lpdp = p->p_brand_data) == NULL) { - sprunlock(p); - return (ESRCH); - } - - switch (cmd) { - case B_PTRACE_EXT_OPTS_SET: - lpdp->l_ptrace_opts = (uint_t)val; - break; - - case B_PTRACE_EXT_OPTS_GET: - ret = lpdp->l_ptrace_opts; - if (lpdp->l_ptrace_is_traced) - ret |= EMUL_PTRACE_IS_TRACED; - break; - - case B_PTRACE_EXT_OPTS_EVT: - ret = lpdp->l_ptrace_event; - lpdp->l_ptrace_event = 0; - break; - - case B_PTRACE_DETACH: - lpdp->l_ptrace_is_traced = 0; - break; - - default: - sprunlock(p); - return (EINVAL); - } - - sprunlock(p); - - if (cmd == B_PTRACE_EXT_OPTS_GET || cmd == B_PTRACE_EXT_OPTS_EVT) { - if (copyout(&ret, (void *)val, sizeof (uint_t)) != 0) - return (EFAULT); - } - - *rval = 0; - return (0); -} - -/* - * Used to support Linux PTRACE_SETOPTIONS handling and similar to - * PTRACE_TRACEME. We signal ourselves to stop on return from this syscall and - * setup the event reason so the emulation can pull this out when someone - * 'waits' on this process. - */ -static void -lx_ptrace_stop_for_option(int option, ulong_t msg) -{ - proc_t *p = ttoproc(curthread); - sigqueue_t *sqp; - lx_proc_data_t *lpdp; - boolean_t child = B_FALSE; - - if ((lpdp = p->p_brand_data) == NULL) { - /* this should never happen but just to be safe */ - return; - } - - if (option & EMUL_PTRACE_O_CHILD) { - child = B_TRUE; - option &= ~EMUL_PTRACE_O_CHILD; - } - - lpdp->l_ptrace_is_traced = 1; - - /* Track the event as the reason for stopping */ - switch (option) { - case LX_PTRACE_O_TRACEFORK: - if (!child) { - lpdp->l_ptrace_event = LX_PTRACE_EVENT_FORK; - lpdp->l_ptrace_eventmsg = msg; - } - break; - case LX_PTRACE_O_TRACEVFORK: - if (!child) { - lpdp->l_ptrace_event = LX_PTRACE_EVENT_VFORK; - lpdp->l_ptrace_eventmsg = msg; - } - break; - case LX_PTRACE_O_TRACECLONE: - if (!child) { - lpdp->l_ptrace_event = LX_PTRACE_EVENT_CLONE; - lpdp->l_ptrace_eventmsg = msg; - } - break; - case LX_PTRACE_O_TRACEEXEC: - lpdp->l_ptrace_event = LX_PTRACE_EVENT_EXEC; - break; - case LX_PTRACE_O_TRACEVFORKDONE: - lpdp->l_ptrace_event = LX_PTRACE_EVENT_VFORK_DONE; - lpdp->l_ptrace_eventmsg = msg; - break; - case LX_PTRACE_O_TRACEEXIT: - lpdp->l_ptrace_event = LX_PTRACE_EVENT_EXIT; - lpdp->l_ptrace_eventmsg = msg; - break; - case LX_PTRACE_O_TRACESECCOMP: - lpdp->l_ptrace_event = LX_PTRACE_EVENT_SECCOMP; - break; - } - - /* - * Post the required signal to ourselves so that we stop. - * - * Although Linux will send a SIGSTOP to a child process which is - * stopped due to PTRACE_O_TRACEFORK, etc., we do not send that signal - * since that leads us down the code path in the kernel which calls - * stop(PR_JOBCONTROL, SIGSTOP), which in turn means that the TS_XSTART - * flag gets turned off on the thread and this makes it complex to - * actually get this process going when the userland application wants - * to detach. Since consumers don't seem to depend on the specific - * signal, we'll just stop both the parent and child the same way. We - * do keep track of both the parent and child via the - * EMUL_PTRACE_O_CHILD bit, in case we need to revisit this later. - */ - psignal(p, SIGTRAP); - - /* - * Since we're stopping, we need to post the SIGCHLD to the parent. The - * code in sigcld expects p_wdata to be set to SIGTRAP before it can - * send the signal, so do that here. We also need p_wcode to be set as - * if we are ptracing, even though we're not really (see the code in - * stop() when procstop is set and p->p_proc_flag has the P_PR_PTRACE - * bit set). This is needed so that when the application calls waitid, - * it will properly retrieve the process. - */ - sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); - mutex_enter(&pidlock); - p->p_wdata = SIGTRAP; - p->p_wcode = CLD_TRAPPED; - sigcld(p, sqp); - mutex_exit(&pidlock); -} - -static int -lx_ptrace_geteventmsg(pid_t pid, ulong_t *msgp) -{ - proc_t *p; - lx_proc_data_t *lpdp; - ulong_t msg; - - if ((p = sprlock(pid)) == NULL) - return (ESRCH); - - if (curproc != p && - priv_proc_cred_perm(curproc->p_cred, p, NULL, VREAD) != 0) { - sprunlock(p); - return (EPERM); - } - - if ((lpdp = p->p_brand_data) == NULL) { - sprunlock(p); - return (ESRCH); - } - - msg = lpdp->l_ptrace_eventmsg; - lpdp->l_ptrace_eventmsg = 0; - - sprunlock(p); - - if (copyout(&msg, (void *)msgp, sizeof (ulong_t)) != 0) - return (EFAULT); - - return (0); -} - -/* - * Brand entry to allow us to optionally generate the ptrace SIGTRAP on exec(). - * This will only be called if ptrace is enabled -- and we only generate the - * SIGTRAP if LX_PTRACE_O_TRACEEXEC hasn't been set. - */ -void -lx_ptrace_exectrap(proc_t *p) -{ - lx_proc_data_t *lpdp; - - if ((lpdp = p->p_brand_data) == NULL || - !(lpdp->l_ptrace_opts & LX_PTRACE_O_TRACEEXEC)) { - psignal(p, SIGTRAP); - } -} - uint32_t lx_map32limit(proc_t *p) { @@ -718,6 +428,12 @@ lx_init_brand_data(zone_t *zone) (void) strlcpy(data->lxzd_kernel_version, "2.4.21", LX_VERS_MAX); data->lxzd_max_syscall = LX_NSYSCALLS; zone->zone_brand_data = data; + + /* + * In Linux, if the init(1) process terminates the system panics. + * The zone must reboot to simulate this behaviour. + */ + zone->zone_reboot_on_init_exit = B_TRUE; } void @@ -752,6 +468,8 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, struct termios *termios; uint_t termios_len; int error; + int code; + int sig; lx_brand_registration_t reg; lx_lwp_data_t *lwpd; @@ -832,6 +550,16 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, lwpd->br_scms = 1; #endif + if (pd->l_traceflag != NULL && pd->l_ptrace != 0) { + /* + * If ptrace(2) is active on this process, it is likely + * that we just finished an emulated execve(2) in a + * traced child. The usermode traceflag will have been + * clobbered by the exec, so we set it again here: + */ + (void) suword32((void *)pd->l_traceflag, 1); + } + *rval = 0; return (0); case B_TTYMODES: @@ -931,11 +659,6 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, return (0); } - case B_PTRACE_SYSCALL: - *rval = lx_ptrace_syscall_set((pid_t)arg1, (id_t)arg2, - (int)arg3); - return (0); - case B_SYSENTRY: if (lx_systrace_enabled) { ASSERT(lx_systrace_entry_ptr != NULL); @@ -963,7 +686,7 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, #endif } - lx_ptrace_fire(); + (void) lx_ptrace_stop(LX_PR_SYSENTRY); pd = p->p_brand_data; @@ -984,7 +707,7 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, (*lx_systrace_return_ptr)(arg1, arg2, arg2, 0, 0, 0, 0); } - lx_ptrace_fire(); + (void) lx_ptrace_stop(LX_PR_SYSEXIT); pd = p->p_brand_data; @@ -1010,20 +733,55 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, */ return (lx_sched_affinity(cmd, arg1, arg2, arg3, rval)); - case B_PTRACE_EXT_OPTS: + case B_PTRACE_STOP_FOR_OPT: + return (lx_ptrace_stop_for_option((int)arg1, arg2 == 0 ? + B_FALSE : B_TRUE, (ulong_t)arg3)); + + case B_PTRACE_CLONE_BEGIN: + return (lx_ptrace_set_clone_inherit((int)arg1, arg2 == 0 ? + B_FALSE : B_TRUE)); + + case B_PTRACE_KERNEL: + return (lx_ptrace_kernel((int)arg1, (pid_t)arg2, arg3, arg4)); + + case B_HELPER_WAITID: { + idtype_t idtype = (idtype_t)arg1; + id_t id = (id_t)arg2; + siginfo_t *infop = (siginfo_t *)arg3; + int options = (int)arg4; + + lwpd = ttolxlwp(curthread); + /* - * Set or get the ptrace extended options or get the event - * reason for the stop. + * Our brand-specific waitid helper only understands a subset of + * the possible idtypes. Ensure we keep to that subset here: */ - return (lx_ptrace_ext_opts((int)arg1, (pid_t)arg2, arg3, rval)); + if (idtype != P_ALL && idtype != P_PID && idtype != P_PGID) { + return (EINVAL); + } - case B_PTRACE_STOP_FOR_OPT: - lx_ptrace_stop_for_option((int)arg1, (ulong_t)arg2); - return (0); + /* + * Enable the return of emulated ptrace(2) stop conditions + * through lx_waitid_helper, and stash the Linux-specific + * extra waitid() flags. + */ + lwpd->br_waitid_emulate = B_TRUE; + lwpd->br_waitid_flags = (int)arg5; + +#if defined(_SYSCALL32_IMPL) + if (get_udatamodel() != DATAMODEL_NATIVE) { + return (waitsys32(idtype, id, infop, options)); + } else +#endif + { + return (waitsys(idtype, id, infop, options)); + } + + lwpd->br_waitid_emulate = B_FALSE; + lwpd->br_waitid_flags = 0; - case B_PTRACE_GETEVENTMSG: - lx_ptrace_geteventmsg((pid_t)arg1, (ulong_t *)arg2); return (0); + } case B_UNSUPPORTED: { @@ -1186,7 +944,19 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, return (0); case B_EXIT_AS_SIG: - exit(CLD_KILLED, (int)arg1); + code = CLD_KILLED; + sig = (int)arg1; + proc_is_exiting(p); + if (exitlwps(1) != 0) { + mutex_enter(&p->p_lock); + lwp_exit(); + } + ttolwp(curthread)->lwp_cursig = sig; + if (sig == SIGSEGV) { + if (core(sig, 0) == 0) + code = CLD_DUMPED; + } + exit(code, sig); /* NOTREACHED */ break; @@ -1254,6 +1024,7 @@ lx_copy_procdata(proc_t *child, proc_t *parent) ppd = parent->p_brand_data; ASSERT(ppd != NULL); + ASSERT(parent->p_brand == &lx_brand); cpd = kmem_alloc(sizeof (lx_proc_data_t), KM_SLEEP); *cpd = *ppd; @@ -1322,13 +1093,14 @@ lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args, struct execenv origenv; stack_t orig_sigaltstack; struct user *up = PTOU(ttoproc(curthread)); - lx_elf_data_t *edp = - &((lx_proc_data_t *)ttoproc(curthread)->p_brand_data)->l_elf_data; + lx_elf_data_t *edp; char *lib_path = NULL; ASSERT(ttoproc(curthread)->p_brand == &lx_brand); ASSERT(ttoproc(curthread)->p_brand_data != NULL); + edp = &ttolxproc(curthread)->l_elf_data; + if (args->to_model == DATAMODEL_NATIVE) { lib_path = LX_LIB_PATH; } @@ -1685,6 +1457,7 @@ _init(void) /* for lx_futex() */ lx_futex_init(); + lx_ptrace_init(); err = mod_install(&modlinkage); if (err != 0) { @@ -1724,6 +1497,7 @@ _fini(void) if (brand_zone_count(&lx_brand)) return (EBUSY); + lx_ptrace_fini(); lx_pid_fini(); lx_ioctl_fini(); diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c index c550ecf9af..abb0ab6e63 100644 --- a/usr/src/uts/common/brand/lx/os/lx_misc.c +++ b/usr/src/uts/common/brand/lx/os/lx_misc.c @@ -80,7 +80,7 @@ lx_exec() klwp_t *lwp = ttolwp(curthread); struct lx_lwp_data *lwpd = lwptolxlwp(lwp); proc_t *p = ttoproc(curthread); - lx_proc_data_t *pd = p->p_brand_data; + lx_proc_data_t *pd = ptolxproc(p); int err; /* @@ -113,6 +113,13 @@ lx_exec() lx_pid_reassign(curthread); } + /* + * Inform ptrace(2) that we are processing an execve(2) call so that if + * we are traced we can post either the PTRACE_EVENT_EXEC event or the + * legacy SIGTRAP. + */ + (void) lx_ptrace_stop_for_option(LX_PTRACE_O_TRACEEXEC, B_FALSE, 0); + /* clear the fsbase values until the app. can reinitialize them */ lwpd->br_lx_fsbase = NULL; lwpd->br_ntv_fsbase = NULL; @@ -137,15 +144,21 @@ void lx_exitlwp(klwp_t *lwp) { struct lx_lwp_data *lwpd = lwptolxlwp(lwp); - proc_t *p; + proc_t *p = lwptoproc(lwp); kthread_t *t; sigqueue_t *sqp = NULL; pid_t ppid; id_t ptid; + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + if (lwpd == NULL) return; /* second time thru' */ + mutex_enter(&p->p_lock); + lx_ptrace_exit(p, lwp); + mutex_exit(&p->p_lock); + if (lwpd->br_clear_ctidp != NULL) { (void) suword32(lwpd->br_clear_ctidp, 0); (void) lx_futex((uintptr_t)lwpd->br_clear_ctidp, FUTEX_WAKE, 1, @@ -226,9 +239,17 @@ lx_freelwp(klwp_t *lwp) if (lwpd != NULL) { (void) removectx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save, NULL); - if (lwpd->br_pid != 0) + if (lwpd->br_pid != 0) { lx_pid_rele(lwptoproc(lwp)->p_pid, lwptot(lwp)->t_tid); + } + + /* + * Ensure that lx_ptrace_exit() has been called to detach + * ptrace(2) tracers and tracees. + */ + VERIFY(lwpd->br_ptrace_tracer == NULL); + VERIFY(lwpd->br_ptrace_accord == NULL); lwp->lwp_brand = NULL; kmem_free(lwpd, sizeof (struct lx_lwp_data)); @@ -238,8 +259,8 @@ lx_freelwp(klwp_t *lwp) int lx_initlwp(klwp_t *lwp) { - struct lx_lwp_data *lwpd; - struct lx_lwp_data *plwpd; + lx_lwp_data_t *lwpd; + lx_lwp_data_t *plwpd = ttolxlwp(curthread); kthread_t *tp = lwptot(lwp); lwpd = kmem_zalloc(sizeof (struct lx_lwp_data), KM_SLEEP); @@ -265,8 +286,7 @@ lx_initlwp(klwp_t *lwp) if (tp->t_next == tp) { lwpd->br_ppid = tp->t_procp->p_ppid; lwpd->br_ptid = -1; - } else if (ttolxlwp(curthread) != NULL) { - plwpd = ttolxlwp(curthread); + } else if (plwpd != NULL) { bcopy(plwpd->br_tls, lwpd->br_tls, sizeof (lwpd->br_tls)); lwpd->br_ppid = plwpd->br_pid; lwpd->br_ptid = curthread->t_tid; @@ -292,6 +312,14 @@ lx_initlwp(klwp_t *lwp) installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save, NULL); + /* + * If the parent LWP has a ptrace(2) tracer, the new LWP may + * need to inherit that same tracer. + */ + if (plwpd != NULL) { + lx_ptrace_inherit_tracer(plwpd, lwpd); + } + return (0); } @@ -524,10 +552,7 @@ lx_exit_with_sig(proc_t *cp, sigqueue_t *sqp, void *brand_data) * SIGCHLD X - * * This is an XOR of __WCLONE being set, and SIGCHLD being the signal sent on - * process exit. Since (flags & __WCLONE) is not guaranteed to have the - * least-significant bit set when the flags is enabled, !! is used to place - * that bit into the least significant bit. Then, the bitwise XOR can be - * used, because there is no logical XOR in the C language. + * process exit. * * More information on wait in lx brands can be found at * usr/src/lib/brand/lx/lx_brand/common/wait.c. @@ -535,29 +560,45 @@ lx_exit_with_sig(proc_t *cp, sigqueue_t *sqp, void *brand_data) boolean_t lx_wait_filter(proc_t *pp, proc_t *cp) { - int flags; + lx_lwp_data_t *lwpd = ttolxlwp(curthread); + int flags = lwpd->br_waitid_flags; boolean_t ret; - if (LX_ARGS(waitid) != NULL) { - flags = LX_ARGS(waitid)->waitid_flags; - mutex_enter(&cp->p_lock); - if (flags & LX_WALL) { - ret = B_TRUE; - } else if (cp->p_stat == SZOMB || - cp->p_brand == &native_brand) { - ret = (((!!(flags & LX_WCLONE)) ^ - (stol_signo[SIGCHLD] == cp->p_exit_data)) - ? B_TRUE : B_FALSE); + if (!lwpd->br_waitid_emulate) { + return (B_TRUE); + } + + mutex_enter(&cp->p_lock); + if (flags & LX_WALL) { + ret = B_TRUE; + + } else { + int exitsig; + boolean_t is_clone, _wclone; + + /* + * Determine the exit signal for this process: + */ + if (cp->p_stat == SZOMB || cp->p_brand == &native_brand) { + exitsig = cp->p_exit_data; } else { - ret = (((!!(flags & LX_WCLONE)) ^ - (stol_signo[SIGCHLD] == ptolxproc(cp)->l_signal)) - ? B_TRUE : B_FALSE); + exitsig = ptolxproc(cp)->l_signal; } - mutex_exit(&cp->p_lock); - return (ret); - } else { - return (B_TRUE); + + /* + * To enable the bitwise XOR to stand in for the absent C + * logical XOR, we use the logical NOT operator twice to + * ensure the least significant bit is populated with the + * __WCLONE flag status. + */ + _wclone = !!(flags & LX_WCLONE); + is_clone = (stol_signo[SIGCHLD] == exitsig); + + ret = (_wclone ^ is_clone) ? B_TRUE : B_FALSE; } + mutex_exit(&cp->p_lock); + + return (ret); } void diff --git a/usr/src/uts/common/brand/lx/os/lx_pid.c b/usr/src/uts/common/brand/lx/os/lx_pid.c index aa8c751bc2..8552754c43 100644 --- a/usr/src/uts/common/brand/lx/os/lx_pid.c +++ b/usr/src/uts/common/brand/lx/os/lx_pid.c @@ -22,7 +22,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #include <sys/types.h> @@ -222,6 +222,28 @@ lx_lpid_to_spair(pid_t l_pid, pid_t *s_pid, id_t *s_tid) { struct lx_pid *hp; + if (l_pid == 1) { + pid_t initpid; + + /* + * We are trying to look up the Linux init process for the + * current zone, which we pretend has pid 1. + */ + if ((initpid = curzone->zone_proc_initpid) == -1) { + /* + * We could not find the init process for this zone. + */ + return (-1); + } + + if (s_pid != NULL) + *s_pid = initpid; + if (s_tid != NULL) + *s_tid = 1; + + return (0); + } + mutex_enter(&hash_lock); for (hp = ltos_pid_hash[LTOS_HASH(l_pid)]; hp; hp = hp->ltos_next) { if (l_pid == hp->l_pid) { diff --git a/usr/src/uts/common/brand/lx/os/lx_ptrace.c b/usr/src/uts/common/brand/lx/os/lx_ptrace.c new file mode 100644 index 0000000000..6e4b74531d --- /dev/null +++ b/usr/src/uts/common/brand/lx/os/lx_ptrace.c @@ -0,0 +1,2270 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * Emulation of the Linux ptrace(2) interface. + * + * OVERVIEW + * + * The Linux process model is somewhat different from the illumos native + * model. One critical difference is that each Linux thread has a unique + * identifier in the pid namespace. The lx brand assigns a pid to each LWP + * within the emulated process, giving the pid of the process itself to the + * first LWP. + * + * The Linux ptrace(2) interface allows for any LWP in a branded process to + * exert control over any other LWP within the same zone. Control is exerted + * by the use of the ptrace(2) system call itself, which accepts a number of + * request codes. Feedback on traced events is primarily received by the + * tracer through SIGCLD and the emulated waitpid(2) and waitid(2) system + * calls. Many of the possible ptrace(2) requests will only succeed if the + * target LWP is in a "ptrace-stop" condition. + * + * HISTORY + * + * The brand support for ptrace(2) was originally built on top of the rich + * support for debugging and tracing provided through the illumos /proc + * interfaces, mounted at /native/proc within the zone. The native legacy + * ptrace(3C) functionality was used as a starting point, but was generally + * insufficient for complete and precise emulation. The extant legacy + * interface, and indeed our native SIGCLD and waitid(2) facilities, are + * focused on _process_ level concerns -- the Linux interface has been + * extended to be aware of LWPs as well. + * + * In order to allow us to focus on providing more complete and accurate + * emulation without extensive and undesirable changes to the native + * facilities, this second generation ptrace(2) emulation is mostly separate + * from any other tracing or debugging framework in the system. + * + * ATTACHING TRACERS TO TRACEES + * + * There are several ways that a child LWP may becomed traced by a tracer. + * To determine which attach method caused a tracee to become attached, one + * may inspect the "br_ptrace_attach" member of the LWP-specific brand data + * with the debugger. + * + * The first attach methods to consider are the attaching ptrace(2) requests: + * + * PTRACE_TRACEME + * + * If an LWP makes a PTRACE_TRACEME call, it will be attached as a tracee + * to its parent LWP (br_ppid). Using PTRACE_TRACEME does _not_ cause the + * tracee to be held in a stop condition. It is common practice for + * consumers to raise(SIGSTOP) immediately afterward. + * + * PTRACE_ATTACH + * + * An LWP may attempt to trace any other LWP in this, or another, process. + * We currently allow any attach where the process containing the tracer + * LWP has permission to write to /proc for the process containing the + * intended tracer. This action also sends a SIGSTOP to the newly attached + * tracee. + * + * The second class of attach methods are the clone(2)/fork(2) inheritance + * options that may be set on a tracee with PTRACE_SETOPTIONS: + * + * PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK and PTRACE_O_TRACECLONE + * + * If these options have been set on a tracee, then a fork(2), vfork(2) or + * clone(2) respectively will cause the newly created LWP to be traced by + * the same tracer. The same set of ptrace(2) options will also be set on + * the new child. + * + * The third class of attach method is the PTRACE_CLONE flag to clone(2). + * This flag induces the same inheritance as PTRACE_O_TRACECLONE, but is + * passed by the tracee as an argument to clone(2). + * + * DETACHING TRACEES + * + * Tracees can be detached by the tracer with the PTRACE_DETACH request. + * This request is only valid when the tracee is in a ptrace(2) stop + * condition, and is itself a restarting action. + * + * If the tracer exits without detaching all of its tracees, then all of the + * tracees are automatically detached and restarted. If a tracee was in + * "signal-delivery-stop" at the time the tracer exited, the signal will be + * released to the child unless it is a SIGSTOP. We drop this instance of + * SIGSTOP in order to prevent the child from becoming stopped by job + * control. + * + * ACCORD ALLOCATION AND MANAGEMENT + * + * The "lx_ptrace_accord_t" object tracks the agreement between a tracer LWP + * and zero or more tracee LWPs. It is explicitly illegal for a tracee to + * trace its tracer, and we block this in PTRACE_ATTACH/PTRACE_TRACEME. + * + * An LWP starts out without an accord. If a child of that LWP calls + * ptrace(2) with the PTRACE_TRACEME subcommand, or if the LWP itself uses + * PTRACE_ATTACH, an accord will be allocated and stored on that LWP. The + * accord structure is not released from that LWP until it arrives in + * lx_exitlwp(), as called by lwp_exit(). A new accord will not be + * allocated, even if one does not exist, once an LWP arrives in lx_exitlwp() + * and sets the LX_PTRACE_EXITING flag. An LWP will have at most one accord + * structure throughout its entire lifecycle; once it has one, it has the + * same one until death. + * + * The accord is reference counted (lxpa_refcnt), starting at a count of one + * at creation to represent the link from the tracer LWP to its accord. The + * accord is not freed until the reference count falls to zero. + * + * To make mutual exclusion between a detaching tracer and various notifying + * tracees simpler, the tracer will hold "pidlock" while it clears the + * accord members that point back to the tracer LWP and CV. + * + * SIGNALS AND JOB CONTROL + * + * Various actions, either directly ptrace(2) related or commonly associated + * with tracing, cause process- or thread-directed SIGSTOP signals to be sent + * to tracees. These signals, and indeed any signal other than SIGKILL, can + * be suppressed by the tracer when using a restarting request (including + * PTRACE_DETACH) on a child. The signal may also be substituted for a + * different signal. + * + * If a SIGSTOP (or other stopping signal) is not suppressed by the tracer, + * it will induce the regular illumos native job control stop of the entire + * traced process. This is at least passingly similar to the Linux "group + * stop" ptrace(2) condition. + * + * SYSTEM CALL TRACING + * + * The ptrace(2) interface enables the tracer to hold the tracee on entry and + * exit from system calls. When a stopped tracee is restarted through the + * PTRACE_SYSCALL request, the LX_PTRACE_SYSCALL flag is set until the next + * system call boundary. Whether this is a "syscall-entry-stop" or + * "syscall-exit-stop", the tracee is held and the tracer is notified via + * SIGCLD/waitpid(2) in the usual way. The flag LX_PTRACE_SYSCALL flag is + * cleared after each stop; for ongoing system call tracing the tracee must + * be continuously restarted with PTRACE_SYSCALL. + * + * EVENT STOPS + * + * Various events (particularly FORK, VFORK, CLONE, EXEC and EXIT) are + * enabled by the tracer through PTRACE_SETOPTIONS. Once enabled, the tracee + * will be stopped at the nominated points of interest and the tracer + * notified. The tracer may request additional information about the event, + * such as the pid of new LWPs and processes, via PTRACE_GETEVENTMSG. + * + * LOCK ORDERING RULES + * + * It is not safe, in general, to hold p_lock for two different processes at + * the same time. This constraint is the primary reason for the existence + * (and complexity) of the ptrace(2) accord mechanism. + * + * In order to facilitate looking up accords by the "pid" of a tracer LWP, + * p_lock for the tracer process may be held while entering the accord mutex + * (lxpa_lock). This mutex protects the accord flags and reference count. + * The reference count is manipulated through lx_ptrace_accord_hold() and + * lx_ptrace_accord_rele(). + * + * DO NOT interact with the accord mutex (lxpa_lock) directly. The + * lx_ptrace_accord_enter() and lx_ptrace_accord_exit() functions do various + * book-keeping and lock ordering enforcement and MUST be used. + * + * It is NOT legal to take ANY p_lock while holding the accord mutex + * (lxpa_lock). If the lxpa_tracees_lock is to be held concurrently with + * lxpa_lock, lxpa_lock MUST be taken first and dropped before taking p_lock + * of any processes from the tracee list. + * + * It is NOT legal to take a tracee p_lock and then attempt to enter the + * accord mutex (or tracee list mutex) of its tracer. When running as the + * tracee LWP, the tracee's hold will prevent the accord from being freed. + * Use of the LX_PTRACE_STOPPING or LX_PTRACE_CLONING flag in the + * LWP-specific brand data prevents an exiting tracer from altering the + * tracee until the tracee has come to an orderly stop, without requiring the + * tracee to hold its own p_lock the entire time it is stopping. + * + * It is not safe, in general, to enter "pidlock" while holding the p_lock of + * any process. It is similarly illegal to hold any accord locks (lxpa_lock + * or lxpa_sublock) while attempting to enter "pidlock". As "pidlock" is a + * global mutex, it should be held for the shortest possible time. + */ + +#include <sys/types.h> +#include <sys/kmem.h> +#include <sys/ksynch.h> +#include <sys/sysmacros.h> +#include <sys/procfs.h> +#include <sys/cmn_err.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/wait.h> +#include <sys/prsystm.h> +#include <sys/note.h> + +#include <sys/brand.h> +#include <sys/lx_brand.h> +#include <sys/lx_impl.h> +#include <sys/lx_misc.h> +#include <sys/lx_pid.h> +#include <lx_syscall.h> +#include <lx_signum.h> + + +typedef enum lx_ptrace_cont_flags_t { + LX_PTC_NONE = 0x00, + LX_PTC_SYSCALL = 0x01, + LX_PTC_SINGLESTEP = 0x02 +} lx_ptrace_cont_flags_t; + +/* + * Macros for checking the state of an LWP via "br_ptrace_flags": + */ +#define LX_PTRACE_BUSY \ + (LX_PTRACE_EXITING | LX_PTRACE_STOPPING | LX_PTRACE_CLONING) + +#define VISIBLE(a) (((a)->br_ptrace_flags & LX_PTRACE_EXITING) == 0) +#define TRACEE_BUSY(a) (((a)->br_ptrace_flags & LX_PTRACE_BUSY) != 0) + +#define ACCORD_HELD(a) MUTEX_HELD(&(a)->lxpa_lock) + +static kcondvar_t lx_ptrace_busy_cv; +static kmem_cache_t *lx_ptrace_accord_cache; + +/* + * Enter the accord mutex. + */ +static void +lx_ptrace_accord_enter(lx_ptrace_accord_t *accord) +{ + VERIFY(MUTEX_NOT_HELD(&accord->lxpa_tracees_lock)); + + mutex_enter(&accord->lxpa_lock); +} + +/* + * Exit the accord mutex. If the reference count has dropped to zero, + * free the accord. + */ +static void +lx_ptrace_accord_exit(lx_ptrace_accord_t *accord) +{ + VERIFY(ACCORD_HELD(accord)); + + if (accord->lxpa_refcnt > 0) { + mutex_exit(&accord->lxpa_lock); + return; + } + + /* + * When the reference count drops to zero we must free the accord. + */ + VERIFY(accord->lxpa_tracer == NULL); + VERIFY(MUTEX_NOT_HELD(&accord->lxpa_tracees_lock)); + VERIFY(list_is_empty(&accord->lxpa_tracees)); + VERIFY(accord->lxpa_flags & LX_ACC_TOMBSTONE); + + mutex_destroy(&accord->lxpa_lock); + mutex_destroy(&accord->lxpa_tracees_lock); + + kmem_cache_free(lx_ptrace_accord_cache, accord); +} + +/* + * Drop our reference to this accord. If this drops the reference count + * to zero, the next lx_ptrace_accord_exit() will free the accord. + */ +static void +lx_ptrace_accord_rele(lx_ptrace_accord_t *accord) +{ + VERIFY(ACCORD_HELD(accord)); + + VERIFY(accord->lxpa_refcnt > 0); + accord->lxpa_refcnt--; +} + +/* + * Place an additional hold on an accord. + */ +static void +lx_ptrace_accord_hold(lx_ptrace_accord_t *accord) +{ + VERIFY(ACCORD_HELD(accord)); + + accord->lxpa_refcnt++; +} + +/* + * Fetch the accord for this LWP. If one has not yet been created, and the + * process is not exiting, allocate it now. Must be called with p_lock held + * for the process containing the target LWP. + * + * If successful, we return holding the accord lock (lxpa_lock). + */ +static int +lx_ptrace_accord_get_locked(klwp_t *lwp, lx_ptrace_accord_t **accordp, + boolean_t allocate_one) +{ + lx_ptrace_accord_t *lxpa; + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + proc_t *p = lwptoproc(lwp); + + VERIFY(MUTEX_HELD(&p->p_lock)); + + /* + * If this LWP does not have an accord, we wish to allocate + * and install one. + */ + if ((lxpa = lwpd->br_ptrace_accord) == NULL) { + if (!allocate_one || !VISIBLE(lwpd)) { + /* + * Either we do not wish to allocate an accord, or this + * LWP has already begun exiting from a ptrace + * perspective. + */ + *accordp = NULL; + return (ESRCH); + } + + lxpa = kmem_cache_alloc(lx_ptrace_accord_cache, KM_SLEEP); + bzero(lxpa, sizeof (*lxpa)); + + /* + * The initial reference count is 1 because we are referencing + * it in from the soon-to-be tracer LWP. + */ + lxpa->lxpa_refcnt = 1; + mutex_init(&lxpa->lxpa_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&lxpa->lxpa_tracees_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&lxpa->lxpa_tracees, sizeof (lx_lwp_data_t), + offsetof(lx_lwp_data_t, br_ptrace_linkage)); + lxpa->lxpa_cvp = &p->p_cv; + + lxpa->lxpa_tracer = lwpd; + lwpd->br_ptrace_accord = lxpa; + } + + /* + * Lock the accord before returning it to the caller. + */ + lx_ptrace_accord_enter(lxpa); + + /* + * There should be at least one active reference to this accord, + * otherwise it should have been freed. + */ + VERIFY(lxpa->lxpa_refcnt > 0); + + *accordp = lxpa; + return (0); +} + +/* + * Accords belong to the tracer LWP. Get the accord for this tracer or return + * an error if it was not possible. To prevent deadlocks, the caller MUST NOT + * hold p_lock on its own or any other process. + * + * If successful, we return holding the accord lock (lxpa_lock). + */ +static int +lx_ptrace_accord_get_by_pid(pid_t lxpid, lx_ptrace_accord_t **accordp) +{ + int ret = ESRCH; + pid_t apid; + id_t atid; + proc_t *aproc; + kthread_t *athr; + klwp_t *alwp; + lx_lwp_data_t *alwpd; + + VERIFY(MUTEX_NOT_HELD(&curproc->p_lock)); + + /* + * Locate the process containing the tracer LWP based on its Linux pid + * and lock it. + */ + if (lx_lpid_to_spair(lxpid, &apid, &atid) != 0 || + (aproc = sprlock(apid)) == NULL) { + return (ESRCH); + } + + /* + * Locate the tracer LWP itself and ensure that it is visible to + * ptrace(2). + */ + if ((athr = idtot(aproc, atid)) == NULL || + (alwp = ttolwp(athr)) == NULL || + (alwpd = lwptolxlwp(alwp)) == NULL || + !VISIBLE(alwpd)) { + sprunlock(aproc); + return (ESRCH); + } + + /* + * We should not fetch our own accord this way. + */ + if (athr == curthread) { + sprunlock(aproc); + return (EPERM); + } + + /* + * Fetch (or allocate) the accord owned by this tracer LWP: + */ + ret = lx_ptrace_accord_get_locked(alwp, accordp, B_TRUE); + + /* + * Unlock the process and return. + */ + sprunlock(aproc); + return (ret); +} + +/* + * Get (or allocate) the ptrace(2) accord for the current LWP, acting as a + * tracer. The caller MUST NOT currently hold p_lock on the process containing + * this LWP. + * + * If successful, we return holding the accord lock (lxpa_lock). + */ +static int +lx_ptrace_accord_get(lx_ptrace_accord_t **accordp, boolean_t allocate_one) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + int ret; + + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + + /* + * Lock the tracer (this LWP). + */ + mutex_enter(&p->p_lock); + + /* + * Fetch (or allocate) the accord for this LWP: + */ + ret = lx_ptrace_accord_get_locked(lwp, accordp, allocate_one); + + mutex_exit(&p->p_lock); + + return (ret); +} + +/* + * Restart an LWP if it is in "ptrace-stop". This function may induce sleep, + * so the caller MUST NOT hold any mutexes other than p_lock for the process + * containing the LWP. + */ +static void +lx_ptrace_restart_lwp(klwp_t *lwp) +{ + kthread_t *rt = lwptot(lwp); + proc_t *rproc = lwptoproc(lwp); + lx_lwp_data_t *rlwpd = lwptolxlwp(lwp); + + VERIFY(rt != curthread); + VERIFY(MUTEX_HELD(&rproc->p_lock)); + + /* + * Exclude potential meddling from procfs. + */ + prbarrier(rproc); + + /* + * Check that the LWP is still in "ptrace-stop" and, if so, restart it. + */ + thread_lock(rt); + if (BSTOPPED(rt) && rt->t_whystop == PR_BRAND) { + rt->t_schedflag |= TS_BSTART; + setrun_locked(rt); + + /* + * Clear stop reason. + */ + rlwpd->br_ptrace_whystop = 0; + rlwpd->br_ptrace_whatstop = 0; + rlwpd->br_ptrace_flags &= ~LX_PTRACE_CLDPEND; + } + thread_unlock(rt); +} + +static void +lx_winfo(lx_lwp_data_t *remote, k_siginfo_t *ip, boolean_t waitflag, + pid_t *event_ppid, pid_t *event_pid) +{ + int signo; + + /* + * Populate our k_siginfo_t with data about this "ptrace-stop" + * condition: + */ + bzero(ip, sizeof (*ip)); + ip->si_signo = SIGCLD; + ip->si_pid = remote->br_pid; + ip->si_code = CLD_TRAPPED; + + switch (remote->br_ptrace_whatstop) { + case LX_PR_SYSENTRY: + case LX_PR_SYSEXIT: + ip->si_status = SIGTRAP; + if (remote->br_ptrace_options & LX_PTRACE_O_TRACESYSGOOD) { + ip->si_status |= 0x80; + } + break; + + case LX_PR_SIGNALLED: + signo = remote->br_ptrace_stopsig; + if (signo < 1 || signo >= LX_NSIG) { + /* + * If this signal number is not valid, pretend it + * was a SIGTRAP. + */ + ip->si_status = SIGTRAP; + } else { + ip->si_status = ltos_signo[signo]; + } + break; + + case LX_PR_EVENT: + ip->si_status = SIGTRAP | remote->br_ptrace_event; + /* + * Record the Linux pid of both this LWP and the create + * event we are dispatching. We will use this information + * to unblock any subsequent ptrace(2) events that depend + * on this one. + */ + if (event_ppid != NULL) + *event_ppid = remote->br_pid; + if (event_pid != NULL) + *event_pid = (pid_t)remote->br_ptrace_eventmsg; + break; + + default: + cmn_err(CE_PANIC, "unxpected stop subreason: %d", + remote->br_ptrace_whatstop); + } + + /* + * If WNOWAIT was specified, do not mark the event as posted + * so that it may be re-fetched on another call to waitid(). + */ + if (waitflag) { + remote->br_ptrace_whystop = 0; + remote->br_ptrace_whatstop = 0; + remote->br_ptrace_flags &= ~LX_PTRACE_CLDPEND; + } +} + +/* + * Receive notification from stop() of a PR_BRAND stop. + */ +void +lx_stop_notify(proc_t *p, klwp_t *lwp, ushort_t why, ushort_t what) +{ + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + lx_ptrace_accord_t *accord; + klwp_t *plwp = NULL; + proc_t *pp = NULL; + lx_lwp_data_t *parent; + boolean_t cldpend = B_TRUE; + boolean_t cldpost = B_FALSE; + sigqueue_t *sqp = NULL; + + /* + * We currently only care about LX-specific stop reasons. + */ + if (why != PR_BRAND) + return; + + switch (what) { + case LX_PR_SYSENTRY: + case LX_PR_SYSEXIT: + case LX_PR_SIGNALLED: + case LX_PR_EVENT: + break; + default: + cmn_err(CE_PANIC, "unexpected subreason for PR_BRAND" + " stop: %d", (int)what); + } + + /* + * We should be holding the lock on our containing process. The + * STOPPING flag should have been set by lx_ptrace_stop() for all + * PR_BRAND stops. + */ + VERIFY(MUTEX_HELD(&p->p_lock)); + VERIFY(lwpd->br_ptrace_flags & LX_PTRACE_STOPPING); + VERIFY((accord = lwpd->br_ptrace_tracer) != NULL); + + /* + * We must drop our process lock to take "pidlock". The + * LX_PTRACE_STOPPING flag protects us from an exiting tracer. + */ + mutex_exit(&p->p_lock); + + /* + * Allocate before we enter any mutexes. + */ + sqp = kmem_zalloc(sizeof (*sqp), KM_SLEEP); + + /* + * We take pidlock now, which excludes all callers of waitid() and + * prevents a detaching tracer from clearing critical accord members. + */ + mutex_enter(&pidlock); + mutex_enter(&p->p_lock); + + /* + * Get the ptrace(2) "parent" process, to which we may send + * a SIGCLD signal later. + */ + if ((parent = accord->lxpa_tracer) != NULL && + (plwp = parent->br_lwp) != NULL) { + pp = lwptoproc(plwp); + } + + /* + * Our tracer should not have been modified in our absence; the + * LX_PTRACE_STOPPING flag prevents it. + */ + VERIFY(lwpd->br_ptrace_tracer == accord); + + /* + * Stash data for this stop condition in the LWP data while we hold + * both pidlock and our p_lock. + */ + lwpd->br_ptrace_whystop = why; + lwpd->br_ptrace_whatstop = what; + + /* + * If this event does not depend on an event from the parent LWP, + * populate the siginfo_t for the event pending on this tracee LWP. + */ + if (!(lwpd->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) && pp != NULL) { + cldpost = B_TRUE; + lx_winfo(lwpd, &sqp->sq_info, B_FALSE, NULL, NULL); + } + + /* + * Drop our p_lock so that we may lock the tracer. + */ + mutex_exit(&p->p_lock); + if (cldpost && pp != NULL) { + /* + * Post the SIGCLD to the tracer. + */ + mutex_enter(&pp->p_lock); + if (!sigismember(&pp->p_sig, SIGCLD)) { + sigaddqa(pp, plwp->lwp_thread, sqp); + cldpend = B_FALSE; + sqp = NULL; + } + mutex_exit(&pp->p_lock); + } + + /* + * We re-take our process lock now. The lock will be held until + * the thread is actually marked stopped, so we will not race with + * lx_ptrace_lock_if_stopped() or lx_waitid_helper(). + */ + mutex_enter(&p->p_lock); + + /* + * We clear the STOPPING flag; stop() continues to hold our p_lock + * until our thread stop state is visible. + */ + lwpd->br_ptrace_flags &= ~LX_PTRACE_STOPPING; + lwpd->br_ptrace_flags |= LX_PTRACE_STOPPED; + if (cldpend) { + /* + * We sent the SIGCLD for this new wait condition already. + */ + lwpd->br_ptrace_flags |= LX_PTRACE_CLDPEND; + } + + /* + * If lx_ptrace_exit_tracer() is trying to detach our tracer, it will + * be sleeping on this CV until LX_PTRACE_STOPPING is clear. Wake it + * now. + */ + cv_broadcast(&lx_ptrace_busy_cv); + + /* + * While still holding pidlock, we attempt to wake our tracer from a + * potential waitid() slumber. + */ + if (accord->lxpa_cvp != NULL) { + cv_broadcast(accord->lxpa_cvp); + } + + /* + * We release pidlock and return as we were called: with our p_lock + * held. + */ + mutex_exit(&pidlock); + + if (sqp != NULL) { + kmem_free(sqp, sizeof (*sqp)); + } +} + +/* + * For any restarting action (e.g. PTRACE_CONT, PTRACE_SYSCALL or + * PTRACE_DETACH) to be allowed, the tracee LWP must be in "ptrace-stop". This + * check must ONLY be run on tracees of the current LWP. If the check is + * successful, we return with the tracee p_lock held. + */ +static int +lx_ptrace_lock_if_stopped(lx_ptrace_accord_t *accord, lx_lwp_data_t *remote) +{ + klwp_t *rlwp = remote->br_lwp; + proc_t *rproc = lwptoproc(rlwp); + kthread_t *rt = lwptot(rlwp); + + /* + * We must never check that we, ourselves, are stopped. We must also + * have the accord tracee list locked while we lock our tracees. + */ + VERIFY(curthread != rt); + VERIFY(MUTEX_HELD(&accord->lxpa_tracees_lock)); + VERIFY(accord->lxpa_tracer == ttolxlwp(curthread)); + + /* + * Lock the process containing the tracee LWP. + */ + mutex_enter(&rproc->p_lock); + if (!VISIBLE(remote)) { + /* + * The tracee LWP is currently detaching itself as it exits. + * It is no longer visible to ptrace(2). + */ + mutex_exit(&rproc->p_lock); + return (ESRCH); + } + + /* + * We must only check whether tracees of the current LWP are stopped. + * We check this condition after confirming visibility as an exiting + * tracee may no longer be completely consistent. + */ + VERIFY(remote->br_ptrace_tracer == accord); + + if (!(remote->br_ptrace_flags & LX_PTRACE_STOPPED)) { + /* + * The tracee is not in "ptrace-stop", so we release the + * process. + */ + mutex_exit(&rproc->p_lock); + return (ESRCH); + } + + /* + * The tracee is stopped. We return holding its process lock so that + * the caller may manipulate it. + */ + return (0); +} + +static int +lx_ptrace_setoptions(lx_lwp_data_t *remote, uintptr_t options) +{ + /* + * Check for valid options. + */ + if ((options & ~LX_PTRACE_O_ALL) != 0) { + return (EINVAL); + } + + /* + * Set ptrace options on the target LWP. + */ + remote->br_ptrace_options = (lx_ptrace_options_t)options; + + return (0); +} + +static int +lx_ptrace_geteventmsg(lx_lwp_data_t *remote, void *umsgp) +{ + int error; + +#if defined(_SYSCALL32_IMPL) + if (get_udatamodel() != DATAMODEL_NATIVE) { + uint32_t tmp = remote->br_ptrace_eventmsg; + + error = copyout(&tmp, umsgp, sizeof (uint32_t)); + } else +#endif + { + error = copyout(&remote->br_ptrace_eventmsg, umsgp, + sizeof (ulong_t)); + } + + return (error); +} + +/* + * Implements the PTRACE_CONT subcommand of the Linux ptrace(2) interface. + */ +static int +lx_ptrace_cont(lx_lwp_data_t *remote, lx_ptrace_cont_flags_t flags, int signo) +{ + klwp_t *lwp = remote->br_lwp; + + if (flags & LX_PTC_SINGLESTEP) { + /* + * We do not currently support single-stepping. + */ + lx_unsupported("PTRACE_SINGLESTEP not currently implemented"); + return (EINVAL); + } + + /* + * The tracer may choose to suppress the delivery of a signal, or + * select an alternative signal for delivery. If this is an + * appropriate ptrace(2) "signal-delivery-stop", br_ptrace_stopsig + * will be used as the new signal number. + * + * As with so many other aspects of the Linux ptrace(2) interface, this + * may fail silently if the state machine is not aligned correctly. + */ + remote->br_ptrace_stopsig = signo; + + /* + * Handle the syscall-stop flag if this is a PTRACE_SYSCALL restart: + */ + if (flags & LX_PTC_SYSCALL) { + remote->br_ptrace_flags |= LX_PTRACE_SYSCALL; + } else { + remote->br_ptrace_flags &= ~LX_PTRACE_SYSCALL; + } + + lx_ptrace_restart_lwp(lwp); + + return (0); +} + +/* + * Implements the PTRACE_DETACH subcommand of the Linux ptrace(2) interface. + * + * The LWP identified by the Linux pid "lx_pid" will, if it as a tracee of the + * current LWP, be detached and set runnable. If the specified LWP is not + * currently in the "ptrace-stop" state, the routine will return ESRCH as if + * the LWP did not exist at all. + * + * The caller must not hold p_lock on any process. + */ +static int +lx_ptrace_detach(lx_ptrace_accord_t *accord, lx_lwp_data_t *remote, int signo, + boolean_t *release_hold) +{ + klwp_t *rlwp; + + rlwp = remote->br_lwp; + + /* + * The tracee LWP was in "ptrace-stop" and we now hold its p_lock. + * Detach the LWP from the accord and set it running. + */ + VERIFY(!TRACEE_BUSY(remote)); + remote->br_ptrace_flags &= ~(LX_PTRACE_SYSCALL | LX_PTRACE_INHERIT); + VERIFY(list_link_active(&remote->br_ptrace_linkage)); + list_remove(&accord->lxpa_tracees, remote); + + remote->br_ptrace_attach = LX_PTA_NONE; + remote->br_ptrace_tracer = NULL; + remote->br_ptrace_flags = 0; + *release_hold = B_TRUE; + + /* + * The tracer may, as described in lx_ptrace_cont(), choose to suppress + * or modify the delivered signal. + */ + remote->br_ptrace_stopsig = signo; + + lx_ptrace_restart_lwp(rlwp); + + return (0); +} + +/* + * This routine implements the PTRACE_ATTACH operation of the Linux ptrace(2) + * interface. + * + * This LWP is requesting to be attached as a tracer to another LWP -- the + * tracee. If a ptrace accord to track the list of tracees has not yet been + * allocated, one will be allocated and attached to this LWP now. + * + * The "br_ptrace_tracer" on the tracee LWP is set to this accord, and the + * tracee LWP is then added to the "lxpa_tracees" list in the accord. We drop + * locks between these two phases; the only consumer of trace events from this + * accord is this LWP, which obviously cannot be running waitpid(2) at the same + * time as this call to ptrace(2). + */ +static int +lx_ptrace_attach(pid_t lx_pid) +{ + int error = ESRCH; + int32_t one = 1; + /* + * Our (Tracer) LWP: + */ + lx_ptrace_accord_t *accord; + lx_lwp_data_t *lwpd = ttolxlwp(curthread); + /* + * Remote (Tracee) LWP: + */ + pid_t rpid; + id_t rtid; + proc_t *rproc; + kthread_t *rthr; + klwp_t *rlwp; + lx_lwp_data_t *rlwpd; + + if (lwpd->br_pid == lx_pid) { + /* + * We cannot trace ourselves. + */ + return (EPERM); + } + + /* + * Ensure that we have an accord and obtain a lock on it. This + * routine should not fail because the LWP cannot make ptrace(2) system + * calls after it has begun exiting. + */ + VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_EXITING); + VERIFY(lx_ptrace_accord_get(&accord, B_TRUE) == 0); + + /* + * Place speculative hold in case the attach is successful. + */ + lx_ptrace_accord_hold(accord); + lx_ptrace_accord_exit(accord); + + /* + * Locate the process containing the tracee LWP based on its Linux pid + * and lock it. + */ + if (lx_lpid_to_spair(lx_pid, &rpid, &rtid) != 0 || + (rproc = sprlock(rpid)) == NULL) { + /* + * We could not find the target process. + */ + goto errout; + } + + /* + * Locate the tracee LWP. + */ + if ((rthr = idtot(rproc, rtid)) == NULL || + (rlwp = ttolwp(rthr)) == NULL || + (rlwpd = lwptolxlwp(rlwp)) == NULL || + !VISIBLE(rlwpd)) { + /* + * The LWP could not be found, was not branded, or is not + * visible to ptrace(2) at this time. + */ + goto unlock_errout; + } + + /* + * We now hold the lock on the tracee. Attempt to install ourselves + * as the tracer. + */ + if (curproc != rproc && priv_proc_cred_perm(curproc->p_cred, rproc, + NULL, VWRITE) != 0) { + /* + * This process does not have permission to trace the remote + * process. + */ + error = EPERM; + } else if (rlwpd->br_ptrace_tracer != NULL) { + /* + * This LWP is already being traced. + */ + VERIFY(list_link_active(&rlwpd->br_ptrace_linkage)); + VERIFY(rlwpd->br_ptrace_attach != LX_PTA_NONE); + error = EPERM; + } else { + lx_proc_data_t *rprocd; + + /* + * Bond the tracee to the accord. + */ + VERIFY0(rlwpd->br_ptrace_flags & LX_PTRACE_EXITING); + VERIFY(rlwpd->br_ptrace_attach == LX_PTA_NONE); + rlwpd->br_ptrace_attach = LX_PTA_ATTACH; + rlwpd->br_ptrace_tracer = accord; + + /* + * We had no tracer, and are thus not in the tracees list. + * It is safe to take the tracee list lock while we insert + * ourselves. + */ + mutex_enter(&accord->lxpa_tracees_lock); + VERIFY(!list_link_active(&rlwpd->br_ptrace_linkage)); + list_insert_tail(&accord->lxpa_tracees, rlwpd); + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * Send a thread-directed SIGSTOP. + */ + sigtoproc(rproc, rthr, SIGSTOP); + + /* + * Set the in-kernel process-wide ptrace(2) enable flag. + * Attempt also to write the usermode trace flag so that the + * process knows to enter the kernel for potential ptrace(2) + * syscall-stops. + */ + rprocd = ttolxproc(rthr); + rprocd->l_ptrace = 1; + mutex_exit(&rproc->p_lock); + (void) uwrite(rproc, &one, sizeof (one), rprocd->l_traceflag); + mutex_enter(&rproc->p_lock); + + error = 0; + } + +unlock_errout: + /* + * Unlock the process containing the tracee LWP and the accord. + */ + sprunlock(rproc); + +errout: + if (error != 0) { + /* + * The attach was not successful. Remove our speculative + * hold. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); + } + + return (error); +} + +int +lx_ptrace_set_clone_inherit(int option, boolean_t inherit_flag) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + + mutex_enter(&p->p_lock); + + switch (option) { + case LX_PTRACE_O_TRACEFORK: + case LX_PTRACE_O_TRACEVFORK: + case LX_PTRACE_O_TRACECLONE: + lwpd->br_ptrace_clone_option = option; + break; + + default: + return (EINVAL); + } + + if (inherit_flag) { + lwpd->br_ptrace_flags |= LX_PTRACE_INHERIT; + } else { + lwpd->br_ptrace_flags &= ~LX_PTRACE_INHERIT; + } + + mutex_exit(&p->p_lock); + return (0); +} + +/* + * If the parent LWP is being traced, we want to attach ourselves to the + * same accord. + */ +void +lx_ptrace_inherit_tracer(lx_lwp_data_t *src, lx_lwp_data_t *dst) +{ + proc_t *srcp = lwptoproc(src->br_lwp); + proc_t *dstp = lwptoproc(dst->br_lwp); + lx_ptrace_accord_t *accord; + boolean_t unlock = B_FALSE; + + if (srcp == dstp) { + /* + * This is syslwp_create(), so the process p_lock is already + * held. + */ + VERIFY(MUTEX_HELD(&srcp->p_lock)); + } else { + unlock = B_TRUE; + mutex_enter(&srcp->p_lock); + } + + if ((accord = src->br_ptrace_tracer) == NULL) { + /* + * The source LWP does not have a tracer to inherit. + */ + goto out; + } + + /* + * There are two conditions to check when determining if the new + * child should inherit the same tracer (and tracing options) as its + * parent. Either condition is sufficient to trigger inheritance. + */ + dst->br_ptrace_attach = LX_PTA_NONE; + if ((src->br_ptrace_options & src->br_ptrace_clone_option) != 0) { + /* + * Condition 1: + * The clone(2), fork(2) and vfork(2) emulated system calls + * populate "br_ptrace_clone_option" with the specific + * ptrace(2) SETOPTIONS option that applies to this + * operation. If the relevant option has been enabled by the + * tracer then we inherit. + */ + dst->br_ptrace_attach |= LX_PTA_INHERIT_OPTIONS; + + } else if ((src->br_ptrace_flags & LX_PTRACE_INHERIT) != 0) { + /* + * Condition 2: + * If the caller opted in to inheritance with the + * PTRACE_CLONE flag to clone(2), the LX_PTRACE_INHERIT flag + * will be set and we inherit. + */ + dst->br_ptrace_attach |= LX_PTA_INHERIT_CLONE; + } + + /* + * These values only apply for the duration of a single clone(2), et + * al, system call. + */ + src->br_ptrace_flags &= ~LX_PTRACE_INHERIT; + src->br_ptrace_clone_option = 0; + + if (dst->br_ptrace_attach == LX_PTA_NONE) { + /* + * No condition triggered inheritance. + */ + goto out; + } + + /* + * Set the LX_PTRACE_CLONING flag to prevent us from being detached + * while our p_lock is dropped. + */ + src->br_ptrace_flags |= LX_PTRACE_CLONING; + mutex_exit(&srcp->p_lock); + + /* + * Hold the accord for the new LWP. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_hold(accord); + lx_ptrace_accord_exit(accord); + + /* + * Install the tracer and copy the current PTRACE_SETOPTIONS options. + */ + dst->br_ptrace_tracer = accord; + dst->br_ptrace_options = src->br_ptrace_options; + + /* + * This flag prevents waitid() from seeing events for the new child + * until the parent is able to post the relevant ptrace event to + * the tracer. + */ + dst->br_ptrace_flags |= LX_PTRACE_PARENT_WAIT; + + mutex_enter(&accord->lxpa_tracees_lock); + VERIFY(list_link_active(&src->br_ptrace_linkage)); + VERIFY(!list_link_active(&dst->br_ptrace_linkage)); + list_insert_tail(&accord->lxpa_tracees, dst); + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * Relock our process and clear our busy flag. + */ + mutex_enter(&srcp->p_lock); + src->br_ptrace_flags &= ~LX_PTRACE_CLONING; + + /* + * If lx_ptrace_exit_tracer() is trying to detach our tracer, it will + * be sleeping on this CV until LX_PTRACE_CLONING is clear. Wake it + * now. + */ + cv_broadcast(&lx_ptrace_busy_cv); + +out: + if (unlock) { + mutex_exit(&srcp->p_lock); + } +} + +static int +lx_ptrace_traceme(void) +{ + int error; + boolean_t did_attach = B_FALSE; + /* + * Our (Tracee) LWP: + */ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + /* + * Remote (Tracer) LWP: + */ + lx_ptrace_accord_t *accord; + + /* + * We are intending to be the tracee. Fetch (or allocate) the accord + * for our parent LWP. + */ + if ((error = lx_ptrace_accord_get_by_pid(lx_lwp_ppid(lwp, NULL, + NULL), &accord)) != 0) { + /* + * Could not determine the Linux pid of the parent LWP, or + * could not get the accord for that LWP. + */ + return (error); + } + + /* + * We now hold the accord lock. + */ + if (accord->lxpa_flags & LX_ACC_TOMBSTONE) { + /* + * The accord is marked for death; give up now. + */ + lx_ptrace_accord_exit(accord); + return (ESRCH); + } + + /* + * Bump the reference count so that the accord is not freed. We need + * to drop the accord lock before we take our own p_lock. + */ + lx_ptrace_accord_hold(accord); + lx_ptrace_accord_exit(accord); + + /* + * We now lock _our_ process and determine if we can install our parent + * as our tracer. + */ + mutex_enter(&p->p_lock); + if (lwpd->br_ptrace_tracer != NULL) { + /* + * This LWP is already being traced. + */ + VERIFY(lwpd->br_ptrace_attach != LX_PTA_NONE); + error = EPERM; + } else { + /* + * Bond ourselves to the accord. We already bumped the accord + * reference count. + */ + VERIFY(lwpd->br_ptrace_attach == LX_PTA_NONE); + lwpd->br_ptrace_attach = LX_PTA_TRACEME; + lwpd->br_ptrace_tracer = accord; + did_attach = B_TRUE; + error = 0; + } + mutex_exit(&p->p_lock); + + /* + * Lock the accord tracee list and add this LWP. Once we are in the + * tracee list, it is the responsibility of the tracer to detach us. + */ + if (error == 0) { + lx_ptrace_accord_enter(accord); + mutex_enter(&accord->lxpa_tracees_lock); + + if (!(accord->lxpa_flags & LX_ACC_TOMBSTONE)) { + lx_proc_data_t *procd = ttolxproc(curthread); + + /* + * Put ourselves in the tracee list for this accord. + */ + VERIFY(!list_link_active(&lwpd->br_ptrace_linkage)); + list_insert_tail(&accord->lxpa_tracees, lwpd); + mutex_exit(&accord->lxpa_tracees_lock); + lx_ptrace_accord_exit(accord); + + /* + * Set the in-kernel process-wide ptrace(2) enable + * flag. Attempt also to write the usermode trace flag + * so that the process knows to enter the kernel for + * potential ptrace(2) syscall-stops. + */ + procd->l_ptrace = 1; + (void) suword32((void *)procd->l_traceflag, 1); + + return (0); + } + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * The accord has been marked for death. We must + * untrace ourselves. + */ + error = ESRCH; + lx_ptrace_accord_exit(accord); + } + + /* + * Our optimism was unjustified: We were unable to attach. We need to + * lock the process containing this LWP again in order to remove the + * tracer. + */ + VERIFY(error != 0); + mutex_enter(&p->p_lock); + if (did_attach) { + /* + * Verify that things were as we left them: + */ + VERIFY(!list_link_active(&lwpd->br_ptrace_linkage)); + VERIFY(lwpd->br_ptrace_tracer == accord); + + lwpd->br_ptrace_attach = LX_PTA_NONE; + lwpd->br_ptrace_tracer = NULL; + } + mutex_exit(&p->p_lock); + + /* + * Remove our speculative hold on the accord, possibly causing it to be + * freed in the process. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); + + return (error); +} + +static boolean_t +lx_ptrace_stop_common(proc_t *p, lx_lwp_data_t *lwpd, ushort_t what) +{ + VERIFY(MUTEX_HELD(&p->p_lock)); + + /* + * Mark this LWP as stopping and call stop() to enter "ptrace-stop". + */ + VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_STOPPING); + lwpd->br_ptrace_flags |= LX_PTRACE_STOPPING; + stop(PR_BRAND, what); + + /* + * We are back from "ptrace-stop" with our process lock held. + */ + lwpd->br_ptrace_flags &= ~(LX_PTRACE_STOPPING | LX_PTRACE_STOPPED | + LX_PTRACE_CLDPEND); + cv_broadcast(&lx_ptrace_busy_cv); + mutex_exit(&p->p_lock); + + return (B_TRUE); +} + +int +lx_ptrace_stop_for_option(int option, boolean_t child, ulong_t msg) +{ + kthread_t *t = curthread; + klwp_t *lwp = ttolwp(t); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + + mutex_enter(&p->p_lock); + if (lwpd->br_ptrace_tracer == NULL) { + mutex_exit(&p->p_lock); + return (ESRCH); + } + + if (!child) { + /* + * Only the first event posted by a new process is to be held + * until the matching parent event is dispatched, and only if + * it is a "child" event. This is not a child event, so we + * clear the wait flag. + */ + lwpd->br_ptrace_flags &= ~LX_PTRACE_PARENT_WAIT; + } + + if (!(lwpd->br_ptrace_options & option)) { + if (option == LX_PTRACE_O_TRACEEXEC) { + /* + * Without PTRACE_O_TRACEEXEC, the Linux kernel will + * send SIGTRAP to the process. + */ + sigtoproc(p, t, SIGTRAP); + mutex_exit(&p->p_lock); + return (0); + } + + /* + * The flag for this trace event is not enabled, so we will not + * stop. + */ + mutex_exit(&p->p_lock); + return (ESRCH); + } + + if (child) { + switch (option) { + case LX_PTRACE_O_TRACECLONE: + case LX_PTRACE_O_TRACEFORK: + case LX_PTRACE_O_TRACEVFORK: + /* + * Send the child LWP a directed SIGSTOP. + */ + sigtoproc(p, t, SIGSTOP); + mutex_exit(&p->p_lock); + return (0); + default: + goto nostop; + } + } + + lwpd->br_ptrace_eventmsg = msg; + + switch (option) { + case LX_PTRACE_O_TRACECLONE: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_CLONE; + break; + case LX_PTRACE_O_TRACEEXEC: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_EXEC; + lwpd->br_ptrace_eventmsg = 0; + break; + case LX_PTRACE_O_TRACEEXIT: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_EXIT; + break; + case LX_PTRACE_O_TRACEFORK: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_FORK; + break; + case LX_PTRACE_O_TRACEVFORK: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_VFORK; + break; + case LX_PTRACE_O_TRACEVFORKDONE: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_VFORK_DONE; + lwpd->br_ptrace_eventmsg = 0; + break; + default: + goto nostop; + } + + /* + * p_lock for the process containing the tracee will be dropped by + * lx_ptrace_stop_common(). + */ + return (lx_ptrace_stop_common(p, lwpd, LX_PR_EVENT) ? 0 : ESRCH); + +nostop: + lwpd->br_ptrace_event = 0; + lwpd->br_ptrace_eventmsg = 0; + mutex_exit(&p->p_lock); + return (ESRCH); +} + +boolean_t +lx_ptrace_stop(ushort_t what) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + + VERIFY(what == LX_PR_SYSENTRY || what == LX_PR_SYSEXIT || + what == LX_PR_SIGNALLED); + + /* + * If we do not have an accord, bail out early. + */ + if (lwpd->br_ptrace_tracer == NULL) + return (B_FALSE); + + /* + * Lock this process and re-check the condition. + */ + mutex_enter(&p->p_lock); + if (lwpd->br_ptrace_tracer == NULL) { + VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_SYSCALL); + mutex_exit(&p->p_lock); + return (B_FALSE); + } + + if (what == LX_PR_SYSENTRY || what == LX_PR_SYSEXIT) { + /* + * This is a syscall-entry-stop or syscall-exit-stop point. + */ + if (!(lwpd->br_ptrace_flags & LX_PTRACE_SYSCALL)) { + /* + * A system call stop has not been requested. + */ + mutex_exit(&p->p_lock); + return (B_FALSE); + } + + /* + * The PTRACE_SYSCALL restart command applies only to the next + * system call entry or exit. The tracer must restart us with + * PTRACE_SYSCALL while we are in ptrace-stop for us to fire + * again at the next system call boundary. + */ + lwpd->br_ptrace_flags &= ~LX_PTRACE_SYSCALL; + } + + /* + * p_lock for the process containing the tracee will be dropped by + * lx_ptrace_stop_common(). + */ + return (lx_ptrace_stop_common(p, lwpd, what)); +} + +int +lx_issig_stop(proc_t *p, klwp_t *lwp) +{ + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + int lx_sig; + + VERIFY(MUTEX_HELD(&p->p_lock)); + + /* + * If we do not have an accord, bail out now. Additionally, if there + * is no valid signal then we have no reason to stop. + */ + if (lwpd->br_ptrace_tracer == NULL || lwp->lwp_cursig == SIGKILL || + (lwp->lwp_cursig == 0 || lwp->lwp_cursig > NSIG) || + (lx_sig = stol_signo[lwp->lwp_cursig]) < 1) { + return (0); + } + + /* + * We stash the signal on the LWP where our waitid_helper will find it + * and enter the ptrace "signal-delivery-stop" condition. + */ + lwpd->br_ptrace_stopsig = lx_sig; + (void) lx_ptrace_stop_common(p, lwpd, LX_PR_SIGNALLED); + mutex_enter(&p->p_lock); + + /* + * When we return, the signal may have been altered or suppressed. + */ + if (lwpd->br_ptrace_stopsig != lx_sig) { + int native_sig; + lx_sig = lwpd->br_ptrace_stopsig; + + if (lx_sig >= LX_NSIG) { + lx_sig = 0; + } + + /* + * Translate signal from Linux signal number back to + * an illumos native signal. + */ + if (lx_sig >= LX_NSIG || lx_sig < 0 || (native_sig = + ltos_signo[lx_sig]) < 1) { + /* + * The signal is not deliverable. + */ + lwp->lwp_cursig = 0; + lwp->lwp_extsig = 0; + if (lwp->lwp_curinfo) { + siginfofree(lwp->lwp_curinfo); + lwp->lwp_curinfo = NULL; + } + } else { + /* + * Alter the currently dispatching signal. + */ + if (native_sig == SIGKILL) { + /* + * We mark ourselves the victim and request + * a restart of signal processing. + */ + p->p_flag |= SKILLED; + p->p_flag &= ~SEXTKILLED; + return (-1); + } + lwp->lwp_cursig = native_sig; + lwp->lwp_extsig = 0; + if (lwp->lwp_curinfo != NULL) { + lwp->lwp_curinfo->sq_info.si_signo = native_sig; + } + } + } + + lwpd->br_ptrace_stopsig = 0; + return (0); +} + +static void +lx_ptrace_exit_tracer(proc_t *p, lx_lwp_data_t *lwpd, + lx_ptrace_accord_t *accord) +{ + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + + lx_ptrace_accord_enter(accord); + /* + * Mark this accord for death. This means no new tracees can be + * attached to this accord. + */ + VERIFY0(accord->lxpa_flags & LX_ACC_TOMBSTONE); + accord->lxpa_flags |= LX_ACC_TOMBSTONE; + lx_ptrace_accord_exit(accord); + + /* + * Walk the list of tracees, detaching them and setting them runnable + * if they are stopped. + */ + for (;;) { + klwp_t *rlwp; + proc_t *rproc; + lx_lwp_data_t *remote; + kmutex_t *rmp; + + mutex_enter(&accord->lxpa_tracees_lock); + if (list_is_empty(&accord->lxpa_tracees)) { + mutex_exit(&accord->lxpa_tracees_lock); + break; + } + + /* + * Fetch the first tracee LWP in the list and lock the process + * which contains it. + */ + remote = list_head(&accord->lxpa_tracees); + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + /* + * The p_lock mutex persists beyond the life of the process + * itself. We save the address, here, to prevent the need to + * dereference the proc_t after awaking from sleep. + */ + rmp = &rproc->p_lock; + mutex_enter(rmp); + + if (TRACEE_BUSY(remote)) { + /* + * This LWP is currently detaching itself on exit, or + * mid-way through stop(). We must wait for this + * action to be completed. While we wait on the CV, we + * must drop the accord tracee list lock. + */ + mutex_exit(&accord->lxpa_tracees_lock); + cv_wait(&lx_ptrace_busy_cv, rmp); + + /* + * While we were waiting, some state may have changed. + * Restart the walk to be sure we don't miss anything. + */ + mutex_exit(rmp); + continue; + } + + /* + * We now hold p_lock on the process. Remove the tracee from + * the list. + */ + VERIFY(list_link_active(&remote->br_ptrace_linkage)); + list_remove(&accord->lxpa_tracees, remote); + + /* + * Unlink the accord and clear our trace flags. + */ + remote->br_ptrace_attach = LX_PTA_NONE; + remote->br_ptrace_tracer = NULL; + remote->br_ptrace_flags = 0; + + /* + * Let go of the list lock before we restart the LWP. We must + * not hold any locks other than the process p_lock when + * we call lx_ptrace_restart_lwp() as it will thread_lock + * the tracee. + */ + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * Ensure that the LWP is not stopped on our account. + */ + lx_ptrace_restart_lwp(rlwp); + + /* + * Unlock the former tracee. + */ + mutex_exit(rmp); + + /* + * Drop the hold this tracee had on the accord. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); + } + + mutex_enter(&p->p_lock); + lwpd->br_ptrace_accord = NULL; + mutex_exit(&p->p_lock); + + /* + * Clean up and release our hold on the accord If we completely + * detached all tracee LWPs, this will free the accord. Otherwise, it + * will be freed when they complete their cleanup. + * + * We hold "pidlock" while clearing these members for easy exclusion of + * waitid(), etc. + */ + mutex_enter(&pidlock); + lx_ptrace_accord_enter(accord); + accord->lxpa_cvp = NULL; + accord->lxpa_tracer = NULL; + mutex_exit(&pidlock); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); +} + +static void +lx_ptrace_exit_tracee(proc_t *p, lx_lwp_data_t *lwpd, + lx_ptrace_accord_t *accord) +{ + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + + /* + * We are the tracee LWP. Lock the accord tracee list and then our + * containing process. + */ + mutex_enter(&accord->lxpa_tracees_lock); + mutex_enter(&p->p_lock); + + /* + * Remove our reference to the accord. We will release our hold + * later. + */ + VERIFY(lwpd->br_ptrace_tracer == accord); + lwpd->br_ptrace_attach = LX_PTA_NONE; + lwpd->br_ptrace_tracer = NULL; + + /* + * Remove this LWP from the accord tracee list: + */ + VERIFY(list_link_active(&lwpd->br_ptrace_linkage)); + list_remove(&accord->lxpa_tracees, lwpd); + + /* + * Wake up any tracers waiting for us to detach from the accord. + */ + cv_broadcast(&lx_ptrace_busy_cv); + mutex_exit(&p->p_lock); + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * Grab "pidlock" and wake the tracer if it is blocked in waitid(). + */ + mutex_enter(&pidlock); + if (accord->lxpa_cvp != NULL) { + cv_broadcast(accord->lxpa_cvp); + } + mutex_exit(&pidlock); + + /* + * Release our hold on the accord. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); +} + +/* + * This routine is called from lx_exitlwp() when an LWP is ready to exit. If + * this LWP is being traced, it will be detached from the tracer's accord. The + * routine will also detach any LWPs being traced by this LWP. + */ +void +lx_ptrace_exit(proc_t *p, klwp_t *lwp) +{ + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + lx_ptrace_accord_t *accord; + + VERIFY(MUTEX_HELD(&p->p_lock)); + + /* + * Mark our LWP as exiting from a ptrace perspective. This will + * prevent a new accord from being allocated if one does not exist + * already, and will make us invisible to PTRACE_ATTACH/PTRACE_TRACEME. + */ + VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_EXITING); + lwpd->br_ptrace_flags |= LX_PTRACE_EXITING; + + if ((accord = lwpd->br_ptrace_tracer) != NULL) { + /* + * We are traced by another LWP and must detach ourselves. + */ + mutex_exit(&p->p_lock); + lx_ptrace_exit_tracee(p, lwpd, accord); + mutex_enter(&p->p_lock); + } + + if ((accord = lwpd->br_ptrace_accord) != NULL) { + /* + * We have been tracing other LWPs, and must detach from + * them and clean up our accord. + */ + mutex_exit(&p->p_lock); + lx_ptrace_exit_tracer(p, lwpd, accord); + mutex_enter(&p->p_lock); + } +} + +/* + * Called when a SIGCLD signal is dispatched so that we may enqueue another. + * Return 0 if we enqueued a signal, or -1 if not. + */ +int +lx_sigcld_repost(proc_t *pp, sigqueue_t *sqp) +{ + klwp_t *lwp = ttolwp(curthread); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + lx_ptrace_accord_t *accord; + lx_lwp_data_t *remote; + klwp_t *rlwp; + proc_t *rproc; + boolean_t found = B_FALSE; + + VERIFY(MUTEX_HELD(&pidlock)); + VERIFY(MUTEX_NOT_HELD(&pp->p_lock)); + VERIFY(lwptoproc(lwp) == pp); + + mutex_enter(&pp->p_lock); + if ((accord = lwpd->br_ptrace_accord) == NULL) { + /* + * This LWP is not a tracer LWP, so there will be no + * SIGCLD. + */ + mutex_exit(&pp->p_lock); + return (-1); + } + mutex_exit(&pp->p_lock); + + mutex_enter(&accord->lxpa_tracees_lock); + for (remote = list_head(&accord->lxpa_tracees); remote != NULL; + remote = list_next(&accord->lxpa_tracees, remote)) { + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + + /* + * Check if this LWP is in "ptrace-stop". If in the correct + * stop condition, lock the process containing the tracee LWP. + */ + if (lx_ptrace_lock_if_stopped(accord, remote) != 0) { + continue; + } + + if (remote->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) { + /* + * This event depends on waitid() clearing out the + * event of another LWP. Skip it for now. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + if (!(remote->br_ptrace_flags & LX_PTRACE_CLDPEND)) { + /* + * No SIGCLD is required for this LWP. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + if (remote->br_ptrace_whystop == 0 || + remote->br_ptrace_whatstop == 0) { + /* + * No (new) stop reason to post for this LWP. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + /* + * We found a process of interest. Leave the process + * containing the tracee LWP locked and break out of the loop. + */ + found = B_TRUE; + break; + } + mutex_exit(&accord->lxpa_tracees_lock); + + if (!found) { + return (-1); + } + + /* + * Generate siginfo for this tracee LWP. + */ + lx_winfo(remote, &sqp->sq_info, B_FALSE, NULL, NULL); + remote->br_ptrace_flags &= ~LX_PTRACE_CLDPEND; + mutex_exit(&rproc->p_lock); + + mutex_enter(&pp->p_lock); + if (sigismember(&pp->p_sig, SIGCLD)) { + mutex_exit(&pp->p_lock); + + mutex_enter(&rproc->p_lock); + remote->br_ptrace_flags |= LX_PTRACE_CLDPEND; + mutex_exit(&rproc->p_lock); + + return (-1); + } + sigaddqa(pp, curthread, sqp); + mutex_exit(&pp->p_lock); + + return (0); +} + +/* + * Consume the next available ptrace(2) event queued against the accord for + * this LWP. The event will be emitted as if through waitid(), and converted + * by lx_waitpid() and friends before the return to usermode. + */ +int +lx_waitid_helper(idtype_t idtype, id_t id, k_siginfo_t *ip, int options, + boolean_t *brand_wants_wait, int *rval) +{ + lx_ptrace_accord_t *accord; + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *local = lwptolxlwp(lwp); + lx_lwp_data_t *remote; + boolean_t found = B_FALSE; + klwp_t *rlwp = NULL; + proc_t *rproc = NULL; + pid_t event_pid = 0, event_ppid = 0; + boolean_t waitflag = !(options & WNOWAIT); + + VERIFY(MUTEX_HELD(&pidlock)); + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + + /* + * By default, we do not expect waitid() to block on our account. + */ + *brand_wants_wait = B_FALSE; + + if (!local->br_waitid_emulate) { + /* + * This waitid() call is not expecting emulated results. + */ + return (-1); + } + + switch (idtype) { + case P_ALL: + case P_PID: + case P_PGID: + break; + default: + /* + * This idtype has no power here. + */ + return (-1); + } + + if (lx_ptrace_accord_get(&accord, B_FALSE) != 0) { + /* + * This LWP does not have an accord; it cannot be tracing. + */ + return (-1); + } + + /* + * We do not need an additional hold on the accord as it belongs to + * the running, tracer, LWP. + */ + lx_ptrace_accord_exit(accord); + + mutex_enter(&accord->lxpa_tracees_lock); + if (list_is_empty(&accord->lxpa_tracees)) { + /* + * Though it has an accord, there are currently no tracees in + * the list for this LWP. + */ + mutex_exit(&accord->lxpa_tracees_lock); + return (-1); + } + + /* + * Walk the list of tracees and determine if any of them have events to + * report. + */ + for (remote = list_head(&accord->lxpa_tracees); remote != NULL; + remote = list_next(&accord->lxpa_tracees, remote)) { + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + + /* + * If the __WALL option was passed, we unconditionally consider + * every possible child. + */ + if (!(local->br_waitid_flags & LX_WALL)) { + /* + * Otherwise, we check to see if this LWP matches an + * id we are waiting for. + */ + switch (idtype) { + case P_ALL: + break; + case P_PID: + if (remote->br_pid != id) + continue; + break; + case P_PGID: + if (rproc->p_pgrp != id) + continue; + break; + default: + cmn_err(CE_PANIC, "unexpected idtype: %d", + idtype); + } + } + + /* + * Check if this LWP is in "ptrace-stop". If in the correct + * stop condition, lock the process containing the tracee LWP. + */ + if (lx_ptrace_lock_if_stopped(accord, remote) != 0) { + continue; + } + + if (remote->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) { + /* + * This event depends on waitid() clearing out the + * event of another LWP. Skip it for now. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + if (remote->br_ptrace_whystop == 0 || + remote->br_ptrace_whatstop == 0) { + /* + * No (new) stop reason to post for this LWP. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + /* + * We found a process of interest. Leave the process + * containing the tracee LWP locked and break out of the loop. + */ + found = B_TRUE; + break; + } + mutex_exit(&accord->lxpa_tracees_lock); + + if (!found) { + /* + * There were no events of interest, but we have tracees. + * Signal to waitid() that it should block if the provided + * flags allow for it. + */ + *brand_wants_wait = B_TRUE; + return (-1); + } + + /* + * Populate the signal information. + */ + lx_winfo(remote, ip, waitflag, &event_ppid, &event_pid); + + /* + * Unlock the tracee. + */ + mutex_exit(&rproc->p_lock); + + if (event_pid != 0 && event_ppid != 0) { + /* + * We need to do another pass around the tracee list and + * unblock any events that have a "happens after" relationship + * with this event. + */ + mutex_enter(&accord->lxpa_tracees_lock); + for (remote = list_head(&accord->lxpa_tracees); remote != NULL; + remote = list_next(&accord->lxpa_tracees, remote)) { + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + + mutex_enter(&rproc->p_lock); + + if (remote->br_pid != event_pid || + remote->br_ppid != event_ppid) { + mutex_exit(&rproc->p_lock); + continue; + } + + remote->br_ptrace_flags &= ~LX_PTRACE_PARENT_WAIT; + + mutex_exit(&rproc->p_lock); + } + mutex_exit(&accord->lxpa_tracees_lock); + } + + /* + * If we are consuming this wait state, we remove the SIGCLD from + * the queue and post another. + */ + if (waitflag) { + mutex_exit(&pidlock); + sigcld_delete(ip); + sigcld_repost(); + mutex_enter(&pidlock); + } + + *rval = 0; + return (0); +} + +/* + * Some PTRACE_* requests are handled in-kernel by this function. It is called + * through brandsys() via the B_PTRACE_KERNEL subcommand. + */ +int +lx_ptrace_kernel(int ptrace_op, pid_t lxpid, uintptr_t addr, uintptr_t data) +{ + lx_lwp_data_t *local = ttolxlwp(curthread); + lx_ptrace_accord_t *accord; + lx_lwp_data_t *remote; + klwp_t *rlwp; + proc_t *rproc; + int error; + boolean_t found = B_FALSE; + boolean_t release_hold = B_FALSE; + + _NOTE(ARGUNUSED(addr)); + + /* + * These actions do not require the target LWP to be traced or stopped. + */ + switch (ptrace_op) { + case LX_PTRACE_TRACEME: + return (lx_ptrace_traceme()); + + case LX_PTRACE_ATTACH: + return (lx_ptrace_attach(lxpid)); + } + + /* + * Ensure that we have an accord and obtain a lock on it. This routine + * should not fail because the LWP cannot make ptrace(2) system calls + * after it has begun exiting. + */ + VERIFY0(local->br_ptrace_flags & LX_PTRACE_EXITING); + VERIFY(lx_ptrace_accord_get(&accord, B_TRUE) == 0); + + /* + * The accord belongs to this (the tracer) LWP, and we have a hold on + * it. We drop the lock so that we can take other locks. + */ + lx_ptrace_accord_exit(accord); + + /* + * Does the tracee list contain the pid in question? + */ + mutex_enter(&accord->lxpa_tracees_lock); + for (remote = list_head(&accord->lxpa_tracees); remote != NULL; + remote = list_next(&accord->lxpa_tracees, remote)) { + if (remote->br_pid == lxpid) { + found = B_TRUE; + break; + } + } + if (!found) { + /* + * The requested pid does not appear in the tracee list. + */ + mutex_exit(&accord->lxpa_tracees_lock); + return (ESRCH); + } + + /* + * Attempt to lock the target LWP. + */ + if ((error = lx_ptrace_lock_if_stopped(accord, remote)) != 0) { + /* + * The LWP was not in "ptrace-stop". + */ + mutex_exit(&accord->lxpa_tracees_lock); + return (error); + } + + /* + * The target LWP is in "ptrace-stop". We have the containing process + * locked. + */ + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + + /* + * Process the ptrace(2) request: + */ + switch (ptrace_op) { + case LX_PTRACE_DETACH: + error = lx_ptrace_detach(accord, remote, (int)data, + &release_hold); + break; + + case LX_PTRACE_CONT: + error = lx_ptrace_cont(remote, LX_PTC_NONE, (int)data); + break; + + case LX_PTRACE_SYSCALL: + error = lx_ptrace_cont(remote, LX_PTC_SYSCALL, (int)data); + break; + + case LX_PTRACE_SINGLESTEP: + error = lx_ptrace_cont(remote, LX_PTC_SINGLESTEP, (int)data); + break; + + case LX_PTRACE_SETOPTIONS: + error = lx_ptrace_setoptions(remote, data); + break; + + case LX_PTRACE_GETEVENTMSG: + error = lx_ptrace_geteventmsg(remote, (void *)data); + break; + + default: + error = EINVAL; + } + + /* + * Drop the lock on both the tracee process and the tracee list. + */ + mutex_exit(&rproc->p_lock); + mutex_exit(&accord->lxpa_tracees_lock); + + if (release_hold) { + /* + * Release a hold from the accord. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); + } + + return (error); +} + +void +lx_ptrace_init(void) +{ + cv_init(&lx_ptrace_busy_cv, NULL, CV_DEFAULT, NULL); + + lx_ptrace_accord_cache = kmem_cache_create("lx_ptrace_accord", + sizeof (lx_ptrace_accord_t), 0, NULL, NULL, NULL, NULL, NULL, 0); +} + +void +lx_ptrace_fini(void) +{ + cv_destroy(&lx_ptrace_busy_cv); + + kmem_cache_destroy(lx_ptrace_accord_cache); +} diff --git a/usr/src/uts/common/brand/lx/procfs/lx_proc.h b/usr/src/uts/common/brand/lx/procfs/lx_proc.h index 184a5211db..a5c2391c95 100644 --- a/usr/src/uts/common/brand/lx/procfs/lx_proc.h +++ b/usr/src/uts/common/brand/lx/procfs/lx_proc.h @@ -138,6 +138,7 @@ typedef enum lxpr_nodetype { LXPR_NET_IGMP, /* /proc/net/igmp */ LXPR_NET_IP_MR_CACHE, /* /proc/net/ip_mr_cache */ LXPR_NET_IP_MR_VIF, /* /proc/net/ip_mr_vif */ + LXPR_NET_IPV6_ROUTE, /* /proc/net/ipv6_route */ LXPR_NET_MCFILTER, /* /proc/net/mcfilter */ LXPR_NET_NETSTAT, /* /proc/net/netstat */ LXPR_NET_RAW, /* /proc/net/raw */ @@ -250,4 +251,11 @@ void lxpr_unlock(proc_t *); } #endif +#ifndef islower +#define islower(x) (((unsigned)(x) >= 'a') && ((unsigned)(x) <= 'z')) +#endif +#ifndef toupper +#define toupper(x) (islower(x) ? (x) - 'a' + 'A' : (x)) +#endif + #endif /* _LXPROC_H */ diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c index a15d852793..3d96a1ceb2 100644 --- a/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c +++ b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c @@ -482,8 +482,8 @@ lxpr_getnode(vnode_t *dp, lxpr_nodetype_t type, proc_t *p, int fd) case LXPR_PID_FD_FD: ASSERT(p != NULL); /* lxpr_realvp is set after we return */ - vp->v_type = VLNK; lxpnp->lxpr_mode = 0700; /* read-write-exe owner only */ + vp->v_type = VLNK; break; case LXPR_PID_FDDIR: diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c index df2a4d7fb5..758a9192d7 100644 --- a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c +++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c @@ -79,6 +79,8 @@ #include <inet/tcp.h> #include <inet/udp_impl.h> #include <inet/ipclassifier.h> +#include <sys/socketvar.h> +#include <fs/sockfs/socktpi.h> /* Dependent on procfs */ extern kthread_t *prchoose(proc_t *); @@ -108,6 +110,7 @@ static int lxpr_lookup(vnode_t *, char *, vnode_t **, static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *, caller_context_t *, int); static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *); +static int lxpr_readlink_pid_fd(lxpr_node_t *lxpnp, char *bp, size_t len); static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *); static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *); static int lxpr_sync(void); @@ -163,6 +166,7 @@ static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *); static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *); static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *); static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *); static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *); static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *); static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *); @@ -320,6 +324,7 @@ static lxpr_dirent_t netdir[] = { { LXPR_NET_IGMP, "igmp" }, { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" }, { LXPR_NET_IP_MR_VIF, "ip_mr_vif" }, + { LXPR_NET_IPV6_ROUTE, "ipv6_route" }, { LXPR_NET_MCFILTER, "mcfilter" }, { LXPR_NET_NETSTAT, "netstat" }, { LXPR_NET_RAW, "raw" }, @@ -502,6 +507,7 @@ static void (*lxpr_read_function[LXPR_NFILES])() = { lxpr_read_net_igmp, /* /proc/net/igmp */ lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */ lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */ + lxpr_read_net_ipv6_route, /* /proc/net/ipv6_route */ lxpr_read_net_mcfilter, /* /proc/net/mcfilter */ lxpr_read_net_netstat, /* /proc/net/netstat */ lxpr_read_net_raw, /* /proc/net/raw */ @@ -579,6 +585,7 @@ static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = { lxpr_lookup_not_a_dir, /* /proc/net/igmp */ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */ + lxpr_lookup_not_a_dir, /* /proc/net/ipv6_route */ lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */ lxpr_lookup_not_a_dir, /* /proc/net/netstat */ lxpr_lookup_not_a_dir, /* /proc/net/raw */ @@ -656,6 +663,7 @@ static int (*lxpr_readdir_function[LXPR_NFILES])() = { lxpr_readdir_not_a_dir, /* /proc/net/igmp */ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */ + lxpr_readdir_not_a_dir, /* /proc/net/ipv6_route */ lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */ lxpr_readdir_not_a_dir, /* /proc/net/netstat */ lxpr_readdir_not_a_dir, /* /proc/net/raw */ @@ -976,7 +984,7 @@ lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) int maj = 0; int min = 0; - u_longlong_t inode = 0; + ino_t inode = 0; *buf = '\0'; if (pbuf->vp != NULL) { @@ -993,12 +1001,12 @@ lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) if (p->p_model == DATAMODEL_LP64) { lxpr_uiobuf_printf(uiobuf, - "%016llx-%16llx %s %016llx %02d:%03d %lld%s%s\n", + "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n", pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset, maj, min, inode, *buf != '\0' ? " " : "", buf); } else { lxpr_uiobuf_printf(uiobuf, - "%08x-%08x %s %08x %02d:%03d %lld%s%s\n", + "%08x-%08x %s %08x %02x:%02x %llu%s%s\n", (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr, pbuf->prot, (uint32_t)pbuf->offset, maj, min, inode, *buf != '\0' ? " " : "", buf); @@ -1768,9 +1776,9 @@ lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) } static void -lxpr_inet6_out(in6_addr_t addr, char buf[33]) +lxpr_inet6_out(const in6_addr_t *addr, char buf[33]) { - uint8_t *ip = addr.s6_addr; + const uint8_t *ip = addr->s6_addr; char digits[] = "0123456789abcdef"; int i; for (i = 0; i < 16; i++) { @@ -1811,7 +1819,7 @@ lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) ipif_get_name(ipif, ifname, sizeof (ifname)); lx_ifname_convert(ifname, LX_IFNAME_FROMNATIVE); - lxpr_inet6_out(ipif->ipif_v6lcl_addr, ip6out); + lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out); /* Scope output is shifted on Linux */ scope = scope << 4; @@ -1841,6 +1849,66 @@ lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { } +static void +lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf) +{ + uint32_t flags; + char name[IFNAMSIZ]; + char ipv6addr[33]; + + lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr); + lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr, + ip_mask_to_plen_v6(&ire->ire_mask_v6)); + + /* punt on this for now */ + lxpr_uiobuf_printf(uiobuf, "%s %02x ", + "00000000000000000000000000000000", 0); + + lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr); + lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr); + + flags = ire->ire_flags & + (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED); + /* Linux's RTF_LOCAL equivalent */ + if (ire->ire_metrics.iulp_local) + flags |= 0x80000000; + + if (ire->ire_ill != NULL) { + ill_get_name(ire->ire_ill, name, sizeof (name)); + lx_ifname_convert(name, LX_IFNAME_FROMNATIVE); + } else { + name[0] = '\0'; + } + + lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n", + 0, /* metric */ + ire->ire_refcnt, + 0, + flags, + name); +} + +/* ARGSUSED */ +static void +lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + netstack_t *ns; + ip_stack_t *ipst; + + ns = netstack_get_current(); + if (ns == NULL) + return; + ipst = ns->netstack_ip; + + /* + * LX branded zones are expected to have exclusive IP stack, hence + * using ALL_ZONES as the zoneid filter. + */ + ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst); + + netstack_rele(ns); +} + /* ARGSUSED */ static void lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) @@ -1859,10 +1927,97 @@ lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { } +#define LXPR_SKIP_ROUTE(type) \ + (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \ + IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0) + +static void +lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf) +{ + uint32_t flags; + char name[IFNAMSIZ]; + ill_t *ill; + ire_t *nire; + ipif_t *ipif; + ipaddr_t gateway; + + if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0) + return; + + /* These route flags have direct Linux equivalents */ + flags = ire->ire_flags & + (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED); + + /* + * Search for a suitable IRE for naming purposes. + * On Linux, the default route is typically associated with the + * interface used to access gateway. The default IRE on Illumos + * typically lacks an ill reference but its parent might have one. + */ + nire = ire; + do { + ill = nire->ire_ill; + nire = nire->ire_dep_parent; + } while (ill == NULL && nire != NULL); + if (ill != NULL) { + ill_get_name(ill, name, sizeof (name)); + lx_ifname_convert(name, LX_IFNAME_FROMNATIVE); + } else { + name[0] = '*'; + name[1] = '\0'; + } + + /* + * Linux suppresses the gateway address for directly connected + * interface networks. To emulate this behavior, we walk all addresses + * of a given route interface. If one matches the gateway, it is + * displayed as NULL. + */ + gateway = ire->ire_gateway_addr; + if ((ill = ire->ire_ill) != NULL) { + for (ipif = ill->ill_ipif; ipif != NULL; + ipif = ipif->ipif_next) { + if (ipif->ipif_lcl_addr == gateway) { + gateway = 0; + break; + } + } + } + + lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t" + "%d\t%08X\t%d\t%u\t%u\n", + name, + ire->ire_addr, + gateway, + flags, 0, 0, + 0, /* priority */ + ire->ire_mask, + 0, 0, /* mss, window */ + ire->ire_metrics.iulp_rtt); +} + /* ARGSUSED */ static void lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { + netstack_t *ns; + ip_stack_t *ipst; + + lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t" + "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n"); + + ns = netstack_get_current(); + if (ns == NULL) + return; + ipst = ns->netstack_ip; + + /* + * LX branded zones are expected to have exclusive IP stack, hence + * using ALL_ZONES as the zoneid filter. + */ + ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst); + + netstack_rele(ns); } /* ARGSUSED */ @@ -1883,10 +2038,146 @@ lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { } +typedef struct lxpr_snmp_table { + const char *lst_proto; + const char *lst_fields[]; +} lxpr_snmp_table_t; + +static lxpr_snmp_table_t lxpr_snmp_ip = { "ip", + { + "forwarding", "defaultTTL", "inReceives", "inHdrErrors", + "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards", + "inDelivers", "outRequests", "outDiscards", "outNoRoutes", + "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs", + "fragFails", "fragCreates", + NULL + } +}; +static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp", + { + "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds", + "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps", + "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps", + "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds", + "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos", + "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks", + "outAddrMaskReps", + NULL + } +}; +static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp", + { + "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens", + "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs", + "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors", + NULL + } +}; +static lxpr_snmp_table_t lxpr_snmp_udp = { "udp", + { + "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors", + "sndbufErrors", "inCsumErrors", + NULL + } +}; + +static lxpr_snmp_table_t *lxpr_net_snmptab[] = { + &lxpr_snmp_ip, + &lxpr_snmp_icmp, + &lxpr_snmp_tcp, + &lxpr_snmp_udp, + NULL +}; + +static void +lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table, + kstat_t *kn) +{ + kstat_named_t *klist; + char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN]; + int i, j, num; + size_t size; + + klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num); + if (klist == NULL) + return; + + /* Print the header line, fields capitalized */ + (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN); + upname[0] = toupper(upname[0]); + lxpr_uiobuf_printf(uiobuf, "%s:", upname); + for (i = 0; table->lst_fields[i] != NULL; i++) { + (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN); + upfield[0] = toupper(upfield[0]); + lxpr_uiobuf_printf(uiobuf, " %s", upfield); + } + lxpr_uiobuf_printf(uiobuf, "\n%s:", upname); + + /* Then loop back through to print the value line. */ + for (i = 0; table->lst_fields[i] != NULL; i++) { + kstat_named_t *kpoint = NULL; + for (j = 0; j < num; j++) { + if (strncmp(klist[j].name, table->lst_fields[i], + KSTAT_STRLEN) == 0) { + kpoint = &klist[j]; + break; + } + } + if (kpoint == NULL) { + /* Output 0 for unknown fields */ + lxpr_uiobuf_printf(uiobuf, " 0"); + } else { + switch (kpoint->data_type) { + case KSTAT_DATA_INT32: + lxpr_uiobuf_printf(uiobuf, " %d", + kpoint->value.i32); + break; + case KSTAT_DATA_UINT32: + lxpr_uiobuf_printf(uiobuf, " %u", + kpoint->value.ui32); + break; + case KSTAT_DATA_INT64: + lxpr_uiobuf_printf(uiobuf, " %ld", + kpoint->value.l); + break; + case KSTAT_DATA_UINT64: + lxpr_uiobuf_printf(uiobuf, " %lu", + kpoint->value.ul); + break; + } + } + } + lxpr_uiobuf_printf(uiobuf, "\n"); + kmem_free(klist, size); +} + /* ARGSUSED */ static void lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { + kstat_t *ksr; + kstat_t ks0; + lxpr_snmp_table_t **table = lxpr_net_snmptab; + int i, t, nidx; + size_t sidx; + + ks0.ks_kid = 0; + ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); + if (ksr == NULL) + return; + + for (t = 0; table[t] != NULL; t++) { + for (i = 0; i < nidx; i++) { + if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0) + continue; + if (strncmp(ksr[i].ks_name, table[t]->lst_proto, + KSTAT_STRLEN) == 0) { + lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]); + break; + } + } + } + kmem_free(ksr, sidx); } /* ARGSUSED */ @@ -1963,13 +2254,13 @@ lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) * - tx_queue * - rx_queue * - uid + * - inode * * Omitted/invalid fields * - tr * - tm->when * - retrnsmt * - timeout - * - inode */ ns = netstack_get_current(); @@ -1983,6 +2274,9 @@ lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) while ((connp = ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) { tcp_t *tcp; + vattr_t attr; + sonode_t *so = (sonode_t *)connp->conn_upper_handle; + vnode_t *vp = (so != NULL) ? so->so_vnode : NULL; if (connp->conn_ipversion != ipver) continue; tcp = connp->conn_tcp; @@ -2010,9 +2304,15 @@ lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) connp->conn_faddr_v6.s6_addr32[3], ntohs(connp->conn_fport)); } + + /* fetch the simulated inode for the socket */ + if (vp == NULL || + VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) + attr.va_nodeid = 0; + lxpr_uiobuf_printf(uiobuf, "%02X %08X:%08X %02X:%08X %08X " - "%5u %8d %u %d %p %u %u %u %u %d\n", + "%5u %8d %lu %d %p %u %u %u %u %d\n", lxpr_convert_tcp_state(tcp->tcp_state), tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */ 0, 0, /* tr, when */ @@ -2020,7 +2320,7 @@ lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) connp->conn_cred->cr_uid, 0, /* timeout */ /* inode + more */ - 0, 0, NULL, 0, 0, 0, 0, 0); + (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0); } } netstack_rele(ns); @@ -2093,6 +2393,9 @@ lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) { udp_t *udp; int state = 0; + vattr_t attr; + sonode_t *so = (sonode_t *)connp->conn_upper_handle; + vnode_t *vp = (so != NULL) ? so->so_vnode : NULL; if (connp->conn_ipversion != ipver) continue; udp = connp->conn_udp; @@ -2120,6 +2423,7 @@ lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) connp->conn_faddr_v6.s6_addr32[3], ntohs(connp->conn_fport)); } + switch (udp->udp_state) { case TS_UNBND: case TS_IDLE: @@ -2129,9 +2433,15 @@ lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) state = 1; break; } + + /* fetch the simulated inode for the socket */ + if (vp == NULL || + VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) + attr.va_nodeid = 0; + lxpr_uiobuf_printf(uiobuf, "%02X %08X:%08X %02X:%08X %08X " - "%5u %8d %u %d %p %d\n", + "%5u %8d %lu %d %p %d\n", state, 0, 0, /* rx/tx queue */ 0, 0, /* tr, when */ @@ -2139,7 +2449,7 @@ lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) connp->conn_cred->cr_uid, 0, /* timeout */ /* inode, ref, pointer, drops */ - 0, 0, NULL, 0); + (ino_t)attr.va_nodeid, 0, NULL, 0); } } netstack_rele(ns); @@ -2163,6 +2473,95 @@ lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) static void lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { + sonode_t *so; + zoneid_t zoneid = getzoneid(); + + lxpr_uiobuf_printf(uiobuf, "Num RefCount Protocol Flags Type " + "St Inode Path\n"); + + mutex_enter(&socklist.sl_lock); + for (so = socklist.sl_list; so != NULL; + so = _SOTOTPI(so)->sti_next_so) { + vnode_t *vp = so->so_vnode; + vattr_t attr; + sotpi_info_t *sti; + const char *name = NULL; + int status = 0; + int type = 0; + int flags = 0; + + /* Only process active sonodes in this zone */ + if (so->so_count == 0 || so->so_zoneid != zoneid) + continue; + + /* + * Grab the inode, if possible. + * This must be done before entering so_lock. + */ + if (vp == NULL || + VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) + attr.va_nodeid = 0; + + mutex_enter(&so->so_lock); + sti = _SOTOTPI(so); + + if (sti->sti_laddr_sa != NULL) + name = sti->sti_laddr_sa->sa_data; + else if (sti->sti_faddr_sa != NULL) + name = sti->sti_faddr_sa->sa_data; + + /* + * Derived from enum values in Linux kernel source: + * include/uapi/linux/net.h + */ + if ((so->so_state & SS_ISDISCONNECTING) != 0) { + status = 4; + } else if ((so->so_state & SS_ISCONNECTING) != 0) { + status = 2; + } else if ((so->so_state & SS_ISCONNECTED) != 0) { + status = 3; + } else { + status = 1; + /* Add ACC flag for stream-type server sockets */ + if (so->so_type != SOCK_DGRAM && + sti->sti_laddr_sa != NULL) + flags |= 0x10000; + } + + /* Convert to Linux type */ + switch (so->so_type) { + case SOCK_DGRAM: + type = 2; + break; + case SOCK_SEQPACKET: + type = 5; + break; + default: + type = 1; + } + + lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu", + so, + so->so_count, + 0, /* proto, always 0 */ + flags, + type, + status, + (ino_t)attr.va_nodeid); + + /* + * Due to shortcomings in the abstract socket emulation, they + * cannot be properly represented here (as @<path>). + * + * This will be the case until they are better implemented. + */ + if (name != NULL) + lxpr_uiobuf_printf(uiobuf, " %s\n", name); + else + lxpr_uiobuf_printf(uiobuf, "\n"); + mutex_exit(&so->so_lock); + } + mutex_exit(&socklist.sl_lock); } /* @@ -3170,6 +3569,13 @@ lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, vap->va_uid = crgetruid(curproc->p_cred); vap->va_gid = crgetrgid(curproc->p_cred); break; + case LXPR_PID_FD_FD: + /* + * Restore VLNK type for lstat-type activity. + * See lxpr_readlink for more details. + */ + if ((flags & FOLLOW) == 0) + vap->va_type = VLNK; default: break; } @@ -3451,17 +3857,15 @@ lxpr_lookup_fddir(vnode_t *dp, char *comp) */ lxpnp->lxpr_realvp = vp; VN_HOLD(lxpnp->lxpr_realvp); - if (lxpnp->lxpr_realvp->v_type == VFIFO) { - /* - * lxpr_getnode initially sets the type to be VLNK for - * the LXPR_PID_FD_FD option, but that breaks fifo - * file descriptors (which are unlinked named pipes). - * We set this as a regular file so that open.2 comes - * into lxpr_open so we can do more work. - */ - dp = LXPTOV(lxpnp); - dp->v_type = VREG; - } + /* + * For certain entries (sockets, pipes, etc), Linux expects a + * bogus-named symlink. If that's the case, report the type as + * VNON to bypass link-following elsewhere in the vfs system. + * + * See lxpr_readlink for more details. + */ + if (lxpr_readlink_pid_fd(lxpnp, NULL, 0) == 0) + LXPTOV(lxpnp)->v_type = VNON; } mutex_enter(&p->p_lock); @@ -4053,16 +4457,41 @@ lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct) pid_t pid; int error = 0; - /* must be a symbolic link file */ - if (vp->v_type != VLNK) + /* + * Linux does something very "clever" for /proc/<pid>/fd/<num> entries. + * Open FDs are represented as symlinks, the link contents + * corresponding to the open resource. For plain files or devices, + * this isn't absurd since one can dereference the symlink to query + * the underlying resource. For sockets or pipes, it becomes ugly in a + * hurry. To maintain this human-readable output, those FD symlinks + * point to bogus targets such as "socket:[<inodenum>]". This requires + * circumventing vfs since the stat/lstat behavior on those FD entries + * will be unusual. (A stat must retrieve information about the open + * socket or pipe. It cannot fail because the link contents point to + * an absent file.) + * + * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD + * entries. This bypasses code paths which would normally + * short-circuit on symlinks and allows us to emulate the vfs behavior + * expected by /proc consumers. + */ + if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD) return (EINVAL); /* Try to produce a symlink name for anything that has a realvp */ if (rvp != NULL) { if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0) return (error); - if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) - return (error); + if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) { + /* + * Special handling possible for /proc/<pid>/fd/<num> + * Generate <type>:[<inode>] links, if allowed. + */ + if (lxpnp->lxpr_type != LXPR_PID_FD_FD || + lxpr_readlink_pid_fd(lxpnp, bp, buflen) != 0) { + return (error); + } + } } else { switch (lxpnp->lxpr_type) { case LXPR_SELF: @@ -4104,6 +4533,37 @@ lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct) } /* + * Attempt to create Linux-proc-style fake symlinks contents for supported + * /proc/<pid>/fd/<#> entries. + */ +static int +lxpr_readlink_pid_fd(lxpr_node_t *lxpnp, char *bp, size_t len) +{ + const char *format; + vnode_t *rvp = lxpnp->lxpr_realvp; + vattr_t attr; + + switch (rvp->v_type) { + case VSOCK: + format = "socket:[%lu]"; + break; + case VFIFO: + format = "pipe:[%lu]"; + break; + default: + return (-1); + } + + /* Fetch the inode of the underlying vnode */ + if (VOP_GETATTR(rvp, &attr, 0, CRED(), NULL) != 0) + return (-1); + + if (bp != NULL) + (void) snprintf(bp, len, format, (ino_t)attr.va_nodeid); + return (0); +} + +/* * lxpr_inactive(): Vnode operation for VOP_INACTIVE() * Vnode is no longer referenced, deallocate the file * and all its resources. diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h index 942a6e3b44..e7f5ee9867 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_brand.h +++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h @@ -80,10 +80,10 @@ extern "C" { #define B_LPID_TO_SPAIR 128 #define B_SYSENTRY 129 #define B_SYSRETURN 130 -#define B_PTRACE_SYSCALL 131 +#define B_PTRACE_KERNEL 131 #define B_SET_AFFINITY_MASK 132 #define B_GET_AFFINITY_MASK 133 -#define B_PTRACE_EXT_OPTS 134 +#define B_PTRACE_CLONE_BEGIN 134 #define B_PTRACE_STOP_FOR_OPT 135 #define B_UNSUPPORTED 136 #define B_STORE_ARGS 137 @@ -91,37 +91,31 @@ extern "C" { #define B_SIGNAL_RETURN 139 #define B_UNWIND_NTV_SYSC_FLAG 140 #define B_EXIT_AS_SIG 141 -#define B_PTRACE_GETEVENTMSG 142 +#define B_HELPER_WAITID 142 #define B_IKE_SYSCALL 192 -/* B_PTRACE_EXT_OPTS subcommands */ -#define B_PTRACE_EXT_OPTS_SET 1 -#define B_PTRACE_EXT_OPTS_GET 2 -#define B_PTRACE_EXT_OPTS_EVT 3 -#define B_PTRACE_DETACH 4 - +#ifndef _ASM /* * Support for Linux PTRACE_SETOPTIONS handling. */ -#define LX_PTRACE_O_TRACESYSGOOD 0x0001 -#define LX_PTRACE_O_TRACEFORK 0x0002 -#define LX_PTRACE_O_TRACEVFORK 0x0004 -#define LX_PTRACE_O_TRACECLONE 0x0008 -#define LX_PTRACE_O_TRACEEXEC 0x0010 -#define LX_PTRACE_O_TRACEVFORKDONE 0x0020 -#define LX_PTRACE_O_TRACEEXIT 0x0040 -#define LX_PTRACE_O_TRACESECCOMP 0x0080 -/* - * lx emulation-specific flag to indicate this is a child process being stopped - * due to one of the PTRACE_SETOPTIONS above. - */ -#define EMUL_PTRACE_O_CHILD 0x8000 -/* - * lx emulation-specific flag to determine via B_PTRACE_EXT_OPTS_GET if a - * process is being traced because of one of the PTRACE_SETOPTIONS above. - */ -#define EMUL_PTRACE_IS_TRACED 0x8000 +typedef enum lx_ptrace_options { + LX_PTRACE_O_TRACESYSGOOD = 0x0001, + LX_PTRACE_O_TRACEFORK = 0x0002, + LX_PTRACE_O_TRACEVFORK = 0x0004, + LX_PTRACE_O_TRACECLONE = 0x0008, + LX_PTRACE_O_TRACEEXEC = 0x0010, + LX_PTRACE_O_TRACEVFORKDONE = 0x0020, + LX_PTRACE_O_TRACEEXIT = 0x0040, + LX_PTRACE_O_TRACESECCOMP = 0x0080 +} lx_ptrace_options_t; + +#define LX_PTRACE_O_ALL \ + (LX_PTRACE_O_TRACESYSGOOD | LX_PTRACE_O_TRACEFORK | \ + LX_PTRACE_O_TRACEVFORK | LX_PTRACE_O_TRACECLONE | \ + LX_PTRACE_O_TRACEEXEC | LX_PTRACE_O_TRACEVFORKDONE | \ + LX_PTRACE_O_TRACEEXIT | LX_PTRACE_O_TRACESECCOMP) +#endif /* !_ASM */ /* siginfo si_status for traced events */ #define LX_PTRACE_EVENT_FORK 0x100 @@ -132,6 +126,17 @@ extern "C" { #define LX_PTRACE_EVENT_EXIT 0x600 #define LX_PTRACE_EVENT_SECCOMP 0x700 +/* + * Brand-private values for the "pr_what" member of lwpstatus, for use with the + * PR_BRAND stop reason. These reasons are validated in lx_stop_notify(); + * update it if you add new reasons here. + */ +#define LX_PR_SYSENTRY 1 +#define LX_PR_SYSEXIT 2 +#define LX_PR_SIGNALLED 3 +#define LX_PR_EVENT 4 + + #define LX_VERSION_1 1 #define LX_VERSION LX_VERSION_1 @@ -154,6 +159,8 @@ extern "C" { #ifndef _ASM +extern struct brand lx_brand; + typedef struct lx_brand_registration { uint_t lxbr_version; /* version number */ void *lxbr_handler; /* base address of handler */ @@ -255,10 +262,6 @@ typedef struct lx_proc_data { uintptr_t l_traceflag; /* address of 32-bit tracing flag */ pid_t l_ppid; /* pid of originating parent proc */ uint64_t l_ptrace; /* process being observed with ptrace */ - uint_t l_ptrace_opts; /* process's extended ptrace options */ - uint_t l_ptrace_event; /* extended ptrace option trap event */ - uint_t l_ptrace_is_traced; /* set if traced due to ptrace setoptions */ - ulong_t l_ptrace_eventmsg; /* extended ptrace event msg */ lx_elf_data_t l_elf_data; /* ELF data for linux executable */ int l_signal; /* signal to deliver to parent when this */ /* thread group dies */ @@ -280,10 +283,70 @@ typedef ulong_t lx_affmask_t[LX_AFF_ULONGS]; #ifdef _KERNEL +typedef struct lx_lwp_data lx_lwp_data_t; + +/* + * Flag values for "lxpa_flags" on a ptrace(2) accord. + */ +typedef enum lx_accord_flags { + LX_ACC_TOMBSTONE = 0x01 +} lx_accord_flags_t; + +/* + * Flags values for "br_ptrace_flags" in the LWP-specific data. + */ +typedef enum lx_ptrace_state { + LX_PTRACE_SYSCALL = 0x01, + LX_PTRACE_EXITING = 0x02, + LX_PTRACE_STOPPING = 0x04, + LX_PTRACE_INHERIT = 0x08, + LX_PTRACE_STOPPED = 0x10, + LX_PTRACE_PARENT_WAIT = 0x20, + LX_PTRACE_CLDPEND = 0x40, + LX_PTRACE_CLONING = 0x80 +} lx_ptrace_state_t; + +/* + * A ptrace(2) accord represents the relationship between a tracer LWP and the + * set of LWPs that it is tracing: the tracees. This data structure belongs + * primarily to the tracer, but is reference counted so that it may be freed by + * whoever references it last. + */ +typedef struct lx_ptrace_accord { + kmutex_t lxpa_lock; + uint_t lxpa_refcnt; + lx_accord_flags_t lxpa_flags; + + /* + * The tracer must hold "pidlock" while clearing these fields for + * exclusion of waitid(), etc. + */ + lx_lwp_data_t *lxpa_tracer; + kcondvar_t *lxpa_cvp; + + /* + * The "lxpa_tracees_lock" mutex protects the tracee list. + */ + kmutex_t lxpa_tracees_lock; + list_t lxpa_tracees; +} lx_ptrace_accord_t; + +/* + * These values are stored in the per-LWP data for a tracee when it is attached + * to a tracer. They record the method that was used to attach. + */ +typedef enum lx_ptrace_attach { + LX_PTA_NONE = 0x00, /* not attached */ + LX_PTA_ATTACH = 0x01, /* due to tracer using PTRACE_ATTACH */ + LX_PTA_TRACEME = 0x02, /* due to child using PTRACE_TRACEME */ + LX_PTA_INHERIT_CLONE = 0x04, /* due to PTRACE_CLONE clone(2) flag */ + LX_PTA_INHERIT_OPTIONS = 0x08 /* due to PTRACE_SETOPTIONS options */ +} lx_ptrace_attach_t; + /* * lx-specific data in the klwp_t */ -typedef struct lx_lwp_data { +struct lx_lwp_data { uint_t br_ntv_syscall; /* 1 = syscall from native libc */ uint_t br_lwp_flags; /* misc. flags */ klwp_t *br_lwp; /* back pointer to container lwp */ @@ -317,8 +380,26 @@ typedef struct lx_lwp_data { void *br_scall_args; int br_args_size; /* size in bytes of br_scall_args */ - uint_t br_ptrace; /* ptrace is active for this LWP */ -} lx_lwp_data_t; + boolean_t br_waitid_emulate; + int br_waitid_flags; + + lx_ptrace_state_t br_ptrace_flags; /* ptrace state for this LWP */ + lx_ptrace_options_t br_ptrace_options; /* PTRACE_SETOPTIONS options */ + lx_ptrace_options_t br_ptrace_clone_option; /* current clone(2) type */ + + lx_ptrace_attach_t br_ptrace_attach; /* how did we get attached */ + lx_ptrace_accord_t *br_ptrace_accord; /* accord for this tracer LWP */ + lx_ptrace_accord_t *br_ptrace_tracer; /* accord tracing this LWP */ + list_node_t br_ptrace_linkage; /* linkage for lxpa_tracees list */ + + ushort_t br_ptrace_whystop; /* stop reason, 0 for no stop */ + ushort_t br_ptrace_whatstop; /* stop sub-reason */ + + int32_t br_ptrace_stopsig; /* stop signal, 0 for no signal */ + + uint_t br_ptrace_event; + ulong_t br_ptrace_eventmsg; +}; /* * Upper limit on br_args_size, low because this value can persist until @@ -336,8 +417,13 @@ typedef struct lx_zone_data { #define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t)) #define lwptolxlwp(l) ((struct lx_lwp_data *)lwptolwpbrand(l)) -#define ttolxproc(t) ((struct lx_proc_data *)(t)->t_procp->p_brand_data) -#define ptolxproc(p) ((struct lx_proc_data *)(p)->p_brand_data) +#define ttolxproc(t) \ + (((t)->t_procp->p_brand == &lx_brand) ? \ + (struct lx_proc_data *)(t)->t_procp->p_brand_data : NULL) +#define ptolxproc(p) \ + (((p)->p_brand == &lx_brand) ? \ + (struct lx_proc_data *)(p)->p_brand_data : NULL) + /* Macro for converting to system call arguments. */ #define LX_ARGS(scall) ((struct lx_##scall##_args *)\ (ttolxlwp(curthread)->br_scall_args)) diff --git a/usr/src/uts/common/brand/lx/sys/lx_misc.h b/usr/src/uts/common/brand/lx/sys/lx_misc.h index 56b5bb4047..7b77789c56 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_misc.h +++ b/usr/src/uts/common/brand/lx/sys/lx_misc.h @@ -46,6 +46,20 @@ extern boolean_t lx_wait_filter(proc_t *, proc_t *); extern void lx_ifname_convert(char *, int); +extern boolean_t lx_ptrace_stop(ushort_t); +extern void lx_stop_notify(proc_t *, klwp_t *, ushort_t, ushort_t); +extern void lx_ptrace_init(void); +extern void lx_ptrace_fini(void); +extern int lx_ptrace_kernel(int, pid_t, uintptr_t, uintptr_t); +extern int lx_waitid_helper(idtype_t, id_t, k_siginfo_t *, int, boolean_t *, + int *); +extern void lx_ptrace_exit(proc_t *, klwp_t *); +extern void lx_ptrace_inherit_tracer(lx_lwp_data_t *, lx_lwp_data_t *); +extern int lx_ptrace_stop_for_option(int, boolean_t, ulong_t); +extern int lx_ptrace_set_clone_inherit(int, boolean_t); +extern int lx_sigcld_repost(proc_t *, sigqueue_t *); +extern int lx_issig_stop(proc_t *, klwp_t *); + #endif #ifdef __cplusplus diff --git a/usr/src/uts/common/brand/lx/syscall/lx_clone.c b/usr/src/uts/common/brand/lx/syscall/lx_clone.c index 949db3a73b..d73c5f100b 100644 --- a/usr/src/uts/common/brand/lx/syscall/lx_clone.c +++ b/usr/src/uts/common/brand/lx/syscall/lx_clone.c @@ -21,7 +21,7 @@ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014 Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. */ #include <sys/types.h> @@ -32,25 +32,10 @@ #include <sys/lx_ldt.h> #include <sys/lx_misc.h> #include <lx_signum.h> +#include <lx_syscall.h> #include <sys/x86_archext.h> #include <sys/controlregs.h> -#define LX_CSIGNAL 0x000000ff -#define LX_CLONE_VM 0x00000100 -#define LX_CLONE_FS 0x00000200 -#define LX_CLONE_FILES 0x00000400 -#define LX_CLONE_SIGHAND 0x00000800 -#define LX_CLONE_PID 0x00001000 -#define LX_CLONE_PTRACE 0x00002000 -#define LX_CLONE_PARENT 0x00008000 -#define LX_CLONE_THREAD 0x00010000 -#define LX_CLONE_SYSVSEM 0x00040000 -#define LX_CLONE_SETTLS 0x00080000 -#define LX_CLONE_PARENT_SETTID 0x00100000 -#define LX_CLONE_CHILD_CLEARTID 0x00200000 -#define LX_CLONE_DETACH 0x00400000 -#define LX_CLONE_CHILD_SETTID 0x01000000 - /* * Our lwp has already been created at this point, so this routine is * responsible for setting up all the state needed to track this as a diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c index cfc4c99f64..ae6c5eef16 100644 --- a/usr/src/uts/common/disp/thread.c +++ b/usr/src/uts/common/disp/thread.c @@ -87,7 +87,7 @@ struct kmem_cache *turnstile_cache; /* cache of free turnstiles */ * allthreads is only for use by kmem_readers. All kernel loops can use * the current thread as a start/end point. */ -static kthread_t *allthreads = &t0; /* circular list of all threads */ +kthread_t *allthreads = &t0; /* circular list of all threads */ static kcondvar_t reaper_cv; /* synchronization var */ kthread_t *thread_deathrow; /* circular list of reapable threads */ diff --git a/usr/src/uts/common/fs/lookup.c b/usr/src/uts/common/fs/lookup.c index 6819509d00..55ffb94805 100644 --- a/usr/src/uts/common/fs/lookup.c +++ b/usr/src/uts/common/fs/lookup.c @@ -20,6 +20,7 @@ */ /* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. */ @@ -217,7 +218,6 @@ lookuppnvp( cred_t *cr) /* user's credential */ { vnode_t *cvp; /* current component vp */ - vnode_t *tvp; /* addressable temp ptr */ char component[MAXNAMELEN]; /* buffer for component (incl null) */ int error; int nlink; @@ -373,7 +373,7 @@ checkforroot: /* * Perform a lookup in the current directory. */ - error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags, + error = VOP_LOOKUP(vp, component, &cvp, pnp, lookup_flags, rootvp, cr, NULL, NULL, pp); /* @@ -391,10 +391,9 @@ checkforroot: * directory inside NFS FS. */ if ((error == EACCES) && retry_with_kcred) - error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags, + error = VOP_LOOKUP(vp, component, &cvp, pnp, lookup_flags, rootvp, zone_kcred(), NULL, NULL, pp); - cvp = tvp; if (error) { cvp = NULL; /* @@ -440,20 +439,8 @@ checkforroot: * be atomic!) */ if (vn_mountedvfs(cvp) != NULL) { - tvp = cvp; - if ((error = traverse(&tvp)) != 0) { - /* - * It is required to assign cvp here, because - * traverse() will return a held vnode which - * may different than the vnode that was passed - * in (even in the error case). If traverse() - * changes the vnode it releases the original, - * and holds the new one. - */ - cvp = tvp; + if ((error = traverse(&cvp)) != 0) goto bad; - } - cvp = tvp; } /* diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv.c b/usr/src/uts/common/fs/nfs/nfs4_srv.c index 127d9e3f29..fe1a10b966 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv.c @@ -18,10 +18,11 @@ * * CDDL HEADER END */ + /* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. */ /* @@ -869,7 +870,7 @@ static nfsstat4 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) { int error, different_export = 0; - vnode_t *dvp, *vp, *tvp; + vnode_t *dvp, *vp; struct exportinfo *exi = NULL; fid_t fid; uint_t count, i; @@ -950,14 +951,12 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) * If it's a mountpoint, then traverse it. */ if (vn_ismntpt(vp)) { - tvp = vp; - if ((error = traverse(&tvp)) != 0) { + if ((error = traverse(&vp)) != 0) { VN_RELE(vp); return (puterrno4(error)); } /* remember that we had to traverse mountpoint */ did_traverse = TRUE; - vp = tvp; different_export = 1; } else if (vp->v_vfsp != dvp->v_vfsp) { /* @@ -2610,7 +2609,7 @@ do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs) { int error; int different_export = 0; - vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL; + vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL; struct exportinfo *exi = NULL, *pre_exi = NULL; nfsstat4 stat; fid_t fid; @@ -2708,13 +2707,11 @@ do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs) * need pre_tvp below if checkexport4 fails */ VN_HOLD(pre_tvp); - tvp = vp; - if ((error = traverse(&tvp)) != 0) { + if ((error = traverse(&vp)) != 0) { VN_RELE(vp); VN_RELE(pre_tvp); return (puterrno4(error)); } - vp = tvp; different_export = 1; } else if (vp->v_vfsp != cs->vp->v_vfsp) { /* diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c b/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c index 3069a98835..276d3b4f19 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c @@ -18,6 +18,11 @@ * * CDDL HEADER END */ + +/* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + */ + /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -149,6 +154,7 @@ nfs4_readdir_getvp(vnode_t *dvp, char *d_name, vnode_t **vpp, VN_HOLD(pre_tvp); if ((error = traverse(&vp)) != 0) { + VN_RELE(vp); VN_RELE(pre_tvp); return (error); } diff --git a/usr/src/uts/common/fs/proc/prcontrol.c b/usr/src/uts/common/fs/proc/prcontrol.c index a5679a8afb..7e99d23b97 100644 --- a/usr/src/uts/common/fs/proc/prcontrol.c +++ b/usr/src/uts/common/fs/proc/prcontrol.c @@ -25,7 +25,7 @@ */ /* - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #include <sys/types.h> @@ -1481,7 +1481,7 @@ pr_setsig(prnode_t *pnp, siginfo_t *sip) } else if (t->t_state == TS_STOPPED && sig == SIGKILL) { /* If SIGKILL, set stopped lwp running */ p->p_stopsig = 0; - t->t_schedflag |= TS_XSTART | TS_PSTART; + t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART; t->t_dtrace_stop = 0; setrun_locked(t); } diff --git a/usr/src/uts/common/fs/proc/prsubr.c b/usr/src/uts/common/fs/proc/prsubr.c index 7801fd0ac8..284bf8cb88 100644 --- a/usr/src/uts/common/fs/proc/prsubr.c +++ b/usr/src/uts/common/fs/proc/prsubr.c @@ -21,7 +21,7 @@ /* * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -201,6 +201,7 @@ prchoose(proc_t *p) case PR_SYSEXIT: case PR_SIGNALLED: case PR_FAULTED: + case PR_BRAND: /* * Make an lwp calling exit() be the * last lwp seen in the process. diff --git a/usr/src/uts/common/fs/smbsrv/smb_common_open.c b/usr/src/uts/common/fs/smbsrv/smb_common_open.c index 3fa43d43cb..5eaa5865c6 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_common_open.c +++ b/usr/src/uts/common/fs/smbsrv/smb_common_open.c @@ -820,8 +820,8 @@ smb_open_subr(smb_request_t *sr) status = NT_STATUS_SUCCESS; - of = smb_ofile_open(sr->tid_tree, node, sr->smb_pid, op, SMB_FTYPE_DISK, - uniq_fid, &err); + of = smb_ofile_open(sr, node, sr->smb_pid, op, SMB_FTYPE_DISK, uniq_fid, + &err); if (of == NULL) { smbsr_error(sr, err.status, err.errcls, err.errcode); status = err.status; diff --git a/usr/src/uts/common/fs/smbsrv/smb_delete.c b/usr/src/uts/common/fs/smbsrv/smb_delete.c index 4930f741ef..14eff73896 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_delete.c +++ b/usr/src/uts/common/fs/smbsrv/smb_delete.c @@ -297,7 +297,7 @@ smb_delete_multiple_files(smb_request_t *sr, smb_error_t *err) if (odid == 0) return (-1); - if ((od = smb_tree_lookup_odir(sr->tid_tree, odid)) == NULL) + if ((od = smb_tree_lookup_odir(sr, odid)) == NULL) return (-1); for (;;) { diff --git a/usr/src/uts/common/fs/smbsrv/smb_dispatch.c b/usr/src/uts/common/fs/smbsrv/smb_dispatch.c index 1afcf18b28..9b1fed6f9a 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_dispatch.c +++ b/usr/src/uts/common/fs/smbsrv/smb_dispatch.c @@ -20,8 +20,8 @@ */ /* - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ /* @@ -694,16 +694,13 @@ andx_more: } sr->user_cr = smb_user_getcred(sr->uid_user); - - if (!(sdd->sdt_flags & SDDF_SUPPRESS_TID) && - (sr->tid_tree == NULL)) { - sr->tid_tree = smb_user_lookup_tree( - sr->uid_user, sr->smb_tid); - if (sr->tid_tree == NULL) { - smbsr_error(sr, 0, ERRSRV, ERRinvnid); - smbsr_cleanup(sr); - goto report_error; - } + } + if (!(sdd->sdt_flags & SDDF_SUPPRESS_TID) && (sr->tid_tree == NULL)) { + sr->tid_tree = smb_session_lookup_tree(session, sr->smb_tid); + if (sr->tid_tree == NULL) { + smbsr_error(sr, 0, ERRSRV, ERRinvnid); + smbsr_cleanup(sr); + goto report_error; } } @@ -1116,8 +1113,7 @@ void smbsr_lookup_file(smb_request_t *sr) { if (sr->fid_ofile == NULL) - sr->fid_ofile = smb_ofile_lookup_by_fid(sr->tid_tree, - sr->smb_fid); + sr->fid_ofile = smb_ofile_lookup_by_fid(sr, sr->smb_fid); } static int diff --git a/usr/src/uts/common/fs/smbsrv/smb_find.c b/usr/src/uts/common/fs/smbsrv/smb_find.c index 1dae4e8cb5..eecbeff4df 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_find.c +++ b/usr/src/uts/common/fs/smbsrv/smb_find.c @@ -306,7 +306,7 @@ smb_com_search(smb_request_t *sr) } } - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od == NULL) { smbsr_error(sr, NT_STATUS_INVALID_HANDLE, ERRDOS, ERROR_INVALID_HANDLE); @@ -452,7 +452,7 @@ smb_com_find(smb_request_t *sr) } } - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od == NULL) { smbsr_error(sr, NT_STATUS_INVALID_HANDLE, ERRDOS, ERROR_INVALID_HANDLE); @@ -575,7 +575,7 @@ smb_com_find_close(smb_request_t *sr) return (SDRC_ERROR); } - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od == NULL) { smbsr_error(sr, NT_STATUS_INVALID_HANDLE, ERRDOS, ERROR_INVALID_HANDLE); @@ -649,7 +649,7 @@ smb_com_find_unique(struct smb_request *sr) odid = smb_odir_open(sr, pn->pn_path, sattr, 0); if (odid == 0) return (SDRC_ERROR); - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od == NULL) return (SDRC_ERROR); diff --git a/usr/src/uts/common/fs/smbsrv/smb_fsops.c b/usr/src/uts/common/fs/smbsrv/smb_fsops.c index 2f4545e966..c64313fdbf 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_fsops.c +++ b/usr/src/uts/common/fs/smbsrv/smb_fsops.c @@ -805,7 +805,7 @@ smb_fsop_remove_streams(smb_request_t *sr, cred_t *cr, smb_node_t *fnode) return (-1); } - if ((od = smb_tree_lookup_odir(sr->tid_tree, odid)) == NULL) { + if ((od = smb_tree_lookup_odir(sr, odid)) == NULL) { smbsr_errno(sr, ENOENT); return (-1); } diff --git a/usr/src/uts/common/fs/smbsrv/smb_nt_create_andx.c b/usr/src/uts/common/fs/smbsrv/smb_nt_create_andx.c index c77c175fc1..037c1373b5 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_nt_create_andx.c +++ b/usr/src/uts/common/fs/smbsrv/smb_nt_create_andx.c @@ -264,8 +264,7 @@ smb_com_nt_create_andx(struct smb_request *sr) if (op->rootdirfid == 0) { op->fqi.fq_dnode = sr->tid_tree->t_snode; } else { - op->dir = smb_ofile_lookup_by_fid(sr->tid_tree, - (uint16_t)op->rootdirfid); + op->dir = smb_ofile_lookup_by_fid(sr, (uint16_t)op->rootdirfid); if (op->dir == NULL) { smbsr_error(sr, NT_STATUS_INVALID_HANDLE, ERRDOS, ERRbadfid); diff --git a/usr/src/uts/common/fs/smbsrv/smb_nt_transact_create.c b/usr/src/uts/common/fs/smbsrv/smb_nt_transact_create.c index fcc12f2fc8..dcfa469617 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_nt_transact_create.c +++ b/usr/src/uts/common/fs/smbsrv/smb_nt_transact_create.c @@ -173,8 +173,7 @@ smb_nt_transact_create(smb_request_t *sr, smb_xa_t *xa) if (op->rootdirfid == 0) { op->fqi.fq_dnode = sr->tid_tree->t_snode; } else { - op->dir = smb_ofile_lookup_by_fid(sr->tid_tree, - (uint16_t)op->rootdirfid); + op->dir = smb_ofile_lookup_by_fid(sr, (uint16_t)op->rootdirfid); if (op->dir == NULL) { smbsr_error(sr, NT_STATUS_INVALID_HANDLE, ERRDOS, ERRbadfid); diff --git a/usr/src/uts/common/fs/smbsrv/smb_odir.c b/usr/src/uts/common/fs/smbsrv/smb_odir.c index b8435d191a..16fffa6692 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_odir.c +++ b/usr/src/uts/common/fs/smbsrv/smb_odir.c @@ -39,15 +39,15 @@ * +-------------------+ +-------------------+ +-------------------+ * | SESSION |<----->| SESSION |......| SESSION | * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v - * +-------------------+ +-------------------+ +-------------------+ - * | USER |<----->| USER |......| USER | - * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v + * | | + * | | + * | v + * | +-------------------+ +-------------------+ +-------------------+ + * | | USER |<--->| USER |...| USER | + * | +-------------------+ +-------------------+ +-------------------+ + * | + * | + * v * +-------------------+ +-------------------+ +-------------------+ * | TREE |<----->| TREE |......| TREE | * +-------------------+ +-------------------+ +-------------------+ @@ -153,7 +153,7 @@ * and add it into the tree's list of odirs. * Return an identifier (odid) uniquely identifying the created odir. * - * smb_odir_t *odir = smb_tree_lookup_odir(odid) + * smb_odir_t *odir = smb_tree_lookup_odir(..., odid) * Find the odir corresponding to the specified odid in the tree's * list of odirs. Place a hold on the odir. * @@ -312,9 +312,9 @@ smb_odir_open(smb_request_t *sr, char *path, uint16_t sattr, uint32_t flags) } if (flags & SMB_ODIR_OPENF_BACKUP_INTENT) - cr = smb_user_getprivcred(tree->t_user); + cr = smb_user_getprivcred(sr->uid_user); else - cr = tree->t_user->u_cred; + cr = sr->uid_user->u_cred; odid = smb_odir_create(sr, dnode, pattern, sattr, cr); smb_node_release(dnode); @@ -888,6 +888,12 @@ smb_odir_create(smb_request_t *sr, smb_node_t *dnode, od->d_opened_by_pid = sr->smb_pid; od->d_session = tree->t_session; od->d_cred = cr; + /* + * grab a ref for od->d_user + * released in smb_odir_delete() + */ + smb_user_hold_internal(sr->uid_user); + od->d_user = sr->uid_user; od->d_tree = tree; od->d_dnode = dnode; smb_node_ref(dnode); @@ -947,6 +953,7 @@ smb_odir_delete(void *arg) od->d_magic = 0; smb_node_release(od->d_dnode); + smb_user_release(od->d_user); mutex_destroy(&od->d_mutex); kmem_cache_free(od->d_tree->t_server->si_cache_odir, od); } diff --git a/usr/src/uts/common/fs/smbsrv/smb_ofile.c b/usr/src/uts/common/fs/smbsrv/smb_ofile.c index 8987da2950..ee45f13c8b 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_ofile.c +++ b/usr/src/uts/common/fs/smbsrv/smb_ofile.c @@ -39,15 +39,15 @@ * +-------------------+ +-------------------+ +-------------------+ * | SESSION |<----->| SESSION |......| SESSION | * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v - * +-------------------+ +-------------------+ +-------------------+ - * | USER |<----->| USER |......| USER | - * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v + * | | + * | | + * | v + * | +-------------------+ +-------------------+ +-------------------+ + * | | USER |<--->| USER |...| USER | + * | +-------------------+ +-------------------+ +-------------------+ + * | + * | + * v * +-------------------+ +-------------------+ +-------------------+ * | TREE |<----->| TREE |......| TREE | * +-------------------+ +-------------------+ +-------------------+ @@ -175,7 +175,7 @@ static void smb_ofile_netinfo_fini(smb_netfileinfo_t *); */ smb_ofile_t * smb_ofile_open( - smb_tree_t *tree, + smb_request_t *sr, smb_node_t *node, uint16_t pid, struct open_param *op, @@ -183,10 +183,13 @@ smb_ofile_open( uint32_t uniqid, smb_error_t *err) { + smb_tree_t *tree = sr->tid_tree; smb_ofile_t *of; uint16_t fid; smb_attr_t attr; int rc; + enum errstates { EMPTY, FIDALLOC, CRHELD, MUTEXINIT }; + enum errstates state = EMPTY; if (smb_idpool_alloc(&tree->t_fid_pool, &fid)) { err->status = NT_STATUS_TOO_MANY_OPENED_FILES; @@ -194,6 +197,7 @@ smb_ofile_open( err->errcode = ERROR_TOO_MANY_OPEN_FILES; return (NULL); } + state = FIDALLOC; of = kmem_cache_alloc(tree->t_server->si_cache_ofile, KM_SLEEP); bzero(of, sizeof (smb_ofile_t)); @@ -206,16 +210,23 @@ smb_ofile_open( of->f_share_access = op->share_access; of->f_create_options = op->create_options; of->f_cr = (op->create_options & FILE_OPEN_FOR_BACKUP_INTENT) ? - smb_user_getprivcred(tree->t_user) : tree->t_user->u_cred; + smb_user_getprivcred(sr->uid_user) : sr->uid_user->u_cred; crhold(of->f_cr); + state = CRHELD; of->f_ftype = ftype; of->f_server = tree->t_server; - of->f_session = tree->t_user->u_session; - of->f_user = tree->t_user; + of->f_session = tree->t_session; + /* + * grab a ref for of->f_user + * released in smb_ofile_delete() + */ + smb_user_hold_internal(sr->uid_user); + of->f_user = sr->uid_user; of->f_tree = tree; of->f_node = node; mutex_init(&of->f_mutex, NULL, MUTEX_DEFAULT, NULL); + state = MUTEXINIT; of->f_state = SMB_OFILE_STATE_OPEN; if (ftype == SMB_FTYPE_MESG_PIPE) { @@ -232,15 +243,10 @@ smb_ofile_open( attr.sa_mask = SMB_AT_UID | SMB_AT_DOSATTR; rc = smb_node_getattr(NULL, node, of->f_cr, NULL, &attr); if (rc != 0) { - of->f_magic = 0; - mutex_destroy(&of->f_mutex); - crfree(of->f_cr); - smb_idpool_free(&tree->t_fid_pool, of->f_fid); - kmem_cache_free(tree->t_server->si_cache_ofile, of); err->status = NT_STATUS_INTERNAL_ERROR; err->errcls = ERRDOS; err->errcode = ERROR_INTERNAL_ERROR; - return (NULL); + goto errout; } if (crgetuid(of->f_cr) == attr.sa_vattr.va_uid) { /* @@ -254,16 +260,10 @@ smb_ofile_open( of->f_mode = smb_fsop_amask_to_omode(of->f_granted_access); if (smb_fsop_open(node, of->f_mode, of->f_cr) != 0) { - of->f_magic = 0; - mutex_destroy(&of->f_mutex); - crfree(of->f_cr); - smb_idpool_free(&tree->t_fid_pool, of->f_fid); - kmem_cache_free(tree->t_server->si_cache_ofile, - of); err->status = NT_STATUS_ACCESS_DENIED; err->errcls = ERRDOS; err->errcode = ERROR_ACCESS_DENIED; - return (NULL); + goto errout; } } @@ -290,6 +290,25 @@ smb_ofile_open( atomic_inc_32(&tree->t_open_files); atomic_inc_32(&of->f_session->s_file_cnt); return (of); + +errout: + switch (state) { + case MUTEXINIT: + mutex_destroy(&of->f_mutex); + smb_user_release(of->f_user); + /*FALLTHROUGH*/ + case CRHELD: + crfree(of->f_cr); + of->f_magic = 0; + kmem_cache_free(tree->t_server->si_cache_ofile, of); + /*FALLTHROUGH*/ + case FIDALLOC: + smb_idpool_free(&tree->t_fid_pool, fid); + /*FALLTHROUGH*/ + case EMPTY: + break; + } + return (NULL); } /* @@ -601,9 +620,10 @@ smb_ofile_request_complete(smb_ofile_t *of) */ smb_ofile_t * smb_ofile_lookup_by_fid( - smb_tree_t *tree, + smb_request_t *sr, uint16_t fid) { + smb_tree_t *tree = sr->tid_tree; smb_llist_t *of_list; smb_ofile_t *of; @@ -616,19 +636,32 @@ smb_ofile_lookup_by_fid( while (of) { ASSERT(of->f_magic == SMB_OFILE_MAGIC); ASSERT(of->f_tree == tree); - if (of->f_fid == fid) { - mutex_enter(&of->f_mutex); - if (of->f_state != SMB_OFILE_STATE_OPEN) { - mutex_exit(&of->f_mutex); - smb_llist_exit(of_list); - return (NULL); - } - of->f_refcnt++; - mutex_exit(&of->f_mutex); + if (of->f_fid == fid) break; - } of = smb_llist_next(of_list, of); } + if (of == NULL) + goto out; + + /* + * Only allow use of a given FID with the same UID that + * was used to open it. MS-CIFS 3.3.5.14 + */ + if (of->f_user != sr->uid_user) { + of = NULL; + goto out; + } + + mutex_enter(&of->f_mutex); + if (of->f_state != SMB_OFILE_STATE_OPEN) { + mutex_exit(&of->f_mutex); + of = NULL; + goto out; + } + of->f_refcnt++; + mutex_exit(&of->f_mutex); + +out: smb_llist_exit(of_list); return (of); } @@ -921,6 +954,7 @@ smb_ofile_delete(void *arg) of->f_magic = (uint32_t)~SMB_OFILE_MAGIC; mutex_destroy(&of->f_mutex); crfree(of->f_cr); + smb_user_release(of->f_user); kmem_cache_free(of->f_tree->t_server->si_cache_ofile, of); } diff --git a/usr/src/uts/common/fs/smbsrv/smb_opipe.c b/usr/src/uts/common/fs/smbsrv/smb_opipe.c index bb178f3952..90cb25aaa0 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_opipe.c +++ b/usr/src/uts/common/fs/smbsrv/smb_opipe.c @@ -130,8 +130,8 @@ smb_opipe_open(smb_request_t *sr) op->create_options = 0; - of = smb_ofile_open(sr->tid_tree, NULL, sr->smb_pid, op, - SMB_FTYPE_MESG_PIPE, SMB_UNIQ_FID(), &err); + of = smb_ofile_open(sr, NULL, sr->smb_pid, op, SMB_FTYPE_MESG_PIPE, + SMB_UNIQ_FID(), &err); if (of == NULL) return (err.status); diff --git a/usr/src/uts/common/fs/smbsrv/smb_process_exit.c b/usr/src/uts/common/fs/smbsrv/smb_process_exit.c index b8c835cd57..2839ca2807 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_process_exit.c +++ b/usr/src/uts/common/fs/smbsrv/smb_process_exit.c @@ -85,11 +85,11 @@ smb_com_process_exit(smb_request_t *sr) * to be the only thing that sends this request these days and * it doesn't provide a TID. */ - sr->tid_tree = smb_user_lookup_tree(sr->uid_user, sr->smb_tid); + sr->tid_tree = smb_session_lookup_tree(sr->session, sr->smb_tid); if (sr->tid_tree != NULL) smb_tree_close_pid(sr->tid_tree, sr->smb_pid); else - smb_user_close_pid(sr->uid_user, sr->smb_pid); + smb_session_close_pid(sr->session, sr->smb_pid); rc = smbsr_encode_empty_result(sr); return ((rc == 0) ? SDRC_SUCCESS : SDRC_ERROR); diff --git a/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c b/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c index bef69e7f61..70ac2e7b24 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c +++ b/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c @@ -662,7 +662,7 @@ smb_encode_stream_info(smb_request_t *sr, smb_xa_t *xa, smb_queryinfo_t *qinfo) odid = smb_odir_openat(sr, fnode); if (odid != 0) - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od != NULL) rc = smb_odir_read_streaminfo(sr, od, sinfo, &eos); diff --git a/usr/src/uts/common/fs/smbsrv/smb_server.c b/usr/src/uts/common/fs/smbsrv/smb_server.c index 3654744569..8687d42b18 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_server.c +++ b/usr/src/uts/common/fs/smbsrv/smb_server.c @@ -240,7 +240,8 @@ static void smb_event_cancel(smb_server_t *, uint32_t); static uint32_t smb_event_alloc_txid(void); static void smb_server_disconnect_share(smb_llist_t *, const char *); -static void smb_server_enum_private(smb_llist_t *, smb_svcenum_t *); +static void smb_server_enum_users(smb_llist_t *, smb_svcenum_t *); +static void smb_server_enum_trees(smb_llist_t *, smb_svcenum_t *); static int smb_server_session_disconnect(smb_llist_t *, const char *, const char *); static int smb_server_fclose(smb_llist_t *, uint32_t); @@ -833,15 +834,6 @@ smb_server_enum(smb_ioc_svcenum_t *ioc) smb_server_t *sv; int rc; - switch (svcenum->se_type) { - case SMB_SVCENUM_TYPE_USER: - case SMB_SVCENUM_TYPE_TREE: - case SMB_SVCENUM_TYPE_FILE: - break; - default: - return (EINVAL); - } - if ((rc = smb_server_lookup(&sv)) != 0) return (rc); @@ -849,11 +841,26 @@ smb_server_enum(smb_ioc_svcenum_t *ioc) svcenum->se_bused = 0; svcenum->se_nitems = 0; - smb_server_enum_private(&sv->sv_nbt_daemon.ld_session_list, svcenum); - smb_server_enum_private(&sv->sv_tcp_daemon.ld_session_list, svcenum); + switch (svcenum->se_type) { + case SMB_SVCENUM_TYPE_USER: + smb_server_enum_users(&sv->sv_nbt_daemon.ld_session_list, + svcenum); + smb_server_enum_users(&sv->sv_tcp_daemon.ld_session_list, + svcenum); + break; + case SMB_SVCENUM_TYPE_TREE: + case SMB_SVCENUM_TYPE_FILE: + smb_server_enum_trees(&sv->sv_nbt_daemon.ld_session_list, + svcenum); + smb_server_enum_trees(&sv->sv_tcp_daemon.ld_session_list, + svcenum); + break; + default: + rc = EINVAL; + } smb_server_release(sv); - return (0); + return (rc); } /* @@ -1694,7 +1701,7 @@ smb_server_release(smb_server_t *sv) * Enumerate the users associated with a session list. */ static void -smb_server_enum_private(smb_llist_t *ll, smb_svcenum_t *svcenum) +smb_server_enum_users(smb_llist_t *ll, smb_svcenum_t *svcenum) { smb_session_t *sn; smb_llist_t *ulist; @@ -1714,6 +1721,8 @@ smb_server_enum_private(smb_llist_t *ll, smb_svcenum_t *svcenum) if (smb_user_hold(user)) { rc = smb_user_enum(user, svcenum); smb_user_release(user); + if (rc != 0) + break; } user = smb_llist_next(ulist, user); @@ -1731,6 +1740,48 @@ smb_server_enum_private(smb_llist_t *ll, smb_svcenum_t *svcenum) } /* + * Enumerate the trees/files associated with a session list. + */ +static void +smb_server_enum_trees(smb_llist_t *ll, smb_svcenum_t *svcenum) +{ + smb_session_t *sn; + smb_llist_t *tlist; + smb_tree_t *tree; + int rc = 0; + + smb_llist_enter(ll, RW_READER); + sn = smb_llist_head(ll); + + while (sn != NULL) { + SMB_SESSION_VALID(sn); + tlist = &sn->s_tree_list; + smb_llist_enter(tlist, RW_READER); + tree = smb_llist_head(tlist); + + while (tree != NULL) { + if (smb_tree_hold(tree)) { + rc = smb_tree_enum(tree, svcenum); + smb_tree_release(tree); + if (rc != 0) + break; + } + + tree = smb_llist_next(tlist, tree); + } + + smb_llist_exit(tlist); + + if (rc != 0) + break; + + sn = smb_llist_next(ll, sn); + } + + smb_llist_exit(ll); +} + +/* * Disconnect sessions associated with the specified client and username. * Empty strings are treated as wildcards. */ @@ -1796,8 +1847,8 @@ static int smb_server_fclose(smb_llist_t *ll, uint32_t uniqid) { smb_session_t *sn; - smb_llist_t *ulist; - smb_user_t *user; + smb_llist_t *tlist; + smb_tree_t *tree; int rc = ENOENT; smb_llist_enter(ll, RW_READER); @@ -1805,20 +1856,20 @@ smb_server_fclose(smb_llist_t *ll, uint32_t uniqid) while ((sn != NULL) && (rc == ENOENT)) { SMB_SESSION_VALID(sn); - ulist = &sn->s_user_list; - smb_llist_enter(ulist, RW_READER); - user = smb_llist_head(ulist); - - while ((user != NULL) && (rc == ENOENT)) { - if (smb_user_hold(user)) { - rc = smb_user_fclose(user, uniqid); - smb_user_release(user); + tlist = &sn->s_tree_list; + smb_llist_enter(tlist, RW_READER); + tree = smb_llist_head(tlist); + + while ((tree != NULL) && (rc == ENOENT)) { + if (smb_tree_hold(tree)) { + rc = smb_tree_fclose(tree, uniqid); + smb_tree_release(tree); } - user = smb_llist_next(ulist, user); + tree = smb_llist_next(tlist, tree); } - smb_llist_exit(ulist); + smb_llist_exit(tlist); sn = smb_llist_next(ll, sn); } diff --git a/usr/src/uts/common/fs/smbsrv/smb_session.c b/usr/src/uts/common/fs/smbsrv/smb_session.c index 0fdac10ca6..b8284b372f 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_session.c +++ b/usr/src/uts/common/fs/smbsrv/smb_session.c @@ -43,6 +43,7 @@ static int smb_session_message(smb_session_t *); static int smb_session_xprt_puthdr(smb_session_t *, smb_xprt_t *, uint8_t *, size_t); static smb_user_t *smb_session_lookup_user(smb_session_t *, char *, char *); +static smb_tree_t *smb_session_get_tree(smb_session_t *, smb_tree_t *); static void smb_session_logoff(smb_session_t *); static void smb_request_init_command_mbuf(smb_request_t *sr); void dump_smb_inaddr(smb_inaddr_t *ipaddr); @@ -624,6 +625,11 @@ smb_session_create(ksocket_t new_so, uint16_t port, smb_server_t *sv, kmem_cache_free(sv->si_cache_session, session); return (NULL); } + if (smb_idpool_constructor(&session->s_tid_pool)) { + smb_idpool_destructor(&session->s_uid_pool); + kmem_cache_free(sv->si_cache_session, session); + return (NULL); + } now = ddi_get_lbolt64(); @@ -642,6 +648,9 @@ smb_session_create(ksocket_t new_so, uint16_t port, smb_server_t *sv, smb_llist_constructor(&session->s_user_list, sizeof (smb_user_t), offsetof(smb_user_t, u_lnd)); + smb_llist_constructor(&session->s_tree_list, sizeof (smb_tree_t), + offsetof(smb_tree_t, t_lnd)); + smb_llist_constructor(&session->s_xa_list, sizeof (smb_xa_t), offsetof(smb_xa_t, xa_lnd)); @@ -719,6 +728,7 @@ smb_session_delete(smb_session_t *session) list_destroy(&session->s_oplock_brkreqs); smb_slist_destructor(&session->s_req_list); + smb_llist_destructor(&session->s_tree_list); smb_llist_destructor(&session->s_user_list); smb_llist_destructor(&session->s_xa_list); @@ -726,6 +736,7 @@ smb_session_delete(smb_session_t *session) ASSERT(session->s_file_cnt == 0); ASSERT(session->s_dir_cnt == 0); + smb_idpool_destructor(&session->s_tid_pool); smb_idpool_destructor(&session->s_uid_pool); if (session->sock != NULL) { if (session->s_local_port == IPPORT_NETBIOS_SSN) @@ -928,45 +939,306 @@ smb_session_post_user(smb_session_t *session, smb_user_t *user) } /* - * Logoff all users associated with the specified session. + * Find a tree by tree-id. */ -static void -smb_session_logoff(smb_session_t *session) +smb_tree_t * +smb_session_lookup_tree( + smb_session_t *session, + uint16_t tid) + { - smb_user_t *user; + smb_tree_t *tree; SMB_SESSION_VALID(session); - smb_llist_enter(&session->s_user_list, RW_READER); + smb_llist_enter(&session->s_tree_list, RW_READER); + tree = smb_llist_head(&session->s_tree_list); - user = smb_llist_head(&session->s_user_list); - while (user) { - SMB_USER_VALID(user); - ASSERT(user->u_session == session); + while (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); - if (smb_user_hold(user)) { - smb_user_logoff(user); - smb_user_release(user); + if (tree->t_tid == tid) { + if (smb_tree_hold(tree)) { + smb_llist_exit(&session->s_tree_list); + return (tree); + } else { + smb_llist_exit(&session->s_tree_list); + return (NULL); + } } - user = smb_llist_next(&session->s_user_list, user); + tree = smb_llist_next(&session->s_tree_list, tree); } - smb_llist_exit(&session->s_user_list); + smb_llist_exit(&session->s_tree_list); + return (NULL); +} + +/* + * Find the first connected tree that matches the specified sharename. + * If the specified tree is NULL the search starts from the beginning of + * the user's tree list. If a tree is provided the search starts just + * after that tree. + */ +smb_tree_t * +smb_session_lookup_share( + smb_session_t *session, + const char *sharename, + smb_tree_t *tree) +{ + SMB_SESSION_VALID(session); + ASSERT(sharename); + + smb_llist_enter(&session->s_tree_list, RW_READER); + + if (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); + tree = smb_llist_next(&session->s_tree_list, tree); + } else { + tree = smb_llist_head(&session->s_tree_list); + } + + while (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); + if (smb_strcasecmp(tree->t_sharename, sharename, 0) == 0) { + if (smb_tree_hold(tree)) { + smb_llist_exit(&session->s_tree_list); + return (tree); + } + } + tree = smb_llist_next(&session->s_tree_list, tree); + } + + smb_llist_exit(&session->s_tree_list); + return (NULL); +} + +/* + * Find the first connected tree that matches the specified volume name. + * If the specified tree is NULL the search starts from the beginning of + * the user's tree list. If a tree is provided the search starts just + * after that tree. + */ +smb_tree_t * +smb_session_lookup_volume( + smb_session_t *session, + const char *name, + smb_tree_t *tree) +{ + SMB_SESSION_VALID(session); + ASSERT(name); + + smb_llist_enter(&session->s_tree_list, RW_READER); + + if (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); + tree = smb_llist_next(&session->s_tree_list, tree); + } else { + tree = smb_llist_head(&session->s_tree_list); + } + + while (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); + + if (smb_strcasecmp(tree->t_volume, name, 0) == 0) { + if (smb_tree_hold(tree)) { + smb_llist_exit(&session->s_tree_list); + return (tree); + } + } + + tree = smb_llist_next(&session->s_tree_list, tree); + } + + smb_llist_exit(&session->s_tree_list); + return (NULL); +} + +/* + * Disconnect all trees that match the specified client process-id. + */ +void +smb_session_close_pid( + smb_session_t *session, + uint16_t pid) +{ + smb_tree_t *tree; + + SMB_SESSION_VALID(session); + + tree = smb_session_get_tree(session, NULL); + while (tree) { + smb_tree_t *next; + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); + smb_tree_close_pid(tree, pid); + next = smb_session_get_tree(session, tree); + smb_tree_release(tree); + tree = next; + } +} + +static void +smb_session_tree_dtor(void *t) +{ + smb_tree_t *tree = (smb_tree_t *)t; + + smb_tree_disconnect(tree, B_TRUE); + /* release the ref acquired during the traversal loop */ + smb_tree_release(tree); } + /* - * Disconnect any trees associated with the specified share. - * Iterate through the users on this session and tell each user - * to disconnect from the share. + * Disconnect all trees that this user has connected. */ void -smb_session_disconnect_share(smb_session_t *session, const char *sharename) +smb_session_disconnect_owned_trees( + smb_session_t *session, + smb_user_t *owner) +{ + smb_tree_t *tree; + smb_llist_t *tree_list = &session->s_tree_list; + + SMB_SESSION_VALID(session); + SMB_USER_VALID(owner); + + smb_llist_enter(tree_list, RW_READER); + + tree = smb_llist_head(tree_list); + while (tree) { + if ((tree->t_owner == owner) && + smb_tree_hold(tree)) { + /* + * smb_tree_hold() succeeded, hence we are in state + * SMB_TREE_STATE_CONNECTED; schedule this tree + * for asynchronous disconnect, which will fire + * after we drop the llist traversal lock. + */ + smb_llist_post(tree_list, tree, smb_session_tree_dtor); + } + tree = smb_llist_next(tree_list, tree); + } + + /* drop the lock and flush the dtor queue */ + smb_llist_exit(tree_list); +} + +/* + * Disconnect all trees that this user has connected. + */ +void +smb_session_disconnect_trees( + smb_session_t *session) +{ + smb_tree_t *tree; + + SMB_SESSION_VALID(session); + + tree = smb_session_get_tree(session, NULL); + while (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); + smb_tree_disconnect(tree, B_TRUE); + smb_tree_release(tree); + tree = smb_session_get_tree(session, NULL); + } +} + +/* + * Disconnect all trees that match the specified share name. + */ +void +smb_session_disconnect_share( + smb_session_t *session, + const char *sharename) +{ + smb_tree_t *tree; + smb_tree_t *next; + + SMB_SESSION_VALID(session); + + tree = smb_session_lookup_share(session, sharename, NULL); + while (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); + smb_session_cancel_requests(session, tree, NULL); + smb_tree_disconnect(tree, B_TRUE); + next = smb_session_lookup_share(session, sharename, tree); + smb_tree_release(tree); + tree = next; + } +} + +void +smb_session_post_tree(smb_session_t *session, smb_tree_t *tree) +{ + SMB_SESSION_VALID(session); + SMB_TREE_VALID(tree); + ASSERT0(tree->t_refcnt); + ASSERT(tree->t_state == SMB_TREE_STATE_DISCONNECTED); + ASSERT(tree->t_session == session); + + smb_llist_post(&session->s_tree_list, tree, smb_tree_dealloc); +} + +/* + * Get the next connected tree in the list. A reference is taken on + * the tree, which can be released later with smb_tree_release(). + * + * If the specified tree is NULL the search starts from the beginning of + * the tree list. If a tree is provided the search starts just after + * that tree. + * + * Returns NULL if there are no connected trees in the list. + */ +static smb_tree_t * +smb_session_get_tree( + smb_session_t *session, + smb_tree_t *tree) +{ + smb_llist_t *tree_list; + + SMB_SESSION_VALID(session); + tree_list = &session->s_tree_list; + + smb_llist_enter(tree_list, RW_READER); + + if (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + tree = smb_llist_next(tree_list, tree); + } else { + tree = smb_llist_head(tree_list); + } + + while (tree) { + if (smb_tree_hold(tree)) + break; + + tree = smb_llist_next(tree_list, tree); + } + + smb_llist_exit(tree_list); + return (tree); +} + +/* + * Logoff all users associated with the specified session. + */ +static void +smb_session_logoff(smb_session_t *session) { smb_user_t *user; SMB_SESSION_VALID(session); + smb_session_disconnect_trees(session); + smb_llist_enter(&session->s_user_list, RW_READER); user = smb_llist_head(&session->s_user_list); @@ -975,7 +1247,7 @@ smb_session_disconnect_share(smb_session_t *session, const char *sharename) ASSERT(user->u_session == session); if (smb_user_hold(user)) { - smb_user_disconnect_share(user, sharename); + smb_user_logoff(user); smb_user_release(user); } diff --git a/usr/src/uts/common/fs/smbsrv/smb_trans2_find.c b/usr/src/uts/common/fs/smbsrv/smb_trans2_find.c index 037b2a3b36..d0d60cea5d 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_trans2_find.c +++ b/usr/src/uts/common/fs/smbsrv/smb_trans2_find.c @@ -332,7 +332,7 @@ smb_com_trans2_find_first2(smb_request_t *sr, smb_xa_t *xa) return (SDRC_ERROR); } - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od == NULL) return (SDRC_ERROR); @@ -463,7 +463,7 @@ smb_com_trans2_find_next2(smb_request_t *sr, smb_xa_t *xa) if (args.fa_maxdata == 0) return (SDRC_ERROR); - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od == NULL) { smbsr_error(sr, NT_STATUS_INVALID_HANDLE, ERRDOS, ERROR_INVALID_HANDLE); @@ -943,7 +943,7 @@ smb_com_find_close2(smb_request_t *sr) if (smbsr_decode_vwv(sr, "w", &odid) != 0) return (SDRC_ERROR); - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od == NULL) { smbsr_error(sr, NT_STATUS_INVALID_HANDLE, ERRDOS, ERROR_INVALID_HANDLE); diff --git a/usr/src/uts/common/fs/smbsrv/smb_tree.c b/usr/src/uts/common/fs/smbsrv/smb_tree.c index 13adc2d803..b225c67623 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_tree.c +++ b/usr/src/uts/common/fs/smbsrv/smb_tree.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ /* @@ -40,15 +40,15 @@ * +-------------------+ +-------------------+ +-------------------+ * | SESSION |<----->| SESSION |......| SESSION | * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v - * +-------------------+ +-------------------+ +-------------------+ - * | USER |<----->| USER |......| USER | - * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v + * | | + * | | + * | v + * | +-------------------+ +-------------------+ +-------------------+ + * | | USER |<--->| USER |...| USER | + * | +-------------------+ +-------------------+ +-------------------+ + * | + * | + * v * +-------------------+ +-------------------+ +-------------------+ * | TREE |<----->| TREE |......| TREE | * +-------------------+ +-------------------+ +-------------------+ @@ -175,7 +175,7 @@ static smb_tree_t *smb_tree_connect_core(smb_request_t *); static smb_tree_t *smb_tree_connect_disk(smb_request_t *, const char *); static smb_tree_t *smb_tree_connect_printq(smb_request_t *, const char *); static smb_tree_t *smb_tree_connect_ipc(smb_request_t *, const char *); -static smb_tree_t *smb_tree_alloc(smb_user_t *, const smb_kshare_t *, +static smb_tree_t *smb_tree_alloc(smb_request_t *, const smb_kshare_t *, smb_node_t *, uint32_t, uint32_t); static boolean_t smb_tree_is_connected_locked(smb_tree_t *); static boolean_t smb_tree_is_disconnected(smb_tree_t *); @@ -269,6 +269,7 @@ smb_tree_connect_core(smb_request_t *sr) } smb_kshare_release(si); + return (tree); } @@ -361,7 +362,7 @@ smb_tree_release( smb_llist_flush(&tree->t_odir_list); if (smb_tree_is_disconnected(tree) && (tree->t_refcnt == 0)) - smb_user_post_tree(tree->t_user, tree); + smb_session_post_tree(tree->t_session, tree); mutex_exit(&tree->t_mutex); } @@ -428,7 +429,7 @@ smb_tree_enum(smb_tree_t *tree, smb_svcenum_t *svcenum) { smb_ofile_t *of; smb_ofile_t *next; - int rc; + int rc = 0; ASSERT(tree); ASSERT(tree->t_magic == SMB_TREE_MAGIC); @@ -712,8 +713,7 @@ smb_tree_connect_disk(smb_request_t *sr, const char *sharename) if (!smb_shortnames) sr->arg.tcon.optional_support |= SMB_UNIQUE_FILE_NAME; - tree = smb_tree_alloc(user, si, snode, access, - sr->sr_cfg->skc_execflags); + tree = smb_tree_alloc(sr, si, snode, access, sr->sr_cfg->skc_execflags); smb_node_release(snode); @@ -805,8 +805,7 @@ smb_tree_connect_printq(smb_request_t *sr, const char *sharename) sr->sr_tcon.optional_support = SMB_SUPPORT_SEARCH_BITS; - tree = smb_tree_alloc(user, si, snode, access, - sr->sr_cfg->skc_execflags); + tree = smb_tree_alloc(sr, si, snode, access, sr->sr_cfg->skc_execflags); smb_node_release(snode); @@ -846,7 +845,7 @@ smb_tree_connect_ipc(smb_request_t *sr, const char *name) sr->sr_tcon.optional_support = SMB_SUPPORT_SEARCH_BITS; - tree = smb_tree_alloc(user, si, NULL, ACE_ALL_PERMS, 0); + tree = smb_tree_alloc(sr, si, NULL, ACE_ALL_PERMS, 0); if (tree == NULL) { smb_tree_log(sr, name, "access denied"); smbsr_error(sr, NT_STATUS_ACCESS_DENIED, ERRSRV, ERRaccess); @@ -859,41 +858,45 @@ smb_tree_connect_ipc(smb_request_t *sr, const char *name) * Allocate a tree. */ static smb_tree_t * -smb_tree_alloc(smb_user_t *user, const smb_kshare_t *si, smb_node_t *snode, - uint32_t access, uint32_t execflags) +smb_tree_alloc(smb_request_t *sr, const smb_kshare_t *si, + smb_node_t *snode, uint32_t access, uint32_t execflags) { + smb_session_t *session = sr->session; smb_tree_t *tree; uint32_t stype = si->shr_type; uint16_t tid; - if (smb_idpool_alloc(&user->u_tid_pool, &tid)) + if (smb_idpool_alloc(&session->s_tid_pool, &tid)) return (NULL); - tree = kmem_cache_alloc(user->u_server->si_cache_tree, KM_SLEEP); + tree = kmem_cache_alloc(session->s_server->si_cache_tree, KM_SLEEP); bzero(tree, sizeof (smb_tree_t)); - tree->t_user = user; - tree->t_session = user->u_session; - tree->t_server = user->u_server; + tree->t_session = session; + tree->t_server = session->s_server; + + /* grab a ref for tree->t_owner */ + smb_user_hold_internal(sr->uid_user); + tree->t_owner = sr->uid_user; if (STYPE_ISDSK(stype) || STYPE_ISPRN(stype)) { if (smb_tree_getattr(si, snode, tree) != 0) { - smb_idpool_free(&user->u_tid_pool, tid); - kmem_cache_free(user->u_server->si_cache_tree, tree); + smb_idpool_free(&session->s_tid_pool, tid); + kmem_cache_free(session->s_server->si_cache_tree, tree); return (NULL); } } if (smb_idpool_constructor(&tree->t_fid_pool)) { - smb_idpool_free(&user->u_tid_pool, tid); - kmem_cache_free(user->u_server->si_cache_tree, tree); + smb_idpool_free(&session->s_tid_pool, tid); + kmem_cache_free(session->s_server->si_cache_tree, tree); return (NULL); } if (smb_idpool_constructor(&tree->t_odid_pool)) { smb_idpool_destructor(&tree->t_fid_pool); - smb_idpool_free(&user->u_tid_pool, tid); - kmem_cache_free(user->u_server->si_cache_tree, tree); + smb_idpool_free(&session->s_tid_pool, tid); + kmem_cache_free(session->s_server->si_cache_tree, tree); return (NULL); } @@ -929,11 +932,11 @@ smb_tree_alloc(smb_user_t *user, const smb_kshare_t *si, smb_node_t *snode, tree->t_acltype = smb_fsop_acltype(snode); } - smb_llist_enter(&user->u_tree_list, RW_WRITER); - smb_llist_insert_head(&user->u_tree_list, tree); - smb_llist_exit(&user->u_tree_list); - atomic_inc_32(&user->u_session->s_tree_cnt); - smb_server_inc_trees(user->u_server); + smb_llist_enter(&session->s_tree_list, RW_WRITER); + smb_llist_insert_head(&session->s_tree_list, tree); + smb_llist_exit(&session->s_tree_list); + atomic_inc_32(&session->s_tree_cnt); + smb_server_inc_trees(session->s_server); return (tree); } @@ -947,19 +950,19 @@ smb_tree_alloc(smb_user_t *user, const smb_kshare_t *si, smb_node_t *snode, void smb_tree_dealloc(void *arg) { - smb_user_t *user; + smb_session_t *session; smb_tree_t *tree = (smb_tree_t *)arg; SMB_TREE_VALID(tree); ASSERT(tree->t_state == SMB_TREE_STATE_DISCONNECTED); ASSERT(tree->t_refcnt == 0); - user = tree->t_user; - smb_llist_enter(&user->u_tree_list, RW_WRITER); - smb_llist_remove(&user->u_tree_list, tree); - smb_idpool_free(&user->u_tid_pool, tree->t_tid); - atomic_dec_32(&tree->t_session->s_tree_cnt); - smb_llist_exit(&user->u_tree_list); + session = tree->t_session; + smb_llist_enter(&session->s_tree_list, RW_WRITER); + smb_llist_remove(&session->s_tree_list, tree); + smb_idpool_free(&session->s_tid_pool, tree->t_tid); + atomic_dec_32(&session->s_tree_cnt); + smb_llist_exit(&session->s_tree_list); mutex_enter(&tree->t_mutex); mutex_exit(&tree->t_mutex); @@ -974,6 +977,10 @@ smb_tree_dealloc(void *arg) smb_llist_destructor(&tree->t_odir_list); smb_idpool_destructor(&tree->t_fid_pool); smb_idpool_destructor(&tree->t_odid_pool); + + SMB_USER_VALID(tree->t_owner); + smb_user_release(tree->t_owner); + kmem_cache_free(tree->t_server->si_cache_tree, tree); } @@ -1234,27 +1241,38 @@ smb_tree_log(smb_request_t *sr, const char *sharename, const char *fmt, ...) * Returns NULL if odir not found or a hold cannot be obtained. */ smb_odir_t * -smb_tree_lookup_odir(smb_tree_t *tree, uint16_t odid) +smb_tree_lookup_odir(smb_request_t *sr, uint16_t odid) { smb_odir_t *od; smb_llist_t *od_list; + smb_tree_t *tree = sr->tid_tree; - ASSERT(tree); ASSERT(tree->t_magic == SMB_TREE_MAGIC); od_list = &tree->t_odir_list; - smb_llist_enter(od_list, RW_READER); + smb_llist_enter(od_list, RW_READER); od = smb_llist_head(od_list); while (od) { - if (od->d_odid == odid) { - if (!smb_odir_hold(od)) - od = NULL; + if (od->d_odid == odid) break; - } od = smb_llist_next(od_list, od); } + if (od == NULL) + goto out; + + /* + * Only allow use of a given Search ID with the same UID that + * was used to create it. MS-CIFS 3.3.5.14 + */ + if (od->d_user != sr->uid_user) { + od = NULL; + goto out; + } + if (!smb_odir_hold(od)) + od = NULL; +out: smb_llist_exit(od_list); return (od); } @@ -1377,15 +1395,16 @@ smb_tree_close_odirs(smb_tree_t *tree, uint16_t pid) } static void -smb_tree_set_execinfo(smb_tree_t *tree, smb_shr_execinfo_t *exec, int exec_type) +smb_tree_set_execinfo(smb_tree_t *tree, smb_shr_execinfo_t *exec, + int exec_type) { exec->e_sharename = tree->t_sharename; - exec->e_winname = tree->t_user->u_name; - exec->e_userdom = tree->t_user->u_domain; + exec->e_winname = tree->t_owner->u_name; + exec->e_userdom = tree->t_owner->u_domain; exec->e_srv_ipaddr = tree->t_session->local_ipaddr; exec->e_cli_ipaddr = tree->t_session->ipaddr; exec->e_cli_netbiosname = tree->t_session->workstation; - exec->e_uid = crgetuid(tree->t_user->u_cred); + exec->e_uid = crgetuid(tree->t_owner->u_cred); exec->e_type = exec_type; } @@ -1438,6 +1457,26 @@ smb_tree_netinfo_encode(smb_tree_t *tree, uint8_t *buf, size_t buflen, return (rc); } +static void +smb_tree_netinfo_username(smb_tree_t *tree, char **namestr, uint32_t *namelen) +{ + smb_user_t *user = tree->t_owner; + + /* + * u_domain_len and u_name_len include the '\0' in their + * lengths, hence the sum of the two lengths gives us room + * for both the '\\' and '\0' chars. + */ + ASSERT(namestr); + ASSERT(namelen); + ASSERT(user->u_domain_len > 0); + ASSERT(user->u_name_len > 0); + *namelen = user->u_domain_len + user->u_name_len; + *namestr = kmem_alloc(*namelen, KM_SLEEP); + (void) snprintf(*namestr, *namelen, "%s\\%s", user->u_domain, + user->u_name); +} + /* * Note: ci_numusers should be the number of users connected to * the share rather than the number of references on the tree but @@ -1446,8 +1485,6 @@ smb_tree_netinfo_encode(smb_tree_t *tree, uint8_t *buf, size_t buflen, static void smb_tree_netinfo_init(smb_tree_t *tree, smb_netconnectinfo_t *info) { - smb_user_t *user; - ASSERT(tree); info->ci_id = tree->t_tid; @@ -1459,13 +1496,7 @@ smb_tree_netinfo_init(smb_tree_t *tree, smb_netconnectinfo_t *info) info->ci_sharelen = strlen(tree->t_sharename) + 1; info->ci_share = smb_mem_strdup(tree->t_sharename); - user = tree->t_user; - ASSERT(user); - - info->ci_namelen = user->u_domain_len + user->u_name_len + 2; - info->ci_username = kmem_alloc(info->ci_namelen, KM_SLEEP); - (void) snprintf(info->ci_username, info->ci_namelen, "%s\\%s", - user->u_domain, user->u_name); + smb_tree_netinfo_username(tree, &info->ci_username, &info->ci_namelen); } static void diff --git a/usr/src/uts/common/fs/smbsrv/smb_tree_connect.c b/usr/src/uts/common/fs/smbsrv/smb_tree_connect.c index 1ce9720f5d..19b857e834 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_tree_connect.c +++ b/usr/src/uts/common/fs/smbsrv/smb_tree_connect.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ #include <smbsrv/smb_kproto.h> @@ -362,8 +363,7 @@ smb_sdrc_t smb_pre_tree_disconnect(smb_request_t *sr) { sr->uid_user = smb_session_lookup_uid(sr->session, sr->smb_uid); - if (sr->uid_user != NULL) - sr->tid_tree = smb_user_lookup_tree(sr->uid_user, sr->smb_tid); + sr->tid_tree = smb_session_lookup_tree(sr->session, sr->smb_tid); DTRACE_SMB_1(op__TreeDisconnect__start, smb_request_t *, sr); return (SDRC_SUCCESS); diff --git a/usr/src/uts/common/fs/smbsrv/smb_user.c b/usr/src/uts/common/fs/smbsrv/smb_user.c index cc3fde7f38..09eaba699c 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_user.c +++ b/usr/src/uts/common/fs/smbsrv/smb_user.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ /* @@ -38,15 +39,15 @@ * +-------------------+ +-------------------+ +-------------------+ * | SESSION |<----->| SESSION |......| SESSION | * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v - * +-------------------+ +-------------------+ +-------------------+ - * | USER |<----->| USER |......| USER | - * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v + * | | + * | | + * | v + * | +-------------------+ +-------------------+ +-------------------+ + * | | USER |<--->| USER |...| USER | + * | +-------------------+ +-------------------+ +-------------------+ + * | + * | + * v * +-------------------+ +-------------------+ +-------------------+ * | TREE |<----->| TREE |......| TREE | * +-------------------+ +-------------------+ +-------------------+ @@ -170,7 +171,6 @@ static boolean_t smb_user_is_logged_in(smb_user_t *); static int smb_user_enum_private(smb_user_t *, smb_svcenum_t *); -static smb_tree_t *smb_user_get_tree(smb_llist_t *, smb_tree_t *); static void smb_user_setcred(smb_user_t *, cred_t *, uint32_t); static void smb_user_nonauth_logon(uint32_t); static void smb_user_auth_logoff(uint32_t); @@ -210,20 +210,15 @@ smb_user_login( user->u_audit_sid = audit_sid; if (!smb_idpool_alloc(&session->s_uid_pool, &user->u_uid)) { - if (!smb_idpool_constructor(&user->u_tid_pool)) { - smb_llist_constructor(&user->u_tree_list, - sizeof (smb_tree_t), offsetof(smb_tree_t, t_lnd)); - mutex_init(&user->u_mutex, NULL, MUTEX_DEFAULT, NULL); - smb_user_setcred(user, cr, privileges); - user->u_state = SMB_USER_STATE_LOGGED_IN; - user->u_magic = SMB_USER_MAGIC; - smb_llist_enter(&session->s_user_list, RW_WRITER); - smb_llist_insert_tail(&session->s_user_list, user); - smb_llist_exit(&session->s_user_list); - smb_server_inc_users(session->s_server); - return (user); - } - smb_idpool_free(&session->s_uid_pool, user->u_uid); + mutex_init(&user->u_mutex, NULL, MUTEX_DEFAULT, NULL); + smb_user_setcred(user, cr, privileges); + user->u_state = SMB_USER_STATE_LOGGED_IN; + user->u_magic = SMB_USER_MAGIC; + smb_llist_enter(&session->s_user_list, RW_WRITER); + smb_llist_insert_tail(&session->s_user_list, user); + smb_llist_exit(&session->s_user_list); + smb_server_inc_users(session->s_server); + return (user); } smb_mem_free(user->u_name); smb_mem_free(user->u_domain); @@ -279,10 +274,7 @@ smb_user_logoff( */ user->u_state = SMB_USER_STATE_LOGGING_OFF; mutex_exit(&user->u_mutex); - /* - * All the trees hanging off of this user are disconnected. - */ - smb_user_disconnect_trees(user); + smb_session_disconnect_owned_trees(user->u_session, user); smb_user_auth_logoff(user->u_audit_sid); mutex_enter(&user->u_mutex); user->u_state = SMB_USER_STATE_LOGGED_OFF; @@ -301,13 +293,13 @@ smb_user_logoff( } /* - * Take a reference on a user. + * Take a reference on a user. Do not return a reference unless the user is in + * the logged-in state. */ boolean_t smb_user_hold(smb_user_t *user) { - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); + SMB_USER_VALID(user); mutex_enter(&user->u_mutex); @@ -322,6 +314,19 @@ smb_user_hold(smb_user_t *user) } /* + * Unconditionally take a reference on a user. + */ +void +smb_user_hold_internal(smb_user_t *user) +{ + SMB_USER_VALID(user); + + mutex_enter(&user->u_mutex); + user->u_refcnt++; + mutex_exit(&user->u_mutex); +} + +/* * Release a reference on a user. If the reference count falls to * zero and the user has logged off, post the object for deletion. * Object deletion is deferred to avoid modifying a list while an @@ -337,9 +342,6 @@ smb_user_release( ASSERT(user->u_refcnt); user->u_refcnt--; - /* flush the tree list's delete queue */ - smb_llist_flush(&user->u_tree_list); - switch (user->u_state) { case SMB_USER_STATE_LOGGED_OFF: if (user->u_refcnt == 0) @@ -357,248 +359,6 @@ smb_user_release( mutex_exit(&user->u_mutex); } -void -smb_user_post_tree(smb_user_t *user, smb_tree_t *tree) -{ - SMB_USER_VALID(user); - SMB_TREE_VALID(tree); - ASSERT(tree->t_refcnt == 0); - ASSERT(tree->t_state == SMB_TREE_STATE_DISCONNECTED); - ASSERT(tree->t_user == user); - - smb_llist_post(&user->u_tree_list, tree, smb_tree_dealloc); -} - - -/* - * Find a tree by tree-id. - */ -smb_tree_t * -smb_user_lookup_tree( - smb_user_t *user, - uint16_t tid) - -{ - smb_tree_t *tree; - - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); - - smb_llist_enter(&user->u_tree_list, RW_READER); - tree = smb_llist_head(&user->u_tree_list); - - while (tree) { - ASSERT(tree->t_magic == SMB_TREE_MAGIC); - ASSERT(tree->t_user == user); - - if (tree->t_tid == tid) { - if (smb_tree_hold(tree)) { - smb_llist_exit(&user->u_tree_list); - return (tree); - } else { - smb_llist_exit(&user->u_tree_list); - return (NULL); - } - } - - tree = smb_llist_next(&user->u_tree_list, tree); - } - - smb_llist_exit(&user->u_tree_list); - return (NULL); -} - -/* - * Find the first connected tree that matches the specified sharename. - * If the specified tree is NULL the search starts from the beginning of - * the user's tree list. If a tree is provided the search starts just - * after that tree. - */ -smb_tree_t * -smb_user_lookup_share( - smb_user_t *user, - const char *sharename, - smb_tree_t *tree) -{ - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); - ASSERT(sharename); - - smb_llist_enter(&user->u_tree_list, RW_READER); - - if (tree) { - ASSERT(tree->t_magic == SMB_TREE_MAGIC); - ASSERT(tree->t_user == user); - tree = smb_llist_next(&user->u_tree_list, tree); - } else { - tree = smb_llist_head(&user->u_tree_list); - } - - while (tree) { - ASSERT(tree->t_magic == SMB_TREE_MAGIC); - ASSERT(tree->t_user == user); - if (smb_strcasecmp(tree->t_sharename, sharename, 0) == 0) { - if (smb_tree_hold(tree)) { - smb_llist_exit(&user->u_tree_list); - return (tree); - } - } - tree = smb_llist_next(&user->u_tree_list, tree); - } - - smb_llist_exit(&user->u_tree_list); - return (NULL); -} - -/* - * Find the first connected tree that matches the specified volume name. - * If the specified tree is NULL the search starts from the beginning of - * the user's tree list. If a tree is provided the search starts just - * after that tree. - */ -smb_tree_t * -smb_user_lookup_volume( - smb_user_t *user, - const char *name, - smb_tree_t *tree) -{ - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); - ASSERT(name); - - smb_llist_enter(&user->u_tree_list, RW_READER); - - if (tree) { - ASSERT(tree->t_magic == SMB_TREE_MAGIC); - ASSERT(tree->t_user == user); - tree = smb_llist_next(&user->u_tree_list, tree); - } else { - tree = smb_llist_head(&user->u_tree_list); - } - - while (tree) { - ASSERT(tree->t_magic == SMB_TREE_MAGIC); - ASSERT(tree->t_user == user); - - if (smb_strcasecmp(tree->t_volume, name, 0) == 0) { - if (smb_tree_hold(tree)) { - smb_llist_exit(&user->u_tree_list); - return (tree); - } - } - - tree = smb_llist_next(&user->u_tree_list, tree); - } - - smb_llist_exit(&user->u_tree_list); - return (NULL); -} - -/* - * Disconnect all trees that match the specified client process-id. - */ -void -smb_user_close_pid( - smb_user_t *user, - uint16_t pid) -{ - smb_tree_t *tree; - - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); - - tree = smb_user_get_tree(&user->u_tree_list, NULL); - while (tree) { - smb_tree_t *next; - ASSERT(tree->t_user == user); - smb_tree_close_pid(tree, pid); - next = smb_user_get_tree(&user->u_tree_list, tree); - smb_tree_release(tree); - tree = next; - } -} - -/* - * Disconnect all trees that this user has connected. - */ -void -smb_user_disconnect_trees( - smb_user_t *user) -{ - smb_tree_t *tree; - - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); - - tree = smb_user_get_tree(&user->u_tree_list, NULL); - while (tree) { - ASSERT(tree->t_user == user); - smb_tree_disconnect(tree, B_TRUE); - smb_tree_release(tree); - tree = smb_user_get_tree(&user->u_tree_list, NULL); - } -} - -/* - * Disconnect all trees that match the specified share name. - */ -void -smb_user_disconnect_share( - smb_user_t *user, - const char *sharename) -{ - smb_tree_t *tree; - smb_tree_t *next; - - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); - ASSERT(user->u_refcnt); - - tree = smb_user_lookup_share(user, sharename, NULL); - while (tree) { - ASSERT(tree->t_magic == SMB_TREE_MAGIC); - smb_session_cancel_requests(user->u_session, tree, NULL); - smb_tree_disconnect(tree, B_TRUE); - next = smb_user_lookup_share(user, sharename, tree); - smb_tree_release(tree); - tree = next; - } -} - -/* - * Close a file by its unique id. - */ -int -smb_user_fclose(smb_user_t *user, uint32_t uniqid) -{ - smb_llist_t *tree_list; - smb_tree_t *tree; - int rc = ENOENT; - - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); - - tree_list = &user->u_tree_list; - ASSERT(tree_list); - - smb_llist_enter(tree_list, RW_READER); - tree = smb_llist_head(tree_list); - - while ((tree != NULL) && (rc == ENOENT)) { - ASSERT(tree->t_user == user); - - if (smb_tree_hold(tree)) { - rc = smb_tree_fclose(tree, uniqid); - smb_tree_release(tree); - } - - tree = smb_llist_next(tree_list, tree); - } - - smb_llist_exit(tree_list); - return (rc); -} - /* * Determine whether or not the user is an administrator. * Members of the administrators group have administrative rights. @@ -688,9 +448,7 @@ smb_user_namecmp(smb_user_t *user, const char *name) int smb_user_enum(smb_user_t *user, smb_svcenum_t *svcenum) { - smb_tree_t *tree; - smb_tree_t *next; - int rc; + int rc = 0; ASSERT(user); ASSERT(user->u_magic == SMB_USER_MAGIC); @@ -698,21 +456,6 @@ smb_user_enum(smb_user_t *user, smb_svcenum_t *svcenum) if (svcenum->se_type == SMB_SVCENUM_TYPE_USER) return (smb_user_enum_private(user, svcenum)); - tree = smb_user_get_tree(&user->u_tree_list, NULL); - while (tree) { - ASSERT(tree->t_user == user); - - rc = smb_tree_enum(tree, svcenum); - if (rc != 0) { - smb_tree_release(tree); - break; - } - - next = smb_user_get_tree(&user->u_tree_list, tree); - smb_tree_release(tree); - tree = next; - } - return (rc); } @@ -769,8 +512,6 @@ smb_user_delete(void *arg) user->u_magic = (uint32_t)~SMB_USER_MAGIC; mutex_destroy(&user->u_mutex); - smb_llist_destructor(&user->u_tree_list); - smb_idpool_destructor(&user->u_tid_pool); if (user->u_cred) crfree(user->u_cred); if (user->u_privcred) @@ -780,43 +521,6 @@ smb_user_delete(void *arg) kmem_cache_free(user->u_server->si_cache_user, user); } -/* - * Get the next connected tree in the list. A reference is taken on - * the tree, which can be released later with smb_tree_release(). - * - * If the specified tree is NULL the search starts from the beginning of - * the tree list. If a tree is provided the search starts just after - * that tree. - * - * Returns NULL if there are no connected trees in the list. - */ -static smb_tree_t * -smb_user_get_tree( - smb_llist_t *tree_list, - smb_tree_t *tree) -{ - ASSERT(tree_list); - - smb_llist_enter(tree_list, RW_READER); - - if (tree) { - ASSERT(tree->t_magic == SMB_TREE_MAGIC); - tree = smb_llist_next(tree_list, tree); - } else { - tree = smb_llist_head(tree_list); - } - - while (tree) { - if (smb_tree_hold(tree)) - break; - - tree = smb_llist_next(tree_list, tree); - } - - smb_llist_exit(tree_list); - return (tree); -} - cred_t * smb_user_getcred(smb_user_t *user) { diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c index ba9c766c65..b4ab4ec3fd 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dataset.c +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c @@ -363,8 +363,19 @@ dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx, boolean_t dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag) { - return (dmu_buf_try_add_ref(ds->ds_dbuf, dp->dp_meta_objset, - ds->ds_object, DMU_BONUS_BLKID, tag)); + dmu_buf_t *dbuf = ds->ds_dbuf; + boolean_t result = B_FALSE; + + if (dbuf != NULL && dmu_buf_try_add_ref(dbuf, dp->dp_meta_objset, + ds->ds_object, DMU_BONUS_BLKID, tag)) { + + if (ds == dmu_buf_get_user(dbuf)) + result = B_TRUE; + else + dmu_buf_rele(dbuf, tag); + } + + return (result); } int diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c index 6a27544201..02844cef07 100644 --- a/usr/src/uts/common/os/exit.c +++ b/usr/src/uts/common/os/exit.c @@ -400,14 +400,36 @@ proc_exit(int why, int what) if (z->zone_boot_err == 0 && zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) { - if (z->zone_restart_init == B_TRUE) { - if (restart_init(what, why) == 0) - return (0); - } - z->zone_init_status = wstat(why, what); - (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, - zone_kcred()); + /* + * If the init process should be restarted, the + * "zone_restart_init" member will be set. Some init + * programs in branded zones do not tolerate a restart + * in the traditional manner; setting the + * "zone_reboot_on_init_exit" member will cause the + * entire zone to be rebooted instead. If neither of + * these flags is set the zone will shut down. + */ + if (z->zone_reboot_on_init_exit == B_TRUE && + z->zone_restart_init == B_TRUE) { + /* + * Trigger a zone reboot and continue + * with exit processing. + */ + z->zone_init_status = wstat(why, what); + (void) zone_kadmin(A_REBOOT, 0, NULL, + zone_kcred()); + + } else { + if (z->zone_restart_init == B_TRUE) { + if (restart_init(what, why) == 0) + return (0); + } + + z->zone_init_status = wstat(why, what); + (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, + zone_kcred()); + } } /* @@ -995,10 +1017,9 @@ winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) int waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) { - int found; proc_t *cp, *pp; - int proc_gone; int waitflag = !(options & WNOWAIT); + boolean_t have_brand_helper = B_FALSE; /* * Obsolete flag, defined here only for binary compatibility @@ -1047,10 +1068,37 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) return (ECHILD); } - while (pp->p_child != NULL) { + if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) { + have_brand_helper = B_TRUE; + } + + while (pp->p_child != NULL || have_brand_helper) { + boolean_t brand_wants_wait = B_FALSE; + int proc_gone = 0; + int found = 0; + + /* + * Give the brand a chance to return synthetic results from + * this waitid() call before we do the real thing. + */ + if (have_brand_helper) { + int ret; - proc_gone = 0; + if (BROP(pp)->b_waitid_helper(idtype, id, ip, options, + &brand_wants_wait, &ret) == 0) { + mutex_exit(&pidlock); + return (ret); + } + if (pp->p_child == NULL) { + goto no_real_children; + } + } + + /* + * Look for interesting children in the newstate list. + */ + VERIFY(pp->p_child != NULL); for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) { if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID)) continue; @@ -1107,7 +1155,6 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) * Wow! None of the threads on the p_sibling_ns list were * interesting threads. Check all the kids! */ - found = 0; for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) { if (idtype == P_PID && id != cp->p_pid) continue; @@ -1186,11 +1233,12 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) break; } +no_real_children: /* * If we found no interesting processes at all, * break out and return ECHILD. */ - if (found + proc_gone == 0) + if (!brand_wants_wait && (found + proc_gone == 0)) break; if (options & WNOHANG) { @@ -1209,7 +1257,7 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) * change state while we wait, we don't wait at all. * Get out with ECHILD according to SVID. */ - if (found == proc_gone) + if (!brand_wants_wait && (found == proc_gone)) break; if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { diff --git a/usr/src/uts/common/os/logsubr.c b/usr/src/uts/common/os/logsubr.c index 86e9045887..6a603c8982 100644 --- a/usr/src/uts/common/os/logsubr.c +++ b/usr/src/uts/common/os/logsubr.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2013 Gary Mills * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright (c) 2015, Joyent, Inc. */ #include <sys/types.h> @@ -250,7 +250,7 @@ log_init(void) */ printf("\rSunOS Release %s Version %s %u-bit\n", utsname.release, utsname.version, NBBY * (uint_t)sizeof (void *)); - printf("Copyright (c) 2010-2014, Joyent Inc. All rights reserved.\n"); + printf("Copyright (c) 2010-2015, Joyent Inc. All rights reserved.\n"); #ifdef DEBUG printf("DEBUG enabled\n"); #endif diff --git a/usr/src/uts/common/os/sig.c b/usr/src/uts/common/os/sig.c index b117bf3584..ae643c280e 100644 --- a/usr/src/uts/common/os/sig.c +++ b/usr/src/uts/common/os/sig.c @@ -22,7 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -194,7 +194,7 @@ eat_signal(kthread_t *t, int sig) !(ttoproc(t)->p_proc_flag & P_PR_LOCK)) { ttoproc(t)->p_stopsig = 0; t->t_dtrace_stop = 0; - t->t_schedflag |= TS_XSTART | TS_PSTART; + t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART; setrun_locked(t); } else if (t != curthread && t->t_state == TS_ONPROC) { aston(t); /* make it do issig promptly */ @@ -608,6 +608,21 @@ issig_forreal(void) } /* + * Allow the brand the chance to alter (or suppress) delivery + * of this signal. + */ + if (PROC_IS_BRANDED(p) && BROP(p)->b_issig_stop != NULL) { + /* + * The brand hook will return 0 if it would like + * us to drive on, or -1 if we should restart + * the loop to check other conditions. + */ + if (BROP(p)->b_issig_stop(p, lwp) != 0) { + continue; + } + } + + /* * Honor requested stop before dealing with the * current signal; a debugger may change it. * Do not want to go back to loop here since this is a special @@ -939,6 +954,16 @@ stop(int why, int what) } break; + case PR_BRAND: + /* + * We have been stopped by the brand code for a brand-private + * reason. This is an asynchronous stop affecting only this + * LWP. + */ + VERIFY(PROC_IS_BRANDED(p)); + flags &= ~TS_BSTART; + break; + default: /* /proc stop */ flags &= ~TS_PSTART; /* @@ -1050,7 +1075,7 @@ stop(int why, int what) } } - if (why != PR_JOBCONTROL && why != PR_CHECKPOINT) { + if (why != PR_JOBCONTROL && why != PR_CHECKPOINT && why != PR_BRAND) { /* * Do process-level notification when all lwps are * either stopped on events of interest to /proc @@ -1156,6 +1181,13 @@ stop(int why, int what) if (why == PR_CHECKPOINT) del_one_utstop(); + /* + * Allow the brand to post notification of this stop condition. + */ + if (PROC_IS_BRANDED(p) && BROP(p)->b_stop_notify != NULL) { + BROP(p)->b_stop_notify(p, lwp, why, what); + } + thread_lock(t); ASSERT((t->t_schedflag & TS_ALLSTART) == 0); t->t_schedflag |= flags; @@ -1177,7 +1209,7 @@ stop(int why, int what) (p->p_flag & (SEXITLWPS|SKILLED))) { p->p_stopsig = 0; thread_lock(t); - t->t_schedflag |= TS_XSTART | TS_PSTART; + t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART; setrun_locked(t); thread_unlock_nopreempt(t); } else if (why == PR_JOBCONTROL) { @@ -1795,6 +1827,15 @@ sigcld_repost() sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); mutex_enter(&pidlock); + if (PROC_IS_BRANDED(pp) && BROP(pp)->b_sigcld_repost != NULL) { + /* + * Allow the brand to inject synthetic SIGCLD signals. + */ + if (BROP(pp)->b_sigcld_repost(pp, sqp) == 0) { + mutex_exit(&pidlock); + return; + } + } for (cp = pp->p_child; cp; cp = cp->p_sibling) { if (cp->p_pidflag & CLDPEND) { post_sigcld(cp, sqp); diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 285aeac032..347a90a022 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -2624,6 +2624,7 @@ zone_init(void) zone0.zone_ntasks = 1; mutex_exit(&p0.p_lock); zone0.zone_restart_init = B_TRUE; + zone0.zone_reboot_on_init_exit = B_FALSE; zone0.zone_init_status = -1; zone0.zone_brand = &native_brand; rctl_prealloc_destroy(gp); @@ -4669,8 +4670,9 @@ parse_rctls(caddr_t ubuf, size_t buflen, nvlist_t **nvlp) error = EINVAL; name = nvpair_name(nvp); - if (strncmp(nvpair_name(nvp), "zone.", sizeof ("zone.") - 1) - != 0 || nvpair_type(nvp) != DATA_TYPE_NVLIST_ARRAY) { + if ((strncmp(name, "zone.", sizeof ("zone.") - 1) != 0 && + strncmp(name, "project.", sizeof ("project.") - 1) != 0) || + nvpair_type(nvp) != DATA_TYPE_NVLIST_ARRAY) { goto out; } if ((hndl = rctl_hndl_lookup(name)) == -1) { @@ -4819,6 +4821,7 @@ zone_create(const char *zone_name, const char *zone_root, zone->zone_ncpus = 0; zone->zone_ncpus_online = 0; zone->zone_restart_init = B_TRUE; + zone->zone_reboot_on_init_exit = B_FALSE; zone->zone_init_status = -1; zone->zone_brand = &native_brand; zone->zone_initname = NULL; @@ -5045,8 +5048,8 @@ zone_create(const char *zone_name, const char *zone_root, /* * The process, task, and project rctls are probably wrong; * we need an interface to get the default values of all rctls, - * and initialize zsched appropriately. I'm not sure that that - * makes much of a difference, though. + * and initialize zsched appropriately. However, we allow zoneadmd + * to pass down both zone and project rctls for the zone's init. */ error = newproc(zsched, (void *)&zarg, syscid, minclsyspri, NULL, 0); if (error != 0) { diff --git a/usr/src/uts/common/smbsrv/smb_kproto.h b/usr/src/uts/common/smbsrv/smb_kproto.h index f2de265176..ad7402fd80 100644 --- a/usr/src/uts/common/smbsrv/smb_kproto.h +++ b/usr/src/uts/common/smbsrv/smb_kproto.h @@ -521,6 +521,15 @@ void smb_session_disconnect_from_share(smb_llist_t *, char *); smb_user_t *smb_session_dup_user(smb_session_t *, char *, char *); smb_user_t *smb_session_lookup_uid(smb_session_t *, uint16_t); void smb_session_post_user(smb_session_t *, smb_user_t *); +void smb_session_post_tree(smb_session_t *, smb_tree_t *); +smb_tree_t *smb_session_lookup_tree(smb_session_t *, uint16_t); +smb_tree_t *smb_session_lookup_share(smb_session_t *, const char *, + smb_tree_t *); +smb_tree_t *smb_session_lookup_volume(smb_session_t *, const char *, + smb_tree_t *); +void smb_session_close_pid(smb_session_t *, uint16_t); +void smb_session_disconnect_owned_trees(smb_session_t *, smb_user_t *); +void smb_session_disconnect_trees(smb_session_t *); void smb_session_disconnect_share(smb_session_t *, const char *); void smb_session_getclient(smb_session_t *, char *, size_t); boolean_t smb_session_isclient(smb_session_t *, const char *); @@ -539,10 +548,10 @@ void smb_request_free(smb_request_t *); /* * ofile functions (file smb_ofile.c) */ -smb_ofile_t *smb_ofile_lookup_by_fid(smb_tree_t *, uint16_t); +smb_ofile_t *smb_ofile_lookup_by_fid(smb_request_t *, uint16_t); smb_ofile_t *smb_ofile_lookup_by_uniqid(smb_tree_t *, uint32_t); boolean_t smb_ofile_disallow_fclose(smb_ofile_t *); -smb_ofile_t *smb_ofile_open(smb_tree_t *, smb_node_t *, uint16_t, +smb_ofile_t *smb_ofile_open(smb_request_t *, smb_node_t *, uint16_t, smb_arg_open_t *, uint16_t, uint32_t, smb_error_t *); void smb_ofile_close(smb_ofile_t *, int32_t); void smb_ofile_delete(void *); @@ -603,18 +612,11 @@ smb_user_t *smb_user_login(smb_session_t *, cred_t *, smb_user_t *smb_user_dup(smb_user_t *); void smb_user_logoff(smb_user_t *); void smb_user_delete(void *); -void smb_user_post_tree(smb_user_t *, smb_tree_t *); -smb_tree_t *smb_user_lookup_tree(smb_user_t *, uint16_t); -smb_tree_t *smb_user_lookup_share(smb_user_t *, const char *, smb_tree_t *); -smb_tree_t *smb_user_lookup_volume(smb_user_t *, const char *, smb_tree_t *); boolean_t smb_user_is_admin(smb_user_t *); boolean_t smb_user_namecmp(smb_user_t *, const char *); int smb_user_enum(smb_user_t *, smb_svcenum_t *); -void smb_user_close_pid(smb_user_t *, uint16_t); -void smb_user_disconnect_trees(smb_user_t *user); -void smb_user_disconnect_share(smb_user_t *, const char *); -int smb_user_fclose(smb_user_t *, uint32_t); boolean_t smb_user_hold(smb_user_t *); +void smb_user_hold_internal(smb_user_t *); void smb_user_release(smb_user_t *); cred_t *smb_user_getcred(smb_user_t *); cred_t *smb_user_getprivcred(smb_user_t *); @@ -637,7 +639,7 @@ int smb_tree_enum(smb_tree_t *, smb_svcenum_t *); int smb_tree_fclose(smb_tree_t *, uint32_t); boolean_t smb_tree_hold(smb_tree_t *); void smb_tree_release(smb_tree_t *); -smb_odir_t *smb_tree_lookup_odir(smb_tree_t *, uint16_t); +smb_odir_t *smb_tree_lookup_odir(smb_request_t *, uint16_t); boolean_t smb_tree_is_connected(smb_tree_t *); #define SMB_TREE_GET_TID(tree) ((tree)->t_tid) diff --git a/usr/src/uts/common/smbsrv/smb_ktypes.h b/usr/src/uts/common/smbsrv/smb_ktypes.h index 493e7130a7..2c5d102f62 100644 --- a/usr/src/uts/common/smbsrv/smb_ktypes.h +++ b/usr/src/uts/common/smbsrv/smb_ktypes.h @@ -908,7 +908,9 @@ typedef struct smb_session { smb_slist_t s_req_list; smb_llist_t s_xa_list; smb_llist_t s_user_list; + smb_llist_t s_tree_list; smb_idpool_t s_uid_pool; + smb_idpool_t s_tid_pool; smb_txlst_t s_txlst; volatile uint32_t s_tree_cnt; @@ -975,9 +977,6 @@ typedef struct smb_user { cred_t *u_cred; cred_t *u_privcred; - smb_llist_t u_tree_list; - smb_idpool_t u_tid_pool; - uint32_t u_refcnt; uint32_t u_flags; uint32_t u_privileges; @@ -1028,7 +1027,11 @@ typedef struct smb_tree { struct smb_server *t_server; smb_session_t *t_session; - smb_user_t *t_user; + /* + * user whose uid was in the tree connect message + * ("owner" in MS-CIFS parlance, see section 2.2.1.6 definition of FID) + */ + smb_user_t *t_owner; smb_node_t *t_snode; smb_llist_t t_ofile_list; @@ -1259,6 +1262,7 @@ typedef struct smb_odir { list_node_t d_lnd; smb_odir_state_t d_state; smb_session_t *d_session; + smb_user_t *d_user; smb_tree_t *d_tree; smb_node_t *d_dnode; cred_t *d_cred; diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h index 3486ae864d..b3abada863 100644 --- a/usr/src/uts/common/sys/brand.h +++ b/usr/src/uts/common/sys/brand.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_BRAND_H @@ -132,6 +132,11 @@ struct brand_ops { boolean_t (*b_native_exec)(uint8_t, const char **); void (*b_ptrace_exectrap)(proc_t *); uint32_t (*b_map32limit)(proc_t *); + void (*b_stop_notify)(proc_t *, klwp_t *, ushort_t, ushort_t); + int (*b_waitid_helper)(idtype_t, id_t, k_siginfo_t *, int, + boolean_t *, int *); + int (*b_sigcld_repost)(proc_t *, sigqueue_t *); + int (*b_issig_stop)(proc_t *, klwp_t *); }; /* diff --git a/usr/src/uts/common/sys/procfs.h b/usr/src/uts/common/sys/procfs.h index f592fd9dcf..501af712ef 100644 --- a/usr/src/uts/common/sys/procfs.h +++ b/usr/src/uts/common/sys/procfs.h @@ -25,6 +25,7 @@ */ /* * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_PROCFS_H @@ -233,6 +234,7 @@ typedef struct pstatus { #define PR_FAULTED 6 #define PR_SUSPENDED 7 #define PR_CHECKPOINT 8 +#define PR_BRAND 9 /* * lwp ps(1) information file. /proc/<pid>/lwp/<lwpid>/lwpsinfo diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h index 9f2e166fea..41ea2331df 100644 --- a/usr/src/uts/common/sys/thread.h +++ b/usr/src/uts/common/sys/thread.h @@ -419,8 +419,9 @@ typedef struct _kthread { #define TS_RESUME 0x1000 /* setrun() by CPR resume process */ #define TS_CREATE 0x2000 /* setrun() by syslwp_create() */ #define TS_RUNQMATCH 0x4000 /* exact run queue balancing by setbackdq() */ +#define TS_BSTART 0x8000 /* setrun() by brand */ #define TS_ALLSTART \ - (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE) + (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE|TS_BSTART) #define TS_ANYWAITQ (TS_PROJWAITQ|TS_ZONEWAITQ) /* @@ -448,6 +449,10 @@ typedef struct _kthread { #define ISTOPPED(t) ((t)->t_state == TS_STOPPED && \ !((t)->t_schedflag & TS_PSTART)) +/* True if thread is stopped for a brand-specific reason */ +#define BSTOPPED(t) ((t)->t_state == TS_STOPPED && \ + !((t)->t_schedflag & TS_BSTART)) + /* True if thread is asleep and wakeable */ #define ISWAKEABLE(t) (((t)->t_state == TS_SLEEP && \ ((t)->t_flag & T_WAKEABLE))) diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 7ab9377e16..a5d1610842 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -594,6 +594,7 @@ typedef struct zone { tsol_mlp_list_t zone_mlps; /* MLPs on zone-private addresses */ boolean_t zone_restart_init; /* Restart init if it dies? */ + boolean_t zone_reboot_on_init_exit; /* Reboot if init dies? */ struct brand *zone_brand; /* zone's brand */ void *zone_brand_data; /* store brand specific data */ id_t zone_defaultcid; /* dflt scheduling class id */ |
