summaryrefslogtreecommitdiff
path: root/usr/src/uts/common
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_brand.c508
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_misc.c99
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_pid.c24
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_ptrace.c2270
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_proc.h8
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prsubr.c2
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prvnops.c512
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_brand.h158
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_misc.h14
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_clone.c19
-rw-r--r--usr/src/uts/common/disp/thread.c2
-rw-r--r--usr/src/uts/common/fs/lookup.c21
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_srv.c15
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c6
-rw-r--r--usr/src/uts/common/fs/proc/prcontrol.c4
-rw-r--r--usr/src/uts/common/fs/proc/prsubr.c3
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_common_open.c4
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_delete.c2
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_dispatch.c22
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_find.c8
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_fsops.c2
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_nt_create_andx.c3
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_nt_transact_create.c3
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_odir.c31
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_ofile.c108
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_opipe.c4
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_process_exit.c4
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c2
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_server.c103
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_session.c310
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_trans2_find.c6
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_tree.c157
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_tree_connect.c4
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_user.c370
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_dataset.c15
-rw-r--r--usr/src/uts/common/os/exit.c76
-rw-r--r--usr/src/uts/common/os/logsubr.c4
-rw-r--r--usr/src/uts/common/os/sig.c49
-rw-r--r--usr/src/uts/common/os/zone.c11
-rw-r--r--usr/src/uts/common/smbsrv/smb_kproto.h24
-rw-r--r--usr/src/uts/common/smbsrv/smb_ktypes.h12
-rw-r--r--usr/src/uts/common/sys/brand.h7
-rw-r--r--usr/src/uts/common/sys/procfs.h2
-rw-r--r--usr/src/uts/common/sys/thread.h7
-rw-r--r--usr/src/uts/common/sys/zone.h1
45 files changed, 3941 insertions, 1075 deletions
diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c
index b964aab1d3..4507c0303c 100644
--- a/usr/src/uts/common/brand/lx/os/lx_brand.c
+++ b/usr/src/uts/common/brand/lx/os/lx_brand.c
@@ -62,6 +62,7 @@
#include <sys/sdt.h>
#include <sys/x86_archext.h>
#include <sys/controlregs.h>
+#include <sys/core.h>
#include <lx_signum.h>
int lx_debug = 0;
@@ -77,6 +78,10 @@ void lx_set_kern_version(zone_t *, char *);
void lx_copy_procdata(proc_t *, proc_t *);
extern int getsetcontext(int, void *);
+extern int waitsys(idtype_t, id_t, siginfo_t *, int);
+#if defined(_SYSCALL32_IMPL)
+extern int waitsys32(idtype_t, id_t, siginfo_t *, int);
+#endif
extern void lx_proc_exit(proc_t *, klwp_t *);
static void lx_psig_to_proc(proc_t *, kthread_t *, int);
@@ -106,35 +111,38 @@ static int lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
caddr_t exec_file, struct cred *cred, int brand_action);
static boolean_t lx_native_exec(uint8_t, const char **);
-static void lx_ptrace_exectrap(proc_t *);
static uint32_t lx_map32limit(proc_t *);
/* lx brand */
struct brand_ops lx_brops = {
- lx_init_brand_data,
- lx_free_brand_data,
- lx_brandsys,
- lx_setbrand,
- lx_getattr,
- lx_setattr,
- lx_copy_procdata,
- lx_proc_exit,
- lx_exec,
- lx_setrval,
- lx_initlwp,
- lx_forklwp,
- lx_freelwp,
- lx_exitlwp,
- lx_elfexec,
- NULL,
- NULL,
- lx_psig_to_proc,
- NSIG,
- lx_exit_with_sig,
- lx_wait_filter,
- lx_native_exec,
- lx_ptrace_exectrap,
- lx_map32limit
+ lx_init_brand_data, /* b_init_brand_data */
+ lx_free_brand_data, /* b_free_brand_data */
+ lx_brandsys, /* b_brandsys */
+ lx_setbrand, /* b_setbrand */
+ lx_getattr, /* b_getattr */
+ lx_setattr, /* b_setattr */
+ lx_copy_procdata, /* b_copy_procdata */
+ lx_proc_exit, /* b_proc_exit */
+ lx_exec, /* b_exec */
+ lx_setrval, /* b_lwp_setrval */
+ lx_initlwp, /* b_initlwp */
+ lx_forklwp, /* b_forklwp */
+ lx_freelwp, /* b_freelwp */
+ lx_exitlwp, /* b_lwpexit */
+ lx_elfexec, /* b_elfexec */
+ NULL, /* b_sigset_native_to_brand */
+ NULL, /* b_sigset_brand_to_native */
+ lx_psig_to_proc, /* b_psig_to_proc */
+ NSIG, /* b_nsig */
+ lx_exit_with_sig, /* b_exit_with_sig */
+ lx_wait_filter, /* b_wait_filter */
+ lx_native_exec, /* b_native_exec */
+ NULL, /* b_ptrace_exectrap */
+ lx_map32limit, /* b_map32limit */
+ lx_stop_notify, /* b_stop_notify */
+ lx_waitid_helper, /* b_waitid_helper */
+ lx_sigcld_repost, /* b_sigcld_repost */
+ lx_issig_stop /* b_issig_stop */
};
struct brand_mach_ops lx_mops = {
@@ -166,33 +174,39 @@ static struct modlinkage modlinkage = {
void
lx_proc_exit(proc_t *p, klwp_t *lwp)
{
- zone_t *z = p->p_zone;
int sig = ptolxproc(p)->l_signal;
- ASSERT(p->p_brand != NULL);
- ASSERT(p->p_brand_data != NULL);
-
- /*
- * If init is dying and we aren't explicitly shutting down the zone
- * or the system, then Solaris is about to restart init. The Linux
- * init is not designed to handle a restart, which it interprets as
- * a reboot. To give it a sane environment in which to run, we
- * reboot the zone.
- */
- if (p->p_pid == z->zone_proc_initpid) {
- if (z->zone_boot_err == 0 &&
- z->zone_restart_init &&
- zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
- zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN)
- (void) zone_kadmin(A_REBOOT, 0, NULL, CRED());
- }
+ VERIFY(p->p_brand == &lx_brand);
+ VERIFY(p->p_brand_data != NULL);
/*
* We might get here if fork failed (e.g. ENOMEM) so we don't always
* have an lwp (see brand_clearbrand).
*/
- if (lwp != NULL)
+ if (lwp != NULL) {
+ boolean_t reenter_mutex = B_FALSE;
+
+ /*
+ * This brand entry point is called variously with and without
+ * the process p_lock held. It would be possible to refactor
+ * the brand infrastructure so that proc_exit() explicitly
+ * calls this hook (b_lwpexit/lx_exitlwp) for the last LWP in a
+ * process prior to detaching the brand with
+ * brand_clearbrand(). Absent such refactoring, we
+ * conditionally exit the mutex for the duration of the call.
+ *
+ * The atomic replacement of both "p_brand" and "p_brand_data"
+ * is not affected by dropping and reacquiring the mutex here.
+ */
+ if (mutex_owned(&p->p_lock) != 0) {
+ mutex_exit(&p->p_lock);
+ reenter_mutex = B_TRUE;
+ }
lx_exitlwp(lwp);
+ if (reenter_mutex) {
+ mutex_enter(&p->p_lock);
+ }
+ }
/*
* The call path here is:
@@ -260,310 +274,6 @@ lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
return (-EINVAL);
}
-/*
- * Enable/disable ptrace system call tracing for the given LWP. Enabling is
- * done by both setting the flag in that LWP's brand data (in the kernel) and
- * setting the process-wide trace flag (in the brand library of the traced
- * process).
- */
-static int
-lx_ptrace_syscall_set(pid_t pid, id_t lwpid, int set)
-{
- proc_t *p;
- kthread_t *t;
- klwp_t *lwp;
- lx_proc_data_t *lpdp;
- lx_lwp_data_t *lldp;
- uintptr_t addr;
- int ret, flag = 1;
-
- if ((p = sprlock(pid)) == NULL)
- return (ESRCH);
-
- if (priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) {
- sprunlock(p);
- return (EPERM);
- }
-
- if ((t = idtot(p, lwpid)) == NULL || (lwp = ttolwp(t)) == NULL) {
- sprunlock(p);
- return (ESRCH);
- }
-
- if ((lpdp = p->p_brand_data) == NULL ||
- (lldp = lwp->lwp_brand) == NULL) {
- sprunlock(p);
- return (ESRCH);
- }
-
- if (set) {
- /*
- * Enable the ptrace flag for this LWP and this process. Note
- * that we will turn off the LWP's ptrace flag, but we don't
- * turn off the process's ptrace flag.
- */
- lldp->br_ptrace = 1;
- lpdp->l_ptrace = 1;
-
- addr = lpdp->l_traceflag;
-
- mutex_exit(&p->p_lock);
-
- /*
- * This can fail only in some rare corner cases where the
- * process is exiting or we're completely out of memory. In
- * these cases, it's sufficient to return an error to the ptrace
- * consumer and leave the process-wide flag set.
- */
- ret = uwrite(p, &flag, sizeof (flag), addr);
-
- mutex_enter(&p->p_lock);
-
- /*
- * If we couldn't set the trace flag, unset the LWP's ptrace
- * flag as there ptrace consumer won't expect this LWP to stop.
- */
- if (ret != 0)
- lldp->br_ptrace = 0;
- } else {
- lldp->br_ptrace = 0;
- ret = 0;
- }
-
- sprunlock(p);
-
- if (ret != 0)
- ret = EIO;
-
- return (ret);
-}
-
-static void
-lx_ptrace_fire(void)
-{
- kthread_t *t = curthread;
- klwp_t *lwp = ttolwp(t);
- lx_lwp_data_t *lldp = lwp->lwp_brand;
-
- /*
- * The ptrace flag only applies until the next event is encountered
- * for the given LWP. If it's set, turn off the flag and poke the
- * controlling process by raising a signal.
- */
- if (lldp->br_ptrace) {
- lldp->br_ptrace = 0;
- tsignal(t, SIGTRAP);
- }
-}
-
-/*
- * Supports Linux PTRACE_SETOPTIONS handling which is similar to PTRACE_TRACEME
- * but return an event in the second byte of si_status.
- */
-static int
-lx_ptrace_ext_opts(int cmd, pid_t pid, uintptr_t val, int64_t *rval)
-{
- proc_t *p;
- lx_proc_data_t *lpdp;
- uint_t ret;
-
- if ((p = sprlock(pid)) == NULL)
- return (ESRCH);
-
- /*
- * Note that priv_proc_cred_perm can disallow access to ourself if
- * the proc's SNOCD p_flag is set, so we skip that check for ourself.
- */
- if (curproc != p &&
- priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) {
- sprunlock(p);
- return (EPERM);
- }
-
- if ((lpdp = p->p_brand_data) == NULL) {
- sprunlock(p);
- return (ESRCH);
- }
-
- switch (cmd) {
- case B_PTRACE_EXT_OPTS_SET:
- lpdp->l_ptrace_opts = (uint_t)val;
- break;
-
- case B_PTRACE_EXT_OPTS_GET:
- ret = lpdp->l_ptrace_opts;
- if (lpdp->l_ptrace_is_traced)
- ret |= EMUL_PTRACE_IS_TRACED;
- break;
-
- case B_PTRACE_EXT_OPTS_EVT:
- ret = lpdp->l_ptrace_event;
- lpdp->l_ptrace_event = 0;
- break;
-
- case B_PTRACE_DETACH:
- lpdp->l_ptrace_is_traced = 0;
- break;
-
- default:
- sprunlock(p);
- return (EINVAL);
- }
-
- sprunlock(p);
-
- if (cmd == B_PTRACE_EXT_OPTS_GET || cmd == B_PTRACE_EXT_OPTS_EVT) {
- if (copyout(&ret, (void *)val, sizeof (uint_t)) != 0)
- return (EFAULT);
- }
-
- *rval = 0;
- return (0);
-}
-
-/*
- * Used to support Linux PTRACE_SETOPTIONS handling and similar to
- * PTRACE_TRACEME. We signal ourselves to stop on return from this syscall and
- * setup the event reason so the emulation can pull this out when someone
- * 'waits' on this process.
- */
-static void
-lx_ptrace_stop_for_option(int option, ulong_t msg)
-{
- proc_t *p = ttoproc(curthread);
- sigqueue_t *sqp;
- lx_proc_data_t *lpdp;
- boolean_t child = B_FALSE;
-
- if ((lpdp = p->p_brand_data) == NULL) {
- /* this should never happen but just to be safe */
- return;
- }
-
- if (option & EMUL_PTRACE_O_CHILD) {
- child = B_TRUE;
- option &= ~EMUL_PTRACE_O_CHILD;
- }
-
- lpdp->l_ptrace_is_traced = 1;
-
- /* Track the event as the reason for stopping */
- switch (option) {
- case LX_PTRACE_O_TRACEFORK:
- if (!child) {
- lpdp->l_ptrace_event = LX_PTRACE_EVENT_FORK;
- lpdp->l_ptrace_eventmsg = msg;
- }
- break;
- case LX_PTRACE_O_TRACEVFORK:
- if (!child) {
- lpdp->l_ptrace_event = LX_PTRACE_EVENT_VFORK;
- lpdp->l_ptrace_eventmsg = msg;
- }
- break;
- case LX_PTRACE_O_TRACECLONE:
- if (!child) {
- lpdp->l_ptrace_event = LX_PTRACE_EVENT_CLONE;
- lpdp->l_ptrace_eventmsg = msg;
- }
- break;
- case LX_PTRACE_O_TRACEEXEC:
- lpdp->l_ptrace_event = LX_PTRACE_EVENT_EXEC;
- break;
- case LX_PTRACE_O_TRACEVFORKDONE:
- lpdp->l_ptrace_event = LX_PTRACE_EVENT_VFORK_DONE;
- lpdp->l_ptrace_eventmsg = msg;
- break;
- case LX_PTRACE_O_TRACEEXIT:
- lpdp->l_ptrace_event = LX_PTRACE_EVENT_EXIT;
- lpdp->l_ptrace_eventmsg = msg;
- break;
- case LX_PTRACE_O_TRACESECCOMP:
- lpdp->l_ptrace_event = LX_PTRACE_EVENT_SECCOMP;
- break;
- }
-
- /*
- * Post the required signal to ourselves so that we stop.
- *
- * Although Linux will send a SIGSTOP to a child process which is
- * stopped due to PTRACE_O_TRACEFORK, etc., we do not send that signal
- * since that leads us down the code path in the kernel which calls
- * stop(PR_JOBCONTROL, SIGSTOP), which in turn means that the TS_XSTART
- * flag gets turned off on the thread and this makes it complex to
- * actually get this process going when the userland application wants
- * to detach. Since consumers don't seem to depend on the specific
- * signal, we'll just stop both the parent and child the same way. We
- * do keep track of both the parent and child via the
- * EMUL_PTRACE_O_CHILD bit, in case we need to revisit this later.
- */
- psignal(p, SIGTRAP);
-
- /*
- * Since we're stopping, we need to post the SIGCHLD to the parent. The
- * code in sigcld expects p_wdata to be set to SIGTRAP before it can
- * send the signal, so do that here. We also need p_wcode to be set as
- * if we are ptracing, even though we're not really (see the code in
- * stop() when procstop is set and p->p_proc_flag has the P_PR_PTRACE
- * bit set). This is needed so that when the application calls waitid,
- * it will properly retrieve the process.
- */
- sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
- mutex_enter(&pidlock);
- p->p_wdata = SIGTRAP;
- p->p_wcode = CLD_TRAPPED;
- sigcld(p, sqp);
- mutex_exit(&pidlock);
-}
-
-static int
-lx_ptrace_geteventmsg(pid_t pid, ulong_t *msgp)
-{
- proc_t *p;
- lx_proc_data_t *lpdp;
- ulong_t msg;
-
- if ((p = sprlock(pid)) == NULL)
- return (ESRCH);
-
- if (curproc != p &&
- priv_proc_cred_perm(curproc->p_cred, p, NULL, VREAD) != 0) {
- sprunlock(p);
- return (EPERM);
- }
-
- if ((lpdp = p->p_brand_data) == NULL) {
- sprunlock(p);
- return (ESRCH);
- }
-
- msg = lpdp->l_ptrace_eventmsg;
- lpdp->l_ptrace_eventmsg = 0;
-
- sprunlock(p);
-
- if (copyout(&msg, (void *)msgp, sizeof (ulong_t)) != 0)
- return (EFAULT);
-
- return (0);
-}
-
-/*
- * Brand entry to allow us to optionally generate the ptrace SIGTRAP on exec().
- * This will only be called if ptrace is enabled -- and we only generate the
- * SIGTRAP if LX_PTRACE_O_TRACEEXEC hasn't been set.
- */
-void
-lx_ptrace_exectrap(proc_t *p)
-{
- lx_proc_data_t *lpdp;
-
- if ((lpdp = p->p_brand_data) == NULL ||
- !(lpdp->l_ptrace_opts & LX_PTRACE_O_TRACEEXEC)) {
- psignal(p, SIGTRAP);
- }
-}
-
uint32_t
lx_map32limit(proc_t *p)
{
@@ -718,6 +428,12 @@ lx_init_brand_data(zone_t *zone)
(void) strlcpy(data->lxzd_kernel_version, "2.4.21", LX_VERS_MAX);
data->lxzd_max_syscall = LX_NSYSCALLS;
zone->zone_brand_data = data;
+
+ /*
+ * In Linux, if the init(1) process terminates the system panics.
+ * The zone must reboot to simulate this behaviour.
+ */
+ zone->zone_reboot_on_init_exit = B_TRUE;
}
void
@@ -752,6 +468,8 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
struct termios *termios;
uint_t termios_len;
int error;
+ int code;
+ int sig;
lx_brand_registration_t reg;
lx_lwp_data_t *lwpd;
@@ -832,6 +550,16 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
lwpd->br_scms = 1;
#endif
+ if (pd->l_traceflag != NULL && pd->l_ptrace != 0) {
+ /*
+ * If ptrace(2) is active on this process, it is likely
+ * that we just finished an emulated execve(2) in a
+ * traced child. The usermode traceflag will have been
+ * clobbered by the exec, so we set it again here:
+ */
+ (void) suword32((void *)pd->l_traceflag, 1);
+ }
+
*rval = 0;
return (0);
case B_TTYMODES:
@@ -931,11 +659,6 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
return (0);
}
- case B_PTRACE_SYSCALL:
- *rval = lx_ptrace_syscall_set((pid_t)arg1, (id_t)arg2,
- (int)arg3);
- return (0);
-
case B_SYSENTRY:
if (lx_systrace_enabled) {
ASSERT(lx_systrace_entry_ptr != NULL);
@@ -963,7 +686,7 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
#endif
}
- lx_ptrace_fire();
+ (void) lx_ptrace_stop(LX_PR_SYSENTRY);
pd = p->p_brand_data;
@@ -984,7 +707,7 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
(*lx_systrace_return_ptr)(arg1, arg2, arg2, 0, 0, 0, 0);
}
- lx_ptrace_fire();
+ (void) lx_ptrace_stop(LX_PR_SYSEXIT);
pd = p->p_brand_data;
@@ -1010,20 +733,55 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
*/
return (lx_sched_affinity(cmd, arg1, arg2, arg3, rval));
- case B_PTRACE_EXT_OPTS:
+ case B_PTRACE_STOP_FOR_OPT:
+ return (lx_ptrace_stop_for_option((int)arg1, arg2 == 0 ?
+ B_FALSE : B_TRUE, (ulong_t)arg3));
+
+ case B_PTRACE_CLONE_BEGIN:
+ return (lx_ptrace_set_clone_inherit((int)arg1, arg2 == 0 ?
+ B_FALSE : B_TRUE));
+
+ case B_PTRACE_KERNEL:
+ return (lx_ptrace_kernel((int)arg1, (pid_t)arg2, arg3, arg4));
+
+ case B_HELPER_WAITID: {
+ idtype_t idtype = (idtype_t)arg1;
+ id_t id = (id_t)arg2;
+ siginfo_t *infop = (siginfo_t *)arg3;
+ int options = (int)arg4;
+
+ lwpd = ttolxlwp(curthread);
+
/*
- * Set or get the ptrace extended options or get the event
- * reason for the stop.
+ * Our brand-specific waitid helper only understands a subset of
+ * the possible idtypes. Ensure we keep to that subset here:
*/
- return (lx_ptrace_ext_opts((int)arg1, (pid_t)arg2, arg3, rval));
+ if (idtype != P_ALL && idtype != P_PID && idtype != P_PGID) {
+ return (EINVAL);
+ }
- case B_PTRACE_STOP_FOR_OPT:
- lx_ptrace_stop_for_option((int)arg1, (ulong_t)arg2);
- return (0);
+ /*
+ * Enable the return of emulated ptrace(2) stop conditions
+ * through lx_waitid_helper, and stash the Linux-specific
+ * extra waitid() flags.
+ */
+ lwpd->br_waitid_emulate = B_TRUE;
+ lwpd->br_waitid_flags = (int)arg5;
+
+#if defined(_SYSCALL32_IMPL)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ return (waitsys32(idtype, id, infop, options));
+ } else
+#endif
+ {
+ return (waitsys(idtype, id, infop, options));
+ }
+
+ lwpd->br_waitid_emulate = B_FALSE;
+ lwpd->br_waitid_flags = 0;
- case B_PTRACE_GETEVENTMSG:
- lx_ptrace_geteventmsg((pid_t)arg1, (ulong_t *)arg2);
return (0);
+ }
case B_UNSUPPORTED:
{
@@ -1186,7 +944,19 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
return (0);
case B_EXIT_AS_SIG:
- exit(CLD_KILLED, (int)arg1);
+ code = CLD_KILLED;
+ sig = (int)arg1;
+ proc_is_exiting(p);
+ if (exitlwps(1) != 0) {
+ mutex_enter(&p->p_lock);
+ lwp_exit();
+ }
+ ttolwp(curthread)->lwp_cursig = sig;
+ if (sig == SIGSEGV) {
+ if (core(sig, 0) == 0)
+ code = CLD_DUMPED;
+ }
+ exit(code, sig);
/* NOTREACHED */
break;
@@ -1254,6 +1024,7 @@ lx_copy_procdata(proc_t *child, proc_t *parent)
ppd = parent->p_brand_data;
ASSERT(ppd != NULL);
+ ASSERT(parent->p_brand == &lx_brand);
cpd = kmem_alloc(sizeof (lx_proc_data_t), KM_SLEEP);
*cpd = *ppd;
@@ -1322,13 +1093,14 @@ lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
struct execenv origenv;
stack_t orig_sigaltstack;
struct user *up = PTOU(ttoproc(curthread));
- lx_elf_data_t *edp =
- &((lx_proc_data_t *)ttoproc(curthread)->p_brand_data)->l_elf_data;
+ lx_elf_data_t *edp;
char *lib_path = NULL;
ASSERT(ttoproc(curthread)->p_brand == &lx_brand);
ASSERT(ttoproc(curthread)->p_brand_data != NULL);
+ edp = &ttolxproc(curthread)->l_elf_data;
+
if (args->to_model == DATAMODEL_NATIVE) {
lib_path = LX_LIB_PATH;
}
@@ -1685,6 +1457,7 @@ _init(void)
/* for lx_futex() */
lx_futex_init();
+ lx_ptrace_init();
err = mod_install(&modlinkage);
if (err != 0) {
@@ -1724,6 +1497,7 @@ _fini(void)
if (brand_zone_count(&lx_brand))
return (EBUSY);
+ lx_ptrace_fini();
lx_pid_fini();
lx_ioctl_fini();
diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c
index c550ecf9af..abb0ab6e63 100644
--- a/usr/src/uts/common/brand/lx/os/lx_misc.c
+++ b/usr/src/uts/common/brand/lx/os/lx_misc.c
@@ -80,7 +80,7 @@ lx_exec()
klwp_t *lwp = ttolwp(curthread);
struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
proc_t *p = ttoproc(curthread);
- lx_proc_data_t *pd = p->p_brand_data;
+ lx_proc_data_t *pd = ptolxproc(p);
int err;
/*
@@ -113,6 +113,13 @@ lx_exec()
lx_pid_reassign(curthread);
}
+ /*
+ * Inform ptrace(2) that we are processing an execve(2) call so that if
+ * we are traced we can post either the PTRACE_EVENT_EXEC event or the
+ * legacy SIGTRAP.
+ */
+ (void) lx_ptrace_stop_for_option(LX_PTRACE_O_TRACEEXEC, B_FALSE, 0);
+
/* clear the fsbase values until the app. can reinitialize them */
lwpd->br_lx_fsbase = NULL;
lwpd->br_ntv_fsbase = NULL;
@@ -137,15 +144,21 @@ void
lx_exitlwp(klwp_t *lwp)
{
struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
- proc_t *p;
+ proc_t *p = lwptoproc(lwp);
kthread_t *t;
sigqueue_t *sqp = NULL;
pid_t ppid;
id_t ptid;
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
if (lwpd == NULL)
return; /* second time thru' */
+ mutex_enter(&p->p_lock);
+ lx_ptrace_exit(p, lwp);
+ mutex_exit(&p->p_lock);
+
if (lwpd->br_clear_ctidp != NULL) {
(void) suword32(lwpd->br_clear_ctidp, 0);
(void) lx_futex((uintptr_t)lwpd->br_clear_ctidp, FUTEX_WAKE, 1,
@@ -226,9 +239,17 @@ lx_freelwp(klwp_t *lwp)
if (lwpd != NULL) {
(void) removectx(lwptot(lwp), lwp, lx_save, lx_restore,
NULL, NULL, lx_save, NULL);
- if (lwpd->br_pid != 0)
+ if (lwpd->br_pid != 0) {
lx_pid_rele(lwptoproc(lwp)->p_pid,
lwptot(lwp)->t_tid);
+ }
+
+ /*
+ * Ensure that lx_ptrace_exit() has been called to detach
+ * ptrace(2) tracers and tracees.
+ */
+ VERIFY(lwpd->br_ptrace_tracer == NULL);
+ VERIFY(lwpd->br_ptrace_accord == NULL);
lwp->lwp_brand = NULL;
kmem_free(lwpd, sizeof (struct lx_lwp_data));
@@ -238,8 +259,8 @@ lx_freelwp(klwp_t *lwp)
int
lx_initlwp(klwp_t *lwp)
{
- struct lx_lwp_data *lwpd;
- struct lx_lwp_data *plwpd;
+ lx_lwp_data_t *lwpd;
+ lx_lwp_data_t *plwpd = ttolxlwp(curthread);
kthread_t *tp = lwptot(lwp);
lwpd = kmem_zalloc(sizeof (struct lx_lwp_data), KM_SLEEP);
@@ -265,8 +286,7 @@ lx_initlwp(klwp_t *lwp)
if (tp->t_next == tp) {
lwpd->br_ppid = tp->t_procp->p_ppid;
lwpd->br_ptid = -1;
- } else if (ttolxlwp(curthread) != NULL) {
- plwpd = ttolxlwp(curthread);
+ } else if (plwpd != NULL) {
bcopy(plwpd->br_tls, lwpd->br_tls, sizeof (lwpd->br_tls));
lwpd->br_ppid = plwpd->br_pid;
lwpd->br_ptid = curthread->t_tid;
@@ -292,6 +312,14 @@ lx_initlwp(klwp_t *lwp)
installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL,
lx_save, NULL);
+ /*
+ * If the parent LWP has a ptrace(2) tracer, the new LWP may
+ * need to inherit that same tracer.
+ */
+ if (plwpd != NULL) {
+ lx_ptrace_inherit_tracer(plwpd, lwpd);
+ }
+
return (0);
}
@@ -524,10 +552,7 @@ lx_exit_with_sig(proc_t *cp, sigqueue_t *sqp, void *brand_data)
* SIGCHLD X -
*
* This is an XOR of __WCLONE being set, and SIGCHLD being the signal sent on
- * process exit. Since (flags & __WCLONE) is not guaranteed to have the
- * least-significant bit set when the flags is enabled, !! is used to place
- * that bit into the least significant bit. Then, the bitwise XOR can be
- * used, because there is no logical XOR in the C language.
+ * process exit.
*
* More information on wait in lx brands can be found at
* usr/src/lib/brand/lx/lx_brand/common/wait.c.
@@ -535,29 +560,45 @@ lx_exit_with_sig(proc_t *cp, sigqueue_t *sqp, void *brand_data)
boolean_t
lx_wait_filter(proc_t *pp, proc_t *cp)
{
- int flags;
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+ int flags = lwpd->br_waitid_flags;
boolean_t ret;
- if (LX_ARGS(waitid) != NULL) {
- flags = LX_ARGS(waitid)->waitid_flags;
- mutex_enter(&cp->p_lock);
- if (flags & LX_WALL) {
- ret = B_TRUE;
- } else if (cp->p_stat == SZOMB ||
- cp->p_brand == &native_brand) {
- ret = (((!!(flags & LX_WCLONE)) ^
- (stol_signo[SIGCHLD] == cp->p_exit_data))
- ? B_TRUE : B_FALSE);
+ if (!lwpd->br_waitid_emulate) {
+ return (B_TRUE);
+ }
+
+ mutex_enter(&cp->p_lock);
+ if (flags & LX_WALL) {
+ ret = B_TRUE;
+
+ } else {
+ int exitsig;
+ boolean_t is_clone, _wclone;
+
+ /*
+ * Determine the exit signal for this process:
+ */
+ if (cp->p_stat == SZOMB || cp->p_brand == &native_brand) {
+ exitsig = cp->p_exit_data;
} else {
- ret = (((!!(flags & LX_WCLONE)) ^
- (stol_signo[SIGCHLD] == ptolxproc(cp)->l_signal))
- ? B_TRUE : B_FALSE);
+ exitsig = ptolxproc(cp)->l_signal;
}
- mutex_exit(&cp->p_lock);
- return (ret);
- } else {
- return (B_TRUE);
+
+ /*
+ * To enable the bitwise XOR to stand in for the absent C
+ * logical XOR, we use the logical NOT operator twice to
+ * ensure the least significant bit is populated with the
+ * __WCLONE flag status.
+ */
+ _wclone = !!(flags & LX_WCLONE);
+ is_clone = (stol_signo[SIGCHLD] == exitsig);
+
+ ret = (_wclone ^ is_clone) ? B_TRUE : B_FALSE;
}
+ mutex_exit(&cp->p_lock);
+
+ return (ret);
}
void
diff --git a/usr/src/uts/common/brand/lx/os/lx_pid.c b/usr/src/uts/common/brand/lx/os/lx_pid.c
index aa8c751bc2..8552754c43 100644
--- a/usr/src/uts/common/brand/lx/os/lx_pid.c
+++ b/usr/src/uts/common/brand/lx/os/lx_pid.c
@@ -22,7 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -222,6 +222,28 @@ lx_lpid_to_spair(pid_t l_pid, pid_t *s_pid, id_t *s_tid)
{
struct lx_pid *hp;
+ if (l_pid == 1) {
+ pid_t initpid;
+
+ /*
+ * We are trying to look up the Linux init process for the
+ * current zone, which we pretend has pid 1.
+ */
+ if ((initpid = curzone->zone_proc_initpid) == -1) {
+ /*
+ * We could not find the init process for this zone.
+ */
+ return (-1);
+ }
+
+ if (s_pid != NULL)
+ *s_pid = initpid;
+ if (s_tid != NULL)
+ *s_tid = 1;
+
+ return (0);
+ }
+
mutex_enter(&hash_lock);
for (hp = ltos_pid_hash[LTOS_HASH(l_pid)]; hp; hp = hp->ltos_next) {
if (l_pid == hp->l_pid) {
diff --git a/usr/src/uts/common/brand/lx/os/lx_ptrace.c b/usr/src/uts/common/brand/lx/os/lx_ptrace.c
new file mode 100644
index 0000000000..6e4b74531d
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_ptrace.c
@@ -0,0 +1,2270 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Emulation of the Linux ptrace(2) interface.
+ *
+ * OVERVIEW
+ *
+ * The Linux process model is somewhat different from the illumos native
+ * model. One critical difference is that each Linux thread has a unique
+ * identifier in the pid namespace. The lx brand assigns a pid to each LWP
+ * within the emulated process, giving the pid of the process itself to the
+ * first LWP.
+ *
+ * The Linux ptrace(2) interface allows for any LWP in a branded process to
+ * exert control over any other LWP within the same zone. Control is exerted
+ * by the use of the ptrace(2) system call itself, which accepts a number of
+ * request codes. Feedback on traced events is primarily received by the
+ * tracer through SIGCLD and the emulated waitpid(2) and waitid(2) system
+ * calls. Many of the possible ptrace(2) requests will only succeed if the
+ * target LWP is in a "ptrace-stop" condition.
+ *
+ * HISTORY
+ *
+ * The brand support for ptrace(2) was originally built on top of the rich
+ * support for debugging and tracing provided through the illumos /proc
+ * interfaces, mounted at /native/proc within the zone. The native legacy
+ * ptrace(3C) functionality was used as a starting point, but was generally
+ * insufficient for complete and precise emulation. The extant legacy
+ * interface, and indeed our native SIGCLD and waitid(2) facilities, are
+ * focused on _process_ level concerns -- the Linux interface has been
+ * extended to be aware of LWPs as well.
+ *
+ * In order to allow us to focus on providing more complete and accurate
+ * emulation without extensive and undesirable changes to the native
+ * facilities, this second generation ptrace(2) emulation is mostly separate
+ * from any other tracing or debugging framework in the system.
+ *
+ * ATTACHING TRACERS TO TRACEES
+ *
+ * There are several ways that a child LWP may becomed traced by a tracer.
+ * To determine which attach method caused a tracee to become attached, one
+ * may inspect the "br_ptrace_attach" member of the LWP-specific brand data
+ * with the debugger.
+ *
+ * The first attach methods to consider are the attaching ptrace(2) requests:
+ *
+ * PTRACE_TRACEME
+ *
+ * If an LWP makes a PTRACE_TRACEME call, it will be attached as a tracee
+ * to its parent LWP (br_ppid). Using PTRACE_TRACEME does _not_ cause the
+ * tracee to be held in a stop condition. It is common practice for
+ * consumers to raise(SIGSTOP) immediately afterward.
+ *
+ * PTRACE_ATTACH
+ *
+ * An LWP may attempt to trace any other LWP in this, or another, process.
+ * We currently allow any attach where the process containing the tracer
+ * LWP has permission to write to /proc for the process containing the
+ * intended tracer. This action also sends a SIGSTOP to the newly attached
+ * tracee.
+ *
+ * The second class of attach methods are the clone(2)/fork(2) inheritance
+ * options that may be set on a tracee with PTRACE_SETOPTIONS:
+ *
+ * PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK and PTRACE_O_TRACECLONE
+ *
+ * If these options have been set on a tracee, then a fork(2), vfork(2) or
+ * clone(2) respectively will cause the newly created LWP to be traced by
+ * the same tracer. The same set of ptrace(2) options will also be set on
+ * the new child.
+ *
+ * The third class of attach method is the PTRACE_CLONE flag to clone(2).
+ * This flag induces the same inheritance as PTRACE_O_TRACECLONE, but is
+ * passed by the tracee as an argument to clone(2).
+ *
+ * DETACHING TRACEES
+ *
+ * Tracees can be detached by the tracer with the PTRACE_DETACH request.
+ * This request is only valid when the tracee is in a ptrace(2) stop
+ * condition, and is itself a restarting action.
+ *
+ * If the tracer exits without detaching all of its tracees, then all of the
+ * tracees are automatically detached and restarted. If a tracee was in
+ * "signal-delivery-stop" at the time the tracer exited, the signal will be
+ * released to the child unless it is a SIGSTOP. We drop this instance of
+ * SIGSTOP in order to prevent the child from becoming stopped by job
+ * control.
+ *
+ * ACCORD ALLOCATION AND MANAGEMENT
+ *
+ * The "lx_ptrace_accord_t" object tracks the agreement between a tracer LWP
+ * and zero or more tracee LWPs. It is explicitly illegal for a tracee to
+ * trace its tracer, and we block this in PTRACE_ATTACH/PTRACE_TRACEME.
+ *
+ * An LWP starts out without an accord. If a child of that LWP calls
+ * ptrace(2) with the PTRACE_TRACEME subcommand, or if the LWP itself uses
+ * PTRACE_ATTACH, an accord will be allocated and stored on that LWP. The
+ * accord structure is not released from that LWP until it arrives in
+ * lx_exitlwp(), as called by lwp_exit(). A new accord will not be
+ * allocated, even if one does not exist, once an LWP arrives in lx_exitlwp()
+ * and sets the LX_PTRACE_EXITING flag. An LWP will have at most one accord
+ * structure throughout its entire lifecycle; once it has one, it has the
+ * same one until death.
+ *
+ * The accord is reference counted (lxpa_refcnt), starting at a count of one
+ * at creation to represent the link from the tracer LWP to its accord. The
+ * accord is not freed until the reference count falls to zero.
+ *
+ * To make mutual exclusion between a detaching tracer and various notifying
+ * tracees simpler, the tracer will hold "pidlock" while it clears the
+ * accord members that point back to the tracer LWP and CV.
+ *
+ * SIGNALS AND JOB CONTROL
+ *
+ * Various actions, either directly ptrace(2) related or commonly associated
+ * with tracing, cause process- or thread-directed SIGSTOP signals to be sent
+ * to tracees. These signals, and indeed any signal other than SIGKILL, can
+ * be suppressed by the tracer when using a restarting request (including
+ * PTRACE_DETACH) on a child. The signal may also be substituted for a
+ * different signal.
+ *
+ * If a SIGSTOP (or other stopping signal) is not suppressed by the tracer,
+ * it will induce the regular illumos native job control stop of the entire
+ * traced process. This is at least passingly similar to the Linux "group
+ * stop" ptrace(2) condition.
+ *
+ * SYSTEM CALL TRACING
+ *
+ * The ptrace(2) interface enables the tracer to hold the tracee on entry and
+ * exit from system calls. When a stopped tracee is restarted through the
+ * PTRACE_SYSCALL request, the LX_PTRACE_SYSCALL flag is set until the next
+ * system call boundary. Whether this is a "syscall-entry-stop" or
+ * "syscall-exit-stop", the tracee is held and the tracer is notified via
+ * SIGCLD/waitpid(2) in the usual way. The flag LX_PTRACE_SYSCALL flag is
+ * cleared after each stop; for ongoing system call tracing the tracee must
+ * be continuously restarted with PTRACE_SYSCALL.
+ *
+ * EVENT STOPS
+ *
+ * Various events (particularly FORK, VFORK, CLONE, EXEC and EXIT) are
+ * enabled by the tracer through PTRACE_SETOPTIONS. Once enabled, the tracee
+ * will be stopped at the nominated points of interest and the tracer
+ * notified. The tracer may request additional information about the event,
+ * such as the pid of new LWPs and processes, via PTRACE_GETEVENTMSG.
+ *
+ * LOCK ORDERING RULES
+ *
+ * It is not safe, in general, to hold p_lock for two different processes at
+ * the same time. This constraint is the primary reason for the existence
+ * (and complexity) of the ptrace(2) accord mechanism.
+ *
+ * In order to facilitate looking up accords by the "pid" of a tracer LWP,
+ * p_lock for the tracer process may be held while entering the accord mutex
+ * (lxpa_lock). This mutex protects the accord flags and reference count.
+ * The reference count is manipulated through lx_ptrace_accord_hold() and
+ * lx_ptrace_accord_rele().
+ *
+ * DO NOT interact with the accord mutex (lxpa_lock) directly. The
+ * lx_ptrace_accord_enter() and lx_ptrace_accord_exit() functions do various
+ * book-keeping and lock ordering enforcement and MUST be used.
+ *
+ * It is NOT legal to take ANY p_lock while holding the accord mutex
+ * (lxpa_lock). If the lxpa_tracees_lock is to be held concurrently with
+ * lxpa_lock, lxpa_lock MUST be taken first and dropped before taking p_lock
+ * of any processes from the tracee list.
+ *
+ * It is NOT legal to take a tracee p_lock and then attempt to enter the
+ * accord mutex (or tracee list mutex) of its tracer. When running as the
+ * tracee LWP, the tracee's hold will prevent the accord from being freed.
+ * Use of the LX_PTRACE_STOPPING or LX_PTRACE_CLONING flag in the
+ * LWP-specific brand data prevents an exiting tracer from altering the
+ * tracee until the tracee has come to an orderly stop, without requiring the
+ * tracee to hold its own p_lock the entire time it is stopping.
+ *
+ * It is not safe, in general, to enter "pidlock" while holding the p_lock of
+ * any process. It is similarly illegal to hold any accord locks (lxpa_lock
+ * or lxpa_sublock) while attempting to enter "pidlock". As "pidlock" is a
+ * global mutex, it should be held for the shortest possible time.
+ */
+
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/ksynch.h>
+#include <sys/sysmacros.h>
+#include <sys/procfs.h>
+#include <sys/cmn_err.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/wait.h>
+#include <sys/prsystm.h>
+#include <sys/note.h>
+
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_impl.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_pid.h>
+#include <lx_syscall.h>
+#include <lx_signum.h>
+
+
+typedef enum lx_ptrace_cont_flags_t {
+ LX_PTC_NONE = 0x00,
+ LX_PTC_SYSCALL = 0x01,
+ LX_PTC_SINGLESTEP = 0x02
+} lx_ptrace_cont_flags_t;
+
+/*
+ * Macros for checking the state of an LWP via "br_ptrace_flags":
+ */
+#define LX_PTRACE_BUSY \
+ (LX_PTRACE_EXITING | LX_PTRACE_STOPPING | LX_PTRACE_CLONING)
+
+#define VISIBLE(a) (((a)->br_ptrace_flags & LX_PTRACE_EXITING) == 0)
+#define TRACEE_BUSY(a) (((a)->br_ptrace_flags & LX_PTRACE_BUSY) != 0)
+
+#define ACCORD_HELD(a) MUTEX_HELD(&(a)->lxpa_lock)
+
+static kcondvar_t lx_ptrace_busy_cv;
+static kmem_cache_t *lx_ptrace_accord_cache;
+
+/*
+ * Enter the accord mutex.
+ */
+static void
+lx_ptrace_accord_enter(lx_ptrace_accord_t *accord)
+{
+ VERIFY(MUTEX_NOT_HELD(&accord->lxpa_tracees_lock));
+
+ mutex_enter(&accord->lxpa_lock);
+}
+
+/*
+ * Exit the accord mutex. If the reference count has dropped to zero,
+ * free the accord.
+ */
+static void
+lx_ptrace_accord_exit(lx_ptrace_accord_t *accord)
+{
+ VERIFY(ACCORD_HELD(accord));
+
+ if (accord->lxpa_refcnt > 0) {
+ mutex_exit(&accord->lxpa_lock);
+ return;
+ }
+
+ /*
+ * When the reference count drops to zero we must free the accord.
+ */
+ VERIFY(accord->lxpa_tracer == NULL);
+ VERIFY(MUTEX_NOT_HELD(&accord->lxpa_tracees_lock));
+ VERIFY(list_is_empty(&accord->lxpa_tracees));
+ VERIFY(accord->lxpa_flags & LX_ACC_TOMBSTONE);
+
+ mutex_destroy(&accord->lxpa_lock);
+ mutex_destroy(&accord->lxpa_tracees_lock);
+
+ kmem_cache_free(lx_ptrace_accord_cache, accord);
+}
+
+/*
+ * Drop our reference to this accord. If this drops the reference count
+ * to zero, the next lx_ptrace_accord_exit() will free the accord.
+ */
+static void
+lx_ptrace_accord_rele(lx_ptrace_accord_t *accord)
+{
+ VERIFY(ACCORD_HELD(accord));
+
+ VERIFY(accord->lxpa_refcnt > 0);
+ accord->lxpa_refcnt--;
+}
+
+/*
+ * Place an additional hold on an accord.
+ */
+static void
+lx_ptrace_accord_hold(lx_ptrace_accord_t *accord)
+{
+ VERIFY(ACCORD_HELD(accord));
+
+ accord->lxpa_refcnt++;
+}
+
+/*
+ * Fetch the accord for this LWP. If one has not yet been created, and the
+ * process is not exiting, allocate it now. Must be called with p_lock held
+ * for the process containing the target LWP.
+ *
+ * If successful, we return holding the accord lock (lxpa_lock).
+ */
+static int
+lx_ptrace_accord_get_locked(klwp_t *lwp, lx_ptrace_accord_t **accordp,
+ boolean_t allocate_one)
+{
+ lx_ptrace_accord_t *lxpa;
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ proc_t *p = lwptoproc(lwp);
+
+ VERIFY(MUTEX_HELD(&p->p_lock));
+
+ /*
+ * If this LWP does not have an accord, we wish to allocate
+ * and install one.
+ */
+ if ((lxpa = lwpd->br_ptrace_accord) == NULL) {
+ if (!allocate_one || !VISIBLE(lwpd)) {
+ /*
+ * Either we do not wish to allocate an accord, or this
+ * LWP has already begun exiting from a ptrace
+ * perspective.
+ */
+ *accordp = NULL;
+ return (ESRCH);
+ }
+
+ lxpa = kmem_cache_alloc(lx_ptrace_accord_cache, KM_SLEEP);
+ bzero(lxpa, sizeof (*lxpa));
+
+ /*
+ * The initial reference count is 1 because we are referencing
+ * it in from the soon-to-be tracer LWP.
+ */
+ lxpa->lxpa_refcnt = 1;
+ mutex_init(&lxpa->lxpa_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&lxpa->lxpa_tracees_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&lxpa->lxpa_tracees, sizeof (lx_lwp_data_t),
+ offsetof(lx_lwp_data_t, br_ptrace_linkage));
+ lxpa->lxpa_cvp = &p->p_cv;
+
+ lxpa->lxpa_tracer = lwpd;
+ lwpd->br_ptrace_accord = lxpa;
+ }
+
+ /*
+ * Lock the accord before returning it to the caller.
+ */
+ lx_ptrace_accord_enter(lxpa);
+
+ /*
+ * There should be at least one active reference to this accord,
+ * otherwise it should have been freed.
+ */
+ VERIFY(lxpa->lxpa_refcnt > 0);
+
+ *accordp = lxpa;
+ return (0);
+}
+
+/*
+ * Accords belong to the tracer LWP. Get the accord for this tracer or return
+ * an error if it was not possible. To prevent deadlocks, the caller MUST NOT
+ * hold p_lock on its own or any other process.
+ *
+ * If successful, we return holding the accord lock (lxpa_lock).
+ */
+static int
+lx_ptrace_accord_get_by_pid(pid_t lxpid, lx_ptrace_accord_t **accordp)
+{
+ int ret = ESRCH;
+ pid_t apid;
+ id_t atid;
+ proc_t *aproc;
+ kthread_t *athr;
+ klwp_t *alwp;
+ lx_lwp_data_t *alwpd;
+
+ VERIFY(MUTEX_NOT_HELD(&curproc->p_lock));
+
+ /*
+ * Locate the process containing the tracer LWP based on its Linux pid
+ * and lock it.
+ */
+ if (lx_lpid_to_spair(lxpid, &apid, &atid) != 0 ||
+ (aproc = sprlock(apid)) == NULL) {
+ return (ESRCH);
+ }
+
+ /*
+ * Locate the tracer LWP itself and ensure that it is visible to
+ * ptrace(2).
+ */
+ if ((athr = idtot(aproc, atid)) == NULL ||
+ (alwp = ttolwp(athr)) == NULL ||
+ (alwpd = lwptolxlwp(alwp)) == NULL ||
+ !VISIBLE(alwpd)) {
+ sprunlock(aproc);
+ return (ESRCH);
+ }
+
+ /*
+ * We should not fetch our own accord this way.
+ */
+ if (athr == curthread) {
+ sprunlock(aproc);
+ return (EPERM);
+ }
+
+ /*
+ * Fetch (or allocate) the accord owned by this tracer LWP:
+ */
+ ret = lx_ptrace_accord_get_locked(alwp, accordp, B_TRUE);
+
+ /*
+ * Unlock the process and return.
+ */
+ sprunlock(aproc);
+ return (ret);
+}
+
+/*
+ * Get (or allocate) the ptrace(2) accord for the current LWP, acting as a
+ * tracer. The caller MUST NOT currently hold p_lock on the process containing
+ * this LWP.
+ *
+ * If successful, we return holding the accord lock (lxpa_lock).
+ */
+static int
+lx_ptrace_accord_get(lx_ptrace_accord_t **accordp, boolean_t allocate_one)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ int ret;
+
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ /*
+ * Lock the tracer (this LWP).
+ */
+ mutex_enter(&p->p_lock);
+
+ /*
+ * Fetch (or allocate) the accord for this LWP:
+ */
+ ret = lx_ptrace_accord_get_locked(lwp, accordp, allocate_one);
+
+ mutex_exit(&p->p_lock);
+
+ return (ret);
+}
+
+/*
+ * Restart an LWP if it is in "ptrace-stop". This function may induce sleep,
+ * so the caller MUST NOT hold any mutexes other than p_lock for the process
+ * containing the LWP.
+ */
+static void
+lx_ptrace_restart_lwp(klwp_t *lwp)
+{
+ kthread_t *rt = lwptot(lwp);
+ proc_t *rproc = lwptoproc(lwp);
+ lx_lwp_data_t *rlwpd = lwptolxlwp(lwp);
+
+ VERIFY(rt != curthread);
+ VERIFY(MUTEX_HELD(&rproc->p_lock));
+
+ /*
+ * Exclude potential meddling from procfs.
+ */
+ prbarrier(rproc);
+
+ /*
+ * Check that the LWP is still in "ptrace-stop" and, if so, restart it.
+ */
+ thread_lock(rt);
+ if (BSTOPPED(rt) && rt->t_whystop == PR_BRAND) {
+ rt->t_schedflag |= TS_BSTART;
+ setrun_locked(rt);
+
+ /*
+ * Clear stop reason.
+ */
+ rlwpd->br_ptrace_whystop = 0;
+ rlwpd->br_ptrace_whatstop = 0;
+ rlwpd->br_ptrace_flags &= ~LX_PTRACE_CLDPEND;
+ }
+ thread_unlock(rt);
+}
+
+static void
+lx_winfo(lx_lwp_data_t *remote, k_siginfo_t *ip, boolean_t waitflag,
+ pid_t *event_ppid, pid_t *event_pid)
+{
+ int signo;
+
+ /*
+ * Populate our k_siginfo_t with data about this "ptrace-stop"
+ * condition:
+ */
+ bzero(ip, sizeof (*ip));
+ ip->si_signo = SIGCLD;
+ ip->si_pid = remote->br_pid;
+ ip->si_code = CLD_TRAPPED;
+
+ switch (remote->br_ptrace_whatstop) {
+ case LX_PR_SYSENTRY:
+ case LX_PR_SYSEXIT:
+ ip->si_status = SIGTRAP;
+ if (remote->br_ptrace_options & LX_PTRACE_O_TRACESYSGOOD) {
+ ip->si_status |= 0x80;
+ }
+ break;
+
+ case LX_PR_SIGNALLED:
+ signo = remote->br_ptrace_stopsig;
+ if (signo < 1 || signo >= LX_NSIG) {
+ /*
+ * If this signal number is not valid, pretend it
+ * was a SIGTRAP.
+ */
+ ip->si_status = SIGTRAP;
+ } else {
+ ip->si_status = ltos_signo[signo];
+ }
+ break;
+
+ case LX_PR_EVENT:
+ ip->si_status = SIGTRAP | remote->br_ptrace_event;
+ /*
+ * Record the Linux pid of both this LWP and the create
+ * event we are dispatching. We will use this information
+ * to unblock any subsequent ptrace(2) events that depend
+ * on this one.
+ */
+ if (event_ppid != NULL)
+ *event_ppid = remote->br_pid;
+ if (event_pid != NULL)
+ *event_pid = (pid_t)remote->br_ptrace_eventmsg;
+ break;
+
+ default:
+ cmn_err(CE_PANIC, "unxpected stop subreason: %d",
+ remote->br_ptrace_whatstop);
+ }
+
+ /*
+ * If WNOWAIT was specified, do not mark the event as posted
+ * so that it may be re-fetched on another call to waitid().
+ */
+ if (waitflag) {
+ remote->br_ptrace_whystop = 0;
+ remote->br_ptrace_whatstop = 0;
+ remote->br_ptrace_flags &= ~LX_PTRACE_CLDPEND;
+ }
+}
+
+/*
+ * Receive notification from stop() of a PR_BRAND stop.
+ */
+void
+lx_stop_notify(proc_t *p, klwp_t *lwp, ushort_t why, ushort_t what)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ lx_ptrace_accord_t *accord;
+ klwp_t *plwp = NULL;
+ proc_t *pp = NULL;
+ lx_lwp_data_t *parent;
+ boolean_t cldpend = B_TRUE;
+ boolean_t cldpost = B_FALSE;
+ sigqueue_t *sqp = NULL;
+
+ /*
+ * We currently only care about LX-specific stop reasons.
+ */
+ if (why != PR_BRAND)
+ return;
+
+ switch (what) {
+ case LX_PR_SYSENTRY:
+ case LX_PR_SYSEXIT:
+ case LX_PR_SIGNALLED:
+ case LX_PR_EVENT:
+ break;
+ default:
+ cmn_err(CE_PANIC, "unexpected subreason for PR_BRAND"
+ " stop: %d", (int)what);
+ }
+
+ /*
+ * We should be holding the lock on our containing process. The
+ * STOPPING flag should have been set by lx_ptrace_stop() for all
+ * PR_BRAND stops.
+ */
+ VERIFY(MUTEX_HELD(&p->p_lock));
+ VERIFY(lwpd->br_ptrace_flags & LX_PTRACE_STOPPING);
+ VERIFY((accord = lwpd->br_ptrace_tracer) != NULL);
+
+ /*
+ * We must drop our process lock to take "pidlock". The
+ * LX_PTRACE_STOPPING flag protects us from an exiting tracer.
+ */
+ mutex_exit(&p->p_lock);
+
+ /*
+ * Allocate before we enter any mutexes.
+ */
+ sqp = kmem_zalloc(sizeof (*sqp), KM_SLEEP);
+
+ /*
+ * We take pidlock now, which excludes all callers of waitid() and
+ * prevents a detaching tracer from clearing critical accord members.
+ */
+ mutex_enter(&pidlock);
+ mutex_enter(&p->p_lock);
+
+ /*
+ * Get the ptrace(2) "parent" process, to which we may send
+ * a SIGCLD signal later.
+ */
+ if ((parent = accord->lxpa_tracer) != NULL &&
+ (plwp = parent->br_lwp) != NULL) {
+ pp = lwptoproc(plwp);
+ }
+
+ /*
+ * Our tracer should not have been modified in our absence; the
+ * LX_PTRACE_STOPPING flag prevents it.
+ */
+ VERIFY(lwpd->br_ptrace_tracer == accord);
+
+ /*
+ * Stash data for this stop condition in the LWP data while we hold
+ * both pidlock and our p_lock.
+ */
+ lwpd->br_ptrace_whystop = why;
+ lwpd->br_ptrace_whatstop = what;
+
+ /*
+ * If this event does not depend on an event from the parent LWP,
+ * populate the siginfo_t for the event pending on this tracee LWP.
+ */
+ if (!(lwpd->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) && pp != NULL) {
+ cldpost = B_TRUE;
+ lx_winfo(lwpd, &sqp->sq_info, B_FALSE, NULL, NULL);
+ }
+
+ /*
+ * Drop our p_lock so that we may lock the tracer.
+ */
+ mutex_exit(&p->p_lock);
+ if (cldpost && pp != NULL) {
+ /*
+ * Post the SIGCLD to the tracer.
+ */
+ mutex_enter(&pp->p_lock);
+ if (!sigismember(&pp->p_sig, SIGCLD)) {
+ sigaddqa(pp, plwp->lwp_thread, sqp);
+ cldpend = B_FALSE;
+ sqp = NULL;
+ }
+ mutex_exit(&pp->p_lock);
+ }
+
+ /*
+ * We re-take our process lock now. The lock will be held until
+ * the thread is actually marked stopped, so we will not race with
+ * lx_ptrace_lock_if_stopped() or lx_waitid_helper().
+ */
+ mutex_enter(&p->p_lock);
+
+ /*
+ * We clear the STOPPING flag; stop() continues to hold our p_lock
+ * until our thread stop state is visible.
+ */
+ lwpd->br_ptrace_flags &= ~LX_PTRACE_STOPPING;
+ lwpd->br_ptrace_flags |= LX_PTRACE_STOPPED;
+ if (cldpend) {
+ /*
+ * We sent the SIGCLD for this new wait condition already.
+ */
+ lwpd->br_ptrace_flags |= LX_PTRACE_CLDPEND;
+ }
+
+ /*
+ * If lx_ptrace_exit_tracer() is trying to detach our tracer, it will
+ * be sleeping on this CV until LX_PTRACE_STOPPING is clear. Wake it
+ * now.
+ */
+ cv_broadcast(&lx_ptrace_busy_cv);
+
+ /*
+ * While still holding pidlock, we attempt to wake our tracer from a
+ * potential waitid() slumber.
+ */
+ if (accord->lxpa_cvp != NULL) {
+ cv_broadcast(accord->lxpa_cvp);
+ }
+
+ /*
+ * We release pidlock and return as we were called: with our p_lock
+ * held.
+ */
+ mutex_exit(&pidlock);
+
+ if (sqp != NULL) {
+ kmem_free(sqp, sizeof (*sqp));
+ }
+}
+
+/*
+ * For any restarting action (e.g. PTRACE_CONT, PTRACE_SYSCALL or
+ * PTRACE_DETACH) to be allowed, the tracee LWP must be in "ptrace-stop". This
+ * check must ONLY be run on tracees of the current LWP. If the check is
+ * successful, we return with the tracee p_lock held.
+ */
+static int
+lx_ptrace_lock_if_stopped(lx_ptrace_accord_t *accord, lx_lwp_data_t *remote)
+{
+ klwp_t *rlwp = remote->br_lwp;
+ proc_t *rproc = lwptoproc(rlwp);
+ kthread_t *rt = lwptot(rlwp);
+
+ /*
+ * We must never check that we, ourselves, are stopped. We must also
+ * have the accord tracee list locked while we lock our tracees.
+ */
+ VERIFY(curthread != rt);
+ VERIFY(MUTEX_HELD(&accord->lxpa_tracees_lock));
+ VERIFY(accord->lxpa_tracer == ttolxlwp(curthread));
+
+ /*
+ * Lock the process containing the tracee LWP.
+ */
+ mutex_enter(&rproc->p_lock);
+ if (!VISIBLE(remote)) {
+ /*
+ * The tracee LWP is currently detaching itself as it exits.
+ * It is no longer visible to ptrace(2).
+ */
+ mutex_exit(&rproc->p_lock);
+ return (ESRCH);
+ }
+
+ /*
+ * We must only check whether tracees of the current LWP are stopped.
+ * We check this condition after confirming visibility as an exiting
+ * tracee may no longer be completely consistent.
+ */
+ VERIFY(remote->br_ptrace_tracer == accord);
+
+ if (!(remote->br_ptrace_flags & LX_PTRACE_STOPPED)) {
+ /*
+ * The tracee is not in "ptrace-stop", so we release the
+ * process.
+ */
+ mutex_exit(&rproc->p_lock);
+ return (ESRCH);
+ }
+
+ /*
+ * The tracee is stopped. We return holding its process lock so that
+ * the caller may manipulate it.
+ */
+ return (0);
+}
+
+static int
+lx_ptrace_setoptions(lx_lwp_data_t *remote, uintptr_t options)
+{
+ /*
+ * Check for valid options.
+ */
+ if ((options & ~LX_PTRACE_O_ALL) != 0) {
+ return (EINVAL);
+ }
+
+ /*
+ * Set ptrace options on the target LWP.
+ */
+ remote->br_ptrace_options = (lx_ptrace_options_t)options;
+
+ return (0);
+}
+
+static int
+lx_ptrace_geteventmsg(lx_lwp_data_t *remote, void *umsgp)
+{
+ int error;
+
+#if defined(_SYSCALL32_IMPL)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ uint32_t tmp = remote->br_ptrace_eventmsg;
+
+ error = copyout(&tmp, umsgp, sizeof (uint32_t));
+ } else
+#endif
+ {
+ error = copyout(&remote->br_ptrace_eventmsg, umsgp,
+ sizeof (ulong_t));
+ }
+
+ return (error);
+}
+
+/*
+ * Implements the PTRACE_CONT subcommand of the Linux ptrace(2) interface.
+ */
+static int
+lx_ptrace_cont(lx_lwp_data_t *remote, lx_ptrace_cont_flags_t flags, int signo)
+{
+ klwp_t *lwp = remote->br_lwp;
+
+ if (flags & LX_PTC_SINGLESTEP) {
+ /*
+ * We do not currently support single-stepping.
+ */
+ lx_unsupported("PTRACE_SINGLESTEP not currently implemented");
+ return (EINVAL);
+ }
+
+ /*
+ * The tracer may choose to suppress the delivery of a signal, or
+ * select an alternative signal for delivery. If this is an
+ * appropriate ptrace(2) "signal-delivery-stop", br_ptrace_stopsig
+ * will be used as the new signal number.
+ *
+ * As with so many other aspects of the Linux ptrace(2) interface, this
+ * may fail silently if the state machine is not aligned correctly.
+ */
+ remote->br_ptrace_stopsig = signo;
+
+ /*
+ * Handle the syscall-stop flag if this is a PTRACE_SYSCALL restart:
+ */
+ if (flags & LX_PTC_SYSCALL) {
+ remote->br_ptrace_flags |= LX_PTRACE_SYSCALL;
+ } else {
+ remote->br_ptrace_flags &= ~LX_PTRACE_SYSCALL;
+ }
+
+ lx_ptrace_restart_lwp(lwp);
+
+ return (0);
+}
+
+/*
+ * Implements the PTRACE_DETACH subcommand of the Linux ptrace(2) interface.
+ *
+ * The LWP identified by the Linux pid "lx_pid" will, if it as a tracee of the
+ * current LWP, be detached and set runnable. If the specified LWP is not
+ * currently in the "ptrace-stop" state, the routine will return ESRCH as if
+ * the LWP did not exist at all.
+ *
+ * The caller must not hold p_lock on any process.
+ */
+static int
+lx_ptrace_detach(lx_ptrace_accord_t *accord, lx_lwp_data_t *remote, int signo,
+ boolean_t *release_hold)
+{
+ klwp_t *rlwp;
+
+ rlwp = remote->br_lwp;
+
+ /*
+ * The tracee LWP was in "ptrace-stop" and we now hold its p_lock.
+ * Detach the LWP from the accord and set it running.
+ */
+ VERIFY(!TRACEE_BUSY(remote));
+ remote->br_ptrace_flags &= ~(LX_PTRACE_SYSCALL | LX_PTRACE_INHERIT);
+ VERIFY(list_link_active(&remote->br_ptrace_linkage));
+ list_remove(&accord->lxpa_tracees, remote);
+
+ remote->br_ptrace_attach = LX_PTA_NONE;
+ remote->br_ptrace_tracer = NULL;
+ remote->br_ptrace_flags = 0;
+ *release_hold = B_TRUE;
+
+ /*
+ * The tracer may, as described in lx_ptrace_cont(), choose to suppress
+ * or modify the delivered signal.
+ */
+ remote->br_ptrace_stopsig = signo;
+
+ lx_ptrace_restart_lwp(rlwp);
+
+ return (0);
+}
+
+/*
+ * This routine implements the PTRACE_ATTACH operation of the Linux ptrace(2)
+ * interface.
+ *
+ * This LWP is requesting to be attached as a tracer to another LWP -- the
+ * tracee. If a ptrace accord to track the list of tracees has not yet been
+ * allocated, one will be allocated and attached to this LWP now.
+ *
+ * The "br_ptrace_tracer" on the tracee LWP is set to this accord, and the
+ * tracee LWP is then added to the "lxpa_tracees" list in the accord. We drop
+ * locks between these two phases; the only consumer of trace events from this
+ * accord is this LWP, which obviously cannot be running waitpid(2) at the same
+ * time as this call to ptrace(2).
+ */
+static int
+lx_ptrace_attach(pid_t lx_pid)
+{
+ int error = ESRCH;
+ int32_t one = 1;
+ /*
+ * Our (Tracer) LWP:
+ */
+ lx_ptrace_accord_t *accord;
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+ /*
+ * Remote (Tracee) LWP:
+ */
+ pid_t rpid;
+ id_t rtid;
+ proc_t *rproc;
+ kthread_t *rthr;
+ klwp_t *rlwp;
+ lx_lwp_data_t *rlwpd;
+
+ if (lwpd->br_pid == lx_pid) {
+ /*
+ * We cannot trace ourselves.
+ */
+ return (EPERM);
+ }
+
+ /*
+ * Ensure that we have an accord and obtain a lock on it. This
+ * routine should not fail because the LWP cannot make ptrace(2) system
+ * calls after it has begun exiting.
+ */
+ VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_EXITING);
+ VERIFY(lx_ptrace_accord_get(&accord, B_TRUE) == 0);
+
+ /*
+ * Place speculative hold in case the attach is successful.
+ */
+ lx_ptrace_accord_hold(accord);
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Locate the process containing the tracee LWP based on its Linux pid
+ * and lock it.
+ */
+ if (lx_lpid_to_spair(lx_pid, &rpid, &rtid) != 0 ||
+ (rproc = sprlock(rpid)) == NULL) {
+ /*
+ * We could not find the target process.
+ */
+ goto errout;
+ }
+
+ /*
+ * Locate the tracee LWP.
+ */
+ if ((rthr = idtot(rproc, rtid)) == NULL ||
+ (rlwp = ttolwp(rthr)) == NULL ||
+ (rlwpd = lwptolxlwp(rlwp)) == NULL ||
+ !VISIBLE(rlwpd)) {
+ /*
+ * The LWP could not be found, was not branded, or is not
+ * visible to ptrace(2) at this time.
+ */
+ goto unlock_errout;
+ }
+
+ /*
+ * We now hold the lock on the tracee. Attempt to install ourselves
+ * as the tracer.
+ */
+ if (curproc != rproc && priv_proc_cred_perm(curproc->p_cred, rproc,
+ NULL, VWRITE) != 0) {
+ /*
+ * This process does not have permission to trace the remote
+ * process.
+ */
+ error = EPERM;
+ } else if (rlwpd->br_ptrace_tracer != NULL) {
+ /*
+ * This LWP is already being traced.
+ */
+ VERIFY(list_link_active(&rlwpd->br_ptrace_linkage));
+ VERIFY(rlwpd->br_ptrace_attach != LX_PTA_NONE);
+ error = EPERM;
+ } else {
+ lx_proc_data_t *rprocd;
+
+ /*
+ * Bond the tracee to the accord.
+ */
+ VERIFY0(rlwpd->br_ptrace_flags & LX_PTRACE_EXITING);
+ VERIFY(rlwpd->br_ptrace_attach == LX_PTA_NONE);
+ rlwpd->br_ptrace_attach = LX_PTA_ATTACH;
+ rlwpd->br_ptrace_tracer = accord;
+
+ /*
+ * We had no tracer, and are thus not in the tracees list.
+ * It is safe to take the tracee list lock while we insert
+ * ourselves.
+ */
+ mutex_enter(&accord->lxpa_tracees_lock);
+ VERIFY(!list_link_active(&rlwpd->br_ptrace_linkage));
+ list_insert_tail(&accord->lxpa_tracees, rlwpd);
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * Send a thread-directed SIGSTOP.
+ */
+ sigtoproc(rproc, rthr, SIGSTOP);
+
+ /*
+ * Set the in-kernel process-wide ptrace(2) enable flag.
+ * Attempt also to write the usermode trace flag so that the
+ * process knows to enter the kernel for potential ptrace(2)
+ * syscall-stops.
+ */
+ rprocd = ttolxproc(rthr);
+ rprocd->l_ptrace = 1;
+ mutex_exit(&rproc->p_lock);
+ (void) uwrite(rproc, &one, sizeof (one), rprocd->l_traceflag);
+ mutex_enter(&rproc->p_lock);
+
+ error = 0;
+ }
+
+unlock_errout:
+ /*
+ * Unlock the process containing the tracee LWP and the accord.
+ */
+ sprunlock(rproc);
+
+errout:
+ if (error != 0) {
+ /*
+ * The attach was not successful. Remove our speculative
+ * hold.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+ }
+
+ return (error);
+}
+
+int
+lx_ptrace_set_clone_inherit(int option, boolean_t inherit_flag)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+
+ mutex_enter(&p->p_lock);
+
+ switch (option) {
+ case LX_PTRACE_O_TRACEFORK:
+ case LX_PTRACE_O_TRACEVFORK:
+ case LX_PTRACE_O_TRACECLONE:
+ lwpd->br_ptrace_clone_option = option;
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ if (inherit_flag) {
+ lwpd->br_ptrace_flags |= LX_PTRACE_INHERIT;
+ } else {
+ lwpd->br_ptrace_flags &= ~LX_PTRACE_INHERIT;
+ }
+
+ mutex_exit(&p->p_lock);
+ return (0);
+}
+
+/*
+ * If the parent LWP is being traced, we want to attach ourselves to the
+ * same accord.
+ */
+void
+lx_ptrace_inherit_tracer(lx_lwp_data_t *src, lx_lwp_data_t *dst)
+{
+ proc_t *srcp = lwptoproc(src->br_lwp);
+ proc_t *dstp = lwptoproc(dst->br_lwp);
+ lx_ptrace_accord_t *accord;
+ boolean_t unlock = B_FALSE;
+
+ if (srcp == dstp) {
+ /*
+ * This is syslwp_create(), so the process p_lock is already
+ * held.
+ */
+ VERIFY(MUTEX_HELD(&srcp->p_lock));
+ } else {
+ unlock = B_TRUE;
+ mutex_enter(&srcp->p_lock);
+ }
+
+ if ((accord = src->br_ptrace_tracer) == NULL) {
+ /*
+ * The source LWP does not have a tracer to inherit.
+ */
+ goto out;
+ }
+
+ /*
+ * There are two conditions to check when determining if the new
+ * child should inherit the same tracer (and tracing options) as its
+ * parent. Either condition is sufficient to trigger inheritance.
+ */
+ dst->br_ptrace_attach = LX_PTA_NONE;
+ if ((src->br_ptrace_options & src->br_ptrace_clone_option) != 0) {
+ /*
+ * Condition 1:
+ * The clone(2), fork(2) and vfork(2) emulated system calls
+ * populate "br_ptrace_clone_option" with the specific
+ * ptrace(2) SETOPTIONS option that applies to this
+ * operation. If the relevant option has been enabled by the
+ * tracer then we inherit.
+ */
+ dst->br_ptrace_attach |= LX_PTA_INHERIT_OPTIONS;
+
+ } else if ((src->br_ptrace_flags & LX_PTRACE_INHERIT) != 0) {
+ /*
+ * Condition 2:
+ * If the caller opted in to inheritance with the
+ * PTRACE_CLONE flag to clone(2), the LX_PTRACE_INHERIT flag
+ * will be set and we inherit.
+ */
+ dst->br_ptrace_attach |= LX_PTA_INHERIT_CLONE;
+ }
+
+ /*
+ * These values only apply for the duration of a single clone(2), et
+ * al, system call.
+ */
+ src->br_ptrace_flags &= ~LX_PTRACE_INHERIT;
+ src->br_ptrace_clone_option = 0;
+
+ if (dst->br_ptrace_attach == LX_PTA_NONE) {
+ /*
+ * No condition triggered inheritance.
+ */
+ goto out;
+ }
+
+ /*
+ * Set the LX_PTRACE_CLONING flag to prevent us from being detached
+ * while our p_lock is dropped.
+ */
+ src->br_ptrace_flags |= LX_PTRACE_CLONING;
+ mutex_exit(&srcp->p_lock);
+
+ /*
+ * Hold the accord for the new LWP.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_hold(accord);
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Install the tracer and copy the current PTRACE_SETOPTIONS options.
+ */
+ dst->br_ptrace_tracer = accord;
+ dst->br_ptrace_options = src->br_ptrace_options;
+
+ /*
+ * This flag prevents waitid() from seeing events for the new child
+ * until the parent is able to post the relevant ptrace event to
+ * the tracer.
+ */
+ dst->br_ptrace_flags |= LX_PTRACE_PARENT_WAIT;
+
+ mutex_enter(&accord->lxpa_tracees_lock);
+ VERIFY(list_link_active(&src->br_ptrace_linkage));
+ VERIFY(!list_link_active(&dst->br_ptrace_linkage));
+ list_insert_tail(&accord->lxpa_tracees, dst);
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * Relock our process and clear our busy flag.
+ */
+ mutex_enter(&srcp->p_lock);
+ src->br_ptrace_flags &= ~LX_PTRACE_CLONING;
+
+ /*
+ * If lx_ptrace_exit_tracer() is trying to detach our tracer, it will
+ * be sleeping on this CV until LX_PTRACE_CLONING is clear. Wake it
+ * now.
+ */
+ cv_broadcast(&lx_ptrace_busy_cv);
+
+out:
+ if (unlock) {
+ mutex_exit(&srcp->p_lock);
+ }
+}
+
+static int
+lx_ptrace_traceme(void)
+{
+ int error;
+ boolean_t did_attach = B_FALSE;
+ /*
+ * Our (Tracee) LWP:
+ */
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ /*
+ * Remote (Tracer) LWP:
+ */
+ lx_ptrace_accord_t *accord;
+
+ /*
+ * We are intending to be the tracee. Fetch (or allocate) the accord
+ * for our parent LWP.
+ */
+ if ((error = lx_ptrace_accord_get_by_pid(lx_lwp_ppid(lwp, NULL,
+ NULL), &accord)) != 0) {
+ /*
+ * Could not determine the Linux pid of the parent LWP, or
+ * could not get the accord for that LWP.
+ */
+ return (error);
+ }
+
+ /*
+ * We now hold the accord lock.
+ */
+ if (accord->lxpa_flags & LX_ACC_TOMBSTONE) {
+ /*
+ * The accord is marked for death; give up now.
+ */
+ lx_ptrace_accord_exit(accord);
+ return (ESRCH);
+ }
+
+ /*
+ * Bump the reference count so that the accord is not freed. We need
+ * to drop the accord lock before we take our own p_lock.
+ */
+ lx_ptrace_accord_hold(accord);
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * We now lock _our_ process and determine if we can install our parent
+ * as our tracer.
+ */
+ mutex_enter(&p->p_lock);
+ if (lwpd->br_ptrace_tracer != NULL) {
+ /*
+ * This LWP is already being traced.
+ */
+ VERIFY(lwpd->br_ptrace_attach != LX_PTA_NONE);
+ error = EPERM;
+ } else {
+ /*
+ * Bond ourselves to the accord. We already bumped the accord
+ * reference count.
+ */
+ VERIFY(lwpd->br_ptrace_attach == LX_PTA_NONE);
+ lwpd->br_ptrace_attach = LX_PTA_TRACEME;
+ lwpd->br_ptrace_tracer = accord;
+ did_attach = B_TRUE;
+ error = 0;
+ }
+ mutex_exit(&p->p_lock);
+
+ /*
+ * Lock the accord tracee list and add this LWP. Once we are in the
+ * tracee list, it is the responsibility of the tracer to detach us.
+ */
+ if (error == 0) {
+ lx_ptrace_accord_enter(accord);
+ mutex_enter(&accord->lxpa_tracees_lock);
+
+ if (!(accord->lxpa_flags & LX_ACC_TOMBSTONE)) {
+ lx_proc_data_t *procd = ttolxproc(curthread);
+
+ /*
+ * Put ourselves in the tracee list for this accord.
+ */
+ VERIFY(!list_link_active(&lwpd->br_ptrace_linkage));
+ list_insert_tail(&accord->lxpa_tracees, lwpd);
+ mutex_exit(&accord->lxpa_tracees_lock);
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Set the in-kernel process-wide ptrace(2) enable
+ * flag. Attempt also to write the usermode trace flag
+ * so that the process knows to enter the kernel for
+ * potential ptrace(2) syscall-stops.
+ */
+ procd->l_ptrace = 1;
+ (void) suword32((void *)procd->l_traceflag, 1);
+
+ return (0);
+ }
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * The accord has been marked for death. We must
+ * untrace ourselves.
+ */
+ error = ESRCH;
+ lx_ptrace_accord_exit(accord);
+ }
+
+ /*
+ * Our optimism was unjustified: We were unable to attach. We need to
+ * lock the process containing this LWP again in order to remove the
+ * tracer.
+ */
+ VERIFY(error != 0);
+ mutex_enter(&p->p_lock);
+ if (did_attach) {
+ /*
+ * Verify that things were as we left them:
+ */
+ VERIFY(!list_link_active(&lwpd->br_ptrace_linkage));
+ VERIFY(lwpd->br_ptrace_tracer == accord);
+
+ lwpd->br_ptrace_attach = LX_PTA_NONE;
+ lwpd->br_ptrace_tracer = NULL;
+ }
+ mutex_exit(&p->p_lock);
+
+ /*
+ * Remove our speculative hold on the accord, possibly causing it to be
+ * freed in the process.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+
+ return (error);
+}
+
+static boolean_t
+lx_ptrace_stop_common(proc_t *p, lx_lwp_data_t *lwpd, ushort_t what)
+{
+ VERIFY(MUTEX_HELD(&p->p_lock));
+
+ /*
+ * Mark this LWP as stopping and call stop() to enter "ptrace-stop".
+ */
+ VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_STOPPING);
+ lwpd->br_ptrace_flags |= LX_PTRACE_STOPPING;
+ stop(PR_BRAND, what);
+
+ /*
+ * We are back from "ptrace-stop" with our process lock held.
+ */
+ lwpd->br_ptrace_flags &= ~(LX_PTRACE_STOPPING | LX_PTRACE_STOPPED |
+ LX_PTRACE_CLDPEND);
+ cv_broadcast(&lx_ptrace_busy_cv);
+ mutex_exit(&p->p_lock);
+
+ return (B_TRUE);
+}
+
+int
+lx_ptrace_stop_for_option(int option, boolean_t child, ulong_t msg)
+{
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+
+ mutex_enter(&p->p_lock);
+ if (lwpd->br_ptrace_tracer == NULL) {
+ mutex_exit(&p->p_lock);
+ return (ESRCH);
+ }
+
+ if (!child) {
+ /*
+ * Only the first event posted by a new process is to be held
+ * until the matching parent event is dispatched, and only if
+ * it is a "child" event. This is not a child event, so we
+ * clear the wait flag.
+ */
+ lwpd->br_ptrace_flags &= ~LX_PTRACE_PARENT_WAIT;
+ }
+
+ if (!(lwpd->br_ptrace_options & option)) {
+ if (option == LX_PTRACE_O_TRACEEXEC) {
+ /*
+ * Without PTRACE_O_TRACEEXEC, the Linux kernel will
+ * send SIGTRAP to the process.
+ */
+ sigtoproc(p, t, SIGTRAP);
+ mutex_exit(&p->p_lock);
+ return (0);
+ }
+
+ /*
+ * The flag for this trace event is not enabled, so we will not
+ * stop.
+ */
+ mutex_exit(&p->p_lock);
+ return (ESRCH);
+ }
+
+ if (child) {
+ switch (option) {
+ case LX_PTRACE_O_TRACECLONE:
+ case LX_PTRACE_O_TRACEFORK:
+ case LX_PTRACE_O_TRACEVFORK:
+ /*
+ * Send the child LWP a directed SIGSTOP.
+ */
+ sigtoproc(p, t, SIGSTOP);
+ mutex_exit(&p->p_lock);
+ return (0);
+ default:
+ goto nostop;
+ }
+ }
+
+ lwpd->br_ptrace_eventmsg = msg;
+
+ switch (option) {
+ case LX_PTRACE_O_TRACECLONE:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_CLONE;
+ break;
+ case LX_PTRACE_O_TRACEEXEC:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_EXEC;
+ lwpd->br_ptrace_eventmsg = 0;
+ break;
+ case LX_PTRACE_O_TRACEEXIT:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_EXIT;
+ break;
+ case LX_PTRACE_O_TRACEFORK:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_FORK;
+ break;
+ case LX_PTRACE_O_TRACEVFORK:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_VFORK;
+ break;
+ case LX_PTRACE_O_TRACEVFORKDONE:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_VFORK_DONE;
+ lwpd->br_ptrace_eventmsg = 0;
+ break;
+ default:
+ goto nostop;
+ }
+
+ /*
+ * p_lock for the process containing the tracee will be dropped by
+ * lx_ptrace_stop_common().
+ */
+ return (lx_ptrace_stop_common(p, lwpd, LX_PR_EVENT) ? 0 : ESRCH);
+
+nostop:
+ lwpd->br_ptrace_event = 0;
+ lwpd->br_ptrace_eventmsg = 0;
+ mutex_exit(&p->p_lock);
+ return (ESRCH);
+}
+
+boolean_t
+lx_ptrace_stop(ushort_t what)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+
+ VERIFY(what == LX_PR_SYSENTRY || what == LX_PR_SYSEXIT ||
+ what == LX_PR_SIGNALLED);
+
+ /*
+ * If we do not have an accord, bail out early.
+ */
+ if (lwpd->br_ptrace_tracer == NULL)
+ return (B_FALSE);
+
+ /*
+ * Lock this process and re-check the condition.
+ */
+ mutex_enter(&p->p_lock);
+ if (lwpd->br_ptrace_tracer == NULL) {
+ VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_SYSCALL);
+ mutex_exit(&p->p_lock);
+ return (B_FALSE);
+ }
+
+ if (what == LX_PR_SYSENTRY || what == LX_PR_SYSEXIT) {
+ /*
+ * This is a syscall-entry-stop or syscall-exit-stop point.
+ */
+ if (!(lwpd->br_ptrace_flags & LX_PTRACE_SYSCALL)) {
+ /*
+ * A system call stop has not been requested.
+ */
+ mutex_exit(&p->p_lock);
+ return (B_FALSE);
+ }
+
+ /*
+ * The PTRACE_SYSCALL restart command applies only to the next
+ * system call entry or exit. The tracer must restart us with
+ * PTRACE_SYSCALL while we are in ptrace-stop for us to fire
+ * again at the next system call boundary.
+ */
+ lwpd->br_ptrace_flags &= ~LX_PTRACE_SYSCALL;
+ }
+
+ /*
+ * p_lock for the process containing the tracee will be dropped by
+ * lx_ptrace_stop_common().
+ */
+ return (lx_ptrace_stop_common(p, lwpd, what));
+}
+
+int
+lx_issig_stop(proc_t *p, klwp_t *lwp)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ int lx_sig;
+
+ VERIFY(MUTEX_HELD(&p->p_lock));
+
+ /*
+ * If we do not have an accord, bail out now. Additionally, if there
+ * is no valid signal then we have no reason to stop.
+ */
+ if (lwpd->br_ptrace_tracer == NULL || lwp->lwp_cursig == SIGKILL ||
+ (lwp->lwp_cursig == 0 || lwp->lwp_cursig > NSIG) ||
+ (lx_sig = stol_signo[lwp->lwp_cursig]) < 1) {
+ return (0);
+ }
+
+ /*
+ * We stash the signal on the LWP where our waitid_helper will find it
+ * and enter the ptrace "signal-delivery-stop" condition.
+ */
+ lwpd->br_ptrace_stopsig = lx_sig;
+ (void) lx_ptrace_stop_common(p, lwpd, LX_PR_SIGNALLED);
+ mutex_enter(&p->p_lock);
+
+ /*
+ * When we return, the signal may have been altered or suppressed.
+ */
+ if (lwpd->br_ptrace_stopsig != lx_sig) {
+ int native_sig;
+ lx_sig = lwpd->br_ptrace_stopsig;
+
+ if (lx_sig >= LX_NSIG) {
+ lx_sig = 0;
+ }
+
+ /*
+ * Translate signal from Linux signal number back to
+ * an illumos native signal.
+ */
+ if (lx_sig >= LX_NSIG || lx_sig < 0 || (native_sig =
+ ltos_signo[lx_sig]) < 1) {
+ /*
+ * The signal is not deliverable.
+ */
+ lwp->lwp_cursig = 0;
+ lwp->lwp_extsig = 0;
+ if (lwp->lwp_curinfo) {
+ siginfofree(lwp->lwp_curinfo);
+ lwp->lwp_curinfo = NULL;
+ }
+ } else {
+ /*
+ * Alter the currently dispatching signal.
+ */
+ if (native_sig == SIGKILL) {
+ /*
+ * We mark ourselves the victim and request
+ * a restart of signal processing.
+ */
+ p->p_flag |= SKILLED;
+ p->p_flag &= ~SEXTKILLED;
+ return (-1);
+ }
+ lwp->lwp_cursig = native_sig;
+ lwp->lwp_extsig = 0;
+ if (lwp->lwp_curinfo != NULL) {
+ lwp->lwp_curinfo->sq_info.si_signo = native_sig;
+ }
+ }
+ }
+
+ lwpd->br_ptrace_stopsig = 0;
+ return (0);
+}
+
+static void
+lx_ptrace_exit_tracer(proc_t *p, lx_lwp_data_t *lwpd,
+ lx_ptrace_accord_t *accord)
+{
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ lx_ptrace_accord_enter(accord);
+ /*
+ * Mark this accord for death. This means no new tracees can be
+ * attached to this accord.
+ */
+ VERIFY0(accord->lxpa_flags & LX_ACC_TOMBSTONE);
+ accord->lxpa_flags |= LX_ACC_TOMBSTONE;
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Walk the list of tracees, detaching them and setting them runnable
+ * if they are stopped.
+ */
+ for (;;) {
+ klwp_t *rlwp;
+ proc_t *rproc;
+ lx_lwp_data_t *remote;
+ kmutex_t *rmp;
+
+ mutex_enter(&accord->lxpa_tracees_lock);
+ if (list_is_empty(&accord->lxpa_tracees)) {
+ mutex_exit(&accord->lxpa_tracees_lock);
+ break;
+ }
+
+ /*
+ * Fetch the first tracee LWP in the list and lock the process
+ * which contains it.
+ */
+ remote = list_head(&accord->lxpa_tracees);
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+ /*
+ * The p_lock mutex persists beyond the life of the process
+ * itself. We save the address, here, to prevent the need to
+ * dereference the proc_t after awaking from sleep.
+ */
+ rmp = &rproc->p_lock;
+ mutex_enter(rmp);
+
+ if (TRACEE_BUSY(remote)) {
+ /*
+ * This LWP is currently detaching itself on exit, or
+ * mid-way through stop(). We must wait for this
+ * action to be completed. While we wait on the CV, we
+ * must drop the accord tracee list lock.
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+ cv_wait(&lx_ptrace_busy_cv, rmp);
+
+ /*
+ * While we were waiting, some state may have changed.
+ * Restart the walk to be sure we don't miss anything.
+ */
+ mutex_exit(rmp);
+ continue;
+ }
+
+ /*
+ * We now hold p_lock on the process. Remove the tracee from
+ * the list.
+ */
+ VERIFY(list_link_active(&remote->br_ptrace_linkage));
+ list_remove(&accord->lxpa_tracees, remote);
+
+ /*
+ * Unlink the accord and clear our trace flags.
+ */
+ remote->br_ptrace_attach = LX_PTA_NONE;
+ remote->br_ptrace_tracer = NULL;
+ remote->br_ptrace_flags = 0;
+
+ /*
+ * Let go of the list lock before we restart the LWP. We must
+ * not hold any locks other than the process p_lock when
+ * we call lx_ptrace_restart_lwp() as it will thread_lock
+ * the tracee.
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * Ensure that the LWP is not stopped on our account.
+ */
+ lx_ptrace_restart_lwp(rlwp);
+
+ /*
+ * Unlock the former tracee.
+ */
+ mutex_exit(rmp);
+
+ /*
+ * Drop the hold this tracee had on the accord.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+ }
+
+ mutex_enter(&p->p_lock);
+ lwpd->br_ptrace_accord = NULL;
+ mutex_exit(&p->p_lock);
+
+ /*
+ * Clean up and release our hold on the accord If we completely
+ * detached all tracee LWPs, this will free the accord. Otherwise, it
+ * will be freed when they complete their cleanup.
+ *
+ * We hold "pidlock" while clearing these members for easy exclusion of
+ * waitid(), etc.
+ */
+ mutex_enter(&pidlock);
+ lx_ptrace_accord_enter(accord);
+ accord->lxpa_cvp = NULL;
+ accord->lxpa_tracer = NULL;
+ mutex_exit(&pidlock);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+}
+
+static void
+lx_ptrace_exit_tracee(proc_t *p, lx_lwp_data_t *lwpd,
+ lx_ptrace_accord_t *accord)
+{
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ /*
+ * We are the tracee LWP. Lock the accord tracee list and then our
+ * containing process.
+ */
+ mutex_enter(&accord->lxpa_tracees_lock);
+ mutex_enter(&p->p_lock);
+
+ /*
+ * Remove our reference to the accord. We will release our hold
+ * later.
+ */
+ VERIFY(lwpd->br_ptrace_tracer == accord);
+ lwpd->br_ptrace_attach = LX_PTA_NONE;
+ lwpd->br_ptrace_tracer = NULL;
+
+ /*
+ * Remove this LWP from the accord tracee list:
+ */
+ VERIFY(list_link_active(&lwpd->br_ptrace_linkage));
+ list_remove(&accord->lxpa_tracees, lwpd);
+
+ /*
+ * Wake up any tracers waiting for us to detach from the accord.
+ */
+ cv_broadcast(&lx_ptrace_busy_cv);
+ mutex_exit(&p->p_lock);
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * Grab "pidlock" and wake the tracer if it is blocked in waitid().
+ */
+ mutex_enter(&pidlock);
+ if (accord->lxpa_cvp != NULL) {
+ cv_broadcast(accord->lxpa_cvp);
+ }
+ mutex_exit(&pidlock);
+
+ /*
+ * Release our hold on the accord.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+}
+
+/*
+ * This routine is called from lx_exitlwp() when an LWP is ready to exit. If
+ * this LWP is being traced, it will be detached from the tracer's accord. The
+ * routine will also detach any LWPs being traced by this LWP.
+ */
+void
+lx_ptrace_exit(proc_t *p, klwp_t *lwp)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ lx_ptrace_accord_t *accord;
+
+ VERIFY(MUTEX_HELD(&p->p_lock));
+
+ /*
+ * Mark our LWP as exiting from a ptrace perspective. This will
+ * prevent a new accord from being allocated if one does not exist
+ * already, and will make us invisible to PTRACE_ATTACH/PTRACE_TRACEME.
+ */
+ VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_EXITING);
+ lwpd->br_ptrace_flags |= LX_PTRACE_EXITING;
+
+ if ((accord = lwpd->br_ptrace_tracer) != NULL) {
+ /*
+ * We are traced by another LWP and must detach ourselves.
+ */
+ mutex_exit(&p->p_lock);
+ lx_ptrace_exit_tracee(p, lwpd, accord);
+ mutex_enter(&p->p_lock);
+ }
+
+ if ((accord = lwpd->br_ptrace_accord) != NULL) {
+ /*
+ * We have been tracing other LWPs, and must detach from
+ * them and clean up our accord.
+ */
+ mutex_exit(&p->p_lock);
+ lx_ptrace_exit_tracer(p, lwpd, accord);
+ mutex_enter(&p->p_lock);
+ }
+}
+
+/*
+ * Called when a SIGCLD signal is dispatched so that we may enqueue another.
+ * Return 0 if we enqueued a signal, or -1 if not.
+ */
+int
+lx_sigcld_repost(proc_t *pp, sigqueue_t *sqp)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ lx_ptrace_accord_t *accord;
+ lx_lwp_data_t *remote;
+ klwp_t *rlwp;
+ proc_t *rproc;
+ boolean_t found = B_FALSE;
+
+ VERIFY(MUTEX_HELD(&pidlock));
+ VERIFY(MUTEX_NOT_HELD(&pp->p_lock));
+ VERIFY(lwptoproc(lwp) == pp);
+
+ mutex_enter(&pp->p_lock);
+ if ((accord = lwpd->br_ptrace_accord) == NULL) {
+ /*
+ * This LWP is not a tracer LWP, so there will be no
+ * SIGCLD.
+ */
+ mutex_exit(&pp->p_lock);
+ return (-1);
+ }
+ mutex_exit(&pp->p_lock);
+
+ mutex_enter(&accord->lxpa_tracees_lock);
+ for (remote = list_head(&accord->lxpa_tracees); remote != NULL;
+ remote = list_next(&accord->lxpa_tracees, remote)) {
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+
+ /*
+ * Check if this LWP is in "ptrace-stop". If in the correct
+ * stop condition, lock the process containing the tracee LWP.
+ */
+ if (lx_ptrace_lock_if_stopped(accord, remote) != 0) {
+ continue;
+ }
+
+ if (remote->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) {
+ /*
+ * This event depends on waitid() clearing out the
+ * event of another LWP. Skip it for now.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ if (!(remote->br_ptrace_flags & LX_PTRACE_CLDPEND)) {
+ /*
+ * No SIGCLD is required for this LWP.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ if (remote->br_ptrace_whystop == 0 ||
+ remote->br_ptrace_whatstop == 0) {
+ /*
+ * No (new) stop reason to post for this LWP.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ /*
+ * We found a process of interest. Leave the process
+ * containing the tracee LWP locked and break out of the loop.
+ */
+ found = B_TRUE;
+ break;
+ }
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ if (!found) {
+ return (-1);
+ }
+
+ /*
+ * Generate siginfo for this tracee LWP.
+ */
+ lx_winfo(remote, &sqp->sq_info, B_FALSE, NULL, NULL);
+ remote->br_ptrace_flags &= ~LX_PTRACE_CLDPEND;
+ mutex_exit(&rproc->p_lock);
+
+ mutex_enter(&pp->p_lock);
+ if (sigismember(&pp->p_sig, SIGCLD)) {
+ mutex_exit(&pp->p_lock);
+
+ mutex_enter(&rproc->p_lock);
+ remote->br_ptrace_flags |= LX_PTRACE_CLDPEND;
+ mutex_exit(&rproc->p_lock);
+
+ return (-1);
+ }
+ sigaddqa(pp, curthread, sqp);
+ mutex_exit(&pp->p_lock);
+
+ return (0);
+}
+
+/*
+ * Consume the next available ptrace(2) event queued against the accord for
+ * this LWP. The event will be emitted as if through waitid(), and converted
+ * by lx_waitpid() and friends before the return to usermode.
+ */
+int
+lx_waitid_helper(idtype_t idtype, id_t id, k_siginfo_t *ip, int options,
+ boolean_t *brand_wants_wait, int *rval)
+{
+ lx_ptrace_accord_t *accord;
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *local = lwptolxlwp(lwp);
+ lx_lwp_data_t *remote;
+ boolean_t found = B_FALSE;
+ klwp_t *rlwp = NULL;
+ proc_t *rproc = NULL;
+ pid_t event_pid = 0, event_ppid = 0;
+ boolean_t waitflag = !(options & WNOWAIT);
+
+ VERIFY(MUTEX_HELD(&pidlock));
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ /*
+ * By default, we do not expect waitid() to block on our account.
+ */
+ *brand_wants_wait = B_FALSE;
+
+ if (!local->br_waitid_emulate) {
+ /*
+ * This waitid() call is not expecting emulated results.
+ */
+ return (-1);
+ }
+
+ switch (idtype) {
+ case P_ALL:
+ case P_PID:
+ case P_PGID:
+ break;
+ default:
+ /*
+ * This idtype has no power here.
+ */
+ return (-1);
+ }
+
+ if (lx_ptrace_accord_get(&accord, B_FALSE) != 0) {
+ /*
+ * This LWP does not have an accord; it cannot be tracing.
+ */
+ return (-1);
+ }
+
+ /*
+ * We do not need an additional hold on the accord as it belongs to
+ * the running, tracer, LWP.
+ */
+ lx_ptrace_accord_exit(accord);
+
+ mutex_enter(&accord->lxpa_tracees_lock);
+ if (list_is_empty(&accord->lxpa_tracees)) {
+ /*
+ * Though it has an accord, there are currently no tracees in
+ * the list for this LWP.
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+ return (-1);
+ }
+
+ /*
+ * Walk the list of tracees and determine if any of them have events to
+ * report.
+ */
+ for (remote = list_head(&accord->lxpa_tracees); remote != NULL;
+ remote = list_next(&accord->lxpa_tracees, remote)) {
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+
+ /*
+ * If the __WALL option was passed, we unconditionally consider
+ * every possible child.
+ */
+ if (!(local->br_waitid_flags & LX_WALL)) {
+ /*
+ * Otherwise, we check to see if this LWP matches an
+ * id we are waiting for.
+ */
+ switch (idtype) {
+ case P_ALL:
+ break;
+ case P_PID:
+ if (remote->br_pid != id)
+ continue;
+ break;
+ case P_PGID:
+ if (rproc->p_pgrp != id)
+ continue;
+ break;
+ default:
+ cmn_err(CE_PANIC, "unexpected idtype: %d",
+ idtype);
+ }
+ }
+
+ /*
+ * Check if this LWP is in "ptrace-stop". If in the correct
+ * stop condition, lock the process containing the tracee LWP.
+ */
+ if (lx_ptrace_lock_if_stopped(accord, remote) != 0) {
+ continue;
+ }
+
+ if (remote->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) {
+ /*
+ * This event depends on waitid() clearing out the
+ * event of another LWP. Skip it for now.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ if (remote->br_ptrace_whystop == 0 ||
+ remote->br_ptrace_whatstop == 0) {
+ /*
+ * No (new) stop reason to post for this LWP.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ /*
+ * We found a process of interest. Leave the process
+ * containing the tracee LWP locked and break out of the loop.
+ */
+ found = B_TRUE;
+ break;
+ }
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ if (!found) {
+ /*
+ * There were no events of interest, but we have tracees.
+ * Signal to waitid() that it should block if the provided
+ * flags allow for it.
+ */
+ *brand_wants_wait = B_TRUE;
+ return (-1);
+ }
+
+ /*
+ * Populate the signal information.
+ */
+ lx_winfo(remote, ip, waitflag, &event_ppid, &event_pid);
+
+ /*
+ * Unlock the tracee.
+ */
+ mutex_exit(&rproc->p_lock);
+
+ if (event_pid != 0 && event_ppid != 0) {
+ /*
+ * We need to do another pass around the tracee list and
+ * unblock any events that have a "happens after" relationship
+ * with this event.
+ */
+ mutex_enter(&accord->lxpa_tracees_lock);
+ for (remote = list_head(&accord->lxpa_tracees); remote != NULL;
+ remote = list_next(&accord->lxpa_tracees, remote)) {
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+
+ mutex_enter(&rproc->p_lock);
+
+ if (remote->br_pid != event_pid ||
+ remote->br_ppid != event_ppid) {
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ remote->br_ptrace_flags &= ~LX_PTRACE_PARENT_WAIT;
+
+ mutex_exit(&rproc->p_lock);
+ }
+ mutex_exit(&accord->lxpa_tracees_lock);
+ }
+
+ /*
+ * If we are consuming this wait state, we remove the SIGCLD from
+ * the queue and post another.
+ */
+ if (waitflag) {
+ mutex_exit(&pidlock);
+ sigcld_delete(ip);
+ sigcld_repost();
+ mutex_enter(&pidlock);
+ }
+
+ *rval = 0;
+ return (0);
+}
+
+/*
+ * Some PTRACE_* requests are handled in-kernel by this function. It is called
+ * through brandsys() via the B_PTRACE_KERNEL subcommand.
+ */
+int
+lx_ptrace_kernel(int ptrace_op, pid_t lxpid, uintptr_t addr, uintptr_t data)
+{
+ lx_lwp_data_t *local = ttolxlwp(curthread);
+ lx_ptrace_accord_t *accord;
+ lx_lwp_data_t *remote;
+ klwp_t *rlwp;
+ proc_t *rproc;
+ int error;
+ boolean_t found = B_FALSE;
+ boolean_t release_hold = B_FALSE;
+
+ _NOTE(ARGUNUSED(addr));
+
+ /*
+ * These actions do not require the target LWP to be traced or stopped.
+ */
+ switch (ptrace_op) {
+ case LX_PTRACE_TRACEME:
+ return (lx_ptrace_traceme());
+
+ case LX_PTRACE_ATTACH:
+ return (lx_ptrace_attach(lxpid));
+ }
+
+ /*
+ * Ensure that we have an accord and obtain a lock on it. This routine
+ * should not fail because the LWP cannot make ptrace(2) system calls
+ * after it has begun exiting.
+ */
+ VERIFY0(local->br_ptrace_flags & LX_PTRACE_EXITING);
+ VERIFY(lx_ptrace_accord_get(&accord, B_TRUE) == 0);
+
+ /*
+ * The accord belongs to this (the tracer) LWP, and we have a hold on
+ * it. We drop the lock so that we can take other locks.
+ */
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Does the tracee list contain the pid in question?
+ */
+ mutex_enter(&accord->lxpa_tracees_lock);
+ for (remote = list_head(&accord->lxpa_tracees); remote != NULL;
+ remote = list_next(&accord->lxpa_tracees, remote)) {
+ if (remote->br_pid == lxpid) {
+ found = B_TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ /*
+ * The requested pid does not appear in the tracee list.
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+ return (ESRCH);
+ }
+
+ /*
+ * Attempt to lock the target LWP.
+ */
+ if ((error = lx_ptrace_lock_if_stopped(accord, remote)) != 0) {
+ /*
+ * The LWP was not in "ptrace-stop".
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+ return (error);
+ }
+
+ /*
+ * The target LWP is in "ptrace-stop". We have the containing process
+ * locked.
+ */
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+
+ /*
+ * Process the ptrace(2) request:
+ */
+ switch (ptrace_op) {
+ case LX_PTRACE_DETACH:
+ error = lx_ptrace_detach(accord, remote, (int)data,
+ &release_hold);
+ break;
+
+ case LX_PTRACE_CONT:
+ error = lx_ptrace_cont(remote, LX_PTC_NONE, (int)data);
+ break;
+
+ case LX_PTRACE_SYSCALL:
+ error = lx_ptrace_cont(remote, LX_PTC_SYSCALL, (int)data);
+ break;
+
+ case LX_PTRACE_SINGLESTEP:
+ error = lx_ptrace_cont(remote, LX_PTC_SINGLESTEP, (int)data);
+ break;
+
+ case LX_PTRACE_SETOPTIONS:
+ error = lx_ptrace_setoptions(remote, data);
+ break;
+
+ case LX_PTRACE_GETEVENTMSG:
+ error = lx_ptrace_geteventmsg(remote, (void *)data);
+ break;
+
+ default:
+ error = EINVAL;
+ }
+
+ /*
+ * Drop the lock on both the tracee process and the tracee list.
+ */
+ mutex_exit(&rproc->p_lock);
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ if (release_hold) {
+ /*
+ * Release a hold from the accord.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+ }
+
+ return (error);
+}
+
+void
+lx_ptrace_init(void)
+{
+ cv_init(&lx_ptrace_busy_cv, NULL, CV_DEFAULT, NULL);
+
+ lx_ptrace_accord_cache = kmem_cache_create("lx_ptrace_accord",
+ sizeof (lx_ptrace_accord_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+}
+
+void
+lx_ptrace_fini(void)
+{
+ cv_destroy(&lx_ptrace_busy_cv);
+
+ kmem_cache_destroy(lx_ptrace_accord_cache);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_proc.h b/usr/src/uts/common/brand/lx/procfs/lx_proc.h
index 184a5211db..a5c2391c95 100644
--- a/usr/src/uts/common/brand/lx/procfs/lx_proc.h
+++ b/usr/src/uts/common/brand/lx/procfs/lx_proc.h
@@ -138,6 +138,7 @@ typedef enum lxpr_nodetype {
LXPR_NET_IGMP, /* /proc/net/igmp */
LXPR_NET_IP_MR_CACHE, /* /proc/net/ip_mr_cache */
LXPR_NET_IP_MR_VIF, /* /proc/net/ip_mr_vif */
+ LXPR_NET_IPV6_ROUTE, /* /proc/net/ipv6_route */
LXPR_NET_MCFILTER, /* /proc/net/mcfilter */
LXPR_NET_NETSTAT, /* /proc/net/netstat */
LXPR_NET_RAW, /* /proc/net/raw */
@@ -250,4 +251,11 @@ void lxpr_unlock(proc_t *);
}
#endif
+#ifndef islower
+#define islower(x) (((unsigned)(x) >= 'a') && ((unsigned)(x) <= 'z'))
+#endif
+#ifndef toupper
+#define toupper(x) (islower(x) ? (x) - 'a' + 'A' : (x))
+#endif
+
#endif /* _LXPROC_H */
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c
index a15d852793..3d96a1ceb2 100644
--- a/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c
@@ -482,8 +482,8 @@ lxpr_getnode(vnode_t *dp, lxpr_nodetype_t type, proc_t *p, int fd)
case LXPR_PID_FD_FD:
ASSERT(p != NULL);
/* lxpr_realvp is set after we return */
- vp->v_type = VLNK;
lxpnp->lxpr_mode = 0700; /* read-write-exe owner only */
+ vp->v_type = VLNK;
break;
case LXPR_PID_FDDIR:
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
index df2a4d7fb5..758a9192d7 100644
--- a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
@@ -79,6 +79,8 @@
#include <inet/tcp.h>
#include <inet/udp_impl.h>
#include <inet/ipclassifier.h>
+#include <sys/socketvar.h>
+#include <fs/sockfs/socktpi.h>
/* Dependent on procfs */
extern kthread_t *prchoose(proc_t *);
@@ -108,6 +110,7 @@ static int lxpr_lookup(vnode_t *, char *, vnode_t **,
static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
caller_context_t *, int);
static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
+static int lxpr_readlink_pid_fd(lxpr_node_t *lxpnp, char *bp, size_t len);
static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
static int lxpr_sync(void);
@@ -163,6 +166,7 @@ static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
@@ -320,6 +324,7 @@ static lxpr_dirent_t netdir[] = {
{ LXPR_NET_IGMP, "igmp" },
{ LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
{ LXPR_NET_IP_MR_VIF, "ip_mr_vif" },
+ { LXPR_NET_IPV6_ROUTE, "ipv6_route" },
{ LXPR_NET_MCFILTER, "mcfilter" },
{ LXPR_NET_NETSTAT, "netstat" },
{ LXPR_NET_RAW, "raw" },
@@ -502,6 +507,7 @@ static void (*lxpr_read_function[LXPR_NFILES])() = {
lxpr_read_net_igmp, /* /proc/net/igmp */
lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */
lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */
+ lxpr_read_net_ipv6_route, /* /proc/net/ipv6_route */
lxpr_read_net_mcfilter, /* /proc/net/mcfilter */
lxpr_read_net_netstat, /* /proc/net/netstat */
lxpr_read_net_raw, /* /proc/net/raw */
@@ -579,6 +585,7 @@ static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
lxpr_lookup_not_a_dir, /* /proc/net/igmp */
lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */
lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */
+ lxpr_lookup_not_a_dir, /* /proc/net/ipv6_route */
lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */
lxpr_lookup_not_a_dir, /* /proc/net/netstat */
lxpr_lookup_not_a_dir, /* /proc/net/raw */
@@ -656,6 +663,7 @@ static int (*lxpr_readdir_function[LXPR_NFILES])() = {
lxpr_readdir_not_a_dir, /* /proc/net/igmp */
lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */
lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */
+ lxpr_readdir_not_a_dir, /* /proc/net/ipv6_route */
lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */
lxpr_readdir_not_a_dir, /* /proc/net/netstat */
lxpr_readdir_not_a_dir, /* /proc/net/raw */
@@ -976,7 +984,7 @@ lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
int maj = 0;
int min = 0;
- u_longlong_t inode = 0;
+ ino_t inode = 0;
*buf = '\0';
if (pbuf->vp != NULL) {
@@ -993,12 +1001,12 @@ lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
if (p->p_model == DATAMODEL_LP64) {
lxpr_uiobuf_printf(uiobuf,
- "%016llx-%16llx %s %016llx %02d:%03d %lld%s%s\n",
+ "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n",
pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
maj, min, inode, *buf != '\0' ? " " : "", buf);
} else {
lxpr_uiobuf_printf(uiobuf,
- "%08x-%08x %s %08x %02d:%03d %lld%s%s\n",
+ "%08x-%08x %s %08x %02x:%02x %llu%s%s\n",
(uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr,
pbuf->prot, (uint32_t)pbuf->offset, maj, min,
inode, *buf != '\0' ? " " : "", buf);
@@ -1768,9 +1776,9 @@ lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
}
static void
-lxpr_inet6_out(in6_addr_t addr, char buf[33])
+lxpr_inet6_out(const in6_addr_t *addr, char buf[33])
{
- uint8_t *ip = addr.s6_addr;
+ const uint8_t *ip = addr->s6_addr;
char digits[] = "0123456789abcdef";
int i;
for (i = 0; i < 16; i++) {
@@ -1811,7 +1819,7 @@ lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
ipif_get_name(ipif, ifname, sizeof (ifname));
lx_ifname_convert(ifname, LX_IFNAME_FROMNATIVE);
- lxpr_inet6_out(ipif->ipif_v6lcl_addr, ip6out);
+ lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out);
/* Scope output is shifted on Linux */
scope = scope << 4;
@@ -1841,6 +1849,66 @@ lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
+static void
+lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf)
+{
+ uint32_t flags;
+ char name[IFNAMSIZ];
+ char ipv6addr[33];
+
+ lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr);
+ lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr,
+ ip_mask_to_plen_v6(&ire->ire_mask_v6));
+
+ /* punt on this for now */
+ lxpr_uiobuf_printf(uiobuf, "%s %02x ",
+ "00000000000000000000000000000000", 0);
+
+ lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr);
+ lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr);
+
+ flags = ire->ire_flags &
+ (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
+ /* Linux's RTF_LOCAL equivalent */
+ if (ire->ire_metrics.iulp_local)
+ flags |= 0x80000000;
+
+ if (ire->ire_ill != NULL) {
+ ill_get_name(ire->ire_ill, name, sizeof (name));
+ lx_ifname_convert(name, LX_IFNAME_FROMNATIVE);
+ } else {
+ name[0] = '\0';
+ }
+
+ lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n",
+ 0, /* metric */
+ ire->ire_refcnt,
+ 0,
+ flags,
+ name);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ netstack_t *ns;
+ ip_stack_t *ipst;
+
+ ns = netstack_get_current();
+ if (ns == NULL)
+ return;
+ ipst = ns->netstack_ip;
+
+ /*
+ * LX branded zones are expected to have exclusive IP stack, hence
+ * using ALL_ZONES as the zoneid filter.
+ */
+ ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst);
+
+ netstack_rele(ns);
+}
+
/* ARGSUSED */
static void
lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
@@ -1859,10 +1927,97 @@ lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
+#define LXPR_SKIP_ROUTE(type) \
+ (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \
+ IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0)
+
+static void
+lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf)
+{
+ uint32_t flags;
+ char name[IFNAMSIZ];
+ ill_t *ill;
+ ire_t *nire;
+ ipif_t *ipif;
+ ipaddr_t gateway;
+
+ if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0)
+ return;
+
+ /* These route flags have direct Linux equivalents */
+ flags = ire->ire_flags &
+ (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
+
+ /*
+ * Search for a suitable IRE for naming purposes.
+ * On Linux, the default route is typically associated with the
+ * interface used to access gateway. The default IRE on Illumos
+ * typically lacks an ill reference but its parent might have one.
+ */
+ nire = ire;
+ do {
+ ill = nire->ire_ill;
+ nire = nire->ire_dep_parent;
+ } while (ill == NULL && nire != NULL);
+ if (ill != NULL) {
+ ill_get_name(ill, name, sizeof (name));
+ lx_ifname_convert(name, LX_IFNAME_FROMNATIVE);
+ } else {
+ name[0] = '*';
+ name[1] = '\0';
+ }
+
+ /*
+ * Linux suppresses the gateway address for directly connected
+ * interface networks. To emulate this behavior, we walk all addresses
+ * of a given route interface. If one matches the gateway, it is
+ * displayed as NULL.
+ */
+ gateway = ire->ire_gateway_addr;
+ if ((ill = ire->ire_ill) != NULL) {
+ for (ipif = ill->ill_ipif; ipif != NULL;
+ ipif = ipif->ipif_next) {
+ if (ipif->ipif_lcl_addr == gateway) {
+ gateway = 0;
+ break;
+ }
+ }
+ }
+
+ lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
+ "%d\t%08X\t%d\t%u\t%u\n",
+ name,
+ ire->ire_addr,
+ gateway,
+ flags, 0, 0,
+ 0, /* priority */
+ ire->ire_mask,
+ 0, 0, /* mss, window */
+ ire->ire_metrics.iulp_rtt);
+}
+
/* ARGSUSED */
static void
lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
+ netstack_t *ns;
+ ip_stack_t *ipst;
+
+ lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t"
+ "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
+
+ ns = netstack_get_current();
+ if (ns == NULL)
+ return;
+ ipst = ns->netstack_ip;
+
+ /*
+ * LX branded zones are expected to have exclusive IP stack, hence
+ * using ALL_ZONES as the zoneid filter.
+ */
+ ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst);
+
+ netstack_rele(ns);
}
/* ARGSUSED */
@@ -1883,10 +2038,146 @@ lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
}
+typedef struct lxpr_snmp_table {
+ const char *lst_proto;
+ const char *lst_fields[];
+} lxpr_snmp_table_t;
+
+static lxpr_snmp_table_t lxpr_snmp_ip = { "ip",
+ {
+ "forwarding", "defaultTTL", "inReceives", "inHdrErrors",
+ "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards",
+ "inDelivers", "outRequests", "outDiscards", "outNoRoutes",
+ "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs",
+ "fragFails", "fragCreates",
+ NULL
+ }
+};
+static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp",
+ {
+ "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds",
+ "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps",
+ "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps",
+ "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds",
+ "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos",
+ "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks",
+ "outAddrMaskReps",
+ NULL
+ }
+};
+static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp",
+ {
+ "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens",
+ "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs",
+ "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors",
+ NULL
+ }
+};
+static lxpr_snmp_table_t lxpr_snmp_udp = { "udp",
+ {
+ "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors",
+ "sndbufErrors", "inCsumErrors",
+ NULL
+ }
+};
+
+static lxpr_snmp_table_t *lxpr_net_snmptab[] = {
+ &lxpr_snmp_ip,
+ &lxpr_snmp_icmp,
+ &lxpr_snmp_tcp,
+ &lxpr_snmp_udp,
+ NULL
+};
+
+static void
+lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table,
+ kstat_t *kn)
+{
+ kstat_named_t *klist;
+ char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN];
+ int i, j, num;
+ size_t size;
+
+ klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
+ if (klist == NULL)
+ return;
+
+ /* Print the header line, fields capitalized */
+ (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN);
+ upname[0] = toupper(upname[0]);
+ lxpr_uiobuf_printf(uiobuf, "%s:", upname);
+ for (i = 0; table->lst_fields[i] != NULL; i++) {
+ (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN);
+ upfield[0] = toupper(upfield[0]);
+ lxpr_uiobuf_printf(uiobuf, " %s", upfield);
+ }
+ lxpr_uiobuf_printf(uiobuf, "\n%s:", upname);
+
+ /* Then loop back through to print the value line. */
+ for (i = 0; table->lst_fields[i] != NULL; i++) {
+ kstat_named_t *kpoint = NULL;
+ for (j = 0; j < num; j++) {
+ if (strncmp(klist[j].name, table->lst_fields[i],
+ KSTAT_STRLEN) == 0) {
+ kpoint = &klist[j];
+ break;
+ }
+ }
+ if (kpoint == NULL) {
+ /* Output 0 for unknown fields */
+ lxpr_uiobuf_printf(uiobuf, " 0");
+ } else {
+ switch (kpoint->data_type) {
+ case KSTAT_DATA_INT32:
+ lxpr_uiobuf_printf(uiobuf, " %d",
+ kpoint->value.i32);
+ break;
+ case KSTAT_DATA_UINT32:
+ lxpr_uiobuf_printf(uiobuf, " %u",
+ kpoint->value.ui32);
+ break;
+ case KSTAT_DATA_INT64:
+ lxpr_uiobuf_printf(uiobuf, " %ld",
+ kpoint->value.l);
+ break;
+ case KSTAT_DATA_UINT64:
+ lxpr_uiobuf_printf(uiobuf, " %lu",
+ kpoint->value.ul);
+ break;
+ }
+ }
+ }
+ lxpr_uiobuf_printf(uiobuf, "\n");
+ kmem_free(klist, size);
+}
+
/* ARGSUSED */
static void
lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
+ kstat_t *ksr;
+ kstat_t ks0;
+ lxpr_snmp_table_t **table = lxpr_net_snmptab;
+ int i, t, nidx;
+ size_t sidx;
+
+ ks0.ks_kid = 0;
+ ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
+ if (ksr == NULL)
+ return;
+
+ for (t = 0; table[t] != NULL; t++) {
+ for (i = 0; i < nidx; i++) {
+ if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0)
+ continue;
+ if (strncmp(ksr[i].ks_name, table[t]->lst_proto,
+ KSTAT_STRLEN) == 0) {
+ lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]);
+ break;
+ }
+ }
+ }
+ kmem_free(ksr, sidx);
}
/* ARGSUSED */
@@ -1963,13 +2254,13 @@ lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
* - tx_queue
* - rx_queue
* - uid
+ * - inode
*
* Omitted/invalid fields
* - tr
* - tm->when
* - retrnsmt
* - timeout
- * - inode
*/
ns = netstack_get_current();
@@ -1983,6 +2274,9 @@ lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
while ((connp =
ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) {
tcp_t *tcp;
+ vattr_t attr;
+ sonode_t *so = (sonode_t *)connp->conn_upper_handle;
+ vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
if (connp->conn_ipversion != ipver)
continue;
tcp = connp->conn_tcp;
@@ -2010,9 +2304,15 @@ lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
connp->conn_faddr_v6.s6_addr32[3],
ntohs(connp->conn_fport));
}
+
+ /* fetch the simulated inode for the socket */
+ if (vp == NULL ||
+ VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
+ attr.va_nodeid = 0;
+
lxpr_uiobuf_printf(uiobuf,
"%02X %08X:%08X %02X:%08X %08X "
- "%5u %8d %u %d %p %u %u %u %u %d\n",
+ "%5u %8d %lu %d %p %u %u %u %u %d\n",
lxpr_convert_tcp_state(tcp->tcp_state),
tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */
0, 0, /* tr, when */
@@ -2020,7 +2320,7 @@ lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
connp->conn_cred->cr_uid,
0, /* timeout */
/* inode + more */
- 0, 0, NULL, 0, 0, 0, 0, 0);
+ (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0);
}
}
netstack_rele(ns);
@@ -2093,6 +2393,9 @@ lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) {
udp_t *udp;
int state = 0;
+ vattr_t attr;
+ sonode_t *so = (sonode_t *)connp->conn_upper_handle;
+ vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
if (connp->conn_ipversion != ipver)
continue;
udp = connp->conn_udp;
@@ -2120,6 +2423,7 @@ lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
connp->conn_faddr_v6.s6_addr32[3],
ntohs(connp->conn_fport));
}
+
switch (udp->udp_state) {
case TS_UNBND:
case TS_IDLE:
@@ -2129,9 +2433,15 @@ lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
state = 1;
break;
}
+
+ /* fetch the simulated inode for the socket */
+ if (vp == NULL ||
+ VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
+ attr.va_nodeid = 0;
+
lxpr_uiobuf_printf(uiobuf,
"%02X %08X:%08X %02X:%08X %08X "
- "%5u %8d %u %d %p %d\n",
+ "%5u %8d %lu %d %p %d\n",
state,
0, 0, /* rx/tx queue */
0, 0, /* tr, when */
@@ -2139,7 +2449,7 @@ lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
connp->conn_cred->cr_uid,
0, /* timeout */
/* inode, ref, pointer, drops */
- 0, 0, NULL, 0);
+ (ino_t)attr.va_nodeid, 0, NULL, 0);
}
}
netstack_rele(ns);
@@ -2163,6 +2473,95 @@ lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
static void
lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
+ sonode_t *so;
+ zoneid_t zoneid = getzoneid();
+
+ lxpr_uiobuf_printf(uiobuf, "Num RefCount Protocol Flags Type "
+ "St Inode Path\n");
+
+ mutex_enter(&socklist.sl_lock);
+ for (so = socklist.sl_list; so != NULL;
+ so = _SOTOTPI(so)->sti_next_so) {
+ vnode_t *vp = so->so_vnode;
+ vattr_t attr;
+ sotpi_info_t *sti;
+ const char *name = NULL;
+ int status = 0;
+ int type = 0;
+ int flags = 0;
+
+ /* Only process active sonodes in this zone */
+ if (so->so_count == 0 || so->so_zoneid != zoneid)
+ continue;
+
+ /*
+ * Grab the inode, if possible.
+ * This must be done before entering so_lock.
+ */
+ if (vp == NULL ||
+ VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
+ attr.va_nodeid = 0;
+
+ mutex_enter(&so->so_lock);
+ sti = _SOTOTPI(so);
+
+ if (sti->sti_laddr_sa != NULL)
+ name = sti->sti_laddr_sa->sa_data;
+ else if (sti->sti_faddr_sa != NULL)
+ name = sti->sti_faddr_sa->sa_data;
+
+ /*
+ * Derived from enum values in Linux kernel source:
+ * include/uapi/linux/net.h
+ */
+ if ((so->so_state & SS_ISDISCONNECTING) != 0) {
+ status = 4;
+ } else if ((so->so_state & SS_ISCONNECTING) != 0) {
+ status = 2;
+ } else if ((so->so_state & SS_ISCONNECTED) != 0) {
+ status = 3;
+ } else {
+ status = 1;
+ /* Add ACC flag for stream-type server sockets */
+ if (so->so_type != SOCK_DGRAM &&
+ sti->sti_laddr_sa != NULL)
+ flags |= 0x10000;
+ }
+
+ /* Convert to Linux type */
+ switch (so->so_type) {
+ case SOCK_DGRAM:
+ type = 2;
+ break;
+ case SOCK_SEQPACKET:
+ type = 5;
+ break;
+ default:
+ type = 1;
+ }
+
+ lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu",
+ so,
+ so->so_count,
+ 0, /* proto, always 0 */
+ flags,
+ type,
+ status,
+ (ino_t)attr.va_nodeid);
+
+ /*
+ * Due to shortcomings in the abstract socket emulation, they
+ * cannot be properly represented here (as @<path>).
+ *
+ * This will be the case until they are better implemented.
+ */
+ if (name != NULL)
+ lxpr_uiobuf_printf(uiobuf, " %s\n", name);
+ else
+ lxpr_uiobuf_printf(uiobuf, "\n");
+ mutex_exit(&so->so_lock);
+ }
+ mutex_exit(&socklist.sl_lock);
}
/*
@@ -3170,6 +3569,13 @@ lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
vap->va_uid = crgetruid(curproc->p_cred);
vap->va_gid = crgetrgid(curproc->p_cred);
break;
+ case LXPR_PID_FD_FD:
+ /*
+ * Restore VLNK type for lstat-type activity.
+ * See lxpr_readlink for more details.
+ */
+ if ((flags & FOLLOW) == 0)
+ vap->va_type = VLNK;
default:
break;
}
@@ -3451,17 +3857,15 @@ lxpr_lookup_fddir(vnode_t *dp, char *comp)
*/
lxpnp->lxpr_realvp = vp;
VN_HOLD(lxpnp->lxpr_realvp);
- if (lxpnp->lxpr_realvp->v_type == VFIFO) {
- /*
- * lxpr_getnode initially sets the type to be VLNK for
- * the LXPR_PID_FD_FD option, but that breaks fifo
- * file descriptors (which are unlinked named pipes).
- * We set this as a regular file so that open.2 comes
- * into lxpr_open so we can do more work.
- */
- dp = LXPTOV(lxpnp);
- dp->v_type = VREG;
- }
+ /*
+ * For certain entries (sockets, pipes, etc), Linux expects a
+ * bogus-named symlink. If that's the case, report the type as
+ * VNON to bypass link-following elsewhere in the vfs system.
+ *
+ * See lxpr_readlink for more details.
+ */
+ if (lxpr_readlink_pid_fd(lxpnp, NULL, 0) == 0)
+ LXPTOV(lxpnp)->v_type = VNON;
}
mutex_enter(&p->p_lock);
@@ -4053,16 +4457,41 @@ lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
pid_t pid;
int error = 0;
- /* must be a symbolic link file */
- if (vp->v_type != VLNK)
+ /*
+ * Linux does something very "clever" for /proc/<pid>/fd/<num> entries.
+ * Open FDs are represented as symlinks, the link contents
+ * corresponding to the open resource. For plain files or devices,
+ * this isn't absurd since one can dereference the symlink to query
+ * the underlying resource. For sockets or pipes, it becomes ugly in a
+ * hurry. To maintain this human-readable output, those FD symlinks
+ * point to bogus targets such as "socket:[<inodenum>]". This requires
+ * circumventing vfs since the stat/lstat behavior on those FD entries
+ * will be unusual. (A stat must retrieve information about the open
+ * socket or pipe. It cannot fail because the link contents point to
+ * an absent file.)
+ *
+ * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD
+ * entries. This bypasses code paths which would normally
+ * short-circuit on symlinks and allows us to emulate the vfs behavior
+ * expected by /proc consumers.
+ */
+ if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD)
return (EINVAL);
/* Try to produce a symlink name for anything that has a realvp */
if (rvp != NULL) {
if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
return (error);
- if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0)
- return (error);
+ if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) {
+ /*
+ * Special handling possible for /proc/<pid>/fd/<num>
+ * Generate <type>:[<inode>] links, if allowed.
+ */
+ if (lxpnp->lxpr_type != LXPR_PID_FD_FD ||
+ lxpr_readlink_pid_fd(lxpnp, bp, buflen) != 0) {
+ return (error);
+ }
+ }
} else {
switch (lxpnp->lxpr_type) {
case LXPR_SELF:
@@ -4104,6 +4533,37 @@ lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
}
/*
+ * Attempt to create Linux-proc-style fake symlinks contents for supported
+ * /proc/<pid>/fd/<#> entries.
+ */
+static int
+lxpr_readlink_pid_fd(lxpr_node_t *lxpnp, char *bp, size_t len)
+{
+ const char *format;
+ vnode_t *rvp = lxpnp->lxpr_realvp;
+ vattr_t attr;
+
+ switch (rvp->v_type) {
+ case VSOCK:
+ format = "socket:[%lu]";
+ break;
+ case VFIFO:
+ format = "pipe:[%lu]";
+ break;
+ default:
+ return (-1);
+ }
+
+ /* Fetch the inode of the underlying vnode */
+ if (VOP_GETATTR(rvp, &attr, 0, CRED(), NULL) != 0)
+ return (-1);
+
+ if (bp != NULL)
+ (void) snprintf(bp, len, format, (ino_t)attr.va_nodeid);
+ return (0);
+}
+
+/*
* lxpr_inactive(): Vnode operation for VOP_INACTIVE()
* Vnode is no longer referenced, deallocate the file
* and all its resources.
diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h
index 942a6e3b44..e7f5ee9867 100644
--- a/usr/src/uts/common/brand/lx/sys/lx_brand.h
+++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h
@@ -80,10 +80,10 @@ extern "C" {
#define B_LPID_TO_SPAIR 128
#define B_SYSENTRY 129
#define B_SYSRETURN 130
-#define B_PTRACE_SYSCALL 131
+#define B_PTRACE_KERNEL 131
#define B_SET_AFFINITY_MASK 132
#define B_GET_AFFINITY_MASK 133
-#define B_PTRACE_EXT_OPTS 134
+#define B_PTRACE_CLONE_BEGIN 134
#define B_PTRACE_STOP_FOR_OPT 135
#define B_UNSUPPORTED 136
#define B_STORE_ARGS 137
@@ -91,37 +91,31 @@ extern "C" {
#define B_SIGNAL_RETURN 139
#define B_UNWIND_NTV_SYSC_FLAG 140
#define B_EXIT_AS_SIG 141
-#define B_PTRACE_GETEVENTMSG 142
+#define B_HELPER_WAITID 142
#define B_IKE_SYSCALL 192
-/* B_PTRACE_EXT_OPTS subcommands */
-#define B_PTRACE_EXT_OPTS_SET 1
-#define B_PTRACE_EXT_OPTS_GET 2
-#define B_PTRACE_EXT_OPTS_EVT 3
-#define B_PTRACE_DETACH 4
-
+#ifndef _ASM
/*
* Support for Linux PTRACE_SETOPTIONS handling.
*/
-#define LX_PTRACE_O_TRACESYSGOOD 0x0001
-#define LX_PTRACE_O_TRACEFORK 0x0002
-#define LX_PTRACE_O_TRACEVFORK 0x0004
-#define LX_PTRACE_O_TRACECLONE 0x0008
-#define LX_PTRACE_O_TRACEEXEC 0x0010
-#define LX_PTRACE_O_TRACEVFORKDONE 0x0020
-#define LX_PTRACE_O_TRACEEXIT 0x0040
-#define LX_PTRACE_O_TRACESECCOMP 0x0080
-/*
- * lx emulation-specific flag to indicate this is a child process being stopped
- * due to one of the PTRACE_SETOPTIONS above.
- */
-#define EMUL_PTRACE_O_CHILD 0x8000
-/*
- * lx emulation-specific flag to determine via B_PTRACE_EXT_OPTS_GET if a
- * process is being traced because of one of the PTRACE_SETOPTIONS above.
- */
-#define EMUL_PTRACE_IS_TRACED 0x8000
+typedef enum lx_ptrace_options {
+ LX_PTRACE_O_TRACESYSGOOD = 0x0001,
+ LX_PTRACE_O_TRACEFORK = 0x0002,
+ LX_PTRACE_O_TRACEVFORK = 0x0004,
+ LX_PTRACE_O_TRACECLONE = 0x0008,
+ LX_PTRACE_O_TRACEEXEC = 0x0010,
+ LX_PTRACE_O_TRACEVFORKDONE = 0x0020,
+ LX_PTRACE_O_TRACEEXIT = 0x0040,
+ LX_PTRACE_O_TRACESECCOMP = 0x0080
+} lx_ptrace_options_t;
+
+#define LX_PTRACE_O_ALL \
+ (LX_PTRACE_O_TRACESYSGOOD | LX_PTRACE_O_TRACEFORK | \
+ LX_PTRACE_O_TRACEVFORK | LX_PTRACE_O_TRACECLONE | \
+ LX_PTRACE_O_TRACEEXEC | LX_PTRACE_O_TRACEVFORKDONE | \
+ LX_PTRACE_O_TRACEEXIT | LX_PTRACE_O_TRACESECCOMP)
+#endif /* !_ASM */
/* siginfo si_status for traced events */
#define LX_PTRACE_EVENT_FORK 0x100
@@ -132,6 +126,17 @@ extern "C" {
#define LX_PTRACE_EVENT_EXIT 0x600
#define LX_PTRACE_EVENT_SECCOMP 0x700
+/*
+ * Brand-private values for the "pr_what" member of lwpstatus, for use with the
+ * PR_BRAND stop reason. These reasons are validated in lx_stop_notify();
+ * update it if you add new reasons here.
+ */
+#define LX_PR_SYSENTRY 1
+#define LX_PR_SYSEXIT 2
+#define LX_PR_SIGNALLED 3
+#define LX_PR_EVENT 4
+
+
#define LX_VERSION_1 1
#define LX_VERSION LX_VERSION_1
@@ -154,6 +159,8 @@ extern "C" {
#ifndef _ASM
+extern struct brand lx_brand;
+
typedef struct lx_brand_registration {
uint_t lxbr_version; /* version number */
void *lxbr_handler; /* base address of handler */
@@ -255,10 +262,6 @@ typedef struct lx_proc_data {
uintptr_t l_traceflag; /* address of 32-bit tracing flag */
pid_t l_ppid; /* pid of originating parent proc */
uint64_t l_ptrace; /* process being observed with ptrace */
- uint_t l_ptrace_opts; /* process's extended ptrace options */
- uint_t l_ptrace_event; /* extended ptrace option trap event */
- uint_t l_ptrace_is_traced; /* set if traced due to ptrace setoptions */
- ulong_t l_ptrace_eventmsg; /* extended ptrace event msg */
lx_elf_data_t l_elf_data; /* ELF data for linux executable */
int l_signal; /* signal to deliver to parent when this */
/* thread group dies */
@@ -280,10 +283,70 @@ typedef ulong_t lx_affmask_t[LX_AFF_ULONGS];
#ifdef _KERNEL
+typedef struct lx_lwp_data lx_lwp_data_t;
+
+/*
+ * Flag values for "lxpa_flags" on a ptrace(2) accord.
+ */
+typedef enum lx_accord_flags {
+ LX_ACC_TOMBSTONE = 0x01
+} lx_accord_flags_t;
+
+/*
+ * Flags values for "br_ptrace_flags" in the LWP-specific data.
+ */
+typedef enum lx_ptrace_state {
+ LX_PTRACE_SYSCALL = 0x01,
+ LX_PTRACE_EXITING = 0x02,
+ LX_PTRACE_STOPPING = 0x04,
+ LX_PTRACE_INHERIT = 0x08,
+ LX_PTRACE_STOPPED = 0x10,
+ LX_PTRACE_PARENT_WAIT = 0x20,
+ LX_PTRACE_CLDPEND = 0x40,
+ LX_PTRACE_CLONING = 0x80
+} lx_ptrace_state_t;
+
+/*
+ * A ptrace(2) accord represents the relationship between a tracer LWP and the
+ * set of LWPs that it is tracing: the tracees. This data structure belongs
+ * primarily to the tracer, but is reference counted so that it may be freed by
+ * whoever references it last.
+ */
+typedef struct lx_ptrace_accord {
+ kmutex_t lxpa_lock;
+ uint_t lxpa_refcnt;
+ lx_accord_flags_t lxpa_flags;
+
+ /*
+ * The tracer must hold "pidlock" while clearing these fields for
+ * exclusion of waitid(), etc.
+ */
+ lx_lwp_data_t *lxpa_tracer;
+ kcondvar_t *lxpa_cvp;
+
+ /*
+ * The "lxpa_tracees_lock" mutex protects the tracee list.
+ */
+ kmutex_t lxpa_tracees_lock;
+ list_t lxpa_tracees;
+} lx_ptrace_accord_t;
+
+/*
+ * These values are stored in the per-LWP data for a tracee when it is attached
+ * to a tracer. They record the method that was used to attach.
+ */
+typedef enum lx_ptrace_attach {
+ LX_PTA_NONE = 0x00, /* not attached */
+ LX_PTA_ATTACH = 0x01, /* due to tracer using PTRACE_ATTACH */
+ LX_PTA_TRACEME = 0x02, /* due to child using PTRACE_TRACEME */
+ LX_PTA_INHERIT_CLONE = 0x04, /* due to PTRACE_CLONE clone(2) flag */
+ LX_PTA_INHERIT_OPTIONS = 0x08 /* due to PTRACE_SETOPTIONS options */
+} lx_ptrace_attach_t;
+
/*
* lx-specific data in the klwp_t
*/
-typedef struct lx_lwp_data {
+struct lx_lwp_data {
uint_t br_ntv_syscall; /* 1 = syscall from native libc */
uint_t br_lwp_flags; /* misc. flags */
klwp_t *br_lwp; /* back pointer to container lwp */
@@ -317,8 +380,26 @@ typedef struct lx_lwp_data {
void *br_scall_args;
int br_args_size; /* size in bytes of br_scall_args */
- uint_t br_ptrace; /* ptrace is active for this LWP */
-} lx_lwp_data_t;
+ boolean_t br_waitid_emulate;
+ int br_waitid_flags;
+
+ lx_ptrace_state_t br_ptrace_flags; /* ptrace state for this LWP */
+ lx_ptrace_options_t br_ptrace_options; /* PTRACE_SETOPTIONS options */
+ lx_ptrace_options_t br_ptrace_clone_option; /* current clone(2) type */
+
+ lx_ptrace_attach_t br_ptrace_attach; /* how did we get attached */
+ lx_ptrace_accord_t *br_ptrace_accord; /* accord for this tracer LWP */
+ lx_ptrace_accord_t *br_ptrace_tracer; /* accord tracing this LWP */
+ list_node_t br_ptrace_linkage; /* linkage for lxpa_tracees list */
+
+ ushort_t br_ptrace_whystop; /* stop reason, 0 for no stop */
+ ushort_t br_ptrace_whatstop; /* stop sub-reason */
+
+ int32_t br_ptrace_stopsig; /* stop signal, 0 for no signal */
+
+ uint_t br_ptrace_event;
+ ulong_t br_ptrace_eventmsg;
+};
/*
* Upper limit on br_args_size, low because this value can persist until
@@ -336,8 +417,13 @@ typedef struct lx_zone_data {
#define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t))
#define lwptolxlwp(l) ((struct lx_lwp_data *)lwptolwpbrand(l))
-#define ttolxproc(t) ((struct lx_proc_data *)(t)->t_procp->p_brand_data)
-#define ptolxproc(p) ((struct lx_proc_data *)(p)->p_brand_data)
+#define ttolxproc(t) \
+ (((t)->t_procp->p_brand == &lx_brand) ? \
+ (struct lx_proc_data *)(t)->t_procp->p_brand_data : NULL)
+#define ptolxproc(p) \
+ (((p)->p_brand == &lx_brand) ? \
+ (struct lx_proc_data *)(p)->p_brand_data : NULL)
+
/* Macro for converting to system call arguments. */
#define LX_ARGS(scall) ((struct lx_##scall##_args *)\
(ttolxlwp(curthread)->br_scall_args))
diff --git a/usr/src/uts/common/brand/lx/sys/lx_misc.h b/usr/src/uts/common/brand/lx/sys/lx_misc.h
index 56b5bb4047..7b77789c56 100644
--- a/usr/src/uts/common/brand/lx/sys/lx_misc.h
+++ b/usr/src/uts/common/brand/lx/sys/lx_misc.h
@@ -46,6 +46,20 @@ extern boolean_t lx_wait_filter(proc_t *, proc_t *);
extern void lx_ifname_convert(char *, int);
+extern boolean_t lx_ptrace_stop(ushort_t);
+extern void lx_stop_notify(proc_t *, klwp_t *, ushort_t, ushort_t);
+extern void lx_ptrace_init(void);
+extern void lx_ptrace_fini(void);
+extern int lx_ptrace_kernel(int, pid_t, uintptr_t, uintptr_t);
+extern int lx_waitid_helper(idtype_t, id_t, k_siginfo_t *, int, boolean_t *,
+ int *);
+extern void lx_ptrace_exit(proc_t *, klwp_t *);
+extern void lx_ptrace_inherit_tracer(lx_lwp_data_t *, lx_lwp_data_t *);
+extern int lx_ptrace_stop_for_option(int, boolean_t, ulong_t);
+extern int lx_ptrace_set_clone_inherit(int, boolean_t);
+extern int lx_sigcld_repost(proc_t *, sigqueue_t *);
+extern int lx_issig_stop(proc_t *, klwp_t *);
+
#endif
#ifdef __cplusplus
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_clone.c b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
index 949db3a73b..d73c5f100b 100644
--- a/usr/src/uts/common/brand/lx/syscall/lx_clone.c
+++ b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
@@ -21,7 +21,7 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2014 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#include <sys/types.h>
@@ -32,25 +32,10 @@
#include <sys/lx_ldt.h>
#include <sys/lx_misc.h>
#include <lx_signum.h>
+#include <lx_syscall.h>
#include <sys/x86_archext.h>
#include <sys/controlregs.h>
-#define LX_CSIGNAL 0x000000ff
-#define LX_CLONE_VM 0x00000100
-#define LX_CLONE_FS 0x00000200
-#define LX_CLONE_FILES 0x00000400
-#define LX_CLONE_SIGHAND 0x00000800
-#define LX_CLONE_PID 0x00001000
-#define LX_CLONE_PTRACE 0x00002000
-#define LX_CLONE_PARENT 0x00008000
-#define LX_CLONE_THREAD 0x00010000
-#define LX_CLONE_SYSVSEM 0x00040000
-#define LX_CLONE_SETTLS 0x00080000
-#define LX_CLONE_PARENT_SETTID 0x00100000
-#define LX_CLONE_CHILD_CLEARTID 0x00200000
-#define LX_CLONE_DETACH 0x00400000
-#define LX_CLONE_CHILD_SETTID 0x01000000
-
/*
* Our lwp has already been created at this point, so this routine is
* responsible for setting up all the state needed to track this as a
diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c
index cfc4c99f64..ae6c5eef16 100644
--- a/usr/src/uts/common/disp/thread.c
+++ b/usr/src/uts/common/disp/thread.c
@@ -87,7 +87,7 @@ struct kmem_cache *turnstile_cache; /* cache of free turnstiles */
* allthreads is only for use by kmem_readers. All kernel loops can use
* the current thread as a start/end point.
*/
-static kthread_t *allthreads = &t0; /* circular list of all threads */
+kthread_t *allthreads = &t0; /* circular list of all threads */
static kcondvar_t reaper_cv; /* synchronization var */
kthread_t *thread_deathrow; /* circular list of reapable threads */
diff --git a/usr/src/uts/common/fs/lookup.c b/usr/src/uts/common/fs/lookup.c
index 6819509d00..55ffb94805 100644
--- a/usr/src/uts/common/fs/lookup.c
+++ b/usr/src/uts/common/fs/lookup.c
@@ -20,6 +20,7 @@
*/
/*
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
*/
@@ -217,7 +218,6 @@ lookuppnvp(
cred_t *cr) /* user's credential */
{
vnode_t *cvp; /* current component vp */
- vnode_t *tvp; /* addressable temp ptr */
char component[MAXNAMELEN]; /* buffer for component (incl null) */
int error;
int nlink;
@@ -373,7 +373,7 @@ checkforroot:
/*
* Perform a lookup in the current directory.
*/
- error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags,
+ error = VOP_LOOKUP(vp, component, &cvp, pnp, lookup_flags,
rootvp, cr, NULL, NULL, pp);
/*
@@ -391,10 +391,9 @@ checkforroot:
* directory inside NFS FS.
*/
if ((error == EACCES) && retry_with_kcred)
- error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags,
+ error = VOP_LOOKUP(vp, component, &cvp, pnp, lookup_flags,
rootvp, zone_kcred(), NULL, NULL, pp);
- cvp = tvp;
if (error) {
cvp = NULL;
/*
@@ -440,20 +439,8 @@ checkforroot:
* be atomic!)
*/
if (vn_mountedvfs(cvp) != NULL) {
- tvp = cvp;
- if ((error = traverse(&tvp)) != 0) {
- /*
- * It is required to assign cvp here, because
- * traverse() will return a held vnode which
- * may different than the vnode that was passed
- * in (even in the error case). If traverse()
- * changes the vnode it releases the original,
- * and holds the new one.
- */
- cvp = tvp;
+ if ((error = traverse(&cvp)) != 0)
goto bad;
- }
- cvp = tvp;
}
/*
diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv.c b/usr/src/uts/common/fs/nfs/nfs4_srv.c
index 127d9e3f29..fe1a10b966 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_srv.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv.c
@@ -18,10 +18,11 @@
*
* CDDL HEADER END
*/
+
/*
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -869,7 +870,7 @@ static nfsstat4
do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
{
int error, different_export = 0;
- vnode_t *dvp, *vp, *tvp;
+ vnode_t *dvp, *vp;
struct exportinfo *exi = NULL;
fid_t fid;
uint_t count, i;
@@ -950,14 +951,12 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
* If it's a mountpoint, then traverse it.
*/
if (vn_ismntpt(vp)) {
- tvp = vp;
- if ((error = traverse(&tvp)) != 0) {
+ if ((error = traverse(&vp)) != 0) {
VN_RELE(vp);
return (puterrno4(error));
}
/* remember that we had to traverse mountpoint */
did_traverse = TRUE;
- vp = tvp;
different_export = 1;
} else if (vp->v_vfsp != dvp->v_vfsp) {
/*
@@ -2610,7 +2609,7 @@ do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
{
int error;
int different_export = 0;
- vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
+ vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
struct exportinfo *exi = NULL, *pre_exi = NULL;
nfsstat4 stat;
fid_t fid;
@@ -2708,13 +2707,11 @@ do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
* need pre_tvp below if checkexport4 fails
*/
VN_HOLD(pre_tvp);
- tvp = vp;
- if ((error = traverse(&tvp)) != 0) {
+ if ((error = traverse(&vp)) != 0) {
VN_RELE(vp);
VN_RELE(pre_tvp);
return (puterrno4(error));
}
- vp = tvp;
different_export = 1;
} else if (vp->v_vfsp != cs->vp->v_vfsp) {
/*
diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c b/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c
index 3069a98835..276d3b4f19 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c
@@ -18,6 +18,11 @@
*
* CDDL HEADER END
*/
+
+/*
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ */
+
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -149,6 +154,7 @@ nfs4_readdir_getvp(vnode_t *dvp, char *d_name, vnode_t **vpp,
VN_HOLD(pre_tvp);
if ((error = traverse(&vp)) != 0) {
+ VN_RELE(vp);
VN_RELE(pre_tvp);
return (error);
}
diff --git a/usr/src/uts/common/fs/proc/prcontrol.c b/usr/src/uts/common/fs/proc/prcontrol.c
index a5679a8afb..7e99d23b97 100644
--- a/usr/src/uts/common/fs/proc/prcontrol.c
+++ b/usr/src/uts/common/fs/proc/prcontrol.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -1481,7 +1481,7 @@ pr_setsig(prnode_t *pnp, siginfo_t *sip)
} else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
/* If SIGKILL, set stopped lwp running */
p->p_stopsig = 0;
- t->t_schedflag |= TS_XSTART | TS_PSTART;
+ t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART;
t->t_dtrace_stop = 0;
setrun_locked(t);
}
diff --git a/usr/src/uts/common/fs/proc/prsubr.c b/usr/src/uts/common/fs/proc/prsubr.c
index 7801fd0ac8..284bf8cb88 100644
--- a/usr/src/uts/common/fs/proc/prsubr.c
+++ b/usr/src/uts/common/fs/proc/prsubr.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -201,6 +201,7 @@ prchoose(proc_t *p)
case PR_SYSEXIT:
case PR_SIGNALLED:
case PR_FAULTED:
+ case PR_BRAND:
/*
* Make an lwp calling exit() be the
* last lwp seen in the process.
diff --git a/usr/src/uts/common/fs/smbsrv/smb_common_open.c b/usr/src/uts/common/fs/smbsrv/smb_common_open.c
index 3fa43d43cb..5eaa5865c6 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_common_open.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_common_open.c
@@ -820,8 +820,8 @@ smb_open_subr(smb_request_t *sr)
status = NT_STATUS_SUCCESS;
- of = smb_ofile_open(sr->tid_tree, node, sr->smb_pid, op, SMB_FTYPE_DISK,
- uniq_fid, &err);
+ of = smb_ofile_open(sr, node, sr->smb_pid, op, SMB_FTYPE_DISK, uniq_fid,
+ &err);
if (of == NULL) {
smbsr_error(sr, err.status, err.errcls, err.errcode);
status = err.status;
diff --git a/usr/src/uts/common/fs/smbsrv/smb_delete.c b/usr/src/uts/common/fs/smbsrv/smb_delete.c
index 4930f741ef..14eff73896 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_delete.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_delete.c
@@ -297,7 +297,7 @@ smb_delete_multiple_files(smb_request_t *sr, smb_error_t *err)
if (odid == 0)
return (-1);
- if ((od = smb_tree_lookup_odir(sr->tid_tree, odid)) == NULL)
+ if ((od = smb_tree_lookup_odir(sr, odid)) == NULL)
return (-1);
for (;;) {
diff --git a/usr/src/uts/common/fs/smbsrv/smb_dispatch.c b/usr/src/uts/common/fs/smbsrv/smb_dispatch.c
index 1afcf18b28..9b1fed6f9a 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_dispatch.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_dispatch.c
@@ -20,8 +20,8 @@
*/
/*
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -694,16 +694,13 @@ andx_more:
}
sr->user_cr = smb_user_getcred(sr->uid_user);
-
- if (!(sdd->sdt_flags & SDDF_SUPPRESS_TID) &&
- (sr->tid_tree == NULL)) {
- sr->tid_tree = smb_user_lookup_tree(
- sr->uid_user, sr->smb_tid);
- if (sr->tid_tree == NULL) {
- smbsr_error(sr, 0, ERRSRV, ERRinvnid);
- smbsr_cleanup(sr);
- goto report_error;
- }
+ }
+ if (!(sdd->sdt_flags & SDDF_SUPPRESS_TID) && (sr->tid_tree == NULL)) {
+ sr->tid_tree = smb_session_lookup_tree(session, sr->smb_tid);
+ if (sr->tid_tree == NULL) {
+ smbsr_error(sr, 0, ERRSRV, ERRinvnid);
+ smbsr_cleanup(sr);
+ goto report_error;
}
}
@@ -1116,8 +1113,7 @@ void
smbsr_lookup_file(smb_request_t *sr)
{
if (sr->fid_ofile == NULL)
- sr->fid_ofile = smb_ofile_lookup_by_fid(sr->tid_tree,
- sr->smb_fid);
+ sr->fid_ofile = smb_ofile_lookup_by_fid(sr, sr->smb_fid);
}
static int
diff --git a/usr/src/uts/common/fs/smbsrv/smb_find.c b/usr/src/uts/common/fs/smbsrv/smb_find.c
index 1dae4e8cb5..eecbeff4df 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_find.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_find.c
@@ -306,7 +306,7 @@ smb_com_search(smb_request_t *sr)
}
}
- od = smb_tree_lookup_odir(sr->tid_tree, odid);
+ od = smb_tree_lookup_odir(sr, odid);
if (od == NULL) {
smbsr_error(sr, NT_STATUS_INVALID_HANDLE,
ERRDOS, ERROR_INVALID_HANDLE);
@@ -452,7 +452,7 @@ smb_com_find(smb_request_t *sr)
}
}
- od = smb_tree_lookup_odir(sr->tid_tree, odid);
+ od = smb_tree_lookup_odir(sr, odid);
if (od == NULL) {
smbsr_error(sr, NT_STATUS_INVALID_HANDLE,
ERRDOS, ERROR_INVALID_HANDLE);
@@ -575,7 +575,7 @@ smb_com_find_close(smb_request_t *sr)
return (SDRC_ERROR);
}
- od = smb_tree_lookup_odir(sr->tid_tree, odid);
+ od = smb_tree_lookup_odir(sr, odid);
if (od == NULL) {
smbsr_error(sr, NT_STATUS_INVALID_HANDLE,
ERRDOS, ERROR_INVALID_HANDLE);
@@ -649,7 +649,7 @@ smb_com_find_unique(struct smb_request *sr)
odid = smb_odir_open(sr, pn->pn_path, sattr, 0);
if (odid == 0)
return (SDRC_ERROR);
- od = smb_tree_lookup_odir(sr->tid_tree, odid);
+ od = smb_tree_lookup_odir(sr, odid);
if (od == NULL)
return (SDRC_ERROR);
diff --git a/usr/src/uts/common/fs/smbsrv/smb_fsops.c b/usr/src/uts/common/fs/smbsrv/smb_fsops.c
index 2f4545e966..c64313fdbf 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_fsops.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_fsops.c
@@ -805,7 +805,7 @@ smb_fsop_remove_streams(smb_request_t *sr, cred_t *cr, smb_node_t *fnode)
return (-1);
}
- if ((od = smb_tree_lookup_odir(sr->tid_tree, odid)) == NULL) {
+ if ((od = smb_tree_lookup_odir(sr, odid)) == NULL) {
smbsr_errno(sr, ENOENT);
return (-1);
}
diff --git a/usr/src/uts/common/fs/smbsrv/smb_nt_create_andx.c b/usr/src/uts/common/fs/smbsrv/smb_nt_create_andx.c
index c77c175fc1..037c1373b5 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_nt_create_andx.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_nt_create_andx.c
@@ -264,8 +264,7 @@ smb_com_nt_create_andx(struct smb_request *sr)
if (op->rootdirfid == 0) {
op->fqi.fq_dnode = sr->tid_tree->t_snode;
} else {
- op->dir = smb_ofile_lookup_by_fid(sr->tid_tree,
- (uint16_t)op->rootdirfid);
+ op->dir = smb_ofile_lookup_by_fid(sr, (uint16_t)op->rootdirfid);
if (op->dir == NULL) {
smbsr_error(sr, NT_STATUS_INVALID_HANDLE,
ERRDOS, ERRbadfid);
diff --git a/usr/src/uts/common/fs/smbsrv/smb_nt_transact_create.c b/usr/src/uts/common/fs/smbsrv/smb_nt_transact_create.c
index fcc12f2fc8..dcfa469617 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_nt_transact_create.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_nt_transact_create.c
@@ -173,8 +173,7 @@ smb_nt_transact_create(smb_request_t *sr, smb_xa_t *xa)
if (op->rootdirfid == 0) {
op->fqi.fq_dnode = sr->tid_tree->t_snode;
} else {
- op->dir = smb_ofile_lookup_by_fid(sr->tid_tree,
- (uint16_t)op->rootdirfid);
+ op->dir = smb_ofile_lookup_by_fid(sr, (uint16_t)op->rootdirfid);
if (op->dir == NULL) {
smbsr_error(sr, NT_STATUS_INVALID_HANDLE,
ERRDOS, ERRbadfid);
diff --git a/usr/src/uts/common/fs/smbsrv/smb_odir.c b/usr/src/uts/common/fs/smbsrv/smb_odir.c
index b8435d191a..16fffa6692 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_odir.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_odir.c
@@ -39,15 +39,15 @@
* +-------------------+ +-------------------+ +-------------------+
* | SESSION |<----->| SESSION |......| SESSION |
* +-------------------+ +-------------------+ +-------------------+
- * |
- * |
- * v
- * +-------------------+ +-------------------+ +-------------------+
- * | USER |<----->| USER |......| USER |
- * +-------------------+ +-------------------+ +-------------------+
- * |
- * |
- * v
+ * | |
+ * | |
+ * | v
+ * | +-------------------+ +-------------------+ +-------------------+
+ * | | USER |<--->| USER |...| USER |
+ * | +-------------------+ +-------------------+ +-------------------+
+ * |
+ * |
+ * v
* +-------------------+ +-------------------+ +-------------------+
* | TREE |<----->| TREE |......| TREE |
* +-------------------+ +-------------------+ +-------------------+
@@ -153,7 +153,7 @@
* and add it into the tree's list of odirs.
* Return an identifier (odid) uniquely identifying the created odir.
*
- * smb_odir_t *odir = smb_tree_lookup_odir(odid)
+ * smb_odir_t *odir = smb_tree_lookup_odir(..., odid)
* Find the odir corresponding to the specified odid in the tree's
* list of odirs. Place a hold on the odir.
*
@@ -312,9 +312,9 @@ smb_odir_open(smb_request_t *sr, char *path, uint16_t sattr, uint32_t flags)
}
if (flags & SMB_ODIR_OPENF_BACKUP_INTENT)
- cr = smb_user_getprivcred(tree->t_user);
+ cr = smb_user_getprivcred(sr->uid_user);
else
- cr = tree->t_user->u_cred;
+ cr = sr->uid_user->u_cred;
odid = smb_odir_create(sr, dnode, pattern, sattr, cr);
smb_node_release(dnode);
@@ -888,6 +888,12 @@ smb_odir_create(smb_request_t *sr, smb_node_t *dnode,
od->d_opened_by_pid = sr->smb_pid;
od->d_session = tree->t_session;
od->d_cred = cr;
+ /*
+ * grab a ref for od->d_user
+ * released in smb_odir_delete()
+ */
+ smb_user_hold_internal(sr->uid_user);
+ od->d_user = sr->uid_user;
od->d_tree = tree;
od->d_dnode = dnode;
smb_node_ref(dnode);
@@ -947,6 +953,7 @@ smb_odir_delete(void *arg)
od->d_magic = 0;
smb_node_release(od->d_dnode);
+ smb_user_release(od->d_user);
mutex_destroy(&od->d_mutex);
kmem_cache_free(od->d_tree->t_server->si_cache_odir, od);
}
diff --git a/usr/src/uts/common/fs/smbsrv/smb_ofile.c b/usr/src/uts/common/fs/smbsrv/smb_ofile.c
index 8987da2950..ee45f13c8b 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_ofile.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_ofile.c
@@ -39,15 +39,15 @@
* +-------------------+ +-------------------+ +-------------------+
* | SESSION |<----->| SESSION |......| SESSION |
* +-------------------+ +-------------------+ +-------------------+
- * |
- * |
- * v
- * +-------------------+ +-------------------+ +-------------------+
- * | USER |<----->| USER |......| USER |
- * +-------------------+ +-------------------+ +-------------------+
- * |
- * |
- * v
+ * | |
+ * | |
+ * | v
+ * | +-------------------+ +-------------------+ +-------------------+
+ * | | USER |<--->| USER |...| USER |
+ * | +-------------------+ +-------------------+ +-------------------+
+ * |
+ * |
+ * v
* +-------------------+ +-------------------+ +-------------------+
* | TREE |<----->| TREE |......| TREE |
* +-------------------+ +-------------------+ +-------------------+
@@ -175,7 +175,7 @@ static void smb_ofile_netinfo_fini(smb_netfileinfo_t *);
*/
smb_ofile_t *
smb_ofile_open(
- smb_tree_t *tree,
+ smb_request_t *sr,
smb_node_t *node,
uint16_t pid,
struct open_param *op,
@@ -183,10 +183,13 @@ smb_ofile_open(
uint32_t uniqid,
smb_error_t *err)
{
+ smb_tree_t *tree = sr->tid_tree;
smb_ofile_t *of;
uint16_t fid;
smb_attr_t attr;
int rc;
+ enum errstates { EMPTY, FIDALLOC, CRHELD, MUTEXINIT };
+ enum errstates state = EMPTY;
if (smb_idpool_alloc(&tree->t_fid_pool, &fid)) {
err->status = NT_STATUS_TOO_MANY_OPENED_FILES;
@@ -194,6 +197,7 @@ smb_ofile_open(
err->errcode = ERROR_TOO_MANY_OPEN_FILES;
return (NULL);
}
+ state = FIDALLOC;
of = kmem_cache_alloc(tree->t_server->si_cache_ofile, KM_SLEEP);
bzero(of, sizeof (smb_ofile_t));
@@ -206,16 +210,23 @@ smb_ofile_open(
of->f_share_access = op->share_access;
of->f_create_options = op->create_options;
of->f_cr = (op->create_options & FILE_OPEN_FOR_BACKUP_INTENT) ?
- smb_user_getprivcred(tree->t_user) : tree->t_user->u_cred;
+ smb_user_getprivcred(sr->uid_user) : sr->uid_user->u_cred;
crhold(of->f_cr);
+ state = CRHELD;
of->f_ftype = ftype;
of->f_server = tree->t_server;
- of->f_session = tree->t_user->u_session;
- of->f_user = tree->t_user;
+ of->f_session = tree->t_session;
+ /*
+ * grab a ref for of->f_user
+ * released in smb_ofile_delete()
+ */
+ smb_user_hold_internal(sr->uid_user);
+ of->f_user = sr->uid_user;
of->f_tree = tree;
of->f_node = node;
mutex_init(&of->f_mutex, NULL, MUTEX_DEFAULT, NULL);
+ state = MUTEXINIT;
of->f_state = SMB_OFILE_STATE_OPEN;
if (ftype == SMB_FTYPE_MESG_PIPE) {
@@ -232,15 +243,10 @@ smb_ofile_open(
attr.sa_mask = SMB_AT_UID | SMB_AT_DOSATTR;
rc = smb_node_getattr(NULL, node, of->f_cr, NULL, &attr);
if (rc != 0) {
- of->f_magic = 0;
- mutex_destroy(&of->f_mutex);
- crfree(of->f_cr);
- smb_idpool_free(&tree->t_fid_pool, of->f_fid);
- kmem_cache_free(tree->t_server->si_cache_ofile, of);
err->status = NT_STATUS_INTERNAL_ERROR;
err->errcls = ERRDOS;
err->errcode = ERROR_INTERNAL_ERROR;
- return (NULL);
+ goto errout;
}
if (crgetuid(of->f_cr) == attr.sa_vattr.va_uid) {
/*
@@ -254,16 +260,10 @@ smb_ofile_open(
of->f_mode =
smb_fsop_amask_to_omode(of->f_granted_access);
if (smb_fsop_open(node, of->f_mode, of->f_cr) != 0) {
- of->f_magic = 0;
- mutex_destroy(&of->f_mutex);
- crfree(of->f_cr);
- smb_idpool_free(&tree->t_fid_pool, of->f_fid);
- kmem_cache_free(tree->t_server->si_cache_ofile,
- of);
err->status = NT_STATUS_ACCESS_DENIED;
err->errcls = ERRDOS;
err->errcode = ERROR_ACCESS_DENIED;
- return (NULL);
+ goto errout;
}
}
@@ -290,6 +290,25 @@ smb_ofile_open(
atomic_inc_32(&tree->t_open_files);
atomic_inc_32(&of->f_session->s_file_cnt);
return (of);
+
+errout:
+ switch (state) {
+ case MUTEXINIT:
+ mutex_destroy(&of->f_mutex);
+ smb_user_release(of->f_user);
+ /*FALLTHROUGH*/
+ case CRHELD:
+ crfree(of->f_cr);
+ of->f_magic = 0;
+ kmem_cache_free(tree->t_server->si_cache_ofile, of);
+ /*FALLTHROUGH*/
+ case FIDALLOC:
+ smb_idpool_free(&tree->t_fid_pool, fid);
+ /*FALLTHROUGH*/
+ case EMPTY:
+ break;
+ }
+ return (NULL);
}
/*
@@ -601,9 +620,10 @@ smb_ofile_request_complete(smb_ofile_t *of)
*/
smb_ofile_t *
smb_ofile_lookup_by_fid(
- smb_tree_t *tree,
+ smb_request_t *sr,
uint16_t fid)
{
+ smb_tree_t *tree = sr->tid_tree;
smb_llist_t *of_list;
smb_ofile_t *of;
@@ -616,19 +636,32 @@ smb_ofile_lookup_by_fid(
while (of) {
ASSERT(of->f_magic == SMB_OFILE_MAGIC);
ASSERT(of->f_tree == tree);
- if (of->f_fid == fid) {
- mutex_enter(&of->f_mutex);
- if (of->f_state != SMB_OFILE_STATE_OPEN) {
- mutex_exit(&of->f_mutex);
- smb_llist_exit(of_list);
- return (NULL);
- }
- of->f_refcnt++;
- mutex_exit(&of->f_mutex);
+ if (of->f_fid == fid)
break;
- }
of = smb_llist_next(of_list, of);
}
+ if (of == NULL)
+ goto out;
+
+ /*
+ * Only allow use of a given FID with the same UID that
+ * was used to open it. MS-CIFS 3.3.5.14
+ */
+ if (of->f_user != sr->uid_user) {
+ of = NULL;
+ goto out;
+ }
+
+ mutex_enter(&of->f_mutex);
+ if (of->f_state != SMB_OFILE_STATE_OPEN) {
+ mutex_exit(&of->f_mutex);
+ of = NULL;
+ goto out;
+ }
+ of->f_refcnt++;
+ mutex_exit(&of->f_mutex);
+
+out:
smb_llist_exit(of_list);
return (of);
}
@@ -921,6 +954,7 @@ smb_ofile_delete(void *arg)
of->f_magic = (uint32_t)~SMB_OFILE_MAGIC;
mutex_destroy(&of->f_mutex);
crfree(of->f_cr);
+ smb_user_release(of->f_user);
kmem_cache_free(of->f_tree->t_server->si_cache_ofile, of);
}
diff --git a/usr/src/uts/common/fs/smbsrv/smb_opipe.c b/usr/src/uts/common/fs/smbsrv/smb_opipe.c
index bb178f3952..90cb25aaa0 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_opipe.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_opipe.c
@@ -130,8 +130,8 @@ smb_opipe_open(smb_request_t *sr)
op->create_options = 0;
- of = smb_ofile_open(sr->tid_tree, NULL, sr->smb_pid, op,
- SMB_FTYPE_MESG_PIPE, SMB_UNIQ_FID(), &err);
+ of = smb_ofile_open(sr, NULL, sr->smb_pid, op, SMB_FTYPE_MESG_PIPE,
+ SMB_UNIQ_FID(), &err);
if (of == NULL)
return (err.status);
diff --git a/usr/src/uts/common/fs/smbsrv/smb_process_exit.c b/usr/src/uts/common/fs/smbsrv/smb_process_exit.c
index b8c835cd57..2839ca2807 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_process_exit.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_process_exit.c
@@ -85,11 +85,11 @@ smb_com_process_exit(smb_request_t *sr)
* to be the only thing that sends this request these days and
* it doesn't provide a TID.
*/
- sr->tid_tree = smb_user_lookup_tree(sr->uid_user, sr->smb_tid);
+ sr->tid_tree = smb_session_lookup_tree(sr->session, sr->smb_tid);
if (sr->tid_tree != NULL)
smb_tree_close_pid(sr->tid_tree, sr->smb_pid);
else
- smb_user_close_pid(sr->uid_user, sr->smb_pid);
+ smb_session_close_pid(sr->session, sr->smb_pid);
rc = smbsr_encode_empty_result(sr);
return ((rc == 0) ? SDRC_SUCCESS : SDRC_ERROR);
diff --git a/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c b/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c
index bef69e7f61..70ac2e7b24 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c
@@ -662,7 +662,7 @@ smb_encode_stream_info(smb_request_t *sr, smb_xa_t *xa, smb_queryinfo_t *qinfo)
odid = smb_odir_openat(sr, fnode);
if (odid != 0)
- od = smb_tree_lookup_odir(sr->tid_tree, odid);
+ od = smb_tree_lookup_odir(sr, odid);
if (od != NULL)
rc = smb_odir_read_streaminfo(sr, od, sinfo, &eos);
diff --git a/usr/src/uts/common/fs/smbsrv/smb_server.c b/usr/src/uts/common/fs/smbsrv/smb_server.c
index 3654744569..8687d42b18 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_server.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_server.c
@@ -240,7 +240,8 @@ static void smb_event_cancel(smb_server_t *, uint32_t);
static uint32_t smb_event_alloc_txid(void);
static void smb_server_disconnect_share(smb_llist_t *, const char *);
-static void smb_server_enum_private(smb_llist_t *, smb_svcenum_t *);
+static void smb_server_enum_users(smb_llist_t *, smb_svcenum_t *);
+static void smb_server_enum_trees(smb_llist_t *, smb_svcenum_t *);
static int smb_server_session_disconnect(smb_llist_t *, const char *,
const char *);
static int smb_server_fclose(smb_llist_t *, uint32_t);
@@ -833,15 +834,6 @@ smb_server_enum(smb_ioc_svcenum_t *ioc)
smb_server_t *sv;
int rc;
- switch (svcenum->se_type) {
- case SMB_SVCENUM_TYPE_USER:
- case SMB_SVCENUM_TYPE_TREE:
- case SMB_SVCENUM_TYPE_FILE:
- break;
- default:
- return (EINVAL);
- }
-
if ((rc = smb_server_lookup(&sv)) != 0)
return (rc);
@@ -849,11 +841,26 @@ smb_server_enum(smb_ioc_svcenum_t *ioc)
svcenum->se_bused = 0;
svcenum->se_nitems = 0;
- smb_server_enum_private(&sv->sv_nbt_daemon.ld_session_list, svcenum);
- smb_server_enum_private(&sv->sv_tcp_daemon.ld_session_list, svcenum);
+ switch (svcenum->se_type) {
+ case SMB_SVCENUM_TYPE_USER:
+ smb_server_enum_users(&sv->sv_nbt_daemon.ld_session_list,
+ svcenum);
+ smb_server_enum_users(&sv->sv_tcp_daemon.ld_session_list,
+ svcenum);
+ break;
+ case SMB_SVCENUM_TYPE_TREE:
+ case SMB_SVCENUM_TYPE_FILE:
+ smb_server_enum_trees(&sv->sv_nbt_daemon.ld_session_list,
+ svcenum);
+ smb_server_enum_trees(&sv->sv_tcp_daemon.ld_session_list,
+ svcenum);
+ break;
+ default:
+ rc = EINVAL;
+ }
smb_server_release(sv);
- return (0);
+ return (rc);
}
/*
@@ -1694,7 +1701,7 @@ smb_server_release(smb_server_t *sv)
* Enumerate the users associated with a session list.
*/
static void
-smb_server_enum_private(smb_llist_t *ll, smb_svcenum_t *svcenum)
+smb_server_enum_users(smb_llist_t *ll, smb_svcenum_t *svcenum)
{
smb_session_t *sn;
smb_llist_t *ulist;
@@ -1714,6 +1721,8 @@ smb_server_enum_private(smb_llist_t *ll, smb_svcenum_t *svcenum)
if (smb_user_hold(user)) {
rc = smb_user_enum(user, svcenum);
smb_user_release(user);
+ if (rc != 0)
+ break;
}
user = smb_llist_next(ulist, user);
@@ -1731,6 +1740,48 @@ smb_server_enum_private(smb_llist_t *ll, smb_svcenum_t *svcenum)
}
/*
+ * Enumerate the trees/files associated with a session list.
+ */
+static void
+smb_server_enum_trees(smb_llist_t *ll, smb_svcenum_t *svcenum)
+{
+ smb_session_t *sn;
+ smb_llist_t *tlist;
+ smb_tree_t *tree;
+ int rc = 0;
+
+ smb_llist_enter(ll, RW_READER);
+ sn = smb_llist_head(ll);
+
+ while (sn != NULL) {
+ SMB_SESSION_VALID(sn);
+ tlist = &sn->s_tree_list;
+ smb_llist_enter(tlist, RW_READER);
+ tree = smb_llist_head(tlist);
+
+ while (tree != NULL) {
+ if (smb_tree_hold(tree)) {
+ rc = smb_tree_enum(tree, svcenum);
+ smb_tree_release(tree);
+ if (rc != 0)
+ break;
+ }
+
+ tree = smb_llist_next(tlist, tree);
+ }
+
+ smb_llist_exit(tlist);
+
+ if (rc != 0)
+ break;
+
+ sn = smb_llist_next(ll, sn);
+ }
+
+ smb_llist_exit(ll);
+}
+
+/*
* Disconnect sessions associated with the specified client and username.
* Empty strings are treated as wildcards.
*/
@@ -1796,8 +1847,8 @@ static int
smb_server_fclose(smb_llist_t *ll, uint32_t uniqid)
{
smb_session_t *sn;
- smb_llist_t *ulist;
- smb_user_t *user;
+ smb_llist_t *tlist;
+ smb_tree_t *tree;
int rc = ENOENT;
smb_llist_enter(ll, RW_READER);
@@ -1805,20 +1856,20 @@ smb_server_fclose(smb_llist_t *ll, uint32_t uniqid)
while ((sn != NULL) && (rc == ENOENT)) {
SMB_SESSION_VALID(sn);
- ulist = &sn->s_user_list;
- smb_llist_enter(ulist, RW_READER);
- user = smb_llist_head(ulist);
-
- while ((user != NULL) && (rc == ENOENT)) {
- if (smb_user_hold(user)) {
- rc = smb_user_fclose(user, uniqid);
- smb_user_release(user);
+ tlist = &sn->s_tree_list;
+ smb_llist_enter(tlist, RW_READER);
+ tree = smb_llist_head(tlist);
+
+ while ((tree != NULL) && (rc == ENOENT)) {
+ if (smb_tree_hold(tree)) {
+ rc = smb_tree_fclose(tree, uniqid);
+ smb_tree_release(tree);
}
- user = smb_llist_next(ulist, user);
+ tree = smb_llist_next(tlist, tree);
}
- smb_llist_exit(ulist);
+ smb_llist_exit(tlist);
sn = smb_llist_next(ll, sn);
}
diff --git a/usr/src/uts/common/fs/smbsrv/smb_session.c b/usr/src/uts/common/fs/smbsrv/smb_session.c
index 0fdac10ca6..b8284b372f 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_session.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_session.c
@@ -43,6 +43,7 @@ static int smb_session_message(smb_session_t *);
static int smb_session_xprt_puthdr(smb_session_t *, smb_xprt_t *,
uint8_t *, size_t);
static smb_user_t *smb_session_lookup_user(smb_session_t *, char *, char *);
+static smb_tree_t *smb_session_get_tree(smb_session_t *, smb_tree_t *);
static void smb_session_logoff(smb_session_t *);
static void smb_request_init_command_mbuf(smb_request_t *sr);
void dump_smb_inaddr(smb_inaddr_t *ipaddr);
@@ -624,6 +625,11 @@ smb_session_create(ksocket_t new_so, uint16_t port, smb_server_t *sv,
kmem_cache_free(sv->si_cache_session, session);
return (NULL);
}
+ if (smb_idpool_constructor(&session->s_tid_pool)) {
+ smb_idpool_destructor(&session->s_uid_pool);
+ kmem_cache_free(sv->si_cache_session, session);
+ return (NULL);
+ }
now = ddi_get_lbolt64();
@@ -642,6 +648,9 @@ smb_session_create(ksocket_t new_so, uint16_t port, smb_server_t *sv,
smb_llist_constructor(&session->s_user_list, sizeof (smb_user_t),
offsetof(smb_user_t, u_lnd));
+ smb_llist_constructor(&session->s_tree_list, sizeof (smb_tree_t),
+ offsetof(smb_tree_t, t_lnd));
+
smb_llist_constructor(&session->s_xa_list, sizeof (smb_xa_t),
offsetof(smb_xa_t, xa_lnd));
@@ -719,6 +728,7 @@ smb_session_delete(smb_session_t *session)
list_destroy(&session->s_oplock_brkreqs);
smb_slist_destructor(&session->s_req_list);
+ smb_llist_destructor(&session->s_tree_list);
smb_llist_destructor(&session->s_user_list);
smb_llist_destructor(&session->s_xa_list);
@@ -726,6 +736,7 @@ smb_session_delete(smb_session_t *session)
ASSERT(session->s_file_cnt == 0);
ASSERT(session->s_dir_cnt == 0);
+ smb_idpool_destructor(&session->s_tid_pool);
smb_idpool_destructor(&session->s_uid_pool);
if (session->sock != NULL) {
if (session->s_local_port == IPPORT_NETBIOS_SSN)
@@ -928,45 +939,306 @@ smb_session_post_user(smb_session_t *session, smb_user_t *user)
}
/*
- * Logoff all users associated with the specified session.
+ * Find a tree by tree-id.
*/
-static void
-smb_session_logoff(smb_session_t *session)
+smb_tree_t *
+smb_session_lookup_tree(
+ smb_session_t *session,
+ uint16_t tid)
+
{
- smb_user_t *user;
+ smb_tree_t *tree;
SMB_SESSION_VALID(session);
- smb_llist_enter(&session->s_user_list, RW_READER);
+ smb_llist_enter(&session->s_tree_list, RW_READER);
+ tree = smb_llist_head(&session->s_tree_list);
- user = smb_llist_head(&session->s_user_list);
- while (user) {
- SMB_USER_VALID(user);
- ASSERT(user->u_session == session);
+ while (tree) {
+ ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
+ ASSERT(tree->t_session == session);
- if (smb_user_hold(user)) {
- smb_user_logoff(user);
- smb_user_release(user);
+ if (tree->t_tid == tid) {
+ if (smb_tree_hold(tree)) {
+ smb_llist_exit(&session->s_tree_list);
+ return (tree);
+ } else {
+ smb_llist_exit(&session->s_tree_list);
+ return (NULL);
+ }
}
- user = smb_llist_next(&session->s_user_list, user);
+ tree = smb_llist_next(&session->s_tree_list, tree);
}
- smb_llist_exit(&session->s_user_list);
+ smb_llist_exit(&session->s_tree_list);
+ return (NULL);
+}
+
+/*
+ * Find the first connected tree that matches the specified sharename.
+ * If the specified tree is NULL the search starts from the beginning of
+ * the user's tree list. If a tree is provided the search starts just
+ * after that tree.
+ */
+smb_tree_t *
+smb_session_lookup_share(
+ smb_session_t *session,
+ const char *sharename,
+ smb_tree_t *tree)
+{
+ SMB_SESSION_VALID(session);
+ ASSERT(sharename);
+
+ smb_llist_enter(&session->s_tree_list, RW_READER);
+
+ if (tree) {
+ ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
+ ASSERT(tree->t_session == session);
+ tree = smb_llist_next(&session->s_tree_list, tree);
+ } else {
+ tree = smb_llist_head(&session->s_tree_list);
+ }
+
+ while (tree) {
+ ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
+ ASSERT(tree->t_session == session);
+ if (smb_strcasecmp(tree->t_sharename, sharename, 0) == 0) {
+ if (smb_tree_hold(tree)) {
+ smb_llist_exit(&session->s_tree_list);
+ return (tree);
+ }
+ }
+ tree = smb_llist_next(&session->s_tree_list, tree);
+ }
+
+ smb_llist_exit(&session->s_tree_list);
+ return (NULL);
+}
+
+/*
+ * Find the first connected tree that matches the specified volume name.
+ * If the specified tree is NULL the search starts from the beginning of
+ * the user's tree list. If a tree is provided the search starts just
+ * after that tree.
+ */
+smb_tree_t *
+smb_session_lookup_volume(
+ smb_session_t *session,
+ const char *name,
+ smb_tree_t *tree)
+{
+ SMB_SESSION_VALID(session);
+ ASSERT(name);
+
+ smb_llist_enter(&session->s_tree_list, RW_READER);
+
+ if (tree) {
+ ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
+ ASSERT(tree->t_session == session);
+ tree = smb_llist_next(&session->s_tree_list, tree);
+ } else {
+ tree = smb_llist_head(&session->s_tree_list);
+ }
+
+ while (tree) {
+ ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
+ ASSERT(tree->t_session == session);
+
+ if (smb_strcasecmp(tree->t_volume, name, 0) == 0) {
+ if (smb_tree_hold(tree)) {
+ smb_llist_exit(&session->s_tree_list);
+ return (tree);
+ }
+ }
+
+ tree = smb_llist_next(&session->s_tree_list, tree);
+ }
+
+ smb_llist_exit(&session->s_tree_list);
+ return (NULL);
+}
+
+/*
+ * Disconnect all trees that match the specified client process-id.
+ */
+void
+smb_session_close_pid(
+ smb_session_t *session,
+ uint16_t pid)
+{
+ smb_tree_t *tree;
+
+ SMB_SESSION_VALID(session);
+
+ tree = smb_session_get_tree(session, NULL);
+ while (tree) {
+ smb_tree_t *next;
+ ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
+ ASSERT(tree->t_session == session);
+ smb_tree_close_pid(tree, pid);
+ next = smb_session_get_tree(session, tree);
+ smb_tree_release(tree);
+ tree = next;
+ }
+}
+
+static void
+smb_session_tree_dtor(void *t)
+{
+ smb_tree_t *tree = (smb_tree_t *)t;
+
+ smb_tree_disconnect(tree, B_TRUE);
+ /* release the ref acquired during the traversal loop */
+ smb_tree_release(tree);
}
+
/*
- * Disconnect any trees associated with the specified share.
- * Iterate through the users on this session and tell each user
- * to disconnect from the share.
+ * Disconnect all trees that this user has connected.
*/
void
-smb_session_disconnect_share(smb_session_t *session, const char *sharename)
+smb_session_disconnect_owned_trees(
+ smb_session_t *session,
+ smb_user_t *owner)
+{
+ smb_tree_t *tree;
+ smb_llist_t *tree_list = &session->s_tree_list;
+
+ SMB_SESSION_VALID(session);
+ SMB_USER_VALID(owner);
+
+ smb_llist_enter(tree_list, RW_READER);
+
+ tree = smb_llist_head(tree_list);
+ while (tree) {
+ if ((tree->t_owner == owner) &&
+ smb_tree_hold(tree)) {
+ /*
+ * smb_tree_hold() succeeded, hence we are in state
+ * SMB_TREE_STATE_CONNECTED; schedule this tree
+ * for asynchronous disconnect, which will fire
+ * after we drop the llist traversal lock.
+ */
+ smb_llist_post(tree_list, tree, smb_session_tree_dtor);
+ }
+ tree = smb_llist_next(tree_list, tree);
+ }
+
+ /* drop the lock and flush the dtor queue */
+ smb_llist_exit(tree_list);
+}
+
+/*
+ * Disconnect all trees that this user has connected.
+ */
+void
+smb_session_disconnect_trees(
+ smb_session_t *session)
+{
+ smb_tree_t *tree;
+
+ SMB_SESSION_VALID(session);
+
+ tree = smb_session_get_tree(session, NULL);
+ while (tree) {
+ ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
+ ASSERT(tree->t_session == session);
+ smb_tree_disconnect(tree, B_TRUE);
+ smb_tree_release(tree);
+ tree = smb_session_get_tree(session, NULL);
+ }
+}
+
+/*
+ * Disconnect all trees that match the specified share name.
+ */
+void
+smb_session_disconnect_share(
+ smb_session_t *session,
+ const char *sharename)
+{
+ smb_tree_t *tree;
+ smb_tree_t *next;
+
+ SMB_SESSION_VALID(session);
+
+ tree = smb_session_lookup_share(session, sharename, NULL);
+ while (tree) {
+ ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
+ ASSERT(tree->t_session == session);
+ smb_session_cancel_requests(session, tree, NULL);
+ smb_tree_disconnect(tree, B_TRUE);
+ next = smb_session_lookup_share(session, sharename, tree);
+ smb_tree_release(tree);
+ tree = next;
+ }
+}
+
+void
+smb_session_post_tree(smb_session_t *session, smb_tree_t *tree)
+{
+ SMB_SESSION_VALID(session);
+ SMB_TREE_VALID(tree);
+ ASSERT0(tree->t_refcnt);
+ ASSERT(tree->t_state == SMB_TREE_STATE_DISCONNECTED);
+ ASSERT(tree->t_session == session);
+
+ smb_llist_post(&session->s_tree_list, tree, smb_tree_dealloc);
+}
+
+/*
+ * Get the next connected tree in the list. A reference is taken on
+ * the tree, which can be released later with smb_tree_release().
+ *
+ * If the specified tree is NULL the search starts from the beginning of
+ * the tree list. If a tree is provided the search starts just after
+ * that tree.
+ *
+ * Returns NULL if there are no connected trees in the list.
+ */
+static smb_tree_t *
+smb_session_get_tree(
+ smb_session_t *session,
+ smb_tree_t *tree)
+{
+ smb_llist_t *tree_list;
+
+ SMB_SESSION_VALID(session);
+ tree_list = &session->s_tree_list;
+
+ smb_llist_enter(tree_list, RW_READER);
+
+ if (tree) {
+ ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
+ tree = smb_llist_next(tree_list, tree);
+ } else {
+ tree = smb_llist_head(tree_list);
+ }
+
+ while (tree) {
+ if (smb_tree_hold(tree))
+ break;
+
+ tree = smb_llist_next(tree_list, tree);
+ }
+
+ smb_llist_exit(tree_list);
+ return (tree);
+}
+
+/*
+ * Logoff all users associated with the specified session.
+ */
+static void
+smb_session_logoff(smb_session_t *session)
{
smb_user_t *user;
SMB_SESSION_VALID(session);
+ smb_session_disconnect_trees(session);
+
smb_llist_enter(&session->s_user_list, RW_READER);
user = smb_llist_head(&session->s_user_list);
@@ -975,7 +1247,7 @@ smb_session_disconnect_share(smb_session_t *session, const char *sharename)
ASSERT(user->u_session == session);
if (smb_user_hold(user)) {
- smb_user_disconnect_share(user, sharename);
+ smb_user_logoff(user);
smb_user_release(user);
}
diff --git a/usr/src/uts/common/fs/smbsrv/smb_trans2_find.c b/usr/src/uts/common/fs/smbsrv/smb_trans2_find.c
index 037b2a3b36..d0d60cea5d 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_trans2_find.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_trans2_find.c
@@ -332,7 +332,7 @@ smb_com_trans2_find_first2(smb_request_t *sr, smb_xa_t *xa)
return (SDRC_ERROR);
}
- od = smb_tree_lookup_odir(sr->tid_tree, odid);
+ od = smb_tree_lookup_odir(sr, odid);
if (od == NULL)
return (SDRC_ERROR);
@@ -463,7 +463,7 @@ smb_com_trans2_find_next2(smb_request_t *sr, smb_xa_t *xa)
if (args.fa_maxdata == 0)
return (SDRC_ERROR);
- od = smb_tree_lookup_odir(sr->tid_tree, odid);
+ od = smb_tree_lookup_odir(sr, odid);
if (od == NULL) {
smbsr_error(sr, NT_STATUS_INVALID_HANDLE,
ERRDOS, ERROR_INVALID_HANDLE);
@@ -943,7 +943,7 @@ smb_com_find_close2(smb_request_t *sr)
if (smbsr_decode_vwv(sr, "w", &odid) != 0)
return (SDRC_ERROR);
- od = smb_tree_lookup_odir(sr->tid_tree, odid);
+ od = smb_tree_lookup_odir(sr, odid);
if (od == NULL) {
smbsr_error(sr, NT_STATUS_INVALID_HANDLE,
ERRDOS, ERROR_INVALID_HANDLE);
diff --git a/usr/src/uts/common/fs/smbsrv/smb_tree.c b/usr/src/uts/common/fs/smbsrv/smb_tree.c
index 13adc2d803..b225c67623 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_tree.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_tree.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -40,15 +40,15 @@
* +-------------------+ +-------------------+ +-------------------+
* | SESSION |<----->| SESSION |......| SESSION |
* +-------------------+ +-------------------+ +-------------------+
- * |
- * |
- * v
- * +-------------------+ +-------------------+ +-------------------+
- * | USER |<----->| USER |......| USER |
- * +-------------------+ +-------------------+ +-------------------+
- * |
- * |
- * v
+ * | |
+ * | |
+ * | v
+ * | +-------------------+ +-------------------+ +-------------------+
+ * | | USER |<--->| USER |...| USER |
+ * | +-------------------+ +-------------------+ +-------------------+
+ * |
+ * |
+ * v
* +-------------------+ +-------------------+ +-------------------+
* | TREE |<----->| TREE |......| TREE |
* +-------------------+ +-------------------+ +-------------------+
@@ -175,7 +175,7 @@ static smb_tree_t *smb_tree_connect_core(smb_request_t *);
static smb_tree_t *smb_tree_connect_disk(smb_request_t *, const char *);
static smb_tree_t *smb_tree_connect_printq(smb_request_t *, const char *);
static smb_tree_t *smb_tree_connect_ipc(smb_request_t *, const char *);
-static smb_tree_t *smb_tree_alloc(smb_user_t *, const smb_kshare_t *,
+static smb_tree_t *smb_tree_alloc(smb_request_t *, const smb_kshare_t *,
smb_node_t *, uint32_t, uint32_t);
static boolean_t smb_tree_is_connected_locked(smb_tree_t *);
static boolean_t smb_tree_is_disconnected(smb_tree_t *);
@@ -269,6 +269,7 @@ smb_tree_connect_core(smb_request_t *sr)
}
smb_kshare_release(si);
+
return (tree);
}
@@ -361,7 +362,7 @@ smb_tree_release(
smb_llist_flush(&tree->t_odir_list);
if (smb_tree_is_disconnected(tree) && (tree->t_refcnt == 0))
- smb_user_post_tree(tree->t_user, tree);
+ smb_session_post_tree(tree->t_session, tree);
mutex_exit(&tree->t_mutex);
}
@@ -428,7 +429,7 @@ smb_tree_enum(smb_tree_t *tree, smb_svcenum_t *svcenum)
{
smb_ofile_t *of;
smb_ofile_t *next;
- int rc;
+ int rc = 0;
ASSERT(tree);
ASSERT(tree->t_magic == SMB_TREE_MAGIC);
@@ -712,8 +713,7 @@ smb_tree_connect_disk(smb_request_t *sr, const char *sharename)
if (!smb_shortnames)
sr->arg.tcon.optional_support |= SMB_UNIQUE_FILE_NAME;
- tree = smb_tree_alloc(user, si, snode, access,
- sr->sr_cfg->skc_execflags);
+ tree = smb_tree_alloc(sr, si, snode, access, sr->sr_cfg->skc_execflags);
smb_node_release(snode);
@@ -805,8 +805,7 @@ smb_tree_connect_printq(smb_request_t *sr, const char *sharename)
sr->sr_tcon.optional_support = SMB_SUPPORT_SEARCH_BITS;
- tree = smb_tree_alloc(user, si, snode, access,
- sr->sr_cfg->skc_execflags);
+ tree = smb_tree_alloc(sr, si, snode, access, sr->sr_cfg->skc_execflags);
smb_node_release(snode);
@@ -846,7 +845,7 @@ smb_tree_connect_ipc(smb_request_t *sr, const char *name)
sr->sr_tcon.optional_support = SMB_SUPPORT_SEARCH_BITS;
- tree = smb_tree_alloc(user, si, NULL, ACE_ALL_PERMS, 0);
+ tree = smb_tree_alloc(sr, si, NULL, ACE_ALL_PERMS, 0);
if (tree == NULL) {
smb_tree_log(sr, name, "access denied");
smbsr_error(sr, NT_STATUS_ACCESS_DENIED, ERRSRV, ERRaccess);
@@ -859,41 +858,45 @@ smb_tree_connect_ipc(smb_request_t *sr, const char *name)
* Allocate a tree.
*/
static smb_tree_t *
-smb_tree_alloc(smb_user_t *user, const smb_kshare_t *si, smb_node_t *snode,
- uint32_t access, uint32_t execflags)
+smb_tree_alloc(smb_request_t *sr, const smb_kshare_t *si,
+ smb_node_t *snode, uint32_t access, uint32_t execflags)
{
+ smb_session_t *session = sr->session;
smb_tree_t *tree;
uint32_t stype = si->shr_type;
uint16_t tid;
- if (smb_idpool_alloc(&user->u_tid_pool, &tid))
+ if (smb_idpool_alloc(&session->s_tid_pool, &tid))
return (NULL);
- tree = kmem_cache_alloc(user->u_server->si_cache_tree, KM_SLEEP);
+ tree = kmem_cache_alloc(session->s_server->si_cache_tree, KM_SLEEP);
bzero(tree, sizeof (smb_tree_t));
- tree->t_user = user;
- tree->t_session = user->u_session;
- tree->t_server = user->u_server;
+ tree->t_session = session;
+ tree->t_server = session->s_server;
+
+ /* grab a ref for tree->t_owner */
+ smb_user_hold_internal(sr->uid_user);
+ tree->t_owner = sr->uid_user;
if (STYPE_ISDSK(stype) || STYPE_ISPRN(stype)) {
if (smb_tree_getattr(si, snode, tree) != 0) {
- smb_idpool_free(&user->u_tid_pool, tid);
- kmem_cache_free(user->u_server->si_cache_tree, tree);
+ smb_idpool_free(&session->s_tid_pool, tid);
+ kmem_cache_free(session->s_server->si_cache_tree, tree);
return (NULL);
}
}
if (smb_idpool_constructor(&tree->t_fid_pool)) {
- smb_idpool_free(&user->u_tid_pool, tid);
- kmem_cache_free(user->u_server->si_cache_tree, tree);
+ smb_idpool_free(&session->s_tid_pool, tid);
+ kmem_cache_free(session->s_server->si_cache_tree, tree);
return (NULL);
}
if (smb_idpool_constructor(&tree->t_odid_pool)) {
smb_idpool_destructor(&tree->t_fid_pool);
- smb_idpool_free(&user->u_tid_pool, tid);
- kmem_cache_free(user->u_server->si_cache_tree, tree);
+ smb_idpool_free(&session->s_tid_pool, tid);
+ kmem_cache_free(session->s_server->si_cache_tree, tree);
return (NULL);
}
@@ -929,11 +932,11 @@ smb_tree_alloc(smb_user_t *user, const smb_kshare_t *si, smb_node_t *snode,
tree->t_acltype = smb_fsop_acltype(snode);
}
- smb_llist_enter(&user->u_tree_list, RW_WRITER);
- smb_llist_insert_head(&user->u_tree_list, tree);
- smb_llist_exit(&user->u_tree_list);
- atomic_inc_32(&user->u_session->s_tree_cnt);
- smb_server_inc_trees(user->u_server);
+ smb_llist_enter(&session->s_tree_list, RW_WRITER);
+ smb_llist_insert_head(&session->s_tree_list, tree);
+ smb_llist_exit(&session->s_tree_list);
+ atomic_inc_32(&session->s_tree_cnt);
+ smb_server_inc_trees(session->s_server);
return (tree);
}
@@ -947,19 +950,19 @@ smb_tree_alloc(smb_user_t *user, const smb_kshare_t *si, smb_node_t *snode,
void
smb_tree_dealloc(void *arg)
{
- smb_user_t *user;
+ smb_session_t *session;
smb_tree_t *tree = (smb_tree_t *)arg;
SMB_TREE_VALID(tree);
ASSERT(tree->t_state == SMB_TREE_STATE_DISCONNECTED);
ASSERT(tree->t_refcnt == 0);
- user = tree->t_user;
- smb_llist_enter(&user->u_tree_list, RW_WRITER);
- smb_llist_remove(&user->u_tree_list, tree);
- smb_idpool_free(&user->u_tid_pool, tree->t_tid);
- atomic_dec_32(&tree->t_session->s_tree_cnt);
- smb_llist_exit(&user->u_tree_list);
+ session = tree->t_session;
+ smb_llist_enter(&session->s_tree_list, RW_WRITER);
+ smb_llist_remove(&session->s_tree_list, tree);
+ smb_idpool_free(&session->s_tid_pool, tree->t_tid);
+ atomic_dec_32(&session->s_tree_cnt);
+ smb_llist_exit(&session->s_tree_list);
mutex_enter(&tree->t_mutex);
mutex_exit(&tree->t_mutex);
@@ -974,6 +977,10 @@ smb_tree_dealloc(void *arg)
smb_llist_destructor(&tree->t_odir_list);
smb_idpool_destructor(&tree->t_fid_pool);
smb_idpool_destructor(&tree->t_odid_pool);
+
+ SMB_USER_VALID(tree->t_owner);
+ smb_user_release(tree->t_owner);
+
kmem_cache_free(tree->t_server->si_cache_tree, tree);
}
@@ -1234,27 +1241,38 @@ smb_tree_log(smb_request_t *sr, const char *sharename, const char *fmt, ...)
* Returns NULL if odir not found or a hold cannot be obtained.
*/
smb_odir_t *
-smb_tree_lookup_odir(smb_tree_t *tree, uint16_t odid)
+smb_tree_lookup_odir(smb_request_t *sr, uint16_t odid)
{
smb_odir_t *od;
smb_llist_t *od_list;
+ smb_tree_t *tree = sr->tid_tree;
- ASSERT(tree);
ASSERT(tree->t_magic == SMB_TREE_MAGIC);
od_list = &tree->t_odir_list;
- smb_llist_enter(od_list, RW_READER);
+ smb_llist_enter(od_list, RW_READER);
od = smb_llist_head(od_list);
while (od) {
- if (od->d_odid == odid) {
- if (!smb_odir_hold(od))
- od = NULL;
+ if (od->d_odid == odid)
break;
- }
od = smb_llist_next(od_list, od);
}
+ if (od == NULL)
+ goto out;
+
+ /*
+ * Only allow use of a given Search ID with the same UID that
+ * was used to create it. MS-CIFS 3.3.5.14
+ */
+ if (od->d_user != sr->uid_user) {
+ od = NULL;
+ goto out;
+ }
+ if (!smb_odir_hold(od))
+ od = NULL;
+out:
smb_llist_exit(od_list);
return (od);
}
@@ -1377,15 +1395,16 @@ smb_tree_close_odirs(smb_tree_t *tree, uint16_t pid)
}
static void
-smb_tree_set_execinfo(smb_tree_t *tree, smb_shr_execinfo_t *exec, int exec_type)
+smb_tree_set_execinfo(smb_tree_t *tree, smb_shr_execinfo_t *exec,
+ int exec_type)
{
exec->e_sharename = tree->t_sharename;
- exec->e_winname = tree->t_user->u_name;
- exec->e_userdom = tree->t_user->u_domain;
+ exec->e_winname = tree->t_owner->u_name;
+ exec->e_userdom = tree->t_owner->u_domain;
exec->e_srv_ipaddr = tree->t_session->local_ipaddr;
exec->e_cli_ipaddr = tree->t_session->ipaddr;
exec->e_cli_netbiosname = tree->t_session->workstation;
- exec->e_uid = crgetuid(tree->t_user->u_cred);
+ exec->e_uid = crgetuid(tree->t_owner->u_cred);
exec->e_type = exec_type;
}
@@ -1438,6 +1457,26 @@ smb_tree_netinfo_encode(smb_tree_t *tree, uint8_t *buf, size_t buflen,
return (rc);
}
+static void
+smb_tree_netinfo_username(smb_tree_t *tree, char **namestr, uint32_t *namelen)
+{
+ smb_user_t *user = tree->t_owner;
+
+ /*
+ * u_domain_len and u_name_len include the '\0' in their
+ * lengths, hence the sum of the two lengths gives us room
+ * for both the '\\' and '\0' chars.
+ */
+ ASSERT(namestr);
+ ASSERT(namelen);
+ ASSERT(user->u_domain_len > 0);
+ ASSERT(user->u_name_len > 0);
+ *namelen = user->u_domain_len + user->u_name_len;
+ *namestr = kmem_alloc(*namelen, KM_SLEEP);
+ (void) snprintf(*namestr, *namelen, "%s\\%s", user->u_domain,
+ user->u_name);
+}
+
/*
* Note: ci_numusers should be the number of users connected to
* the share rather than the number of references on the tree but
@@ -1446,8 +1485,6 @@ smb_tree_netinfo_encode(smb_tree_t *tree, uint8_t *buf, size_t buflen,
static void
smb_tree_netinfo_init(smb_tree_t *tree, smb_netconnectinfo_t *info)
{
- smb_user_t *user;
-
ASSERT(tree);
info->ci_id = tree->t_tid;
@@ -1459,13 +1496,7 @@ smb_tree_netinfo_init(smb_tree_t *tree, smb_netconnectinfo_t *info)
info->ci_sharelen = strlen(tree->t_sharename) + 1;
info->ci_share = smb_mem_strdup(tree->t_sharename);
- user = tree->t_user;
- ASSERT(user);
-
- info->ci_namelen = user->u_domain_len + user->u_name_len + 2;
- info->ci_username = kmem_alloc(info->ci_namelen, KM_SLEEP);
- (void) snprintf(info->ci_username, info->ci_namelen, "%s\\%s",
- user->u_domain, user->u_name);
+ smb_tree_netinfo_username(tree, &info->ci_username, &info->ci_namelen);
}
static void
diff --git a/usr/src/uts/common/fs/smbsrv/smb_tree_connect.c b/usr/src/uts/common/fs/smbsrv/smb_tree_connect.c
index 1ce9720f5d..19b857e834 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_tree_connect.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_tree_connect.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
#include <smbsrv/smb_kproto.h>
@@ -362,8 +363,7 @@ smb_sdrc_t
smb_pre_tree_disconnect(smb_request_t *sr)
{
sr->uid_user = smb_session_lookup_uid(sr->session, sr->smb_uid);
- if (sr->uid_user != NULL)
- sr->tid_tree = smb_user_lookup_tree(sr->uid_user, sr->smb_tid);
+ sr->tid_tree = smb_session_lookup_tree(sr->session, sr->smb_tid);
DTRACE_SMB_1(op__TreeDisconnect__start, smb_request_t *, sr);
return (SDRC_SUCCESS);
diff --git a/usr/src/uts/common/fs/smbsrv/smb_user.c b/usr/src/uts/common/fs/smbsrv/smb_user.c
index cc3fde7f38..09eaba699c 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_user.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_user.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -38,15 +39,15 @@
* +-------------------+ +-------------------+ +-------------------+
* | SESSION |<----->| SESSION |......| SESSION |
* +-------------------+ +-------------------+ +-------------------+
- * |
- * |
- * v
- * +-------------------+ +-------------------+ +-------------------+
- * | USER |<----->| USER |......| USER |
- * +-------------------+ +-------------------+ +-------------------+
- * |
- * |
- * v
+ * | |
+ * | |
+ * | v
+ * | +-------------------+ +-------------------+ +-------------------+
+ * | | USER |<--->| USER |...| USER |
+ * | +-------------------+ +-------------------+ +-------------------+
+ * |
+ * |
+ * v
* +-------------------+ +-------------------+ +-------------------+
* | TREE |<----->| TREE |......| TREE |
* +-------------------+ +-------------------+ +-------------------+
@@ -170,7 +171,6 @@
static boolean_t smb_user_is_logged_in(smb_user_t *);
static int smb_user_enum_private(smb_user_t *, smb_svcenum_t *);
-static smb_tree_t *smb_user_get_tree(smb_llist_t *, smb_tree_t *);
static void smb_user_setcred(smb_user_t *, cred_t *, uint32_t);
static void smb_user_nonauth_logon(uint32_t);
static void smb_user_auth_logoff(uint32_t);
@@ -210,20 +210,15 @@ smb_user_login(
user->u_audit_sid = audit_sid;
if (!smb_idpool_alloc(&session->s_uid_pool, &user->u_uid)) {
- if (!smb_idpool_constructor(&user->u_tid_pool)) {
- smb_llist_constructor(&user->u_tree_list,
- sizeof (smb_tree_t), offsetof(smb_tree_t, t_lnd));
- mutex_init(&user->u_mutex, NULL, MUTEX_DEFAULT, NULL);
- smb_user_setcred(user, cr, privileges);
- user->u_state = SMB_USER_STATE_LOGGED_IN;
- user->u_magic = SMB_USER_MAGIC;
- smb_llist_enter(&session->s_user_list, RW_WRITER);
- smb_llist_insert_tail(&session->s_user_list, user);
- smb_llist_exit(&session->s_user_list);
- smb_server_inc_users(session->s_server);
- return (user);
- }
- smb_idpool_free(&session->s_uid_pool, user->u_uid);
+ mutex_init(&user->u_mutex, NULL, MUTEX_DEFAULT, NULL);
+ smb_user_setcred(user, cr, privileges);
+ user->u_state = SMB_USER_STATE_LOGGED_IN;
+ user->u_magic = SMB_USER_MAGIC;
+ smb_llist_enter(&session->s_user_list, RW_WRITER);
+ smb_llist_insert_tail(&session->s_user_list, user);
+ smb_llist_exit(&session->s_user_list);
+ smb_server_inc_users(session->s_server);
+ return (user);
}
smb_mem_free(user->u_name);
smb_mem_free(user->u_domain);
@@ -279,10 +274,7 @@ smb_user_logoff(
*/
user->u_state = SMB_USER_STATE_LOGGING_OFF;
mutex_exit(&user->u_mutex);
- /*
- * All the trees hanging off of this user are disconnected.
- */
- smb_user_disconnect_trees(user);
+ smb_session_disconnect_owned_trees(user->u_session, user);
smb_user_auth_logoff(user->u_audit_sid);
mutex_enter(&user->u_mutex);
user->u_state = SMB_USER_STATE_LOGGED_OFF;
@@ -301,13 +293,13 @@ smb_user_logoff(
}
/*
- * Take a reference on a user.
+ * Take a reference on a user. Do not return a reference unless the user is in
+ * the logged-in state.
*/
boolean_t
smb_user_hold(smb_user_t *user)
{
- ASSERT(user);
- ASSERT(user->u_magic == SMB_USER_MAGIC);
+ SMB_USER_VALID(user);
mutex_enter(&user->u_mutex);
@@ -322,6 +314,19 @@ smb_user_hold(smb_user_t *user)
}
/*
+ * Unconditionally take a reference on a user.
+ */
+void
+smb_user_hold_internal(smb_user_t *user)
+{
+ SMB_USER_VALID(user);
+
+ mutex_enter(&user->u_mutex);
+ user->u_refcnt++;
+ mutex_exit(&user->u_mutex);
+}
+
+/*
* Release a reference on a user. If the reference count falls to
* zero and the user has logged off, post the object for deletion.
* Object deletion is deferred to avoid modifying a list while an
@@ -337,9 +342,6 @@ smb_user_release(
ASSERT(user->u_refcnt);
user->u_refcnt--;
- /* flush the tree list's delete queue */
- smb_llist_flush(&user->u_tree_list);
-
switch (user->u_state) {
case SMB_USER_STATE_LOGGED_OFF:
if (user->u_refcnt == 0)
@@ -357,248 +359,6 @@ smb_user_release(
mutex_exit(&user->u_mutex);
}
-void
-smb_user_post_tree(smb_user_t *user, smb_tree_t *tree)
-{
- SMB_USER_VALID(user);
- SMB_TREE_VALID(tree);
- ASSERT(tree->t_refcnt == 0);
- ASSERT(tree->t_state == SMB_TREE_STATE_DISCONNECTED);
- ASSERT(tree->t_user == user);
-
- smb_llist_post(&user->u_tree_list, tree, smb_tree_dealloc);
-}
-
-
-/*
- * Find a tree by tree-id.
- */
-smb_tree_t *
-smb_user_lookup_tree(
- smb_user_t *user,
- uint16_t tid)
-
-{
- smb_tree_t *tree;
-
- ASSERT(user);
- ASSERT(user->u_magic == SMB_USER_MAGIC);
-
- smb_llist_enter(&user->u_tree_list, RW_READER);
- tree = smb_llist_head(&user->u_tree_list);
-
- while (tree) {
- ASSERT(tree->t_magic == SMB_TREE_MAGIC);
- ASSERT(tree->t_user == user);
-
- if (tree->t_tid == tid) {
- if (smb_tree_hold(tree)) {
- smb_llist_exit(&user->u_tree_list);
- return (tree);
- } else {
- smb_llist_exit(&user->u_tree_list);
- return (NULL);
- }
- }
-
- tree = smb_llist_next(&user->u_tree_list, tree);
- }
-
- smb_llist_exit(&user->u_tree_list);
- return (NULL);
-}
-
-/*
- * Find the first connected tree that matches the specified sharename.
- * If the specified tree is NULL the search starts from the beginning of
- * the user's tree list. If a tree is provided the search starts just
- * after that tree.
- */
-smb_tree_t *
-smb_user_lookup_share(
- smb_user_t *user,
- const char *sharename,
- smb_tree_t *tree)
-{
- ASSERT(user);
- ASSERT(user->u_magic == SMB_USER_MAGIC);
- ASSERT(sharename);
-
- smb_llist_enter(&user->u_tree_list, RW_READER);
-
- if (tree) {
- ASSERT(tree->t_magic == SMB_TREE_MAGIC);
- ASSERT(tree->t_user == user);
- tree = smb_llist_next(&user->u_tree_list, tree);
- } else {
- tree = smb_llist_head(&user->u_tree_list);
- }
-
- while (tree) {
- ASSERT(tree->t_magic == SMB_TREE_MAGIC);
- ASSERT(tree->t_user == user);
- if (smb_strcasecmp(tree->t_sharename, sharename, 0) == 0) {
- if (smb_tree_hold(tree)) {
- smb_llist_exit(&user->u_tree_list);
- return (tree);
- }
- }
- tree = smb_llist_next(&user->u_tree_list, tree);
- }
-
- smb_llist_exit(&user->u_tree_list);
- return (NULL);
-}
-
-/*
- * Find the first connected tree that matches the specified volume name.
- * If the specified tree is NULL the search starts from the beginning of
- * the user's tree list. If a tree is provided the search starts just
- * after that tree.
- */
-smb_tree_t *
-smb_user_lookup_volume(
- smb_user_t *user,
- const char *name,
- smb_tree_t *tree)
-{
- ASSERT(user);
- ASSERT(user->u_magic == SMB_USER_MAGIC);
- ASSERT(name);
-
- smb_llist_enter(&user->u_tree_list, RW_READER);
-
- if (tree) {
- ASSERT(tree->t_magic == SMB_TREE_MAGIC);
- ASSERT(tree->t_user == user);
- tree = smb_llist_next(&user->u_tree_list, tree);
- } else {
- tree = smb_llist_head(&user->u_tree_list);
- }
-
- while (tree) {
- ASSERT(tree->t_magic == SMB_TREE_MAGIC);
- ASSERT(tree->t_user == user);
-
- if (smb_strcasecmp(tree->t_volume, name, 0) == 0) {
- if (smb_tree_hold(tree)) {
- smb_llist_exit(&user->u_tree_list);
- return (tree);
- }
- }
-
- tree = smb_llist_next(&user->u_tree_list, tree);
- }
-
- smb_llist_exit(&user->u_tree_list);
- return (NULL);
-}
-
-/*
- * Disconnect all trees that match the specified client process-id.
- */
-void
-smb_user_close_pid(
- smb_user_t *user,
- uint16_t pid)
-{
- smb_tree_t *tree;
-
- ASSERT(user);
- ASSERT(user->u_magic == SMB_USER_MAGIC);
-
- tree = smb_user_get_tree(&user->u_tree_list, NULL);
- while (tree) {
- smb_tree_t *next;
- ASSERT(tree->t_user == user);
- smb_tree_close_pid(tree, pid);
- next = smb_user_get_tree(&user->u_tree_list, tree);
- smb_tree_release(tree);
- tree = next;
- }
-}
-
-/*
- * Disconnect all trees that this user has connected.
- */
-void
-smb_user_disconnect_trees(
- smb_user_t *user)
-{
- smb_tree_t *tree;
-
- ASSERT(user);
- ASSERT(user->u_magic == SMB_USER_MAGIC);
-
- tree = smb_user_get_tree(&user->u_tree_list, NULL);
- while (tree) {
- ASSERT(tree->t_user == user);
- smb_tree_disconnect(tree, B_TRUE);
- smb_tree_release(tree);
- tree = smb_user_get_tree(&user->u_tree_list, NULL);
- }
-}
-
-/*
- * Disconnect all trees that match the specified share name.
- */
-void
-smb_user_disconnect_share(
- smb_user_t *user,
- const char *sharename)
-{
- smb_tree_t *tree;
- smb_tree_t *next;
-
- ASSERT(user);
- ASSERT(user->u_magic == SMB_USER_MAGIC);
- ASSERT(user->u_refcnt);
-
- tree = smb_user_lookup_share(user, sharename, NULL);
- while (tree) {
- ASSERT(tree->t_magic == SMB_TREE_MAGIC);
- smb_session_cancel_requests(user->u_session, tree, NULL);
- smb_tree_disconnect(tree, B_TRUE);
- next = smb_user_lookup_share(user, sharename, tree);
- smb_tree_release(tree);
- tree = next;
- }
-}
-
-/*
- * Close a file by its unique id.
- */
-int
-smb_user_fclose(smb_user_t *user, uint32_t uniqid)
-{
- smb_llist_t *tree_list;
- smb_tree_t *tree;
- int rc = ENOENT;
-
- ASSERT(user);
- ASSERT(user->u_magic == SMB_USER_MAGIC);
-
- tree_list = &user->u_tree_list;
- ASSERT(tree_list);
-
- smb_llist_enter(tree_list, RW_READER);
- tree = smb_llist_head(tree_list);
-
- while ((tree != NULL) && (rc == ENOENT)) {
- ASSERT(tree->t_user == user);
-
- if (smb_tree_hold(tree)) {
- rc = smb_tree_fclose(tree, uniqid);
- smb_tree_release(tree);
- }
-
- tree = smb_llist_next(tree_list, tree);
- }
-
- smb_llist_exit(tree_list);
- return (rc);
-}
-
/*
* Determine whether or not the user is an administrator.
* Members of the administrators group have administrative rights.
@@ -688,9 +448,7 @@ smb_user_namecmp(smb_user_t *user, const char *name)
int
smb_user_enum(smb_user_t *user, smb_svcenum_t *svcenum)
{
- smb_tree_t *tree;
- smb_tree_t *next;
- int rc;
+ int rc = 0;
ASSERT(user);
ASSERT(user->u_magic == SMB_USER_MAGIC);
@@ -698,21 +456,6 @@ smb_user_enum(smb_user_t *user, smb_svcenum_t *svcenum)
if (svcenum->se_type == SMB_SVCENUM_TYPE_USER)
return (smb_user_enum_private(user, svcenum));
- tree = smb_user_get_tree(&user->u_tree_list, NULL);
- while (tree) {
- ASSERT(tree->t_user == user);
-
- rc = smb_tree_enum(tree, svcenum);
- if (rc != 0) {
- smb_tree_release(tree);
- break;
- }
-
- next = smb_user_get_tree(&user->u_tree_list, tree);
- smb_tree_release(tree);
- tree = next;
- }
-
return (rc);
}
@@ -769,8 +512,6 @@ smb_user_delete(void *arg)
user->u_magic = (uint32_t)~SMB_USER_MAGIC;
mutex_destroy(&user->u_mutex);
- smb_llist_destructor(&user->u_tree_list);
- smb_idpool_destructor(&user->u_tid_pool);
if (user->u_cred)
crfree(user->u_cred);
if (user->u_privcred)
@@ -780,43 +521,6 @@ smb_user_delete(void *arg)
kmem_cache_free(user->u_server->si_cache_user, user);
}
-/*
- * Get the next connected tree in the list. A reference is taken on
- * the tree, which can be released later with smb_tree_release().
- *
- * If the specified tree is NULL the search starts from the beginning of
- * the tree list. If a tree is provided the search starts just after
- * that tree.
- *
- * Returns NULL if there are no connected trees in the list.
- */
-static smb_tree_t *
-smb_user_get_tree(
- smb_llist_t *tree_list,
- smb_tree_t *tree)
-{
- ASSERT(tree_list);
-
- smb_llist_enter(tree_list, RW_READER);
-
- if (tree) {
- ASSERT(tree->t_magic == SMB_TREE_MAGIC);
- tree = smb_llist_next(tree_list, tree);
- } else {
- tree = smb_llist_head(tree_list);
- }
-
- while (tree) {
- if (smb_tree_hold(tree))
- break;
-
- tree = smb_llist_next(tree_list, tree);
- }
-
- smb_llist_exit(tree_list);
- return (tree);
-}
-
cred_t *
smb_user_getcred(smb_user_t *user)
{
diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c
index ba9c766c65..b4ab4ec3fd 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c
@@ -363,8 +363,19 @@ dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx,
boolean_t
dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag)
{
- return (dmu_buf_try_add_ref(ds->ds_dbuf, dp->dp_meta_objset,
- ds->ds_object, DMU_BONUS_BLKID, tag));
+ dmu_buf_t *dbuf = ds->ds_dbuf;
+ boolean_t result = B_FALSE;
+
+ if (dbuf != NULL && dmu_buf_try_add_ref(dbuf, dp->dp_meta_objset,
+ ds->ds_object, DMU_BONUS_BLKID, tag)) {
+
+ if (ds == dmu_buf_get_user(dbuf))
+ result = B_TRUE;
+ else
+ dmu_buf_rele(dbuf, tag);
+ }
+
+ return (result);
}
int
diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c
index 6a27544201..02844cef07 100644
--- a/usr/src/uts/common/os/exit.c
+++ b/usr/src/uts/common/os/exit.c
@@ -400,14 +400,36 @@ proc_exit(int why, int what)
if (z->zone_boot_err == 0 &&
zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
- if (z->zone_restart_init == B_TRUE) {
- if (restart_init(what, why) == 0)
- return (0);
- }
- z->zone_init_status = wstat(why, what);
- (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
- zone_kcred());
+ /*
+ * If the init process should be restarted, the
+ * "zone_restart_init" member will be set. Some init
+ * programs in branded zones do not tolerate a restart
+ * in the traditional manner; setting the
+ * "zone_reboot_on_init_exit" member will cause the
+ * entire zone to be rebooted instead. If neither of
+ * these flags is set the zone will shut down.
+ */
+ if (z->zone_reboot_on_init_exit == B_TRUE &&
+ z->zone_restart_init == B_TRUE) {
+ /*
+ * Trigger a zone reboot and continue
+ * with exit processing.
+ */
+ z->zone_init_status = wstat(why, what);
+ (void) zone_kadmin(A_REBOOT, 0, NULL,
+ zone_kcred());
+
+ } else {
+ if (z->zone_restart_init == B_TRUE) {
+ if (restart_init(what, why) == 0)
+ return (0);
+ }
+
+ z->zone_init_status = wstat(why, what);
+ (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
+ zone_kcred());
+ }
}
/*
@@ -995,10 +1017,9 @@ winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
int
waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
{
- int found;
proc_t *cp, *pp;
- int proc_gone;
int waitflag = !(options & WNOWAIT);
+ boolean_t have_brand_helper = B_FALSE;
/*
* Obsolete flag, defined here only for binary compatibility
@@ -1047,10 +1068,37 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
return (ECHILD);
}
- while (pp->p_child != NULL) {
+ if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
+ have_brand_helper = B_TRUE;
+ }
+
+ while (pp->p_child != NULL || have_brand_helper) {
+ boolean_t brand_wants_wait = B_FALSE;
+ int proc_gone = 0;
+ int found = 0;
+
+ /*
+ * Give the brand a chance to return synthetic results from
+ * this waitid() call before we do the real thing.
+ */
+ if (have_brand_helper) {
+ int ret;
- proc_gone = 0;
+ if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
+ &brand_wants_wait, &ret) == 0) {
+ mutex_exit(&pidlock);
+ return (ret);
+ }
+ if (pp->p_child == NULL) {
+ goto no_real_children;
+ }
+ }
+
+ /*
+ * Look for interesting children in the newstate list.
+ */
+ VERIFY(pp->p_child != NULL);
for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
continue;
@@ -1107,7 +1155,6 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
* Wow! None of the threads on the p_sibling_ns list were
* interesting threads. Check all the kids!
*/
- found = 0;
for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
if (idtype == P_PID && id != cp->p_pid)
continue;
@@ -1186,11 +1233,12 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
break;
}
+no_real_children:
/*
* If we found no interesting processes at all,
* break out and return ECHILD.
*/
- if (found + proc_gone == 0)
+ if (!brand_wants_wait && (found + proc_gone == 0))
break;
if (options & WNOHANG) {
@@ -1209,7 +1257,7 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
* change state while we wait, we don't wait at all.
* Get out with ECHILD according to SVID.
*/
- if (found == proc_gone)
+ if (!brand_wants_wait && (found == proc_gone))
break;
if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
diff --git a/usr/src/uts/common/os/logsubr.c b/usr/src/uts/common/os/logsubr.c
index 86e9045887..6a603c8982 100644
--- a/usr/src/uts/common/os/logsubr.c
+++ b/usr/src/uts/common/os/logsubr.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2013 Gary Mills
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -250,7 +250,7 @@ log_init(void)
*/
printf("\rSunOS Release %s Version %s %u-bit\n",
utsname.release, utsname.version, NBBY * (uint_t)sizeof (void *));
- printf("Copyright (c) 2010-2014, Joyent Inc. All rights reserved.\n");
+ printf("Copyright (c) 2010-2015, Joyent Inc. All rights reserved.\n");
#ifdef DEBUG
printf("DEBUG enabled\n");
#endif
diff --git a/usr/src/uts/common/os/sig.c b/usr/src/uts/common/os/sig.c
index b117bf3584..ae643c280e 100644
--- a/usr/src/uts/common/os/sig.c
+++ b/usr/src/uts/common/os/sig.c
@@ -22,7 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -194,7 +194,7 @@ eat_signal(kthread_t *t, int sig)
!(ttoproc(t)->p_proc_flag & P_PR_LOCK)) {
ttoproc(t)->p_stopsig = 0;
t->t_dtrace_stop = 0;
- t->t_schedflag |= TS_XSTART | TS_PSTART;
+ t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART;
setrun_locked(t);
} else if (t != curthread && t->t_state == TS_ONPROC) {
aston(t); /* make it do issig promptly */
@@ -608,6 +608,21 @@ issig_forreal(void)
}
/*
+ * Allow the brand the chance to alter (or suppress) delivery
+ * of this signal.
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_issig_stop != NULL) {
+ /*
+ * The brand hook will return 0 if it would like
+ * us to drive on, or -1 if we should restart
+ * the loop to check other conditions.
+ */
+ if (BROP(p)->b_issig_stop(p, lwp) != 0) {
+ continue;
+ }
+ }
+
+ /*
* Honor requested stop before dealing with the
* current signal; a debugger may change it.
* Do not want to go back to loop here since this is a special
@@ -939,6 +954,16 @@ stop(int why, int what)
}
break;
+ case PR_BRAND:
+ /*
+ * We have been stopped by the brand code for a brand-private
+ * reason. This is an asynchronous stop affecting only this
+ * LWP.
+ */
+ VERIFY(PROC_IS_BRANDED(p));
+ flags &= ~TS_BSTART;
+ break;
+
default: /* /proc stop */
flags &= ~TS_PSTART;
/*
@@ -1050,7 +1075,7 @@ stop(int why, int what)
}
}
- if (why != PR_JOBCONTROL && why != PR_CHECKPOINT) {
+ if (why != PR_JOBCONTROL && why != PR_CHECKPOINT && why != PR_BRAND) {
/*
* Do process-level notification when all lwps are
* either stopped on events of interest to /proc
@@ -1156,6 +1181,13 @@ stop(int why, int what)
if (why == PR_CHECKPOINT)
del_one_utstop();
+ /*
+ * Allow the brand to post notification of this stop condition.
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_stop_notify != NULL) {
+ BROP(p)->b_stop_notify(p, lwp, why, what);
+ }
+
thread_lock(t);
ASSERT((t->t_schedflag & TS_ALLSTART) == 0);
t->t_schedflag |= flags;
@@ -1177,7 +1209,7 @@ stop(int why, int what)
(p->p_flag & (SEXITLWPS|SKILLED))) {
p->p_stopsig = 0;
thread_lock(t);
- t->t_schedflag |= TS_XSTART | TS_PSTART;
+ t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART;
setrun_locked(t);
thread_unlock_nopreempt(t);
} else if (why == PR_JOBCONTROL) {
@@ -1795,6 +1827,15 @@ sigcld_repost()
sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
mutex_enter(&pidlock);
+ if (PROC_IS_BRANDED(pp) && BROP(pp)->b_sigcld_repost != NULL) {
+ /*
+ * Allow the brand to inject synthetic SIGCLD signals.
+ */
+ if (BROP(pp)->b_sigcld_repost(pp, sqp) == 0) {
+ mutex_exit(&pidlock);
+ return;
+ }
+ }
for (cp = pp->p_child; cp; cp = cp->p_sibling) {
if (cp->p_pidflag & CLDPEND) {
post_sigcld(cp, sqp);
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index 285aeac032..347a90a022 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -2624,6 +2624,7 @@ zone_init(void)
zone0.zone_ntasks = 1;
mutex_exit(&p0.p_lock);
zone0.zone_restart_init = B_TRUE;
+ zone0.zone_reboot_on_init_exit = B_FALSE;
zone0.zone_init_status = -1;
zone0.zone_brand = &native_brand;
rctl_prealloc_destroy(gp);
@@ -4669,8 +4670,9 @@ parse_rctls(caddr_t ubuf, size_t buflen, nvlist_t **nvlp)
error = EINVAL;
name = nvpair_name(nvp);
- if (strncmp(nvpair_name(nvp), "zone.", sizeof ("zone.") - 1)
- != 0 || nvpair_type(nvp) != DATA_TYPE_NVLIST_ARRAY) {
+ if ((strncmp(name, "zone.", sizeof ("zone.") - 1) != 0 &&
+ strncmp(name, "project.", sizeof ("project.") - 1) != 0) ||
+ nvpair_type(nvp) != DATA_TYPE_NVLIST_ARRAY) {
goto out;
}
if ((hndl = rctl_hndl_lookup(name)) == -1) {
@@ -4819,6 +4821,7 @@ zone_create(const char *zone_name, const char *zone_root,
zone->zone_ncpus = 0;
zone->zone_ncpus_online = 0;
zone->zone_restart_init = B_TRUE;
+ zone->zone_reboot_on_init_exit = B_FALSE;
zone->zone_init_status = -1;
zone->zone_brand = &native_brand;
zone->zone_initname = NULL;
@@ -5045,8 +5048,8 @@ zone_create(const char *zone_name, const char *zone_root,
/*
* The process, task, and project rctls are probably wrong;
* we need an interface to get the default values of all rctls,
- * and initialize zsched appropriately. I'm not sure that that
- * makes much of a difference, though.
+ * and initialize zsched appropriately. However, we allow zoneadmd
+ * to pass down both zone and project rctls for the zone's init.
*/
error = newproc(zsched, (void *)&zarg, syscid, minclsyspri, NULL, 0);
if (error != 0) {
diff --git a/usr/src/uts/common/smbsrv/smb_kproto.h b/usr/src/uts/common/smbsrv/smb_kproto.h
index f2de265176..ad7402fd80 100644
--- a/usr/src/uts/common/smbsrv/smb_kproto.h
+++ b/usr/src/uts/common/smbsrv/smb_kproto.h
@@ -521,6 +521,15 @@ void smb_session_disconnect_from_share(smb_llist_t *, char *);
smb_user_t *smb_session_dup_user(smb_session_t *, char *, char *);
smb_user_t *smb_session_lookup_uid(smb_session_t *, uint16_t);
void smb_session_post_user(smb_session_t *, smb_user_t *);
+void smb_session_post_tree(smb_session_t *, smb_tree_t *);
+smb_tree_t *smb_session_lookup_tree(smb_session_t *, uint16_t);
+smb_tree_t *smb_session_lookup_share(smb_session_t *, const char *,
+ smb_tree_t *);
+smb_tree_t *smb_session_lookup_volume(smb_session_t *, const char *,
+ smb_tree_t *);
+void smb_session_close_pid(smb_session_t *, uint16_t);
+void smb_session_disconnect_owned_trees(smb_session_t *, smb_user_t *);
+void smb_session_disconnect_trees(smb_session_t *);
void smb_session_disconnect_share(smb_session_t *, const char *);
void smb_session_getclient(smb_session_t *, char *, size_t);
boolean_t smb_session_isclient(smb_session_t *, const char *);
@@ -539,10 +548,10 @@ void smb_request_free(smb_request_t *);
/*
* ofile functions (file smb_ofile.c)
*/
-smb_ofile_t *smb_ofile_lookup_by_fid(smb_tree_t *, uint16_t);
+smb_ofile_t *smb_ofile_lookup_by_fid(smb_request_t *, uint16_t);
smb_ofile_t *smb_ofile_lookup_by_uniqid(smb_tree_t *, uint32_t);
boolean_t smb_ofile_disallow_fclose(smb_ofile_t *);
-smb_ofile_t *smb_ofile_open(smb_tree_t *, smb_node_t *, uint16_t,
+smb_ofile_t *smb_ofile_open(smb_request_t *, smb_node_t *, uint16_t,
smb_arg_open_t *, uint16_t, uint32_t, smb_error_t *);
void smb_ofile_close(smb_ofile_t *, int32_t);
void smb_ofile_delete(void *);
@@ -603,18 +612,11 @@ smb_user_t *smb_user_login(smb_session_t *, cred_t *,
smb_user_t *smb_user_dup(smb_user_t *);
void smb_user_logoff(smb_user_t *);
void smb_user_delete(void *);
-void smb_user_post_tree(smb_user_t *, smb_tree_t *);
-smb_tree_t *smb_user_lookup_tree(smb_user_t *, uint16_t);
-smb_tree_t *smb_user_lookup_share(smb_user_t *, const char *, smb_tree_t *);
-smb_tree_t *smb_user_lookup_volume(smb_user_t *, const char *, smb_tree_t *);
boolean_t smb_user_is_admin(smb_user_t *);
boolean_t smb_user_namecmp(smb_user_t *, const char *);
int smb_user_enum(smb_user_t *, smb_svcenum_t *);
-void smb_user_close_pid(smb_user_t *, uint16_t);
-void smb_user_disconnect_trees(smb_user_t *user);
-void smb_user_disconnect_share(smb_user_t *, const char *);
-int smb_user_fclose(smb_user_t *, uint32_t);
boolean_t smb_user_hold(smb_user_t *);
+void smb_user_hold_internal(smb_user_t *);
void smb_user_release(smb_user_t *);
cred_t *smb_user_getcred(smb_user_t *);
cred_t *smb_user_getprivcred(smb_user_t *);
@@ -637,7 +639,7 @@ int smb_tree_enum(smb_tree_t *, smb_svcenum_t *);
int smb_tree_fclose(smb_tree_t *, uint32_t);
boolean_t smb_tree_hold(smb_tree_t *);
void smb_tree_release(smb_tree_t *);
-smb_odir_t *smb_tree_lookup_odir(smb_tree_t *, uint16_t);
+smb_odir_t *smb_tree_lookup_odir(smb_request_t *, uint16_t);
boolean_t smb_tree_is_connected(smb_tree_t *);
#define SMB_TREE_GET_TID(tree) ((tree)->t_tid)
diff --git a/usr/src/uts/common/smbsrv/smb_ktypes.h b/usr/src/uts/common/smbsrv/smb_ktypes.h
index 493e7130a7..2c5d102f62 100644
--- a/usr/src/uts/common/smbsrv/smb_ktypes.h
+++ b/usr/src/uts/common/smbsrv/smb_ktypes.h
@@ -908,7 +908,9 @@ typedef struct smb_session {
smb_slist_t s_req_list;
smb_llist_t s_xa_list;
smb_llist_t s_user_list;
+ smb_llist_t s_tree_list;
smb_idpool_t s_uid_pool;
+ smb_idpool_t s_tid_pool;
smb_txlst_t s_txlst;
volatile uint32_t s_tree_cnt;
@@ -975,9 +977,6 @@ typedef struct smb_user {
cred_t *u_cred;
cred_t *u_privcred;
- smb_llist_t u_tree_list;
- smb_idpool_t u_tid_pool;
-
uint32_t u_refcnt;
uint32_t u_flags;
uint32_t u_privileges;
@@ -1028,7 +1027,11 @@ typedef struct smb_tree {
struct smb_server *t_server;
smb_session_t *t_session;
- smb_user_t *t_user;
+ /*
+ * user whose uid was in the tree connect message
+ * ("owner" in MS-CIFS parlance, see section 2.2.1.6 definition of FID)
+ */
+ smb_user_t *t_owner;
smb_node_t *t_snode;
smb_llist_t t_ofile_list;
@@ -1259,6 +1262,7 @@ typedef struct smb_odir {
list_node_t d_lnd;
smb_odir_state_t d_state;
smb_session_t *d_session;
+ smb_user_t *d_user;
smb_tree_t *d_tree;
smb_node_t *d_dnode;
cred_t *d_cred;
diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h
index 3486ae864d..b3abada863 100644
--- a/usr/src/uts/common/sys/brand.h
+++ b/usr/src/uts/common/sys/brand.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _SYS_BRAND_H
@@ -132,6 +132,11 @@ struct brand_ops {
boolean_t (*b_native_exec)(uint8_t, const char **);
void (*b_ptrace_exectrap)(proc_t *);
uint32_t (*b_map32limit)(proc_t *);
+ void (*b_stop_notify)(proc_t *, klwp_t *, ushort_t, ushort_t);
+ int (*b_waitid_helper)(idtype_t, id_t, k_siginfo_t *, int,
+ boolean_t *, int *);
+ int (*b_sigcld_repost)(proc_t *, sigqueue_t *);
+ int (*b_issig_stop)(proc_t *, klwp_t *);
};
/*
diff --git a/usr/src/uts/common/sys/procfs.h b/usr/src/uts/common/sys/procfs.h
index f592fd9dcf..501af712ef 100644
--- a/usr/src/uts/common/sys/procfs.h
+++ b/usr/src/uts/common/sys/procfs.h
@@ -25,6 +25,7 @@
*/
/*
* Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _SYS_PROCFS_H
@@ -233,6 +234,7 @@ typedef struct pstatus {
#define PR_FAULTED 6
#define PR_SUSPENDED 7
#define PR_CHECKPOINT 8
+#define PR_BRAND 9
/*
* lwp ps(1) information file. /proc/<pid>/lwp/<lwpid>/lwpsinfo
diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h
index 9f2e166fea..41ea2331df 100644
--- a/usr/src/uts/common/sys/thread.h
+++ b/usr/src/uts/common/sys/thread.h
@@ -419,8 +419,9 @@ typedef struct _kthread {
#define TS_RESUME 0x1000 /* setrun() by CPR resume process */
#define TS_CREATE 0x2000 /* setrun() by syslwp_create() */
#define TS_RUNQMATCH 0x4000 /* exact run queue balancing by setbackdq() */
+#define TS_BSTART 0x8000 /* setrun() by brand */
#define TS_ALLSTART \
- (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE)
+ (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE|TS_BSTART)
#define TS_ANYWAITQ (TS_PROJWAITQ|TS_ZONEWAITQ)
/*
@@ -448,6 +449,10 @@ typedef struct _kthread {
#define ISTOPPED(t) ((t)->t_state == TS_STOPPED && \
!((t)->t_schedflag & TS_PSTART))
+/* True if thread is stopped for a brand-specific reason */
+#define BSTOPPED(t) ((t)->t_state == TS_STOPPED && \
+ !((t)->t_schedflag & TS_BSTART))
+
/* True if thread is asleep and wakeable */
#define ISWAKEABLE(t) (((t)->t_state == TS_SLEEP && \
((t)->t_flag & T_WAKEABLE)))
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 7ab9377e16..a5d1610842 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -594,6 +594,7 @@ typedef struct zone {
tsol_mlp_list_t zone_mlps; /* MLPs on zone-private addresses */
boolean_t zone_restart_init; /* Restart init if it dies? */
+ boolean_t zone_reboot_on_init_exit; /* Reboot if init dies? */
struct brand *zone_brand; /* zone's brand */
void *zone_brand_data; /* store brand specific data */
id_t zone_defaultcid; /* dflt scheduling class id */