diff options
author | Robert Mustacchi <rm@joyent.com> | 2015-02-13 13:58:47 +0000 |
---|---|---|
committer | Robert Mustacchi <rm@joyent.com> | 2015-02-13 13:58:47 +0000 |
commit | 39fb3e855a4aecad22321e329e58359504b39555 (patch) | |
tree | fc78f004574ceb35febc58259402a28df6e94b82 /usr/src/uts/common | |
parent | 8062190bde2b194ecd0aa6e2063cf3dddcc5d741 (diff) | |
parent | eb20fbe2fac1596990392cf5a8ea5030948e4768 (diff) | |
download | illumos-joyent-dev-overlay.tar.gz |
[illumos-joyent merge]dev-overlay
commit 3b13a1ef7511135ec0c75b5f94de8075454efd79
5322 tree connect from Windows 7 fails
commit 200c7a6f5f903a9dcd83c319bddeee9b627406ac
OS-3820 lxbrand ptrace(2): the next generation
OS-3685 lxbrand PTRACE_O_TRACEFORK race condition
OS-3834 lxbrand 64-bit strace(1) reports 64-bit process as using x32 ABI
OS-3794 lxbrand panic on init signal death
commit f1630c2becf4af570cefc47794212e8110eb79e3
OS-3796 lxbrand remove netstat overlay script
commit 653f0ca3cc17876745f96a5e25fb60faa72b33f3
OS-3798 lxbrand populate /proc/net/snmp
commit d72cf7bfa828bceb8c81f81282e9b81712d032e2
OS-3797 lxbrand populate /proc/net/unix
commit d0fcb88af333aa48dd2b958f3681f1b597b924cc
4545 _t_create(): Use after free in error code paths
commit c62da27859e36f9fdd8cee3c6df3ad567543dcf9
4539 _t_checkfd() should not call find_tilink() if force_sync is set
commit a2ca8683ba75b01f3468a17061812db2731decb6
OS-3826 lxbrand ipv4 networking broken after OS-3800
commit 6529d7987e7f46c8a923ae661b04c896f3815d91
OS-3800 lx_thunk core dumps on centos 6.6
commit f4cf1a6a363de08977c8db91c119df49e9f6c296
5186 The door_call(3c) man page contains too much
commit 23c88c5ab36a96068ae184ebd20bf625426c3773
5594 nodename(4): Duplicate paragraph in the man page
commit 5c644dd8f2c88f1e78ae4866f22c749a3d1b0157
5585 Typo in gethostname(3c): current processor
commit 7825e891f35613dcf9e7e6d6848401511f3f96eb
4717 Missing period (.) in man pages
commit 270be59d332e9c2003ef54b622a67d6f0e3ef7fc
5263 Missing space in getrbuf(9f) man page
commit faf5add516ff7b15d67af766e32716c04c75716d
5528 devid_str_free() should be used for devid_get_minor_name() too
commit 4812581794004eff0af2b765b832403b30bf64ab
4996 rtld _init race leads to incorrect symbol values
commit 5ae8d2a82dbf2dc1b22ae6755ecefed000d7532e
OS-3822 OS-3780 creates a life of fd crime in libproc
commit 1f2ca518aeecee8616fccc0c46a339773faea7d5
4863 illumos-gate can't be built with fresh perl versions
commit 386e9c9ebfe4116f62e7a0950acd30564fc60125
5566 elfexec is overzealous with vpages
5572 elfexec and mapelfexec can disagree on aux vectors
commit 8a986bad744c8a479dfacfcdc16bcad15bbb1ec6
5101 privileges(5) manual page missing some privileges
commit 9ef283481583d677cd2cf5449ef49b90eacc97d4
5261 libm should stop using synonyms.h (fix studio build)
commit ed1591688000a5d179c4ba27793cae55590c55d2
5590 improper use of NULL in tools/protocmp
commit ad0b1ea5d69a45fe23c434277599e315f29a5fca
5589 improper use of NULL in tools/ctf
commit 8bf1e4f3b335466afe9b85d761b3822ec8c1a371
OS-3816 sync up mdb_v8 with upstream
commit 97a9db610324e7db4393415018e0e737485a94cd
4393 /etc/rpc: 100133 and 100169 should be added
commit dc1de1110df1be3c207fa275c52056314a438b95
OS-3806 /proc/self/fd/2 is no longer working in LX
commit ebb8db03bc1050fa9dd3b184c99634f4c2eae56c
OS-3810 tar doesn't properly wait for its children
OS-3813 Clean up unused variables and parenthesis warnings in tar
commit 4190e41f9d08bc0e41bed63c0b3641af9cfa1a1d
OS-3811 LTP signal_test_05 now fails
commit 86851d81a7ab61819497cbc95630c7fc812d00c9
OS-3808 lx_boot_zone_redhat relies on /tmp being cleared on reboot
commit d41c05b714ac5cd589b3edd49c55f21d1d8f2589
OS-3802 LTP kill11 test case fails after vsyscall signal changes
OS-3803 LTP kill12 test failing
OS-3804 LTP signal03 failing
OS-3805 LTP waitpid05 failing
commit e3e63864a2ed092a7da41db4ea4998f461524a18
OS-3807 lx brand: asynchronous I/O operations can hang
commit 91600d919baafe4e4d8bdee4168878036351c556
5578 file(1) should validate Elf_Shdr->sh_name
commit 30e6ec63ea67bd88d75811ab11b9c115ff026ab3
OS-3795 lxbrand initial centos 6.6 support
commit 9d47dec0481d8cd53b2c1053c96bfa3f78357d6a
5592 NULL pointer dereference in dsl_prop_notify_all_cb()
commit 71ceaec61a50dff6050c6905ac8352dd58c89311
OS-3801 update boot copyright date
commit 823c8a3d4ff8d31f222cb81ed5b0685e318215e1
OS-3793 lxbrand /proc/pid/maps formatted incorrectly
commit da4b59e7b4853d1b5018cd3e37eb592574a673b4
OS-3768 snoop could fail more cleanly on large files
commit 643588d2256f94df9beb942b812ffaa83665c09a
OS-3799 lxbrand panic when accessing /proc/net/tcp from GZ
commit 9a3dc1f68894cc036075fdabc3764446d5d5fa52
OS-3485 lxbrand populate /proc for netstat
commit 12e2b6203a3d75549383615f039c435ab4418037
OS-3790 lxbrand vsyscall segfault when SIGSEGV handler set to SIG_DFL
commit d5fef2f4802f515505a545dfee6c81b5fd377a96
OS-3792 platform uses obsolete nfs mount command
commit bd181d5a0c2b96669dcb4aa44619e0b7dbbedab9
1100 cpustat usage message is incorrect
commit 2515f5d4dbff605ba645d47a6851d8d0bac5b994
5527 ::spa_space fails with mdb: couldn't find member dd_phys of type struct zfs`dsl_dir'
commit 6e062f4a9c9a27ea6e2e980c1b5f4c41e33aba45
5563 Some traverse() callers do strange things
commit bb633f5b0e92fa59f65274f8b5637a7107ca29ec
OS-3782 lx brand: vsyscall can induce fatal SIGSEGV
commit 4d01dc17bafd21a83dfb4383d30137cf0ab74ed1
OS-3777 zlogin -I needs to work with docker run when in logging mode
commit de267ec7980943d6c76defc73d2a3d8356d3acb2
OS-3776 project rctls should be in sync with zone rctls
commit b9acd3d9851f7716ce41f37dcd04dd6067a21146
OS-3780 libproc could know about .gnu_debuglink for remote symbol tables
commit 10648e3fb261910e63f8354af96444b02d016f44
OS-3773 lxbrand ltp shmctl ipcinfo can fail on a machine with >4GB of memory
OS-3779 lxbrand shmctl_ipcinfo struct incorrect for 64-bit
commit 181d66828bce1fbd366a3b3a9224593577390463
OS-3778 lxbrand panic when ptracing native process
commit 63098359d8842cf81b6fb1b81567e12c671db06f
5511 stat.h(3head) manpage #define typo
commit 7eb15eeb0b1a3f960946b7563765e128425fc13b
5568 'allthreads' needs to be global
commit 32b5e9f0cda85eef94eb578dd053e155df43fed3
5554 kmdb can't trace stacks that begin within itself
Diffstat (limited to 'usr/src/uts/common')
45 files changed, 3941 insertions, 1075 deletions
diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c index b964aab1d3..4507c0303c 100644 --- a/usr/src/uts/common/brand/lx/os/lx_brand.c +++ b/usr/src/uts/common/brand/lx/os/lx_brand.c @@ -62,6 +62,7 @@ #include <sys/sdt.h> #include <sys/x86_archext.h> #include <sys/controlregs.h> +#include <sys/core.h> #include <lx_signum.h> int lx_debug = 0; @@ -77,6 +78,10 @@ void lx_set_kern_version(zone_t *, char *); void lx_copy_procdata(proc_t *, proc_t *); extern int getsetcontext(int, void *); +extern int waitsys(idtype_t, id_t, siginfo_t *, int); +#if defined(_SYSCALL32_IMPL) +extern int waitsys32(idtype_t, id_t, siginfo_t *, int); +#endif extern void lx_proc_exit(proc_t *, klwp_t *); static void lx_psig_to_proc(proc_t *, kthread_t *, int); @@ -106,35 +111,38 @@ static int lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args, caddr_t exec_file, struct cred *cred, int brand_action); static boolean_t lx_native_exec(uint8_t, const char **); -static void lx_ptrace_exectrap(proc_t *); static uint32_t lx_map32limit(proc_t *); /* lx brand */ struct brand_ops lx_brops = { - lx_init_brand_data, - lx_free_brand_data, - lx_brandsys, - lx_setbrand, - lx_getattr, - lx_setattr, - lx_copy_procdata, - lx_proc_exit, - lx_exec, - lx_setrval, - lx_initlwp, - lx_forklwp, - lx_freelwp, - lx_exitlwp, - lx_elfexec, - NULL, - NULL, - lx_psig_to_proc, - NSIG, - lx_exit_with_sig, - lx_wait_filter, - lx_native_exec, - lx_ptrace_exectrap, - lx_map32limit + lx_init_brand_data, /* b_init_brand_data */ + lx_free_brand_data, /* b_free_brand_data */ + lx_brandsys, /* b_brandsys */ + lx_setbrand, /* b_setbrand */ + lx_getattr, /* b_getattr */ + lx_setattr, /* b_setattr */ + lx_copy_procdata, /* b_copy_procdata */ + lx_proc_exit, /* b_proc_exit */ + lx_exec, /* b_exec */ + lx_setrval, /* b_lwp_setrval */ + lx_initlwp, /* b_initlwp */ + lx_forklwp, /* b_forklwp */ + lx_freelwp, /* b_freelwp */ + lx_exitlwp, /* b_lwpexit */ + lx_elfexec, /* b_elfexec */ + NULL, /* b_sigset_native_to_brand */ + NULL, /* b_sigset_brand_to_native */ + lx_psig_to_proc, /* b_psig_to_proc */ + NSIG, /* b_nsig */ + lx_exit_with_sig, /* b_exit_with_sig */ + lx_wait_filter, /* b_wait_filter */ + lx_native_exec, /* b_native_exec */ + NULL, /* b_ptrace_exectrap */ + lx_map32limit, /* b_map32limit */ + lx_stop_notify, /* b_stop_notify */ + lx_waitid_helper, /* b_waitid_helper */ + lx_sigcld_repost, /* b_sigcld_repost */ + lx_issig_stop /* b_issig_stop */ }; struct brand_mach_ops lx_mops = { @@ -166,33 +174,39 @@ static struct modlinkage modlinkage = { void lx_proc_exit(proc_t *p, klwp_t *lwp) { - zone_t *z = p->p_zone; int sig = ptolxproc(p)->l_signal; - ASSERT(p->p_brand != NULL); - ASSERT(p->p_brand_data != NULL); - - /* - * If init is dying and we aren't explicitly shutting down the zone - * or the system, then Solaris is about to restart init. The Linux - * init is not designed to handle a restart, which it interprets as - * a reboot. To give it a sane environment in which to run, we - * reboot the zone. - */ - if (p->p_pid == z->zone_proc_initpid) { - if (z->zone_boot_err == 0 && - z->zone_restart_init && - zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && - zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) - (void) zone_kadmin(A_REBOOT, 0, NULL, CRED()); - } + VERIFY(p->p_brand == &lx_brand); + VERIFY(p->p_brand_data != NULL); /* * We might get here if fork failed (e.g. ENOMEM) so we don't always * have an lwp (see brand_clearbrand). */ - if (lwp != NULL) + if (lwp != NULL) { + boolean_t reenter_mutex = B_FALSE; + + /* + * This brand entry point is called variously with and without + * the process p_lock held. It would be possible to refactor + * the brand infrastructure so that proc_exit() explicitly + * calls this hook (b_lwpexit/lx_exitlwp) for the last LWP in a + * process prior to detaching the brand with + * brand_clearbrand(). Absent such refactoring, we + * conditionally exit the mutex for the duration of the call. + * + * The atomic replacement of both "p_brand" and "p_brand_data" + * is not affected by dropping and reacquiring the mutex here. + */ + if (mutex_owned(&p->p_lock) != 0) { + mutex_exit(&p->p_lock); + reenter_mutex = B_TRUE; + } lx_exitlwp(lwp); + if (reenter_mutex) { + mutex_enter(&p->p_lock); + } + } /* * The call path here is: @@ -260,310 +274,6 @@ lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize) return (-EINVAL); } -/* - * Enable/disable ptrace system call tracing for the given LWP. Enabling is - * done by both setting the flag in that LWP's brand data (in the kernel) and - * setting the process-wide trace flag (in the brand library of the traced - * process). - */ -static int -lx_ptrace_syscall_set(pid_t pid, id_t lwpid, int set) -{ - proc_t *p; - kthread_t *t; - klwp_t *lwp; - lx_proc_data_t *lpdp; - lx_lwp_data_t *lldp; - uintptr_t addr; - int ret, flag = 1; - - if ((p = sprlock(pid)) == NULL) - return (ESRCH); - - if (priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) { - sprunlock(p); - return (EPERM); - } - - if ((t = idtot(p, lwpid)) == NULL || (lwp = ttolwp(t)) == NULL) { - sprunlock(p); - return (ESRCH); - } - - if ((lpdp = p->p_brand_data) == NULL || - (lldp = lwp->lwp_brand) == NULL) { - sprunlock(p); - return (ESRCH); - } - - if (set) { - /* - * Enable the ptrace flag for this LWP and this process. Note - * that we will turn off the LWP's ptrace flag, but we don't - * turn off the process's ptrace flag. - */ - lldp->br_ptrace = 1; - lpdp->l_ptrace = 1; - - addr = lpdp->l_traceflag; - - mutex_exit(&p->p_lock); - - /* - * This can fail only in some rare corner cases where the - * process is exiting or we're completely out of memory. In - * these cases, it's sufficient to return an error to the ptrace - * consumer and leave the process-wide flag set. - */ - ret = uwrite(p, &flag, sizeof (flag), addr); - - mutex_enter(&p->p_lock); - - /* - * If we couldn't set the trace flag, unset the LWP's ptrace - * flag as there ptrace consumer won't expect this LWP to stop. - */ - if (ret != 0) - lldp->br_ptrace = 0; - } else { - lldp->br_ptrace = 0; - ret = 0; - } - - sprunlock(p); - - if (ret != 0) - ret = EIO; - - return (ret); -} - -static void -lx_ptrace_fire(void) -{ - kthread_t *t = curthread; - klwp_t *lwp = ttolwp(t); - lx_lwp_data_t *lldp = lwp->lwp_brand; - - /* - * The ptrace flag only applies until the next event is encountered - * for the given LWP. If it's set, turn off the flag and poke the - * controlling process by raising a signal. - */ - if (lldp->br_ptrace) { - lldp->br_ptrace = 0; - tsignal(t, SIGTRAP); - } -} - -/* - * Supports Linux PTRACE_SETOPTIONS handling which is similar to PTRACE_TRACEME - * but return an event in the second byte of si_status. - */ -static int -lx_ptrace_ext_opts(int cmd, pid_t pid, uintptr_t val, int64_t *rval) -{ - proc_t *p; - lx_proc_data_t *lpdp; - uint_t ret; - - if ((p = sprlock(pid)) == NULL) - return (ESRCH); - - /* - * Note that priv_proc_cred_perm can disallow access to ourself if - * the proc's SNOCD p_flag is set, so we skip that check for ourself. - */ - if (curproc != p && - priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) { - sprunlock(p); - return (EPERM); - } - - if ((lpdp = p->p_brand_data) == NULL) { - sprunlock(p); - return (ESRCH); - } - - switch (cmd) { - case B_PTRACE_EXT_OPTS_SET: - lpdp->l_ptrace_opts = (uint_t)val; - break; - - case B_PTRACE_EXT_OPTS_GET: - ret = lpdp->l_ptrace_opts; - if (lpdp->l_ptrace_is_traced) - ret |= EMUL_PTRACE_IS_TRACED; - break; - - case B_PTRACE_EXT_OPTS_EVT: - ret = lpdp->l_ptrace_event; - lpdp->l_ptrace_event = 0; - break; - - case B_PTRACE_DETACH: - lpdp->l_ptrace_is_traced = 0; - break; - - default: - sprunlock(p); - return (EINVAL); - } - - sprunlock(p); - - if (cmd == B_PTRACE_EXT_OPTS_GET || cmd == B_PTRACE_EXT_OPTS_EVT) { - if (copyout(&ret, (void *)val, sizeof (uint_t)) != 0) - return (EFAULT); - } - - *rval = 0; - return (0); -} - -/* - * Used to support Linux PTRACE_SETOPTIONS handling and similar to - * PTRACE_TRACEME. We signal ourselves to stop on return from this syscall and - * setup the event reason so the emulation can pull this out when someone - * 'waits' on this process. - */ -static void -lx_ptrace_stop_for_option(int option, ulong_t msg) -{ - proc_t *p = ttoproc(curthread); - sigqueue_t *sqp; - lx_proc_data_t *lpdp; - boolean_t child = B_FALSE; - - if ((lpdp = p->p_brand_data) == NULL) { - /* this should never happen but just to be safe */ - return; - } - - if (option & EMUL_PTRACE_O_CHILD) { - child = B_TRUE; - option &= ~EMUL_PTRACE_O_CHILD; - } - - lpdp->l_ptrace_is_traced = 1; - - /* Track the event as the reason for stopping */ - switch (option) { - case LX_PTRACE_O_TRACEFORK: - if (!child) { - lpdp->l_ptrace_event = LX_PTRACE_EVENT_FORK; - lpdp->l_ptrace_eventmsg = msg; - } - break; - case LX_PTRACE_O_TRACEVFORK: - if (!child) { - lpdp->l_ptrace_event = LX_PTRACE_EVENT_VFORK; - lpdp->l_ptrace_eventmsg = msg; - } - break; - case LX_PTRACE_O_TRACECLONE: - if (!child) { - lpdp->l_ptrace_event = LX_PTRACE_EVENT_CLONE; - lpdp->l_ptrace_eventmsg = msg; - } - break; - case LX_PTRACE_O_TRACEEXEC: - lpdp->l_ptrace_event = LX_PTRACE_EVENT_EXEC; - break; - case LX_PTRACE_O_TRACEVFORKDONE: - lpdp->l_ptrace_event = LX_PTRACE_EVENT_VFORK_DONE; - lpdp->l_ptrace_eventmsg = msg; - break; - case LX_PTRACE_O_TRACEEXIT: - lpdp->l_ptrace_event = LX_PTRACE_EVENT_EXIT; - lpdp->l_ptrace_eventmsg = msg; - break; - case LX_PTRACE_O_TRACESECCOMP: - lpdp->l_ptrace_event = LX_PTRACE_EVENT_SECCOMP; - break; - } - - /* - * Post the required signal to ourselves so that we stop. - * - * Although Linux will send a SIGSTOP to a child process which is - * stopped due to PTRACE_O_TRACEFORK, etc., we do not send that signal - * since that leads us down the code path in the kernel which calls - * stop(PR_JOBCONTROL, SIGSTOP), which in turn means that the TS_XSTART - * flag gets turned off on the thread and this makes it complex to - * actually get this process going when the userland application wants - * to detach. Since consumers don't seem to depend on the specific - * signal, we'll just stop both the parent and child the same way. We - * do keep track of both the parent and child via the - * EMUL_PTRACE_O_CHILD bit, in case we need to revisit this later. - */ - psignal(p, SIGTRAP); - - /* - * Since we're stopping, we need to post the SIGCHLD to the parent. The - * code in sigcld expects p_wdata to be set to SIGTRAP before it can - * send the signal, so do that here. We also need p_wcode to be set as - * if we are ptracing, even though we're not really (see the code in - * stop() when procstop is set and p->p_proc_flag has the P_PR_PTRACE - * bit set). This is needed so that when the application calls waitid, - * it will properly retrieve the process. - */ - sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); - mutex_enter(&pidlock); - p->p_wdata = SIGTRAP; - p->p_wcode = CLD_TRAPPED; - sigcld(p, sqp); - mutex_exit(&pidlock); -} - -static int -lx_ptrace_geteventmsg(pid_t pid, ulong_t *msgp) -{ - proc_t *p; - lx_proc_data_t *lpdp; - ulong_t msg; - - if ((p = sprlock(pid)) == NULL) - return (ESRCH); - - if (curproc != p && - priv_proc_cred_perm(curproc->p_cred, p, NULL, VREAD) != 0) { - sprunlock(p); - return (EPERM); - } - - if ((lpdp = p->p_brand_data) == NULL) { - sprunlock(p); - return (ESRCH); - } - - msg = lpdp->l_ptrace_eventmsg; - lpdp->l_ptrace_eventmsg = 0; - - sprunlock(p); - - if (copyout(&msg, (void *)msgp, sizeof (ulong_t)) != 0) - return (EFAULT); - - return (0); -} - -/* - * Brand entry to allow us to optionally generate the ptrace SIGTRAP on exec(). - * This will only be called if ptrace is enabled -- and we only generate the - * SIGTRAP if LX_PTRACE_O_TRACEEXEC hasn't been set. - */ -void -lx_ptrace_exectrap(proc_t *p) -{ - lx_proc_data_t *lpdp; - - if ((lpdp = p->p_brand_data) == NULL || - !(lpdp->l_ptrace_opts & LX_PTRACE_O_TRACEEXEC)) { - psignal(p, SIGTRAP); - } -} - uint32_t lx_map32limit(proc_t *p) { @@ -718,6 +428,12 @@ lx_init_brand_data(zone_t *zone) (void) strlcpy(data->lxzd_kernel_version, "2.4.21", LX_VERS_MAX); data->lxzd_max_syscall = LX_NSYSCALLS; zone->zone_brand_data = data; + + /* + * In Linux, if the init(1) process terminates the system panics. + * The zone must reboot to simulate this behaviour. + */ + zone->zone_reboot_on_init_exit = B_TRUE; } void @@ -752,6 +468,8 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, struct termios *termios; uint_t termios_len; int error; + int code; + int sig; lx_brand_registration_t reg; lx_lwp_data_t *lwpd; @@ -832,6 +550,16 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, lwpd->br_scms = 1; #endif + if (pd->l_traceflag != NULL && pd->l_ptrace != 0) { + /* + * If ptrace(2) is active on this process, it is likely + * that we just finished an emulated execve(2) in a + * traced child. The usermode traceflag will have been + * clobbered by the exec, so we set it again here: + */ + (void) suword32((void *)pd->l_traceflag, 1); + } + *rval = 0; return (0); case B_TTYMODES: @@ -931,11 +659,6 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, return (0); } - case B_PTRACE_SYSCALL: - *rval = lx_ptrace_syscall_set((pid_t)arg1, (id_t)arg2, - (int)arg3); - return (0); - case B_SYSENTRY: if (lx_systrace_enabled) { ASSERT(lx_systrace_entry_ptr != NULL); @@ -963,7 +686,7 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, #endif } - lx_ptrace_fire(); + (void) lx_ptrace_stop(LX_PR_SYSENTRY); pd = p->p_brand_data; @@ -984,7 +707,7 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, (*lx_systrace_return_ptr)(arg1, arg2, arg2, 0, 0, 0, 0); } - lx_ptrace_fire(); + (void) lx_ptrace_stop(LX_PR_SYSEXIT); pd = p->p_brand_data; @@ -1010,20 +733,55 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, */ return (lx_sched_affinity(cmd, arg1, arg2, arg3, rval)); - case B_PTRACE_EXT_OPTS: + case B_PTRACE_STOP_FOR_OPT: + return (lx_ptrace_stop_for_option((int)arg1, arg2 == 0 ? + B_FALSE : B_TRUE, (ulong_t)arg3)); + + case B_PTRACE_CLONE_BEGIN: + return (lx_ptrace_set_clone_inherit((int)arg1, arg2 == 0 ? + B_FALSE : B_TRUE)); + + case B_PTRACE_KERNEL: + return (lx_ptrace_kernel((int)arg1, (pid_t)arg2, arg3, arg4)); + + case B_HELPER_WAITID: { + idtype_t idtype = (idtype_t)arg1; + id_t id = (id_t)arg2; + siginfo_t *infop = (siginfo_t *)arg3; + int options = (int)arg4; + + lwpd = ttolxlwp(curthread); + /* - * Set or get the ptrace extended options or get the event - * reason for the stop. + * Our brand-specific waitid helper only understands a subset of + * the possible idtypes. Ensure we keep to that subset here: */ - return (lx_ptrace_ext_opts((int)arg1, (pid_t)arg2, arg3, rval)); + if (idtype != P_ALL && idtype != P_PID && idtype != P_PGID) { + return (EINVAL); + } - case B_PTRACE_STOP_FOR_OPT: - lx_ptrace_stop_for_option((int)arg1, (ulong_t)arg2); - return (0); + /* + * Enable the return of emulated ptrace(2) stop conditions + * through lx_waitid_helper, and stash the Linux-specific + * extra waitid() flags. + */ + lwpd->br_waitid_emulate = B_TRUE; + lwpd->br_waitid_flags = (int)arg5; + +#if defined(_SYSCALL32_IMPL) + if (get_udatamodel() != DATAMODEL_NATIVE) { + return (waitsys32(idtype, id, infop, options)); + } else +#endif + { + return (waitsys(idtype, id, infop, options)); + } + + lwpd->br_waitid_emulate = B_FALSE; + lwpd->br_waitid_flags = 0; - case B_PTRACE_GETEVENTMSG: - lx_ptrace_geteventmsg((pid_t)arg1, (ulong_t *)arg2); return (0); + } case B_UNSUPPORTED: { @@ -1186,7 +944,19 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, return (0); case B_EXIT_AS_SIG: - exit(CLD_KILLED, (int)arg1); + code = CLD_KILLED; + sig = (int)arg1; + proc_is_exiting(p); + if (exitlwps(1) != 0) { + mutex_enter(&p->p_lock); + lwp_exit(); + } + ttolwp(curthread)->lwp_cursig = sig; + if (sig == SIGSEGV) { + if (core(sig, 0) == 0) + code = CLD_DUMPED; + } + exit(code, sig); /* NOTREACHED */ break; @@ -1254,6 +1024,7 @@ lx_copy_procdata(proc_t *child, proc_t *parent) ppd = parent->p_brand_data; ASSERT(ppd != NULL); + ASSERT(parent->p_brand == &lx_brand); cpd = kmem_alloc(sizeof (lx_proc_data_t), KM_SLEEP); *cpd = *ppd; @@ -1322,13 +1093,14 @@ lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args, struct execenv origenv; stack_t orig_sigaltstack; struct user *up = PTOU(ttoproc(curthread)); - lx_elf_data_t *edp = - &((lx_proc_data_t *)ttoproc(curthread)->p_brand_data)->l_elf_data; + lx_elf_data_t *edp; char *lib_path = NULL; ASSERT(ttoproc(curthread)->p_brand == &lx_brand); ASSERT(ttoproc(curthread)->p_brand_data != NULL); + edp = &ttolxproc(curthread)->l_elf_data; + if (args->to_model == DATAMODEL_NATIVE) { lib_path = LX_LIB_PATH; } @@ -1685,6 +1457,7 @@ _init(void) /* for lx_futex() */ lx_futex_init(); + lx_ptrace_init(); err = mod_install(&modlinkage); if (err != 0) { @@ -1724,6 +1497,7 @@ _fini(void) if (brand_zone_count(&lx_brand)) return (EBUSY); + lx_ptrace_fini(); lx_pid_fini(); lx_ioctl_fini(); diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c index c550ecf9af..abb0ab6e63 100644 --- a/usr/src/uts/common/brand/lx/os/lx_misc.c +++ b/usr/src/uts/common/brand/lx/os/lx_misc.c @@ -80,7 +80,7 @@ lx_exec() klwp_t *lwp = ttolwp(curthread); struct lx_lwp_data *lwpd = lwptolxlwp(lwp); proc_t *p = ttoproc(curthread); - lx_proc_data_t *pd = p->p_brand_data; + lx_proc_data_t *pd = ptolxproc(p); int err; /* @@ -113,6 +113,13 @@ lx_exec() lx_pid_reassign(curthread); } + /* + * Inform ptrace(2) that we are processing an execve(2) call so that if + * we are traced we can post either the PTRACE_EVENT_EXEC event or the + * legacy SIGTRAP. + */ + (void) lx_ptrace_stop_for_option(LX_PTRACE_O_TRACEEXEC, B_FALSE, 0); + /* clear the fsbase values until the app. can reinitialize them */ lwpd->br_lx_fsbase = NULL; lwpd->br_ntv_fsbase = NULL; @@ -137,15 +144,21 @@ void lx_exitlwp(klwp_t *lwp) { struct lx_lwp_data *lwpd = lwptolxlwp(lwp); - proc_t *p; + proc_t *p = lwptoproc(lwp); kthread_t *t; sigqueue_t *sqp = NULL; pid_t ppid; id_t ptid; + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + if (lwpd == NULL) return; /* second time thru' */ + mutex_enter(&p->p_lock); + lx_ptrace_exit(p, lwp); + mutex_exit(&p->p_lock); + if (lwpd->br_clear_ctidp != NULL) { (void) suword32(lwpd->br_clear_ctidp, 0); (void) lx_futex((uintptr_t)lwpd->br_clear_ctidp, FUTEX_WAKE, 1, @@ -226,9 +239,17 @@ lx_freelwp(klwp_t *lwp) if (lwpd != NULL) { (void) removectx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save, NULL); - if (lwpd->br_pid != 0) + if (lwpd->br_pid != 0) { lx_pid_rele(lwptoproc(lwp)->p_pid, lwptot(lwp)->t_tid); + } + + /* + * Ensure that lx_ptrace_exit() has been called to detach + * ptrace(2) tracers and tracees. + */ + VERIFY(lwpd->br_ptrace_tracer == NULL); + VERIFY(lwpd->br_ptrace_accord == NULL); lwp->lwp_brand = NULL; kmem_free(lwpd, sizeof (struct lx_lwp_data)); @@ -238,8 +259,8 @@ lx_freelwp(klwp_t *lwp) int lx_initlwp(klwp_t *lwp) { - struct lx_lwp_data *lwpd; - struct lx_lwp_data *plwpd; + lx_lwp_data_t *lwpd; + lx_lwp_data_t *plwpd = ttolxlwp(curthread); kthread_t *tp = lwptot(lwp); lwpd = kmem_zalloc(sizeof (struct lx_lwp_data), KM_SLEEP); @@ -265,8 +286,7 @@ lx_initlwp(klwp_t *lwp) if (tp->t_next == tp) { lwpd->br_ppid = tp->t_procp->p_ppid; lwpd->br_ptid = -1; - } else if (ttolxlwp(curthread) != NULL) { - plwpd = ttolxlwp(curthread); + } else if (plwpd != NULL) { bcopy(plwpd->br_tls, lwpd->br_tls, sizeof (lwpd->br_tls)); lwpd->br_ppid = plwpd->br_pid; lwpd->br_ptid = curthread->t_tid; @@ -292,6 +312,14 @@ lx_initlwp(klwp_t *lwp) installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save, NULL); + /* + * If the parent LWP has a ptrace(2) tracer, the new LWP may + * need to inherit that same tracer. + */ + if (plwpd != NULL) { + lx_ptrace_inherit_tracer(plwpd, lwpd); + } + return (0); } @@ -524,10 +552,7 @@ lx_exit_with_sig(proc_t *cp, sigqueue_t *sqp, void *brand_data) * SIGCHLD X - * * This is an XOR of __WCLONE being set, and SIGCHLD being the signal sent on - * process exit. Since (flags & __WCLONE) is not guaranteed to have the - * least-significant bit set when the flags is enabled, !! is used to place - * that bit into the least significant bit. Then, the bitwise XOR can be - * used, because there is no logical XOR in the C language. + * process exit. * * More information on wait in lx brands can be found at * usr/src/lib/brand/lx/lx_brand/common/wait.c. @@ -535,29 +560,45 @@ lx_exit_with_sig(proc_t *cp, sigqueue_t *sqp, void *brand_data) boolean_t lx_wait_filter(proc_t *pp, proc_t *cp) { - int flags; + lx_lwp_data_t *lwpd = ttolxlwp(curthread); + int flags = lwpd->br_waitid_flags; boolean_t ret; - if (LX_ARGS(waitid) != NULL) { - flags = LX_ARGS(waitid)->waitid_flags; - mutex_enter(&cp->p_lock); - if (flags & LX_WALL) { - ret = B_TRUE; - } else if (cp->p_stat == SZOMB || - cp->p_brand == &native_brand) { - ret = (((!!(flags & LX_WCLONE)) ^ - (stol_signo[SIGCHLD] == cp->p_exit_data)) - ? B_TRUE : B_FALSE); + if (!lwpd->br_waitid_emulate) { + return (B_TRUE); + } + + mutex_enter(&cp->p_lock); + if (flags & LX_WALL) { + ret = B_TRUE; + + } else { + int exitsig; + boolean_t is_clone, _wclone; + + /* + * Determine the exit signal for this process: + */ + if (cp->p_stat == SZOMB || cp->p_brand == &native_brand) { + exitsig = cp->p_exit_data; } else { - ret = (((!!(flags & LX_WCLONE)) ^ - (stol_signo[SIGCHLD] == ptolxproc(cp)->l_signal)) - ? B_TRUE : B_FALSE); + exitsig = ptolxproc(cp)->l_signal; } - mutex_exit(&cp->p_lock); - return (ret); - } else { - return (B_TRUE); + + /* + * To enable the bitwise XOR to stand in for the absent C + * logical XOR, we use the logical NOT operator twice to + * ensure the least significant bit is populated with the + * __WCLONE flag status. + */ + _wclone = !!(flags & LX_WCLONE); + is_clone = (stol_signo[SIGCHLD] == exitsig); + + ret = (_wclone ^ is_clone) ? B_TRUE : B_FALSE; } + mutex_exit(&cp->p_lock); + + return (ret); } void diff --git a/usr/src/uts/common/brand/lx/os/lx_pid.c b/usr/src/uts/common/brand/lx/os/lx_pid.c index aa8c751bc2..8552754c43 100644 --- a/usr/src/uts/common/brand/lx/os/lx_pid.c +++ b/usr/src/uts/common/brand/lx/os/lx_pid.c @@ -22,7 +22,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #include <sys/types.h> @@ -222,6 +222,28 @@ lx_lpid_to_spair(pid_t l_pid, pid_t *s_pid, id_t *s_tid) { struct lx_pid *hp; + if (l_pid == 1) { + pid_t initpid; + + /* + * We are trying to look up the Linux init process for the + * current zone, which we pretend has pid 1. + */ + if ((initpid = curzone->zone_proc_initpid) == -1) { + /* + * We could not find the init process for this zone. + */ + return (-1); + } + + if (s_pid != NULL) + *s_pid = initpid; + if (s_tid != NULL) + *s_tid = 1; + + return (0); + } + mutex_enter(&hash_lock); for (hp = ltos_pid_hash[LTOS_HASH(l_pid)]; hp; hp = hp->ltos_next) { if (l_pid == hp->l_pid) { diff --git a/usr/src/uts/common/brand/lx/os/lx_ptrace.c b/usr/src/uts/common/brand/lx/os/lx_ptrace.c new file mode 100644 index 0000000000..6e4b74531d --- /dev/null +++ b/usr/src/uts/common/brand/lx/os/lx_ptrace.c @@ -0,0 +1,2270 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * Emulation of the Linux ptrace(2) interface. + * + * OVERVIEW + * + * The Linux process model is somewhat different from the illumos native + * model. One critical difference is that each Linux thread has a unique + * identifier in the pid namespace. The lx brand assigns a pid to each LWP + * within the emulated process, giving the pid of the process itself to the + * first LWP. + * + * The Linux ptrace(2) interface allows for any LWP in a branded process to + * exert control over any other LWP within the same zone. Control is exerted + * by the use of the ptrace(2) system call itself, which accepts a number of + * request codes. Feedback on traced events is primarily received by the + * tracer through SIGCLD and the emulated waitpid(2) and waitid(2) system + * calls. Many of the possible ptrace(2) requests will only succeed if the + * target LWP is in a "ptrace-stop" condition. + * + * HISTORY + * + * The brand support for ptrace(2) was originally built on top of the rich + * support for debugging and tracing provided through the illumos /proc + * interfaces, mounted at /native/proc within the zone. The native legacy + * ptrace(3C) functionality was used as a starting point, but was generally + * insufficient for complete and precise emulation. The extant legacy + * interface, and indeed our native SIGCLD and waitid(2) facilities, are + * focused on _process_ level concerns -- the Linux interface has been + * extended to be aware of LWPs as well. + * + * In order to allow us to focus on providing more complete and accurate + * emulation without extensive and undesirable changes to the native + * facilities, this second generation ptrace(2) emulation is mostly separate + * from any other tracing or debugging framework in the system. + * + * ATTACHING TRACERS TO TRACEES + * + * There are several ways that a child LWP may becomed traced by a tracer. + * To determine which attach method caused a tracee to become attached, one + * may inspect the "br_ptrace_attach" member of the LWP-specific brand data + * with the debugger. + * + * The first attach methods to consider are the attaching ptrace(2) requests: + * + * PTRACE_TRACEME + * + * If an LWP makes a PTRACE_TRACEME call, it will be attached as a tracee + * to its parent LWP (br_ppid). Using PTRACE_TRACEME does _not_ cause the + * tracee to be held in a stop condition. It is common practice for + * consumers to raise(SIGSTOP) immediately afterward. + * + * PTRACE_ATTACH + * + * An LWP may attempt to trace any other LWP in this, or another, process. + * We currently allow any attach where the process containing the tracer + * LWP has permission to write to /proc for the process containing the + * intended tracer. This action also sends a SIGSTOP to the newly attached + * tracee. + * + * The second class of attach methods are the clone(2)/fork(2) inheritance + * options that may be set on a tracee with PTRACE_SETOPTIONS: + * + * PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK and PTRACE_O_TRACECLONE + * + * If these options have been set on a tracee, then a fork(2), vfork(2) or + * clone(2) respectively will cause the newly created LWP to be traced by + * the same tracer. The same set of ptrace(2) options will also be set on + * the new child. + * + * The third class of attach method is the PTRACE_CLONE flag to clone(2). + * This flag induces the same inheritance as PTRACE_O_TRACECLONE, but is + * passed by the tracee as an argument to clone(2). + * + * DETACHING TRACEES + * + * Tracees can be detached by the tracer with the PTRACE_DETACH request. + * This request is only valid when the tracee is in a ptrace(2) stop + * condition, and is itself a restarting action. + * + * If the tracer exits without detaching all of its tracees, then all of the + * tracees are automatically detached and restarted. If a tracee was in + * "signal-delivery-stop" at the time the tracer exited, the signal will be + * released to the child unless it is a SIGSTOP. We drop this instance of + * SIGSTOP in order to prevent the child from becoming stopped by job + * control. + * + * ACCORD ALLOCATION AND MANAGEMENT + * + * The "lx_ptrace_accord_t" object tracks the agreement between a tracer LWP + * and zero or more tracee LWPs. It is explicitly illegal for a tracee to + * trace its tracer, and we block this in PTRACE_ATTACH/PTRACE_TRACEME. + * + * An LWP starts out without an accord. If a child of that LWP calls + * ptrace(2) with the PTRACE_TRACEME subcommand, or if the LWP itself uses + * PTRACE_ATTACH, an accord will be allocated and stored on that LWP. The + * accord structure is not released from that LWP until it arrives in + * lx_exitlwp(), as called by lwp_exit(). A new accord will not be + * allocated, even if one does not exist, once an LWP arrives in lx_exitlwp() + * and sets the LX_PTRACE_EXITING flag. An LWP will have at most one accord + * structure throughout its entire lifecycle; once it has one, it has the + * same one until death. + * + * The accord is reference counted (lxpa_refcnt), starting at a count of one + * at creation to represent the link from the tracer LWP to its accord. The + * accord is not freed until the reference count falls to zero. + * + * To make mutual exclusion between a detaching tracer and various notifying + * tracees simpler, the tracer will hold "pidlock" while it clears the + * accord members that point back to the tracer LWP and CV. + * + * SIGNALS AND JOB CONTROL + * + * Various actions, either directly ptrace(2) related or commonly associated + * with tracing, cause process- or thread-directed SIGSTOP signals to be sent + * to tracees. These signals, and indeed any signal other than SIGKILL, can + * be suppressed by the tracer when using a restarting request (including + * PTRACE_DETACH) on a child. The signal may also be substituted for a + * different signal. + * + * If a SIGSTOP (or other stopping signal) is not suppressed by the tracer, + * it will induce the regular illumos native job control stop of the entire + * traced process. This is at least passingly similar to the Linux "group + * stop" ptrace(2) condition. + * + * SYSTEM CALL TRACING + * + * The ptrace(2) interface enables the tracer to hold the tracee on entry and + * exit from system calls. When a stopped tracee is restarted through the + * PTRACE_SYSCALL request, the LX_PTRACE_SYSCALL flag is set until the next + * system call boundary. Whether this is a "syscall-entry-stop" or + * "syscall-exit-stop", the tracee is held and the tracer is notified via + * SIGCLD/waitpid(2) in the usual way. The flag LX_PTRACE_SYSCALL flag is + * cleared after each stop; for ongoing system call tracing the tracee must + * be continuously restarted with PTRACE_SYSCALL. + * + * EVENT STOPS + * + * Various events (particularly FORK, VFORK, CLONE, EXEC and EXIT) are + * enabled by the tracer through PTRACE_SETOPTIONS. Once enabled, the tracee + * will be stopped at the nominated points of interest and the tracer + * notified. The tracer may request additional information about the event, + * such as the pid of new LWPs and processes, via PTRACE_GETEVENTMSG. + * + * LOCK ORDERING RULES + * + * It is not safe, in general, to hold p_lock for two different processes at + * the same time. This constraint is the primary reason for the existence + * (and complexity) of the ptrace(2) accord mechanism. + * + * In order to facilitate looking up accords by the "pid" of a tracer LWP, + * p_lock for the tracer process may be held while entering the accord mutex + * (lxpa_lock). This mutex protects the accord flags and reference count. + * The reference count is manipulated through lx_ptrace_accord_hold() and + * lx_ptrace_accord_rele(). + * + * DO NOT interact with the accord mutex (lxpa_lock) directly. The + * lx_ptrace_accord_enter() and lx_ptrace_accord_exit() functions do various + * book-keeping and lock ordering enforcement and MUST be used. + * + * It is NOT legal to take ANY p_lock while holding the accord mutex + * (lxpa_lock). If the lxpa_tracees_lock is to be held concurrently with + * lxpa_lock, lxpa_lock MUST be taken first and dropped before taking p_lock + * of any processes from the tracee list. + * + * It is NOT legal to take a tracee p_lock and then attempt to enter the + * accord mutex (or tracee list mutex) of its tracer. When running as the + * tracee LWP, the tracee's hold will prevent the accord from being freed. + * Use of the LX_PTRACE_STOPPING or LX_PTRACE_CLONING flag in the + * LWP-specific brand data prevents an exiting tracer from altering the + * tracee until the tracee has come to an orderly stop, without requiring the + * tracee to hold its own p_lock the entire time it is stopping. + * + * It is not safe, in general, to enter "pidlock" while holding the p_lock of + * any process. It is similarly illegal to hold any accord locks (lxpa_lock + * or lxpa_sublock) while attempting to enter "pidlock". As "pidlock" is a + * global mutex, it should be held for the shortest possible time. + */ + +#include <sys/types.h> +#include <sys/kmem.h> +#include <sys/ksynch.h> +#include <sys/sysmacros.h> +#include <sys/procfs.h> +#include <sys/cmn_err.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/wait.h> +#include <sys/prsystm.h> +#include <sys/note.h> + +#include <sys/brand.h> +#include <sys/lx_brand.h> +#include <sys/lx_impl.h> +#include <sys/lx_misc.h> +#include <sys/lx_pid.h> +#include <lx_syscall.h> +#include <lx_signum.h> + + +typedef enum lx_ptrace_cont_flags_t { + LX_PTC_NONE = 0x00, + LX_PTC_SYSCALL = 0x01, + LX_PTC_SINGLESTEP = 0x02 +} lx_ptrace_cont_flags_t; + +/* + * Macros for checking the state of an LWP via "br_ptrace_flags": + */ +#define LX_PTRACE_BUSY \ + (LX_PTRACE_EXITING | LX_PTRACE_STOPPING | LX_PTRACE_CLONING) + +#define VISIBLE(a) (((a)->br_ptrace_flags & LX_PTRACE_EXITING) == 0) +#define TRACEE_BUSY(a) (((a)->br_ptrace_flags & LX_PTRACE_BUSY) != 0) + +#define ACCORD_HELD(a) MUTEX_HELD(&(a)->lxpa_lock) + +static kcondvar_t lx_ptrace_busy_cv; +static kmem_cache_t *lx_ptrace_accord_cache; + +/* + * Enter the accord mutex. + */ +static void +lx_ptrace_accord_enter(lx_ptrace_accord_t *accord) +{ + VERIFY(MUTEX_NOT_HELD(&accord->lxpa_tracees_lock)); + + mutex_enter(&accord->lxpa_lock); +} + +/* + * Exit the accord mutex. If the reference count has dropped to zero, + * free the accord. + */ +static void +lx_ptrace_accord_exit(lx_ptrace_accord_t *accord) +{ + VERIFY(ACCORD_HELD(accord)); + + if (accord->lxpa_refcnt > 0) { + mutex_exit(&accord->lxpa_lock); + return; + } + + /* + * When the reference count drops to zero we must free the accord. + */ + VERIFY(accord->lxpa_tracer == NULL); + VERIFY(MUTEX_NOT_HELD(&accord->lxpa_tracees_lock)); + VERIFY(list_is_empty(&accord->lxpa_tracees)); + VERIFY(accord->lxpa_flags & LX_ACC_TOMBSTONE); + + mutex_destroy(&accord->lxpa_lock); + mutex_destroy(&accord->lxpa_tracees_lock); + + kmem_cache_free(lx_ptrace_accord_cache, accord); +} + +/* + * Drop our reference to this accord. If this drops the reference count + * to zero, the next lx_ptrace_accord_exit() will free the accord. + */ +static void +lx_ptrace_accord_rele(lx_ptrace_accord_t *accord) +{ + VERIFY(ACCORD_HELD(accord)); + + VERIFY(accord->lxpa_refcnt > 0); + accord->lxpa_refcnt--; +} + +/* + * Place an additional hold on an accord. + */ +static void +lx_ptrace_accord_hold(lx_ptrace_accord_t *accord) +{ + VERIFY(ACCORD_HELD(accord)); + + accord->lxpa_refcnt++; +} + +/* + * Fetch the accord for this LWP. If one has not yet been created, and the + * process is not exiting, allocate it now. Must be called with p_lock held + * for the process containing the target LWP. + * + * If successful, we return holding the accord lock (lxpa_lock). + */ +static int +lx_ptrace_accord_get_locked(klwp_t *lwp, lx_ptrace_accord_t **accordp, + boolean_t allocate_one) +{ + lx_ptrace_accord_t *lxpa; + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + proc_t *p = lwptoproc(lwp); + + VERIFY(MUTEX_HELD(&p->p_lock)); + + /* + * If this LWP does not have an accord, we wish to allocate + * and install one. + */ + if ((lxpa = lwpd->br_ptrace_accord) == NULL) { + if (!allocate_one || !VISIBLE(lwpd)) { + /* + * Either we do not wish to allocate an accord, or this + * LWP has already begun exiting from a ptrace + * perspective. + */ + *accordp = NULL; + return (ESRCH); + } + + lxpa = kmem_cache_alloc(lx_ptrace_accord_cache, KM_SLEEP); + bzero(lxpa, sizeof (*lxpa)); + + /* + * The initial reference count is 1 because we are referencing + * it in from the soon-to-be tracer LWP. + */ + lxpa->lxpa_refcnt = 1; + mutex_init(&lxpa->lxpa_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&lxpa->lxpa_tracees_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&lxpa->lxpa_tracees, sizeof (lx_lwp_data_t), + offsetof(lx_lwp_data_t, br_ptrace_linkage)); + lxpa->lxpa_cvp = &p->p_cv; + + lxpa->lxpa_tracer = lwpd; + lwpd->br_ptrace_accord = lxpa; + } + + /* + * Lock the accord before returning it to the caller. + */ + lx_ptrace_accord_enter(lxpa); + + /* + * There should be at least one active reference to this accord, + * otherwise it should have been freed. + */ + VERIFY(lxpa->lxpa_refcnt > 0); + + *accordp = lxpa; + return (0); +} + +/* + * Accords belong to the tracer LWP. Get the accord for this tracer or return + * an error if it was not possible. To prevent deadlocks, the caller MUST NOT + * hold p_lock on its own or any other process. + * + * If successful, we return holding the accord lock (lxpa_lock). + */ +static int +lx_ptrace_accord_get_by_pid(pid_t lxpid, lx_ptrace_accord_t **accordp) +{ + int ret = ESRCH; + pid_t apid; + id_t atid; + proc_t *aproc; + kthread_t *athr; + klwp_t *alwp; + lx_lwp_data_t *alwpd; + + VERIFY(MUTEX_NOT_HELD(&curproc->p_lock)); + + /* + * Locate the process containing the tracer LWP based on its Linux pid + * and lock it. + */ + if (lx_lpid_to_spair(lxpid, &apid, &atid) != 0 || + (aproc = sprlock(apid)) == NULL) { + return (ESRCH); + } + + /* + * Locate the tracer LWP itself and ensure that it is visible to + * ptrace(2). + */ + if ((athr = idtot(aproc, atid)) == NULL || + (alwp = ttolwp(athr)) == NULL || + (alwpd = lwptolxlwp(alwp)) == NULL || + !VISIBLE(alwpd)) { + sprunlock(aproc); + return (ESRCH); + } + + /* + * We should not fetch our own accord this way. + */ + if (athr == curthread) { + sprunlock(aproc); + return (EPERM); + } + + /* + * Fetch (or allocate) the accord owned by this tracer LWP: + */ + ret = lx_ptrace_accord_get_locked(alwp, accordp, B_TRUE); + + /* + * Unlock the process and return. + */ + sprunlock(aproc); + return (ret); +} + +/* + * Get (or allocate) the ptrace(2) accord for the current LWP, acting as a + * tracer. The caller MUST NOT currently hold p_lock on the process containing + * this LWP. + * + * If successful, we return holding the accord lock (lxpa_lock). + */ +static int +lx_ptrace_accord_get(lx_ptrace_accord_t **accordp, boolean_t allocate_one) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + int ret; + + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + + /* + * Lock the tracer (this LWP). + */ + mutex_enter(&p->p_lock); + + /* + * Fetch (or allocate) the accord for this LWP: + */ + ret = lx_ptrace_accord_get_locked(lwp, accordp, allocate_one); + + mutex_exit(&p->p_lock); + + return (ret); +} + +/* + * Restart an LWP if it is in "ptrace-stop". This function may induce sleep, + * so the caller MUST NOT hold any mutexes other than p_lock for the process + * containing the LWP. + */ +static void +lx_ptrace_restart_lwp(klwp_t *lwp) +{ + kthread_t *rt = lwptot(lwp); + proc_t *rproc = lwptoproc(lwp); + lx_lwp_data_t *rlwpd = lwptolxlwp(lwp); + + VERIFY(rt != curthread); + VERIFY(MUTEX_HELD(&rproc->p_lock)); + + /* + * Exclude potential meddling from procfs. + */ + prbarrier(rproc); + + /* + * Check that the LWP is still in "ptrace-stop" and, if so, restart it. + */ + thread_lock(rt); + if (BSTOPPED(rt) && rt->t_whystop == PR_BRAND) { + rt->t_schedflag |= TS_BSTART; + setrun_locked(rt); + + /* + * Clear stop reason. + */ + rlwpd->br_ptrace_whystop = 0; + rlwpd->br_ptrace_whatstop = 0; + rlwpd->br_ptrace_flags &= ~LX_PTRACE_CLDPEND; + } + thread_unlock(rt); +} + +static void +lx_winfo(lx_lwp_data_t *remote, k_siginfo_t *ip, boolean_t waitflag, + pid_t *event_ppid, pid_t *event_pid) +{ + int signo; + + /* + * Populate our k_siginfo_t with data about this "ptrace-stop" + * condition: + */ + bzero(ip, sizeof (*ip)); + ip->si_signo = SIGCLD; + ip->si_pid = remote->br_pid; + ip->si_code = CLD_TRAPPED; + + switch (remote->br_ptrace_whatstop) { + case LX_PR_SYSENTRY: + case LX_PR_SYSEXIT: + ip->si_status = SIGTRAP; + if (remote->br_ptrace_options & LX_PTRACE_O_TRACESYSGOOD) { + ip->si_status |= 0x80; + } + break; + + case LX_PR_SIGNALLED: + signo = remote->br_ptrace_stopsig; + if (signo < 1 || signo >= LX_NSIG) { + /* + * If this signal number is not valid, pretend it + * was a SIGTRAP. + */ + ip->si_status = SIGTRAP; + } else { + ip->si_status = ltos_signo[signo]; + } + break; + + case LX_PR_EVENT: + ip->si_status = SIGTRAP | remote->br_ptrace_event; + /* + * Record the Linux pid of both this LWP and the create + * event we are dispatching. We will use this information + * to unblock any subsequent ptrace(2) events that depend + * on this one. + */ + if (event_ppid != NULL) + *event_ppid = remote->br_pid; + if (event_pid != NULL) + *event_pid = (pid_t)remote->br_ptrace_eventmsg; + break; + + default: + cmn_err(CE_PANIC, "unxpected stop subreason: %d", + remote->br_ptrace_whatstop); + } + + /* + * If WNOWAIT was specified, do not mark the event as posted + * so that it may be re-fetched on another call to waitid(). + */ + if (waitflag) { + remote->br_ptrace_whystop = 0; + remote->br_ptrace_whatstop = 0; + remote->br_ptrace_flags &= ~LX_PTRACE_CLDPEND; + } +} + +/* + * Receive notification from stop() of a PR_BRAND stop. + */ +void +lx_stop_notify(proc_t *p, klwp_t *lwp, ushort_t why, ushort_t what) +{ + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + lx_ptrace_accord_t *accord; + klwp_t *plwp = NULL; + proc_t *pp = NULL; + lx_lwp_data_t *parent; + boolean_t cldpend = B_TRUE; + boolean_t cldpost = B_FALSE; + sigqueue_t *sqp = NULL; + + /* + * We currently only care about LX-specific stop reasons. + */ + if (why != PR_BRAND) + return; + + switch (what) { + case LX_PR_SYSENTRY: + case LX_PR_SYSEXIT: + case LX_PR_SIGNALLED: + case LX_PR_EVENT: + break; + default: + cmn_err(CE_PANIC, "unexpected subreason for PR_BRAND" + " stop: %d", (int)what); + } + + /* + * We should be holding the lock on our containing process. The + * STOPPING flag should have been set by lx_ptrace_stop() for all + * PR_BRAND stops. + */ + VERIFY(MUTEX_HELD(&p->p_lock)); + VERIFY(lwpd->br_ptrace_flags & LX_PTRACE_STOPPING); + VERIFY((accord = lwpd->br_ptrace_tracer) != NULL); + + /* + * We must drop our process lock to take "pidlock". The + * LX_PTRACE_STOPPING flag protects us from an exiting tracer. + */ + mutex_exit(&p->p_lock); + + /* + * Allocate before we enter any mutexes. + */ + sqp = kmem_zalloc(sizeof (*sqp), KM_SLEEP); + + /* + * We take pidlock now, which excludes all callers of waitid() and + * prevents a detaching tracer from clearing critical accord members. + */ + mutex_enter(&pidlock); + mutex_enter(&p->p_lock); + + /* + * Get the ptrace(2) "parent" process, to which we may send + * a SIGCLD signal later. + */ + if ((parent = accord->lxpa_tracer) != NULL && + (plwp = parent->br_lwp) != NULL) { + pp = lwptoproc(plwp); + } + + /* + * Our tracer should not have been modified in our absence; the + * LX_PTRACE_STOPPING flag prevents it. + */ + VERIFY(lwpd->br_ptrace_tracer == accord); + + /* + * Stash data for this stop condition in the LWP data while we hold + * both pidlock and our p_lock. + */ + lwpd->br_ptrace_whystop = why; + lwpd->br_ptrace_whatstop = what; + + /* + * If this event does not depend on an event from the parent LWP, + * populate the siginfo_t for the event pending on this tracee LWP. + */ + if (!(lwpd->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) && pp != NULL) { + cldpost = B_TRUE; + lx_winfo(lwpd, &sqp->sq_info, B_FALSE, NULL, NULL); + } + + /* + * Drop our p_lock so that we may lock the tracer. + */ + mutex_exit(&p->p_lock); + if (cldpost && pp != NULL) { + /* + * Post the SIGCLD to the tracer. + */ + mutex_enter(&pp->p_lock); + if (!sigismember(&pp->p_sig, SIGCLD)) { + sigaddqa(pp, plwp->lwp_thread, sqp); + cldpend = B_FALSE; + sqp = NULL; + } + mutex_exit(&pp->p_lock); + } + + /* + * We re-take our process lock now. The lock will be held until + * the thread is actually marked stopped, so we will not race with + * lx_ptrace_lock_if_stopped() or lx_waitid_helper(). + */ + mutex_enter(&p->p_lock); + + /* + * We clear the STOPPING flag; stop() continues to hold our p_lock + * until our thread stop state is visible. + */ + lwpd->br_ptrace_flags &= ~LX_PTRACE_STOPPING; + lwpd->br_ptrace_flags |= LX_PTRACE_STOPPED; + if (cldpend) { + /* + * We sent the SIGCLD for this new wait condition already. + */ + lwpd->br_ptrace_flags |= LX_PTRACE_CLDPEND; + } + + /* + * If lx_ptrace_exit_tracer() is trying to detach our tracer, it will + * be sleeping on this CV until LX_PTRACE_STOPPING is clear. Wake it + * now. + */ + cv_broadcast(&lx_ptrace_busy_cv); + + /* + * While still holding pidlock, we attempt to wake our tracer from a + * potential waitid() slumber. + */ + if (accord->lxpa_cvp != NULL) { + cv_broadcast(accord->lxpa_cvp); + } + + /* + * We release pidlock and return as we were called: with our p_lock + * held. + */ + mutex_exit(&pidlock); + + if (sqp != NULL) { + kmem_free(sqp, sizeof (*sqp)); + } +} + +/* + * For any restarting action (e.g. PTRACE_CONT, PTRACE_SYSCALL or + * PTRACE_DETACH) to be allowed, the tracee LWP must be in "ptrace-stop". This + * check must ONLY be run on tracees of the current LWP. If the check is + * successful, we return with the tracee p_lock held. + */ +static int +lx_ptrace_lock_if_stopped(lx_ptrace_accord_t *accord, lx_lwp_data_t *remote) +{ + klwp_t *rlwp = remote->br_lwp; + proc_t *rproc = lwptoproc(rlwp); + kthread_t *rt = lwptot(rlwp); + + /* + * We must never check that we, ourselves, are stopped. We must also + * have the accord tracee list locked while we lock our tracees. + */ + VERIFY(curthread != rt); + VERIFY(MUTEX_HELD(&accord->lxpa_tracees_lock)); + VERIFY(accord->lxpa_tracer == ttolxlwp(curthread)); + + /* + * Lock the process containing the tracee LWP. + */ + mutex_enter(&rproc->p_lock); + if (!VISIBLE(remote)) { + /* + * The tracee LWP is currently detaching itself as it exits. + * It is no longer visible to ptrace(2). + */ + mutex_exit(&rproc->p_lock); + return (ESRCH); + } + + /* + * We must only check whether tracees of the current LWP are stopped. + * We check this condition after confirming visibility as an exiting + * tracee may no longer be completely consistent. + */ + VERIFY(remote->br_ptrace_tracer == accord); + + if (!(remote->br_ptrace_flags & LX_PTRACE_STOPPED)) { + /* + * The tracee is not in "ptrace-stop", so we release the + * process. + */ + mutex_exit(&rproc->p_lock); + return (ESRCH); + } + + /* + * The tracee is stopped. We return holding its process lock so that + * the caller may manipulate it. + */ + return (0); +} + +static int +lx_ptrace_setoptions(lx_lwp_data_t *remote, uintptr_t options) +{ + /* + * Check for valid options. + */ + if ((options & ~LX_PTRACE_O_ALL) != 0) { + return (EINVAL); + } + + /* + * Set ptrace options on the target LWP. + */ + remote->br_ptrace_options = (lx_ptrace_options_t)options; + + return (0); +} + +static int +lx_ptrace_geteventmsg(lx_lwp_data_t *remote, void *umsgp) +{ + int error; + +#if defined(_SYSCALL32_IMPL) + if (get_udatamodel() != DATAMODEL_NATIVE) { + uint32_t tmp = remote->br_ptrace_eventmsg; + + error = copyout(&tmp, umsgp, sizeof (uint32_t)); + } else +#endif + { + error = copyout(&remote->br_ptrace_eventmsg, umsgp, + sizeof (ulong_t)); + } + + return (error); +} + +/* + * Implements the PTRACE_CONT subcommand of the Linux ptrace(2) interface. + */ +static int +lx_ptrace_cont(lx_lwp_data_t *remote, lx_ptrace_cont_flags_t flags, int signo) +{ + klwp_t *lwp = remote->br_lwp; + + if (flags & LX_PTC_SINGLESTEP) { + /* + * We do not currently support single-stepping. + */ + lx_unsupported("PTRACE_SINGLESTEP not currently implemented"); + return (EINVAL); + } + + /* + * The tracer may choose to suppress the delivery of a signal, or + * select an alternative signal for delivery. If this is an + * appropriate ptrace(2) "signal-delivery-stop", br_ptrace_stopsig + * will be used as the new signal number. + * + * As with so many other aspects of the Linux ptrace(2) interface, this + * may fail silently if the state machine is not aligned correctly. + */ + remote->br_ptrace_stopsig = signo; + + /* + * Handle the syscall-stop flag if this is a PTRACE_SYSCALL restart: + */ + if (flags & LX_PTC_SYSCALL) { + remote->br_ptrace_flags |= LX_PTRACE_SYSCALL; + } else { + remote->br_ptrace_flags &= ~LX_PTRACE_SYSCALL; + } + + lx_ptrace_restart_lwp(lwp); + + return (0); +} + +/* + * Implements the PTRACE_DETACH subcommand of the Linux ptrace(2) interface. + * + * The LWP identified by the Linux pid "lx_pid" will, if it as a tracee of the + * current LWP, be detached and set runnable. If the specified LWP is not + * currently in the "ptrace-stop" state, the routine will return ESRCH as if + * the LWP did not exist at all. + * + * The caller must not hold p_lock on any process. + */ +static int +lx_ptrace_detach(lx_ptrace_accord_t *accord, lx_lwp_data_t *remote, int signo, + boolean_t *release_hold) +{ + klwp_t *rlwp; + + rlwp = remote->br_lwp; + + /* + * The tracee LWP was in "ptrace-stop" and we now hold its p_lock. + * Detach the LWP from the accord and set it running. + */ + VERIFY(!TRACEE_BUSY(remote)); + remote->br_ptrace_flags &= ~(LX_PTRACE_SYSCALL | LX_PTRACE_INHERIT); + VERIFY(list_link_active(&remote->br_ptrace_linkage)); + list_remove(&accord->lxpa_tracees, remote); + + remote->br_ptrace_attach = LX_PTA_NONE; + remote->br_ptrace_tracer = NULL; + remote->br_ptrace_flags = 0; + *release_hold = B_TRUE; + + /* + * The tracer may, as described in lx_ptrace_cont(), choose to suppress + * or modify the delivered signal. + */ + remote->br_ptrace_stopsig = signo; + + lx_ptrace_restart_lwp(rlwp); + + return (0); +} + +/* + * This routine implements the PTRACE_ATTACH operation of the Linux ptrace(2) + * interface. + * + * This LWP is requesting to be attached as a tracer to another LWP -- the + * tracee. If a ptrace accord to track the list of tracees has not yet been + * allocated, one will be allocated and attached to this LWP now. + * + * The "br_ptrace_tracer" on the tracee LWP is set to this accord, and the + * tracee LWP is then added to the "lxpa_tracees" list in the accord. We drop + * locks between these two phases; the only consumer of trace events from this + * accord is this LWP, which obviously cannot be running waitpid(2) at the same + * time as this call to ptrace(2). + */ +static int +lx_ptrace_attach(pid_t lx_pid) +{ + int error = ESRCH; + int32_t one = 1; + /* + * Our (Tracer) LWP: + */ + lx_ptrace_accord_t *accord; + lx_lwp_data_t *lwpd = ttolxlwp(curthread); + /* + * Remote (Tracee) LWP: + */ + pid_t rpid; + id_t rtid; + proc_t *rproc; + kthread_t *rthr; + klwp_t *rlwp; + lx_lwp_data_t *rlwpd; + + if (lwpd->br_pid == lx_pid) { + /* + * We cannot trace ourselves. + */ + return (EPERM); + } + + /* + * Ensure that we have an accord and obtain a lock on it. This + * routine should not fail because the LWP cannot make ptrace(2) system + * calls after it has begun exiting. + */ + VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_EXITING); + VERIFY(lx_ptrace_accord_get(&accord, B_TRUE) == 0); + + /* + * Place speculative hold in case the attach is successful. + */ + lx_ptrace_accord_hold(accord); + lx_ptrace_accord_exit(accord); + + /* + * Locate the process containing the tracee LWP based on its Linux pid + * and lock it. + */ + if (lx_lpid_to_spair(lx_pid, &rpid, &rtid) != 0 || + (rproc = sprlock(rpid)) == NULL) { + /* + * We could not find the target process. + */ + goto errout; + } + + /* + * Locate the tracee LWP. + */ + if ((rthr = idtot(rproc, rtid)) == NULL || + (rlwp = ttolwp(rthr)) == NULL || + (rlwpd = lwptolxlwp(rlwp)) == NULL || + !VISIBLE(rlwpd)) { + /* + * The LWP could not be found, was not branded, or is not + * visible to ptrace(2) at this time. + */ + goto unlock_errout; + } + + /* + * We now hold the lock on the tracee. Attempt to install ourselves + * as the tracer. + */ + if (curproc != rproc && priv_proc_cred_perm(curproc->p_cred, rproc, + NULL, VWRITE) != 0) { + /* + * This process does not have permission to trace the remote + * process. + */ + error = EPERM; + } else if (rlwpd->br_ptrace_tracer != NULL) { + /* + * This LWP is already being traced. + */ + VERIFY(list_link_active(&rlwpd->br_ptrace_linkage)); + VERIFY(rlwpd->br_ptrace_attach != LX_PTA_NONE); + error = EPERM; + } else { + lx_proc_data_t *rprocd; + + /* + * Bond the tracee to the accord. + */ + VERIFY0(rlwpd->br_ptrace_flags & LX_PTRACE_EXITING); + VERIFY(rlwpd->br_ptrace_attach == LX_PTA_NONE); + rlwpd->br_ptrace_attach = LX_PTA_ATTACH; + rlwpd->br_ptrace_tracer = accord; + + /* + * We had no tracer, and are thus not in the tracees list. + * It is safe to take the tracee list lock while we insert + * ourselves. + */ + mutex_enter(&accord->lxpa_tracees_lock); + VERIFY(!list_link_active(&rlwpd->br_ptrace_linkage)); + list_insert_tail(&accord->lxpa_tracees, rlwpd); + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * Send a thread-directed SIGSTOP. + */ + sigtoproc(rproc, rthr, SIGSTOP); + + /* + * Set the in-kernel process-wide ptrace(2) enable flag. + * Attempt also to write the usermode trace flag so that the + * process knows to enter the kernel for potential ptrace(2) + * syscall-stops. + */ + rprocd = ttolxproc(rthr); + rprocd->l_ptrace = 1; + mutex_exit(&rproc->p_lock); + (void) uwrite(rproc, &one, sizeof (one), rprocd->l_traceflag); + mutex_enter(&rproc->p_lock); + + error = 0; + } + +unlock_errout: + /* + * Unlock the process containing the tracee LWP and the accord. + */ + sprunlock(rproc); + +errout: + if (error != 0) { + /* + * The attach was not successful. Remove our speculative + * hold. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); + } + + return (error); +} + +int +lx_ptrace_set_clone_inherit(int option, boolean_t inherit_flag) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + + mutex_enter(&p->p_lock); + + switch (option) { + case LX_PTRACE_O_TRACEFORK: + case LX_PTRACE_O_TRACEVFORK: + case LX_PTRACE_O_TRACECLONE: + lwpd->br_ptrace_clone_option = option; + break; + + default: + return (EINVAL); + } + + if (inherit_flag) { + lwpd->br_ptrace_flags |= LX_PTRACE_INHERIT; + } else { + lwpd->br_ptrace_flags &= ~LX_PTRACE_INHERIT; + } + + mutex_exit(&p->p_lock); + return (0); +} + +/* + * If the parent LWP is being traced, we want to attach ourselves to the + * same accord. + */ +void +lx_ptrace_inherit_tracer(lx_lwp_data_t *src, lx_lwp_data_t *dst) +{ + proc_t *srcp = lwptoproc(src->br_lwp); + proc_t *dstp = lwptoproc(dst->br_lwp); + lx_ptrace_accord_t *accord; + boolean_t unlock = B_FALSE; + + if (srcp == dstp) { + /* + * This is syslwp_create(), so the process p_lock is already + * held. + */ + VERIFY(MUTEX_HELD(&srcp->p_lock)); + } else { + unlock = B_TRUE; + mutex_enter(&srcp->p_lock); + } + + if ((accord = src->br_ptrace_tracer) == NULL) { + /* + * The source LWP does not have a tracer to inherit. + */ + goto out; + } + + /* + * There are two conditions to check when determining if the new + * child should inherit the same tracer (and tracing options) as its + * parent. Either condition is sufficient to trigger inheritance. + */ + dst->br_ptrace_attach = LX_PTA_NONE; + if ((src->br_ptrace_options & src->br_ptrace_clone_option) != 0) { + /* + * Condition 1: + * The clone(2), fork(2) and vfork(2) emulated system calls + * populate "br_ptrace_clone_option" with the specific + * ptrace(2) SETOPTIONS option that applies to this + * operation. If the relevant option has been enabled by the + * tracer then we inherit. + */ + dst->br_ptrace_attach |= LX_PTA_INHERIT_OPTIONS; + + } else if ((src->br_ptrace_flags & LX_PTRACE_INHERIT) != 0) { + /* + * Condition 2: + * If the caller opted in to inheritance with the + * PTRACE_CLONE flag to clone(2), the LX_PTRACE_INHERIT flag + * will be set and we inherit. + */ + dst->br_ptrace_attach |= LX_PTA_INHERIT_CLONE; + } + + /* + * These values only apply for the duration of a single clone(2), et + * al, system call. + */ + src->br_ptrace_flags &= ~LX_PTRACE_INHERIT; + src->br_ptrace_clone_option = 0; + + if (dst->br_ptrace_attach == LX_PTA_NONE) { + /* + * No condition triggered inheritance. + */ + goto out; + } + + /* + * Set the LX_PTRACE_CLONING flag to prevent us from being detached + * while our p_lock is dropped. + */ + src->br_ptrace_flags |= LX_PTRACE_CLONING; + mutex_exit(&srcp->p_lock); + + /* + * Hold the accord for the new LWP. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_hold(accord); + lx_ptrace_accord_exit(accord); + + /* + * Install the tracer and copy the current PTRACE_SETOPTIONS options. + */ + dst->br_ptrace_tracer = accord; + dst->br_ptrace_options = src->br_ptrace_options; + + /* + * This flag prevents waitid() from seeing events for the new child + * until the parent is able to post the relevant ptrace event to + * the tracer. + */ + dst->br_ptrace_flags |= LX_PTRACE_PARENT_WAIT; + + mutex_enter(&accord->lxpa_tracees_lock); + VERIFY(list_link_active(&src->br_ptrace_linkage)); + VERIFY(!list_link_active(&dst->br_ptrace_linkage)); + list_insert_tail(&accord->lxpa_tracees, dst); + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * Relock our process and clear our busy flag. + */ + mutex_enter(&srcp->p_lock); + src->br_ptrace_flags &= ~LX_PTRACE_CLONING; + + /* + * If lx_ptrace_exit_tracer() is trying to detach our tracer, it will + * be sleeping on this CV until LX_PTRACE_CLONING is clear. Wake it + * now. + */ + cv_broadcast(&lx_ptrace_busy_cv); + +out: + if (unlock) { + mutex_exit(&srcp->p_lock); + } +} + +static int +lx_ptrace_traceme(void) +{ + int error; + boolean_t did_attach = B_FALSE; + /* + * Our (Tracee) LWP: + */ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + /* + * Remote (Tracer) LWP: + */ + lx_ptrace_accord_t *accord; + + /* + * We are intending to be the tracee. Fetch (or allocate) the accord + * for our parent LWP. + */ + if ((error = lx_ptrace_accord_get_by_pid(lx_lwp_ppid(lwp, NULL, + NULL), &accord)) != 0) { + /* + * Could not determine the Linux pid of the parent LWP, or + * could not get the accord for that LWP. + */ + return (error); + } + + /* + * We now hold the accord lock. + */ + if (accord->lxpa_flags & LX_ACC_TOMBSTONE) { + /* + * The accord is marked for death; give up now. + */ + lx_ptrace_accord_exit(accord); + return (ESRCH); + } + + /* + * Bump the reference count so that the accord is not freed. We need + * to drop the accord lock before we take our own p_lock. + */ + lx_ptrace_accord_hold(accord); + lx_ptrace_accord_exit(accord); + + /* + * We now lock _our_ process and determine if we can install our parent + * as our tracer. + */ + mutex_enter(&p->p_lock); + if (lwpd->br_ptrace_tracer != NULL) { + /* + * This LWP is already being traced. + */ + VERIFY(lwpd->br_ptrace_attach != LX_PTA_NONE); + error = EPERM; + } else { + /* + * Bond ourselves to the accord. We already bumped the accord + * reference count. + */ + VERIFY(lwpd->br_ptrace_attach == LX_PTA_NONE); + lwpd->br_ptrace_attach = LX_PTA_TRACEME; + lwpd->br_ptrace_tracer = accord; + did_attach = B_TRUE; + error = 0; + } + mutex_exit(&p->p_lock); + + /* + * Lock the accord tracee list and add this LWP. Once we are in the + * tracee list, it is the responsibility of the tracer to detach us. + */ + if (error == 0) { + lx_ptrace_accord_enter(accord); + mutex_enter(&accord->lxpa_tracees_lock); + + if (!(accord->lxpa_flags & LX_ACC_TOMBSTONE)) { + lx_proc_data_t *procd = ttolxproc(curthread); + + /* + * Put ourselves in the tracee list for this accord. + */ + VERIFY(!list_link_active(&lwpd->br_ptrace_linkage)); + list_insert_tail(&accord->lxpa_tracees, lwpd); + mutex_exit(&accord->lxpa_tracees_lock); + lx_ptrace_accord_exit(accord); + + /* + * Set the in-kernel process-wide ptrace(2) enable + * flag. Attempt also to write the usermode trace flag + * so that the process knows to enter the kernel for + * potential ptrace(2) syscall-stops. + */ + procd->l_ptrace = 1; + (void) suword32((void *)procd->l_traceflag, 1); + + return (0); + } + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * The accord has been marked for death. We must + * untrace ourselves. + */ + error = ESRCH; + lx_ptrace_accord_exit(accord); + } + + /* + * Our optimism was unjustified: We were unable to attach. We need to + * lock the process containing this LWP again in order to remove the + * tracer. + */ + VERIFY(error != 0); + mutex_enter(&p->p_lock); + if (did_attach) { + /* + * Verify that things were as we left them: + */ + VERIFY(!list_link_active(&lwpd->br_ptrace_linkage)); + VERIFY(lwpd->br_ptrace_tracer == accord); + + lwpd->br_ptrace_attach = LX_PTA_NONE; + lwpd->br_ptrace_tracer = NULL; + } + mutex_exit(&p->p_lock); + + /* + * Remove our speculative hold on the accord, possibly causing it to be + * freed in the process. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); + + return (error); +} + +static boolean_t +lx_ptrace_stop_common(proc_t *p, lx_lwp_data_t *lwpd, ushort_t what) +{ + VERIFY(MUTEX_HELD(&p->p_lock)); + + /* + * Mark this LWP as stopping and call stop() to enter "ptrace-stop". + */ + VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_STOPPING); + lwpd->br_ptrace_flags |= LX_PTRACE_STOPPING; + stop(PR_BRAND, what); + + /* + * We are back from "ptrace-stop" with our process lock held. + */ + lwpd->br_ptrace_flags &= ~(LX_PTRACE_STOPPING | LX_PTRACE_STOPPED | + LX_PTRACE_CLDPEND); + cv_broadcast(&lx_ptrace_busy_cv); + mutex_exit(&p->p_lock); + + return (B_TRUE); +} + +int +lx_ptrace_stop_for_option(int option, boolean_t child, ulong_t msg) +{ + kthread_t *t = curthread; + klwp_t *lwp = ttolwp(t); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + + mutex_enter(&p->p_lock); + if (lwpd->br_ptrace_tracer == NULL) { + mutex_exit(&p->p_lock); + return (ESRCH); + } + + if (!child) { + /* + * Only the first event posted by a new process is to be held + * until the matching parent event is dispatched, and only if + * it is a "child" event. This is not a child event, so we + * clear the wait flag. + */ + lwpd->br_ptrace_flags &= ~LX_PTRACE_PARENT_WAIT; + } + + if (!(lwpd->br_ptrace_options & option)) { + if (option == LX_PTRACE_O_TRACEEXEC) { + /* + * Without PTRACE_O_TRACEEXEC, the Linux kernel will + * send SIGTRAP to the process. + */ + sigtoproc(p, t, SIGTRAP); + mutex_exit(&p->p_lock); + return (0); + } + + /* + * The flag for this trace event is not enabled, so we will not + * stop. + */ + mutex_exit(&p->p_lock); + return (ESRCH); + } + + if (child) { + switch (option) { + case LX_PTRACE_O_TRACECLONE: + case LX_PTRACE_O_TRACEFORK: + case LX_PTRACE_O_TRACEVFORK: + /* + * Send the child LWP a directed SIGSTOP. + */ + sigtoproc(p, t, SIGSTOP); + mutex_exit(&p->p_lock); + return (0); + default: + goto nostop; + } + } + + lwpd->br_ptrace_eventmsg = msg; + + switch (option) { + case LX_PTRACE_O_TRACECLONE: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_CLONE; + break; + case LX_PTRACE_O_TRACEEXEC: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_EXEC; + lwpd->br_ptrace_eventmsg = 0; + break; + case LX_PTRACE_O_TRACEEXIT: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_EXIT; + break; + case LX_PTRACE_O_TRACEFORK: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_FORK; + break; + case LX_PTRACE_O_TRACEVFORK: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_VFORK; + break; + case LX_PTRACE_O_TRACEVFORKDONE: + lwpd->br_ptrace_event = LX_PTRACE_EVENT_VFORK_DONE; + lwpd->br_ptrace_eventmsg = 0; + break; + default: + goto nostop; + } + + /* + * p_lock for the process containing the tracee will be dropped by + * lx_ptrace_stop_common(). + */ + return (lx_ptrace_stop_common(p, lwpd, LX_PR_EVENT) ? 0 : ESRCH); + +nostop: + lwpd->br_ptrace_event = 0; + lwpd->br_ptrace_eventmsg = 0; + mutex_exit(&p->p_lock); + return (ESRCH); +} + +boolean_t +lx_ptrace_stop(ushort_t what) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + + VERIFY(what == LX_PR_SYSENTRY || what == LX_PR_SYSEXIT || + what == LX_PR_SIGNALLED); + + /* + * If we do not have an accord, bail out early. + */ + if (lwpd->br_ptrace_tracer == NULL) + return (B_FALSE); + + /* + * Lock this process and re-check the condition. + */ + mutex_enter(&p->p_lock); + if (lwpd->br_ptrace_tracer == NULL) { + VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_SYSCALL); + mutex_exit(&p->p_lock); + return (B_FALSE); + } + + if (what == LX_PR_SYSENTRY || what == LX_PR_SYSEXIT) { + /* + * This is a syscall-entry-stop or syscall-exit-stop point. + */ + if (!(lwpd->br_ptrace_flags & LX_PTRACE_SYSCALL)) { + /* + * A system call stop has not been requested. + */ + mutex_exit(&p->p_lock); + return (B_FALSE); + } + + /* + * The PTRACE_SYSCALL restart command applies only to the next + * system call entry or exit. The tracer must restart us with + * PTRACE_SYSCALL while we are in ptrace-stop for us to fire + * again at the next system call boundary. + */ + lwpd->br_ptrace_flags &= ~LX_PTRACE_SYSCALL; + } + + /* + * p_lock for the process containing the tracee will be dropped by + * lx_ptrace_stop_common(). + */ + return (lx_ptrace_stop_common(p, lwpd, what)); +} + +int +lx_issig_stop(proc_t *p, klwp_t *lwp) +{ + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + int lx_sig; + + VERIFY(MUTEX_HELD(&p->p_lock)); + + /* + * If we do not have an accord, bail out now. Additionally, if there + * is no valid signal then we have no reason to stop. + */ + if (lwpd->br_ptrace_tracer == NULL || lwp->lwp_cursig == SIGKILL || + (lwp->lwp_cursig == 0 || lwp->lwp_cursig > NSIG) || + (lx_sig = stol_signo[lwp->lwp_cursig]) < 1) { + return (0); + } + + /* + * We stash the signal on the LWP where our waitid_helper will find it + * and enter the ptrace "signal-delivery-stop" condition. + */ + lwpd->br_ptrace_stopsig = lx_sig; + (void) lx_ptrace_stop_common(p, lwpd, LX_PR_SIGNALLED); + mutex_enter(&p->p_lock); + + /* + * When we return, the signal may have been altered or suppressed. + */ + if (lwpd->br_ptrace_stopsig != lx_sig) { + int native_sig; + lx_sig = lwpd->br_ptrace_stopsig; + + if (lx_sig >= LX_NSIG) { + lx_sig = 0; + } + + /* + * Translate signal from Linux signal number back to + * an illumos native signal. + */ + if (lx_sig >= LX_NSIG || lx_sig < 0 || (native_sig = + ltos_signo[lx_sig]) < 1) { + /* + * The signal is not deliverable. + */ + lwp->lwp_cursig = 0; + lwp->lwp_extsig = 0; + if (lwp->lwp_curinfo) { + siginfofree(lwp->lwp_curinfo); + lwp->lwp_curinfo = NULL; + } + } else { + /* + * Alter the currently dispatching signal. + */ + if (native_sig == SIGKILL) { + /* + * We mark ourselves the victim and request + * a restart of signal processing. + */ + p->p_flag |= SKILLED; + p->p_flag &= ~SEXTKILLED; + return (-1); + } + lwp->lwp_cursig = native_sig; + lwp->lwp_extsig = 0; + if (lwp->lwp_curinfo != NULL) { + lwp->lwp_curinfo->sq_info.si_signo = native_sig; + } + } + } + + lwpd->br_ptrace_stopsig = 0; + return (0); +} + +static void +lx_ptrace_exit_tracer(proc_t *p, lx_lwp_data_t *lwpd, + lx_ptrace_accord_t *accord) +{ + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + + lx_ptrace_accord_enter(accord); + /* + * Mark this accord for death. This means no new tracees can be + * attached to this accord. + */ + VERIFY0(accord->lxpa_flags & LX_ACC_TOMBSTONE); + accord->lxpa_flags |= LX_ACC_TOMBSTONE; + lx_ptrace_accord_exit(accord); + + /* + * Walk the list of tracees, detaching them and setting them runnable + * if they are stopped. + */ + for (;;) { + klwp_t *rlwp; + proc_t *rproc; + lx_lwp_data_t *remote; + kmutex_t *rmp; + + mutex_enter(&accord->lxpa_tracees_lock); + if (list_is_empty(&accord->lxpa_tracees)) { + mutex_exit(&accord->lxpa_tracees_lock); + break; + } + + /* + * Fetch the first tracee LWP in the list and lock the process + * which contains it. + */ + remote = list_head(&accord->lxpa_tracees); + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + /* + * The p_lock mutex persists beyond the life of the process + * itself. We save the address, here, to prevent the need to + * dereference the proc_t after awaking from sleep. + */ + rmp = &rproc->p_lock; + mutex_enter(rmp); + + if (TRACEE_BUSY(remote)) { + /* + * This LWP is currently detaching itself on exit, or + * mid-way through stop(). We must wait for this + * action to be completed. While we wait on the CV, we + * must drop the accord tracee list lock. + */ + mutex_exit(&accord->lxpa_tracees_lock); + cv_wait(&lx_ptrace_busy_cv, rmp); + + /* + * While we were waiting, some state may have changed. + * Restart the walk to be sure we don't miss anything. + */ + mutex_exit(rmp); + continue; + } + + /* + * We now hold p_lock on the process. Remove the tracee from + * the list. + */ + VERIFY(list_link_active(&remote->br_ptrace_linkage)); + list_remove(&accord->lxpa_tracees, remote); + + /* + * Unlink the accord and clear our trace flags. + */ + remote->br_ptrace_attach = LX_PTA_NONE; + remote->br_ptrace_tracer = NULL; + remote->br_ptrace_flags = 0; + + /* + * Let go of the list lock before we restart the LWP. We must + * not hold any locks other than the process p_lock when + * we call lx_ptrace_restart_lwp() as it will thread_lock + * the tracee. + */ + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * Ensure that the LWP is not stopped on our account. + */ + lx_ptrace_restart_lwp(rlwp); + + /* + * Unlock the former tracee. + */ + mutex_exit(rmp); + + /* + * Drop the hold this tracee had on the accord. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); + } + + mutex_enter(&p->p_lock); + lwpd->br_ptrace_accord = NULL; + mutex_exit(&p->p_lock); + + /* + * Clean up and release our hold on the accord If we completely + * detached all tracee LWPs, this will free the accord. Otherwise, it + * will be freed when they complete their cleanup. + * + * We hold "pidlock" while clearing these members for easy exclusion of + * waitid(), etc. + */ + mutex_enter(&pidlock); + lx_ptrace_accord_enter(accord); + accord->lxpa_cvp = NULL; + accord->lxpa_tracer = NULL; + mutex_exit(&pidlock); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); +} + +static void +lx_ptrace_exit_tracee(proc_t *p, lx_lwp_data_t *lwpd, + lx_ptrace_accord_t *accord) +{ + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + + /* + * We are the tracee LWP. Lock the accord tracee list and then our + * containing process. + */ + mutex_enter(&accord->lxpa_tracees_lock); + mutex_enter(&p->p_lock); + + /* + * Remove our reference to the accord. We will release our hold + * later. + */ + VERIFY(lwpd->br_ptrace_tracer == accord); + lwpd->br_ptrace_attach = LX_PTA_NONE; + lwpd->br_ptrace_tracer = NULL; + + /* + * Remove this LWP from the accord tracee list: + */ + VERIFY(list_link_active(&lwpd->br_ptrace_linkage)); + list_remove(&accord->lxpa_tracees, lwpd); + + /* + * Wake up any tracers waiting for us to detach from the accord. + */ + cv_broadcast(&lx_ptrace_busy_cv); + mutex_exit(&p->p_lock); + mutex_exit(&accord->lxpa_tracees_lock); + + /* + * Grab "pidlock" and wake the tracer if it is blocked in waitid(). + */ + mutex_enter(&pidlock); + if (accord->lxpa_cvp != NULL) { + cv_broadcast(accord->lxpa_cvp); + } + mutex_exit(&pidlock); + + /* + * Release our hold on the accord. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); +} + +/* + * This routine is called from lx_exitlwp() when an LWP is ready to exit. If + * this LWP is being traced, it will be detached from the tracer's accord. The + * routine will also detach any LWPs being traced by this LWP. + */ +void +lx_ptrace_exit(proc_t *p, klwp_t *lwp) +{ + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + lx_ptrace_accord_t *accord; + + VERIFY(MUTEX_HELD(&p->p_lock)); + + /* + * Mark our LWP as exiting from a ptrace perspective. This will + * prevent a new accord from being allocated if one does not exist + * already, and will make us invisible to PTRACE_ATTACH/PTRACE_TRACEME. + */ + VERIFY0(lwpd->br_ptrace_flags & LX_PTRACE_EXITING); + lwpd->br_ptrace_flags |= LX_PTRACE_EXITING; + + if ((accord = lwpd->br_ptrace_tracer) != NULL) { + /* + * We are traced by another LWP and must detach ourselves. + */ + mutex_exit(&p->p_lock); + lx_ptrace_exit_tracee(p, lwpd, accord); + mutex_enter(&p->p_lock); + } + + if ((accord = lwpd->br_ptrace_accord) != NULL) { + /* + * We have been tracing other LWPs, and must detach from + * them and clean up our accord. + */ + mutex_exit(&p->p_lock); + lx_ptrace_exit_tracer(p, lwpd, accord); + mutex_enter(&p->p_lock); + } +} + +/* + * Called when a SIGCLD signal is dispatched so that we may enqueue another. + * Return 0 if we enqueued a signal, or -1 if not. + */ +int +lx_sigcld_repost(proc_t *pp, sigqueue_t *sqp) +{ + klwp_t *lwp = ttolwp(curthread); + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + lx_ptrace_accord_t *accord; + lx_lwp_data_t *remote; + klwp_t *rlwp; + proc_t *rproc; + boolean_t found = B_FALSE; + + VERIFY(MUTEX_HELD(&pidlock)); + VERIFY(MUTEX_NOT_HELD(&pp->p_lock)); + VERIFY(lwptoproc(lwp) == pp); + + mutex_enter(&pp->p_lock); + if ((accord = lwpd->br_ptrace_accord) == NULL) { + /* + * This LWP is not a tracer LWP, so there will be no + * SIGCLD. + */ + mutex_exit(&pp->p_lock); + return (-1); + } + mutex_exit(&pp->p_lock); + + mutex_enter(&accord->lxpa_tracees_lock); + for (remote = list_head(&accord->lxpa_tracees); remote != NULL; + remote = list_next(&accord->lxpa_tracees, remote)) { + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + + /* + * Check if this LWP is in "ptrace-stop". If in the correct + * stop condition, lock the process containing the tracee LWP. + */ + if (lx_ptrace_lock_if_stopped(accord, remote) != 0) { + continue; + } + + if (remote->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) { + /* + * This event depends on waitid() clearing out the + * event of another LWP. Skip it for now. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + if (!(remote->br_ptrace_flags & LX_PTRACE_CLDPEND)) { + /* + * No SIGCLD is required for this LWP. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + if (remote->br_ptrace_whystop == 0 || + remote->br_ptrace_whatstop == 0) { + /* + * No (new) stop reason to post for this LWP. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + /* + * We found a process of interest. Leave the process + * containing the tracee LWP locked and break out of the loop. + */ + found = B_TRUE; + break; + } + mutex_exit(&accord->lxpa_tracees_lock); + + if (!found) { + return (-1); + } + + /* + * Generate siginfo for this tracee LWP. + */ + lx_winfo(remote, &sqp->sq_info, B_FALSE, NULL, NULL); + remote->br_ptrace_flags &= ~LX_PTRACE_CLDPEND; + mutex_exit(&rproc->p_lock); + + mutex_enter(&pp->p_lock); + if (sigismember(&pp->p_sig, SIGCLD)) { + mutex_exit(&pp->p_lock); + + mutex_enter(&rproc->p_lock); + remote->br_ptrace_flags |= LX_PTRACE_CLDPEND; + mutex_exit(&rproc->p_lock); + + return (-1); + } + sigaddqa(pp, curthread, sqp); + mutex_exit(&pp->p_lock); + + return (0); +} + +/* + * Consume the next available ptrace(2) event queued against the accord for + * this LWP. The event will be emitted as if through waitid(), and converted + * by lx_waitpid() and friends before the return to usermode. + */ +int +lx_waitid_helper(idtype_t idtype, id_t id, k_siginfo_t *ip, int options, + boolean_t *brand_wants_wait, int *rval) +{ + lx_ptrace_accord_t *accord; + klwp_t *lwp = ttolwp(curthread); + proc_t *p = lwptoproc(lwp); + lx_lwp_data_t *local = lwptolxlwp(lwp); + lx_lwp_data_t *remote; + boolean_t found = B_FALSE; + klwp_t *rlwp = NULL; + proc_t *rproc = NULL; + pid_t event_pid = 0, event_ppid = 0; + boolean_t waitflag = !(options & WNOWAIT); + + VERIFY(MUTEX_HELD(&pidlock)); + VERIFY(MUTEX_NOT_HELD(&p->p_lock)); + + /* + * By default, we do not expect waitid() to block on our account. + */ + *brand_wants_wait = B_FALSE; + + if (!local->br_waitid_emulate) { + /* + * This waitid() call is not expecting emulated results. + */ + return (-1); + } + + switch (idtype) { + case P_ALL: + case P_PID: + case P_PGID: + break; + default: + /* + * This idtype has no power here. + */ + return (-1); + } + + if (lx_ptrace_accord_get(&accord, B_FALSE) != 0) { + /* + * This LWP does not have an accord; it cannot be tracing. + */ + return (-1); + } + + /* + * We do not need an additional hold on the accord as it belongs to + * the running, tracer, LWP. + */ + lx_ptrace_accord_exit(accord); + + mutex_enter(&accord->lxpa_tracees_lock); + if (list_is_empty(&accord->lxpa_tracees)) { + /* + * Though it has an accord, there are currently no tracees in + * the list for this LWP. + */ + mutex_exit(&accord->lxpa_tracees_lock); + return (-1); + } + + /* + * Walk the list of tracees and determine if any of them have events to + * report. + */ + for (remote = list_head(&accord->lxpa_tracees); remote != NULL; + remote = list_next(&accord->lxpa_tracees, remote)) { + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + + /* + * If the __WALL option was passed, we unconditionally consider + * every possible child. + */ + if (!(local->br_waitid_flags & LX_WALL)) { + /* + * Otherwise, we check to see if this LWP matches an + * id we are waiting for. + */ + switch (idtype) { + case P_ALL: + break; + case P_PID: + if (remote->br_pid != id) + continue; + break; + case P_PGID: + if (rproc->p_pgrp != id) + continue; + break; + default: + cmn_err(CE_PANIC, "unexpected idtype: %d", + idtype); + } + } + + /* + * Check if this LWP is in "ptrace-stop". If in the correct + * stop condition, lock the process containing the tracee LWP. + */ + if (lx_ptrace_lock_if_stopped(accord, remote) != 0) { + continue; + } + + if (remote->br_ptrace_flags & LX_PTRACE_PARENT_WAIT) { + /* + * This event depends on waitid() clearing out the + * event of another LWP. Skip it for now. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + if (remote->br_ptrace_whystop == 0 || + remote->br_ptrace_whatstop == 0) { + /* + * No (new) stop reason to post for this LWP. + */ + mutex_exit(&rproc->p_lock); + continue; + } + + /* + * We found a process of interest. Leave the process + * containing the tracee LWP locked and break out of the loop. + */ + found = B_TRUE; + break; + } + mutex_exit(&accord->lxpa_tracees_lock); + + if (!found) { + /* + * There were no events of interest, but we have tracees. + * Signal to waitid() that it should block if the provided + * flags allow for it. + */ + *brand_wants_wait = B_TRUE; + return (-1); + } + + /* + * Populate the signal information. + */ + lx_winfo(remote, ip, waitflag, &event_ppid, &event_pid); + + /* + * Unlock the tracee. + */ + mutex_exit(&rproc->p_lock); + + if (event_pid != 0 && event_ppid != 0) { + /* + * We need to do another pass around the tracee list and + * unblock any events that have a "happens after" relationship + * with this event. + */ + mutex_enter(&accord->lxpa_tracees_lock); + for (remote = list_head(&accord->lxpa_tracees); remote != NULL; + remote = list_next(&accord->lxpa_tracees, remote)) { + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + + mutex_enter(&rproc->p_lock); + + if (remote->br_pid != event_pid || + remote->br_ppid != event_ppid) { + mutex_exit(&rproc->p_lock); + continue; + } + + remote->br_ptrace_flags &= ~LX_PTRACE_PARENT_WAIT; + + mutex_exit(&rproc->p_lock); + } + mutex_exit(&accord->lxpa_tracees_lock); + } + + /* + * If we are consuming this wait state, we remove the SIGCLD from + * the queue and post another. + */ + if (waitflag) { + mutex_exit(&pidlock); + sigcld_delete(ip); + sigcld_repost(); + mutex_enter(&pidlock); + } + + *rval = 0; + return (0); +} + +/* + * Some PTRACE_* requests are handled in-kernel by this function. It is called + * through brandsys() via the B_PTRACE_KERNEL subcommand. + */ +int +lx_ptrace_kernel(int ptrace_op, pid_t lxpid, uintptr_t addr, uintptr_t data) +{ + lx_lwp_data_t *local = ttolxlwp(curthread); + lx_ptrace_accord_t *accord; + lx_lwp_data_t *remote; + klwp_t *rlwp; + proc_t *rproc; + int error; + boolean_t found = B_FALSE; + boolean_t release_hold = B_FALSE; + + _NOTE(ARGUNUSED(addr)); + + /* + * These actions do not require the target LWP to be traced or stopped. + */ + switch (ptrace_op) { + case LX_PTRACE_TRACEME: + return (lx_ptrace_traceme()); + + case LX_PTRACE_ATTACH: + return (lx_ptrace_attach(lxpid)); + } + + /* + * Ensure that we have an accord and obtain a lock on it. This routine + * should not fail because the LWP cannot make ptrace(2) system calls + * after it has begun exiting. + */ + VERIFY0(local->br_ptrace_flags & LX_PTRACE_EXITING); + VERIFY(lx_ptrace_accord_get(&accord, B_TRUE) == 0); + + /* + * The accord belongs to this (the tracer) LWP, and we have a hold on + * it. We drop the lock so that we can take other locks. + */ + lx_ptrace_accord_exit(accord); + + /* + * Does the tracee list contain the pid in question? + */ + mutex_enter(&accord->lxpa_tracees_lock); + for (remote = list_head(&accord->lxpa_tracees); remote != NULL; + remote = list_next(&accord->lxpa_tracees, remote)) { + if (remote->br_pid == lxpid) { + found = B_TRUE; + break; + } + } + if (!found) { + /* + * The requested pid does not appear in the tracee list. + */ + mutex_exit(&accord->lxpa_tracees_lock); + return (ESRCH); + } + + /* + * Attempt to lock the target LWP. + */ + if ((error = lx_ptrace_lock_if_stopped(accord, remote)) != 0) { + /* + * The LWP was not in "ptrace-stop". + */ + mutex_exit(&accord->lxpa_tracees_lock); + return (error); + } + + /* + * The target LWP is in "ptrace-stop". We have the containing process + * locked. + */ + rlwp = remote->br_lwp; + rproc = lwptoproc(rlwp); + + /* + * Process the ptrace(2) request: + */ + switch (ptrace_op) { + case LX_PTRACE_DETACH: + error = lx_ptrace_detach(accord, remote, (int)data, + &release_hold); + break; + + case LX_PTRACE_CONT: + error = lx_ptrace_cont(remote, LX_PTC_NONE, (int)data); + break; + + case LX_PTRACE_SYSCALL: + error = lx_ptrace_cont(remote, LX_PTC_SYSCALL, (int)data); + break; + + case LX_PTRACE_SINGLESTEP: + error = lx_ptrace_cont(remote, LX_PTC_SINGLESTEP, (int)data); + break; + + case LX_PTRACE_SETOPTIONS: + error = lx_ptrace_setoptions(remote, data); + break; + + case LX_PTRACE_GETEVENTMSG: + error = lx_ptrace_geteventmsg(remote, (void *)data); + break; + + default: + error = EINVAL; + } + + /* + * Drop the lock on both the tracee process and the tracee list. + */ + mutex_exit(&rproc->p_lock); + mutex_exit(&accord->lxpa_tracees_lock); + + if (release_hold) { + /* + * Release a hold from the accord. + */ + lx_ptrace_accord_enter(accord); + lx_ptrace_accord_rele(accord); + lx_ptrace_accord_exit(accord); + } + + return (error); +} + +void +lx_ptrace_init(void) +{ + cv_init(&lx_ptrace_busy_cv, NULL, CV_DEFAULT, NULL); + + lx_ptrace_accord_cache = kmem_cache_create("lx_ptrace_accord", + sizeof (lx_ptrace_accord_t), 0, NULL, NULL, NULL, NULL, NULL, 0); +} + +void +lx_ptrace_fini(void) +{ + cv_destroy(&lx_ptrace_busy_cv); + + kmem_cache_destroy(lx_ptrace_accord_cache); +} diff --git a/usr/src/uts/common/brand/lx/procfs/lx_proc.h b/usr/src/uts/common/brand/lx/procfs/lx_proc.h index 184a5211db..a5c2391c95 100644 --- a/usr/src/uts/common/brand/lx/procfs/lx_proc.h +++ b/usr/src/uts/common/brand/lx/procfs/lx_proc.h @@ -138,6 +138,7 @@ typedef enum lxpr_nodetype { LXPR_NET_IGMP, /* /proc/net/igmp */ LXPR_NET_IP_MR_CACHE, /* /proc/net/ip_mr_cache */ LXPR_NET_IP_MR_VIF, /* /proc/net/ip_mr_vif */ + LXPR_NET_IPV6_ROUTE, /* /proc/net/ipv6_route */ LXPR_NET_MCFILTER, /* /proc/net/mcfilter */ LXPR_NET_NETSTAT, /* /proc/net/netstat */ LXPR_NET_RAW, /* /proc/net/raw */ @@ -250,4 +251,11 @@ void lxpr_unlock(proc_t *); } #endif +#ifndef islower +#define islower(x) (((unsigned)(x) >= 'a') && ((unsigned)(x) <= 'z')) +#endif +#ifndef toupper +#define toupper(x) (islower(x) ? (x) - 'a' + 'A' : (x)) +#endif + #endif /* _LXPROC_H */ diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c index a15d852793..3d96a1ceb2 100644 --- a/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c +++ b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c @@ -482,8 +482,8 @@ lxpr_getnode(vnode_t *dp, lxpr_nodetype_t type, proc_t *p, int fd) case LXPR_PID_FD_FD: ASSERT(p != NULL); /* lxpr_realvp is set after we return */ - vp->v_type = VLNK; lxpnp->lxpr_mode = 0700; /* read-write-exe owner only */ + vp->v_type = VLNK; break; case LXPR_PID_FDDIR: diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c index df2a4d7fb5..758a9192d7 100644 --- a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c +++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c @@ -79,6 +79,8 @@ #include <inet/tcp.h> #include <inet/udp_impl.h> #include <inet/ipclassifier.h> +#include <sys/socketvar.h> +#include <fs/sockfs/socktpi.h> /* Dependent on procfs */ extern kthread_t *prchoose(proc_t *); @@ -108,6 +110,7 @@ static int lxpr_lookup(vnode_t *, char *, vnode_t **, static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *, caller_context_t *, int); static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *); +static int lxpr_readlink_pid_fd(lxpr_node_t *lxpnp, char *bp, size_t len); static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *); static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *); static int lxpr_sync(void); @@ -163,6 +166,7 @@ static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *); static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *); static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *); static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *); static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *); static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *); static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *); @@ -320,6 +324,7 @@ static lxpr_dirent_t netdir[] = { { LXPR_NET_IGMP, "igmp" }, { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" }, { LXPR_NET_IP_MR_VIF, "ip_mr_vif" }, + { LXPR_NET_IPV6_ROUTE, "ipv6_route" }, { LXPR_NET_MCFILTER, "mcfilter" }, { LXPR_NET_NETSTAT, "netstat" }, { LXPR_NET_RAW, "raw" }, @@ -502,6 +507,7 @@ static void (*lxpr_read_function[LXPR_NFILES])() = { lxpr_read_net_igmp, /* /proc/net/igmp */ lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */ lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */ + lxpr_read_net_ipv6_route, /* /proc/net/ipv6_route */ lxpr_read_net_mcfilter, /* /proc/net/mcfilter */ lxpr_read_net_netstat, /* /proc/net/netstat */ lxpr_read_net_raw, /* /proc/net/raw */ @@ -579,6 +585,7 @@ static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = { lxpr_lookup_not_a_dir, /* /proc/net/igmp */ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */ + lxpr_lookup_not_a_dir, /* /proc/net/ipv6_route */ lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */ lxpr_lookup_not_a_dir, /* /proc/net/netstat */ lxpr_lookup_not_a_dir, /* /proc/net/raw */ @@ -656,6 +663,7 @@ static int (*lxpr_readdir_function[LXPR_NFILES])() = { lxpr_readdir_not_a_dir, /* /proc/net/igmp */ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */ + lxpr_readdir_not_a_dir, /* /proc/net/ipv6_route */ lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */ lxpr_readdir_not_a_dir, /* /proc/net/netstat */ lxpr_readdir_not_a_dir, /* /proc/net/raw */ @@ -976,7 +984,7 @@ lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) int maj = 0; int min = 0; - u_longlong_t inode = 0; + ino_t inode = 0; *buf = '\0'; if (pbuf->vp != NULL) { @@ -993,12 +1001,12 @@ lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) if (p->p_model == DATAMODEL_LP64) { lxpr_uiobuf_printf(uiobuf, - "%016llx-%16llx %s %016llx %02d:%03d %lld%s%s\n", + "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n", pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset, maj, min, inode, *buf != '\0' ? " " : "", buf); } else { lxpr_uiobuf_printf(uiobuf, - "%08x-%08x %s %08x %02d:%03d %lld%s%s\n", + "%08x-%08x %s %08x %02x:%02x %llu%s%s\n", (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr, pbuf->prot, (uint32_t)pbuf->offset, maj, min, inode, *buf != '\0' ? " " : "", buf); @@ -1768,9 +1776,9 @@ lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) } static void -lxpr_inet6_out(in6_addr_t addr, char buf[33]) +lxpr_inet6_out(const in6_addr_t *addr, char buf[33]) { - uint8_t *ip = addr.s6_addr; + const uint8_t *ip = addr->s6_addr; char digits[] = "0123456789abcdef"; int i; for (i = 0; i < 16; i++) { @@ -1811,7 +1819,7 @@ lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) ipif_get_name(ipif, ifname, sizeof (ifname)); lx_ifname_convert(ifname, LX_IFNAME_FROMNATIVE); - lxpr_inet6_out(ipif->ipif_v6lcl_addr, ip6out); + lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out); /* Scope output is shifted on Linux */ scope = scope << 4; @@ -1841,6 +1849,66 @@ lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { } +static void +lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf) +{ + uint32_t flags; + char name[IFNAMSIZ]; + char ipv6addr[33]; + + lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr); + lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr, + ip_mask_to_plen_v6(&ire->ire_mask_v6)); + + /* punt on this for now */ + lxpr_uiobuf_printf(uiobuf, "%s %02x ", + "00000000000000000000000000000000", 0); + + lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr); + lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr); + + flags = ire->ire_flags & + (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED); + /* Linux's RTF_LOCAL equivalent */ + if (ire->ire_metrics.iulp_local) + flags |= 0x80000000; + + if (ire->ire_ill != NULL) { + ill_get_name(ire->ire_ill, name, sizeof (name)); + lx_ifname_convert(name, LX_IFNAME_FROMNATIVE); + } else { + name[0] = '\0'; + } + + lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n", + 0, /* metric */ + ire->ire_refcnt, + 0, + flags, + name); +} + +/* ARGSUSED */ +static void +lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + netstack_t *ns; + ip_stack_t *ipst; + + ns = netstack_get_current(); + if (ns == NULL) + return; + ipst = ns->netstack_ip; + + /* + * LX branded zones are expected to have exclusive IP stack, hence + * using ALL_ZONES as the zoneid filter. + */ + ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst); + + netstack_rele(ns); +} + /* ARGSUSED */ static void lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) @@ -1859,10 +1927,97 @@ lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { } +#define LXPR_SKIP_ROUTE(type) \ + (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \ + IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0) + +static void +lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf) +{ + uint32_t flags; + char name[IFNAMSIZ]; + ill_t *ill; + ire_t *nire; + ipif_t *ipif; + ipaddr_t gateway; + + if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0) + return; + + /* These route flags have direct Linux equivalents */ + flags = ire->ire_flags & + (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED); + + /* + * Search for a suitable IRE for naming purposes. + * On Linux, the default route is typically associated with the + * interface used to access gateway. The default IRE on Illumos + * typically lacks an ill reference but its parent might have one. + */ + nire = ire; + do { + ill = nire->ire_ill; + nire = nire->ire_dep_parent; + } while (ill == NULL && nire != NULL); + if (ill != NULL) { + ill_get_name(ill, name, sizeof (name)); + lx_ifname_convert(name, LX_IFNAME_FROMNATIVE); + } else { + name[0] = '*'; + name[1] = '\0'; + } + + /* + * Linux suppresses the gateway address for directly connected + * interface networks. To emulate this behavior, we walk all addresses + * of a given route interface. If one matches the gateway, it is + * displayed as NULL. + */ + gateway = ire->ire_gateway_addr; + if ((ill = ire->ire_ill) != NULL) { + for (ipif = ill->ill_ipif; ipif != NULL; + ipif = ipif->ipif_next) { + if (ipif->ipif_lcl_addr == gateway) { + gateway = 0; + break; + } + } + } + + lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t" + "%d\t%08X\t%d\t%u\t%u\n", + name, + ire->ire_addr, + gateway, + flags, 0, 0, + 0, /* priority */ + ire->ire_mask, + 0, 0, /* mss, window */ + ire->ire_metrics.iulp_rtt); +} + /* ARGSUSED */ static void lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { + netstack_t *ns; + ip_stack_t *ipst; + + lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t" + "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n"); + + ns = netstack_get_current(); + if (ns == NULL) + return; + ipst = ns->netstack_ip; + + /* + * LX branded zones are expected to have exclusive IP stack, hence + * using ALL_ZONES as the zoneid filter. + */ + ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst); + + netstack_rele(ns); } /* ARGSUSED */ @@ -1883,10 +2038,146 @@ lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { } +typedef struct lxpr_snmp_table { + const char *lst_proto; + const char *lst_fields[]; +} lxpr_snmp_table_t; + +static lxpr_snmp_table_t lxpr_snmp_ip = { "ip", + { + "forwarding", "defaultTTL", "inReceives", "inHdrErrors", + "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards", + "inDelivers", "outRequests", "outDiscards", "outNoRoutes", + "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs", + "fragFails", "fragCreates", + NULL + } +}; +static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp", + { + "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds", + "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps", + "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps", + "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds", + "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos", + "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks", + "outAddrMaskReps", + NULL + } +}; +static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp", + { + "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens", + "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs", + "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors", + NULL + } +}; +static lxpr_snmp_table_t lxpr_snmp_udp = { "udp", + { + "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors", + "sndbufErrors", "inCsumErrors", + NULL + } +}; + +static lxpr_snmp_table_t *lxpr_net_snmptab[] = { + &lxpr_snmp_ip, + &lxpr_snmp_icmp, + &lxpr_snmp_tcp, + &lxpr_snmp_udp, + NULL +}; + +static void +lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table, + kstat_t *kn) +{ + kstat_named_t *klist; + char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN]; + int i, j, num; + size_t size; + + klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num); + if (klist == NULL) + return; + + /* Print the header line, fields capitalized */ + (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN); + upname[0] = toupper(upname[0]); + lxpr_uiobuf_printf(uiobuf, "%s:", upname); + for (i = 0; table->lst_fields[i] != NULL; i++) { + (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN); + upfield[0] = toupper(upfield[0]); + lxpr_uiobuf_printf(uiobuf, " %s", upfield); + } + lxpr_uiobuf_printf(uiobuf, "\n%s:", upname); + + /* Then loop back through to print the value line. */ + for (i = 0; table->lst_fields[i] != NULL; i++) { + kstat_named_t *kpoint = NULL; + for (j = 0; j < num; j++) { + if (strncmp(klist[j].name, table->lst_fields[i], + KSTAT_STRLEN) == 0) { + kpoint = &klist[j]; + break; + } + } + if (kpoint == NULL) { + /* Output 0 for unknown fields */ + lxpr_uiobuf_printf(uiobuf, " 0"); + } else { + switch (kpoint->data_type) { + case KSTAT_DATA_INT32: + lxpr_uiobuf_printf(uiobuf, " %d", + kpoint->value.i32); + break; + case KSTAT_DATA_UINT32: + lxpr_uiobuf_printf(uiobuf, " %u", + kpoint->value.ui32); + break; + case KSTAT_DATA_INT64: + lxpr_uiobuf_printf(uiobuf, " %ld", + kpoint->value.l); + break; + case KSTAT_DATA_UINT64: + lxpr_uiobuf_printf(uiobuf, " %lu", + kpoint->value.ul); + break; + } + } + } + lxpr_uiobuf_printf(uiobuf, "\n"); + kmem_free(klist, size); +} + /* ARGSUSED */ static void lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { + kstat_t *ksr; + kstat_t ks0; + lxpr_snmp_table_t **table = lxpr_net_snmptab; + int i, t, nidx; + size_t sidx; + + ks0.ks_kid = 0; + ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); + if (ksr == NULL) + return; + + for (t = 0; table[t] != NULL; t++) { + for (i = 0; i < nidx; i++) { + if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0) + continue; + if (strncmp(ksr[i].ks_name, table[t]->lst_proto, + KSTAT_STRLEN) == 0) { + lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]); + break; + } + } + } + kmem_free(ksr, sidx); } /* ARGSUSED */ @@ -1963,13 +2254,13 @@ lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) * - tx_queue * - rx_queue * - uid + * - inode * * Omitted/invalid fields * - tr * - tm->when * - retrnsmt * - timeout - * - inode */ ns = netstack_get_current(); @@ -1983,6 +2274,9 @@ lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) while ((connp = ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) { tcp_t *tcp; + vattr_t attr; + sonode_t *so = (sonode_t *)connp->conn_upper_handle; + vnode_t *vp = (so != NULL) ? so->so_vnode : NULL; if (connp->conn_ipversion != ipver) continue; tcp = connp->conn_tcp; @@ -2010,9 +2304,15 @@ lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) connp->conn_faddr_v6.s6_addr32[3], ntohs(connp->conn_fport)); } + + /* fetch the simulated inode for the socket */ + if (vp == NULL || + VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) + attr.va_nodeid = 0; + lxpr_uiobuf_printf(uiobuf, "%02X %08X:%08X %02X:%08X %08X " - "%5u %8d %u %d %p %u %u %u %u %d\n", + "%5u %8d %lu %d %p %u %u %u %u %d\n", lxpr_convert_tcp_state(tcp->tcp_state), tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */ 0, 0, /* tr, when */ @@ -2020,7 +2320,7 @@ lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) connp->conn_cred->cr_uid, 0, /* timeout */ /* inode + more */ - 0, 0, NULL, 0, 0, 0, 0, 0); + (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0); } } netstack_rele(ns); @@ -2093,6 +2393,9 @@ lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) { udp_t *udp; int state = 0; + vattr_t attr; + sonode_t *so = (sonode_t *)connp->conn_upper_handle; + vnode_t *vp = (so != NULL) ? so->so_vnode : NULL; if (connp->conn_ipversion != ipver) continue; udp = connp->conn_udp; @@ -2120,6 +2423,7 @@ lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) connp->conn_faddr_v6.s6_addr32[3], ntohs(connp->conn_fport)); } + switch (udp->udp_state) { case TS_UNBND: case TS_IDLE: @@ -2129,9 +2433,15 @@ lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) state = 1; break; } + + /* fetch the simulated inode for the socket */ + if (vp == NULL || + VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) + attr.va_nodeid = 0; + lxpr_uiobuf_printf(uiobuf, "%02X %08X:%08X %02X:%08X %08X " - "%5u %8d %u %d %p %d\n", + "%5u %8d %lu %d %p %d\n", state, 0, 0, /* rx/tx queue */ 0, 0, /* tr, when */ @@ -2139,7 +2449,7 @@ lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) connp->conn_cred->cr_uid, 0, /* timeout */ /* inode, ref, pointer, drops */ - 0, 0, NULL, 0); + (ino_t)attr.va_nodeid, 0, NULL, 0); } } netstack_rele(ns); @@ -2163,6 +2473,95 @@ lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) static void lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { + sonode_t *so; + zoneid_t zoneid = getzoneid(); + + lxpr_uiobuf_printf(uiobuf, "Num RefCount Protocol Flags Type " + "St Inode Path\n"); + + mutex_enter(&socklist.sl_lock); + for (so = socklist.sl_list; so != NULL; + so = _SOTOTPI(so)->sti_next_so) { + vnode_t *vp = so->so_vnode; + vattr_t attr; + sotpi_info_t *sti; + const char *name = NULL; + int status = 0; + int type = 0; + int flags = 0; + + /* Only process active sonodes in this zone */ + if (so->so_count == 0 || so->so_zoneid != zoneid) + continue; + + /* + * Grab the inode, if possible. + * This must be done before entering so_lock. + */ + if (vp == NULL || + VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) + attr.va_nodeid = 0; + + mutex_enter(&so->so_lock); + sti = _SOTOTPI(so); + + if (sti->sti_laddr_sa != NULL) + name = sti->sti_laddr_sa->sa_data; + else if (sti->sti_faddr_sa != NULL) + name = sti->sti_faddr_sa->sa_data; + + /* + * Derived from enum values in Linux kernel source: + * include/uapi/linux/net.h + */ + if ((so->so_state & SS_ISDISCONNECTING) != 0) { + status = 4; + } else if ((so->so_state & SS_ISCONNECTING) != 0) { + status = 2; + } else if ((so->so_state & SS_ISCONNECTED) != 0) { + status = 3; + } else { + status = 1; + /* Add ACC flag for stream-type server sockets */ + if (so->so_type != SOCK_DGRAM && + sti->sti_laddr_sa != NULL) + flags |= 0x10000; + } + + /* Convert to Linux type */ + switch (so->so_type) { + case SOCK_DGRAM: + type = 2; + break; + case SOCK_SEQPACKET: + type = 5; + break; + default: + type = 1; + } + + lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu", + so, + so->so_count, + 0, /* proto, always 0 */ + flags, + type, + status, + (ino_t)attr.va_nodeid); + + /* + * Due to shortcomings in the abstract socket emulation, they + * cannot be properly represented here (as @<path>). + * + * This will be the case until they are better implemented. + */ + if (name != NULL) + lxpr_uiobuf_printf(uiobuf, " %s\n", name); + else + lxpr_uiobuf_printf(uiobuf, "\n"); + mutex_exit(&so->so_lock); + } + mutex_exit(&socklist.sl_lock); } /* @@ -3170,6 +3569,13 @@ lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, vap->va_uid = crgetruid(curproc->p_cred); vap->va_gid = crgetrgid(curproc->p_cred); break; + case LXPR_PID_FD_FD: + /* + * Restore VLNK type for lstat-type activity. + * See lxpr_readlink for more details. + */ + if ((flags & FOLLOW) == 0) + vap->va_type = VLNK; default: break; } @@ -3451,17 +3857,15 @@ lxpr_lookup_fddir(vnode_t *dp, char *comp) */ lxpnp->lxpr_realvp = vp; VN_HOLD(lxpnp->lxpr_realvp); - if (lxpnp->lxpr_realvp->v_type == VFIFO) { - /* - * lxpr_getnode initially sets the type to be VLNK for - * the LXPR_PID_FD_FD option, but that breaks fifo - * file descriptors (which are unlinked named pipes). - * We set this as a regular file so that open.2 comes - * into lxpr_open so we can do more work. - */ - dp = LXPTOV(lxpnp); - dp->v_type = VREG; - } + /* + * For certain entries (sockets, pipes, etc), Linux expects a + * bogus-named symlink. If that's the case, report the type as + * VNON to bypass link-following elsewhere in the vfs system. + * + * See lxpr_readlink for more details. + */ + if (lxpr_readlink_pid_fd(lxpnp, NULL, 0) == 0) + LXPTOV(lxpnp)->v_type = VNON; } mutex_enter(&p->p_lock); @@ -4053,16 +4457,41 @@ lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct) pid_t pid; int error = 0; - /* must be a symbolic link file */ - if (vp->v_type != VLNK) + /* + * Linux does something very "clever" for /proc/<pid>/fd/<num> entries. + * Open FDs are represented as symlinks, the link contents + * corresponding to the open resource. For plain files or devices, + * this isn't absurd since one can dereference the symlink to query + * the underlying resource. For sockets or pipes, it becomes ugly in a + * hurry. To maintain this human-readable output, those FD symlinks + * point to bogus targets such as "socket:[<inodenum>]". This requires + * circumventing vfs since the stat/lstat behavior on those FD entries + * will be unusual. (A stat must retrieve information about the open + * socket or pipe. It cannot fail because the link contents point to + * an absent file.) + * + * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD + * entries. This bypasses code paths which would normally + * short-circuit on symlinks and allows us to emulate the vfs behavior + * expected by /proc consumers. + */ + if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD) return (EINVAL); /* Try to produce a symlink name for anything that has a realvp */ if (rvp != NULL) { if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0) return (error); - if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) - return (error); + if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) { + /* + * Special handling possible for /proc/<pid>/fd/<num> + * Generate <type>:[<inode>] links, if allowed. + */ + if (lxpnp->lxpr_type != LXPR_PID_FD_FD || + lxpr_readlink_pid_fd(lxpnp, bp, buflen) != 0) { + return (error); + } + } } else { switch (lxpnp->lxpr_type) { case LXPR_SELF: @@ -4104,6 +4533,37 @@ lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct) } /* + * Attempt to create Linux-proc-style fake symlinks contents for supported + * /proc/<pid>/fd/<#> entries. + */ +static int +lxpr_readlink_pid_fd(lxpr_node_t *lxpnp, char *bp, size_t len) +{ + const char *format; + vnode_t *rvp = lxpnp->lxpr_realvp; + vattr_t attr; + + switch (rvp->v_type) { + case VSOCK: + format = "socket:[%lu]"; + break; + case VFIFO: + format = "pipe:[%lu]"; + break; + default: + return (-1); + } + + /* Fetch the inode of the underlying vnode */ + if (VOP_GETATTR(rvp, &attr, 0, CRED(), NULL) != 0) + return (-1); + + if (bp != NULL) + (void) snprintf(bp, len, format, (ino_t)attr.va_nodeid); + return (0); +} + +/* * lxpr_inactive(): Vnode operation for VOP_INACTIVE() * Vnode is no longer referenced, deallocate the file * and all its resources. diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h index 942a6e3b44..e7f5ee9867 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_brand.h +++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h @@ -80,10 +80,10 @@ extern "C" { #define B_LPID_TO_SPAIR 128 #define B_SYSENTRY 129 #define B_SYSRETURN 130 -#define B_PTRACE_SYSCALL 131 +#define B_PTRACE_KERNEL 131 #define B_SET_AFFINITY_MASK 132 #define B_GET_AFFINITY_MASK 133 -#define B_PTRACE_EXT_OPTS 134 +#define B_PTRACE_CLONE_BEGIN 134 #define B_PTRACE_STOP_FOR_OPT 135 #define B_UNSUPPORTED 136 #define B_STORE_ARGS 137 @@ -91,37 +91,31 @@ extern "C" { #define B_SIGNAL_RETURN 139 #define B_UNWIND_NTV_SYSC_FLAG 140 #define B_EXIT_AS_SIG 141 -#define B_PTRACE_GETEVENTMSG 142 +#define B_HELPER_WAITID 142 #define B_IKE_SYSCALL 192 -/* B_PTRACE_EXT_OPTS subcommands */ -#define B_PTRACE_EXT_OPTS_SET 1 -#define B_PTRACE_EXT_OPTS_GET 2 -#define B_PTRACE_EXT_OPTS_EVT 3 -#define B_PTRACE_DETACH 4 - +#ifndef _ASM /* * Support for Linux PTRACE_SETOPTIONS handling. */ -#define LX_PTRACE_O_TRACESYSGOOD 0x0001 -#define LX_PTRACE_O_TRACEFORK 0x0002 -#define LX_PTRACE_O_TRACEVFORK 0x0004 -#define LX_PTRACE_O_TRACECLONE 0x0008 -#define LX_PTRACE_O_TRACEEXEC 0x0010 -#define LX_PTRACE_O_TRACEVFORKDONE 0x0020 -#define LX_PTRACE_O_TRACEEXIT 0x0040 -#define LX_PTRACE_O_TRACESECCOMP 0x0080 -/* - * lx emulation-specific flag to indicate this is a child process being stopped - * due to one of the PTRACE_SETOPTIONS above. - */ -#define EMUL_PTRACE_O_CHILD 0x8000 -/* - * lx emulation-specific flag to determine via B_PTRACE_EXT_OPTS_GET if a - * process is being traced because of one of the PTRACE_SETOPTIONS above. - */ -#define EMUL_PTRACE_IS_TRACED 0x8000 +typedef enum lx_ptrace_options { + LX_PTRACE_O_TRACESYSGOOD = 0x0001, + LX_PTRACE_O_TRACEFORK = 0x0002, + LX_PTRACE_O_TRACEVFORK = 0x0004, + LX_PTRACE_O_TRACECLONE = 0x0008, + LX_PTRACE_O_TRACEEXEC = 0x0010, + LX_PTRACE_O_TRACEVFORKDONE = 0x0020, + LX_PTRACE_O_TRACEEXIT = 0x0040, + LX_PTRACE_O_TRACESECCOMP = 0x0080 +} lx_ptrace_options_t; + +#define LX_PTRACE_O_ALL \ + (LX_PTRACE_O_TRACESYSGOOD | LX_PTRACE_O_TRACEFORK | \ + LX_PTRACE_O_TRACEVFORK | LX_PTRACE_O_TRACECLONE | \ + LX_PTRACE_O_TRACEEXEC | LX_PTRACE_O_TRACEVFORKDONE | \ + LX_PTRACE_O_TRACEEXIT | LX_PTRACE_O_TRACESECCOMP) +#endif /* !_ASM */ /* siginfo si_status for traced events */ #define LX_PTRACE_EVENT_FORK 0x100 @@ -132,6 +126,17 @@ extern "C" { #define LX_PTRACE_EVENT_EXIT 0x600 #define LX_PTRACE_EVENT_SECCOMP 0x700 +/* + * Brand-private values for the "pr_what" member of lwpstatus, for use with the + * PR_BRAND stop reason. These reasons are validated in lx_stop_notify(); + * update it if you add new reasons here. + */ +#define LX_PR_SYSENTRY 1 +#define LX_PR_SYSEXIT 2 +#define LX_PR_SIGNALLED 3 +#define LX_PR_EVENT 4 + + #define LX_VERSION_1 1 #define LX_VERSION LX_VERSION_1 @@ -154,6 +159,8 @@ extern "C" { #ifndef _ASM +extern struct brand lx_brand; + typedef struct lx_brand_registration { uint_t lxbr_version; /* version number */ void *lxbr_handler; /* base address of handler */ @@ -255,10 +262,6 @@ typedef struct lx_proc_data { uintptr_t l_traceflag; /* address of 32-bit tracing flag */ pid_t l_ppid; /* pid of originating parent proc */ uint64_t l_ptrace; /* process being observed with ptrace */ - uint_t l_ptrace_opts; /* process's extended ptrace options */ - uint_t l_ptrace_event; /* extended ptrace option trap event */ - uint_t l_ptrace_is_traced; /* set if traced due to ptrace setoptions */ - ulong_t l_ptrace_eventmsg; /* extended ptrace event msg */ lx_elf_data_t l_elf_data; /* ELF data for linux executable */ int l_signal; /* signal to deliver to parent when this */ /* thread group dies */ @@ -280,10 +283,70 @@ typedef ulong_t lx_affmask_t[LX_AFF_ULONGS]; #ifdef _KERNEL +typedef struct lx_lwp_data lx_lwp_data_t; + +/* + * Flag values for "lxpa_flags" on a ptrace(2) accord. + */ +typedef enum lx_accord_flags { + LX_ACC_TOMBSTONE = 0x01 +} lx_accord_flags_t; + +/* + * Flags values for "br_ptrace_flags" in the LWP-specific data. + */ +typedef enum lx_ptrace_state { + LX_PTRACE_SYSCALL = 0x01, + LX_PTRACE_EXITING = 0x02, + LX_PTRACE_STOPPING = 0x04, + LX_PTRACE_INHERIT = 0x08, + LX_PTRACE_STOPPED = 0x10, + LX_PTRACE_PARENT_WAIT = 0x20, + LX_PTRACE_CLDPEND = 0x40, + LX_PTRACE_CLONING = 0x80 +} lx_ptrace_state_t; + +/* + * A ptrace(2) accord represents the relationship between a tracer LWP and the + * set of LWPs that it is tracing: the tracees. This data structure belongs + * primarily to the tracer, but is reference counted so that it may be freed by + * whoever references it last. + */ +typedef struct lx_ptrace_accord { + kmutex_t lxpa_lock; + uint_t lxpa_refcnt; + lx_accord_flags_t lxpa_flags; + + /* + * The tracer must hold "pidlock" while clearing these fields for + * exclusion of waitid(), etc. + */ + lx_lwp_data_t *lxpa_tracer; + kcondvar_t *lxpa_cvp; + + /* + * The "lxpa_tracees_lock" mutex protects the tracee list. + */ + kmutex_t lxpa_tracees_lock; + list_t lxpa_tracees; +} lx_ptrace_accord_t; + +/* + * These values are stored in the per-LWP data for a tracee when it is attached + * to a tracer. They record the method that was used to attach. + */ +typedef enum lx_ptrace_attach { + LX_PTA_NONE = 0x00, /* not attached */ + LX_PTA_ATTACH = 0x01, /* due to tracer using PTRACE_ATTACH */ + LX_PTA_TRACEME = 0x02, /* due to child using PTRACE_TRACEME */ + LX_PTA_INHERIT_CLONE = 0x04, /* due to PTRACE_CLONE clone(2) flag */ + LX_PTA_INHERIT_OPTIONS = 0x08 /* due to PTRACE_SETOPTIONS options */ +} lx_ptrace_attach_t; + /* * lx-specific data in the klwp_t */ -typedef struct lx_lwp_data { +struct lx_lwp_data { uint_t br_ntv_syscall; /* 1 = syscall from native libc */ uint_t br_lwp_flags; /* misc. flags */ klwp_t *br_lwp; /* back pointer to container lwp */ @@ -317,8 +380,26 @@ typedef struct lx_lwp_data { void *br_scall_args; int br_args_size; /* size in bytes of br_scall_args */ - uint_t br_ptrace; /* ptrace is active for this LWP */ -} lx_lwp_data_t; + boolean_t br_waitid_emulate; + int br_waitid_flags; + + lx_ptrace_state_t br_ptrace_flags; /* ptrace state for this LWP */ + lx_ptrace_options_t br_ptrace_options; /* PTRACE_SETOPTIONS options */ + lx_ptrace_options_t br_ptrace_clone_option; /* current clone(2) type */ + + lx_ptrace_attach_t br_ptrace_attach; /* how did we get attached */ + lx_ptrace_accord_t *br_ptrace_accord; /* accord for this tracer LWP */ + lx_ptrace_accord_t *br_ptrace_tracer; /* accord tracing this LWP */ + list_node_t br_ptrace_linkage; /* linkage for lxpa_tracees list */ + + ushort_t br_ptrace_whystop; /* stop reason, 0 for no stop */ + ushort_t br_ptrace_whatstop; /* stop sub-reason */ + + int32_t br_ptrace_stopsig; /* stop signal, 0 for no signal */ + + uint_t br_ptrace_event; + ulong_t br_ptrace_eventmsg; +}; /* * Upper limit on br_args_size, low because this value can persist until @@ -336,8 +417,13 @@ typedef struct lx_zone_data { #define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t)) #define lwptolxlwp(l) ((struct lx_lwp_data *)lwptolwpbrand(l)) -#define ttolxproc(t) ((struct lx_proc_data *)(t)->t_procp->p_brand_data) -#define ptolxproc(p) ((struct lx_proc_data *)(p)->p_brand_data) +#define ttolxproc(t) \ + (((t)->t_procp->p_brand == &lx_brand) ? \ + (struct lx_proc_data *)(t)->t_procp->p_brand_data : NULL) +#define ptolxproc(p) \ + (((p)->p_brand == &lx_brand) ? \ + (struct lx_proc_data *)(p)->p_brand_data : NULL) + /* Macro for converting to system call arguments. */ #define LX_ARGS(scall) ((struct lx_##scall##_args *)\ (ttolxlwp(curthread)->br_scall_args)) diff --git a/usr/src/uts/common/brand/lx/sys/lx_misc.h b/usr/src/uts/common/brand/lx/sys/lx_misc.h index 56b5bb4047..7b77789c56 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_misc.h +++ b/usr/src/uts/common/brand/lx/sys/lx_misc.h @@ -46,6 +46,20 @@ extern boolean_t lx_wait_filter(proc_t *, proc_t *); extern void lx_ifname_convert(char *, int); +extern boolean_t lx_ptrace_stop(ushort_t); +extern void lx_stop_notify(proc_t *, klwp_t *, ushort_t, ushort_t); +extern void lx_ptrace_init(void); +extern void lx_ptrace_fini(void); +extern int lx_ptrace_kernel(int, pid_t, uintptr_t, uintptr_t); +extern int lx_waitid_helper(idtype_t, id_t, k_siginfo_t *, int, boolean_t *, + int *); +extern void lx_ptrace_exit(proc_t *, klwp_t *); +extern void lx_ptrace_inherit_tracer(lx_lwp_data_t *, lx_lwp_data_t *); +extern int lx_ptrace_stop_for_option(int, boolean_t, ulong_t); +extern int lx_ptrace_set_clone_inherit(int, boolean_t); +extern int lx_sigcld_repost(proc_t *, sigqueue_t *); +extern int lx_issig_stop(proc_t *, klwp_t *); + #endif #ifdef __cplusplus diff --git a/usr/src/uts/common/brand/lx/syscall/lx_clone.c b/usr/src/uts/common/brand/lx/syscall/lx_clone.c index 949db3a73b..d73c5f100b 100644 --- a/usr/src/uts/common/brand/lx/syscall/lx_clone.c +++ b/usr/src/uts/common/brand/lx/syscall/lx_clone.c @@ -21,7 +21,7 @@ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014 Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. */ #include <sys/types.h> @@ -32,25 +32,10 @@ #include <sys/lx_ldt.h> #include <sys/lx_misc.h> #include <lx_signum.h> +#include <lx_syscall.h> #include <sys/x86_archext.h> #include <sys/controlregs.h> -#define LX_CSIGNAL 0x000000ff -#define LX_CLONE_VM 0x00000100 -#define LX_CLONE_FS 0x00000200 -#define LX_CLONE_FILES 0x00000400 -#define LX_CLONE_SIGHAND 0x00000800 -#define LX_CLONE_PID 0x00001000 -#define LX_CLONE_PTRACE 0x00002000 -#define LX_CLONE_PARENT 0x00008000 -#define LX_CLONE_THREAD 0x00010000 -#define LX_CLONE_SYSVSEM 0x00040000 -#define LX_CLONE_SETTLS 0x00080000 -#define LX_CLONE_PARENT_SETTID 0x00100000 -#define LX_CLONE_CHILD_CLEARTID 0x00200000 -#define LX_CLONE_DETACH 0x00400000 -#define LX_CLONE_CHILD_SETTID 0x01000000 - /* * Our lwp has already been created at this point, so this routine is * responsible for setting up all the state needed to track this as a diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c index cfc4c99f64..ae6c5eef16 100644 --- a/usr/src/uts/common/disp/thread.c +++ b/usr/src/uts/common/disp/thread.c @@ -87,7 +87,7 @@ struct kmem_cache *turnstile_cache; /* cache of free turnstiles */ * allthreads is only for use by kmem_readers. All kernel loops can use * the current thread as a start/end point. */ -static kthread_t *allthreads = &t0; /* circular list of all threads */ +kthread_t *allthreads = &t0; /* circular list of all threads */ static kcondvar_t reaper_cv; /* synchronization var */ kthread_t *thread_deathrow; /* circular list of reapable threads */ diff --git a/usr/src/uts/common/fs/lookup.c b/usr/src/uts/common/fs/lookup.c index 6819509d00..55ffb94805 100644 --- a/usr/src/uts/common/fs/lookup.c +++ b/usr/src/uts/common/fs/lookup.c @@ -20,6 +20,7 @@ */ /* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. */ @@ -217,7 +218,6 @@ lookuppnvp( cred_t *cr) /* user's credential */ { vnode_t *cvp; /* current component vp */ - vnode_t *tvp; /* addressable temp ptr */ char component[MAXNAMELEN]; /* buffer for component (incl null) */ int error; int nlink; @@ -373,7 +373,7 @@ checkforroot: /* * Perform a lookup in the current directory. */ - error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags, + error = VOP_LOOKUP(vp, component, &cvp, pnp, lookup_flags, rootvp, cr, NULL, NULL, pp); /* @@ -391,10 +391,9 @@ checkforroot: * directory inside NFS FS. */ if ((error == EACCES) && retry_with_kcred) - error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags, + error = VOP_LOOKUP(vp, component, &cvp, pnp, lookup_flags, rootvp, zone_kcred(), NULL, NULL, pp); - cvp = tvp; if (error) { cvp = NULL; /* @@ -440,20 +439,8 @@ checkforroot: * be atomic!) */ if (vn_mountedvfs(cvp) != NULL) { - tvp = cvp; - if ((error = traverse(&tvp)) != 0) { - /* - * It is required to assign cvp here, because - * traverse() will return a held vnode which - * may different than the vnode that was passed - * in (even in the error case). If traverse() - * changes the vnode it releases the original, - * and holds the new one. - */ - cvp = tvp; + if ((error = traverse(&cvp)) != 0) goto bad; - } - cvp = tvp; } /* diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv.c b/usr/src/uts/common/fs/nfs/nfs4_srv.c index 127d9e3f29..fe1a10b966 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv.c @@ -18,10 +18,11 @@ * * CDDL HEADER END */ + /* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. */ /* @@ -869,7 +870,7 @@ static nfsstat4 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) { int error, different_export = 0; - vnode_t *dvp, *vp, *tvp; + vnode_t *dvp, *vp; struct exportinfo *exi = NULL; fid_t fid; uint_t count, i; @@ -950,14 +951,12 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) * If it's a mountpoint, then traverse it. */ if (vn_ismntpt(vp)) { - tvp = vp; - if ((error = traverse(&tvp)) != 0) { + if ((error = traverse(&vp)) != 0) { VN_RELE(vp); return (puterrno4(error)); } /* remember that we had to traverse mountpoint */ did_traverse = TRUE; - vp = tvp; different_export = 1; } else if (vp->v_vfsp != dvp->v_vfsp) { /* @@ -2610,7 +2609,7 @@ do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs) { int error; int different_export = 0; - vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL; + vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL; struct exportinfo *exi = NULL, *pre_exi = NULL; nfsstat4 stat; fid_t fid; @@ -2708,13 +2707,11 @@ do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs) * need pre_tvp below if checkexport4 fails */ VN_HOLD(pre_tvp); - tvp = vp; - if ((error = traverse(&tvp)) != 0) { + if ((error = traverse(&vp)) != 0) { VN_RELE(vp); VN_RELE(pre_tvp); return (puterrno4(error)); } - vp = tvp; different_export = 1; } else if (vp->v_vfsp != cs->vp->v_vfsp) { /* diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c b/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c index 3069a98835..276d3b4f19 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c @@ -18,6 +18,11 @@ * * CDDL HEADER END */ + +/* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + */ + /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -149,6 +154,7 @@ nfs4_readdir_getvp(vnode_t *dvp, char *d_name, vnode_t **vpp, VN_HOLD(pre_tvp); if ((error = traverse(&vp)) != 0) { + VN_RELE(vp); VN_RELE(pre_tvp); return (error); } diff --git a/usr/src/uts/common/fs/proc/prcontrol.c b/usr/src/uts/common/fs/proc/prcontrol.c index a5679a8afb..7e99d23b97 100644 --- a/usr/src/uts/common/fs/proc/prcontrol.c +++ b/usr/src/uts/common/fs/proc/prcontrol.c @@ -25,7 +25,7 @@ */ /* - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #include <sys/types.h> @@ -1481,7 +1481,7 @@ pr_setsig(prnode_t *pnp, siginfo_t *sip) } else if (t->t_state == TS_STOPPED && sig == SIGKILL) { /* If SIGKILL, set stopped lwp running */ p->p_stopsig = 0; - t->t_schedflag |= TS_XSTART | TS_PSTART; + t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART; t->t_dtrace_stop = 0; setrun_locked(t); } diff --git a/usr/src/uts/common/fs/proc/prsubr.c b/usr/src/uts/common/fs/proc/prsubr.c index 7801fd0ac8..284bf8cb88 100644 --- a/usr/src/uts/common/fs/proc/prsubr.c +++ b/usr/src/uts/common/fs/proc/prsubr.c @@ -21,7 +21,7 @@ /* * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -201,6 +201,7 @@ prchoose(proc_t *p) case PR_SYSEXIT: case PR_SIGNALLED: case PR_FAULTED: + case PR_BRAND: /* * Make an lwp calling exit() be the * last lwp seen in the process. diff --git a/usr/src/uts/common/fs/smbsrv/smb_common_open.c b/usr/src/uts/common/fs/smbsrv/smb_common_open.c index 3fa43d43cb..5eaa5865c6 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_common_open.c +++ b/usr/src/uts/common/fs/smbsrv/smb_common_open.c @@ -820,8 +820,8 @@ smb_open_subr(smb_request_t *sr) status = NT_STATUS_SUCCESS; - of = smb_ofile_open(sr->tid_tree, node, sr->smb_pid, op, SMB_FTYPE_DISK, - uniq_fid, &err); + of = smb_ofile_open(sr, node, sr->smb_pid, op, SMB_FTYPE_DISK, uniq_fid, + &err); if (of == NULL) { smbsr_error(sr, err.status, err.errcls, err.errcode); status = err.status; diff --git a/usr/src/uts/common/fs/smbsrv/smb_delete.c b/usr/src/uts/common/fs/smbsrv/smb_delete.c index 4930f741ef..14eff73896 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_delete.c +++ b/usr/src/uts/common/fs/smbsrv/smb_delete.c @@ -297,7 +297,7 @@ smb_delete_multiple_files(smb_request_t *sr, smb_error_t *err) if (odid == 0) return (-1); - if ((od = smb_tree_lookup_odir(sr->tid_tree, odid)) == NULL) + if ((od = smb_tree_lookup_odir(sr, odid)) == NULL) return (-1); for (;;) { diff --git a/usr/src/uts/common/fs/smbsrv/smb_dispatch.c b/usr/src/uts/common/fs/smbsrv/smb_dispatch.c index 1afcf18b28..9b1fed6f9a 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_dispatch.c +++ b/usr/src/uts/common/fs/smbsrv/smb_dispatch.c @@ -20,8 +20,8 @@ */ /* - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ /* @@ -694,16 +694,13 @@ andx_more: } sr->user_cr = smb_user_getcred(sr->uid_user); - - if (!(sdd->sdt_flags & SDDF_SUPPRESS_TID) && - (sr->tid_tree == NULL)) { - sr->tid_tree = smb_user_lookup_tree( - sr->uid_user, sr->smb_tid); - if (sr->tid_tree == NULL) { - smbsr_error(sr, 0, ERRSRV, ERRinvnid); - smbsr_cleanup(sr); - goto report_error; - } + } + if (!(sdd->sdt_flags & SDDF_SUPPRESS_TID) && (sr->tid_tree == NULL)) { + sr->tid_tree = smb_session_lookup_tree(session, sr->smb_tid); + if (sr->tid_tree == NULL) { + smbsr_error(sr, 0, ERRSRV, ERRinvnid); + smbsr_cleanup(sr); + goto report_error; } } @@ -1116,8 +1113,7 @@ void smbsr_lookup_file(smb_request_t *sr) { if (sr->fid_ofile == NULL) - sr->fid_ofile = smb_ofile_lookup_by_fid(sr->tid_tree, - sr->smb_fid); + sr->fid_ofile = smb_ofile_lookup_by_fid(sr, sr->smb_fid); } static int diff --git a/usr/src/uts/common/fs/smbsrv/smb_find.c b/usr/src/uts/common/fs/smbsrv/smb_find.c index 1dae4e8cb5..eecbeff4df 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_find.c +++ b/usr/src/uts/common/fs/smbsrv/smb_find.c @@ -306,7 +306,7 @@ smb_com_search(smb_request_t *sr) } } - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od == NULL) { smbsr_error(sr, NT_STATUS_INVALID_HANDLE, ERRDOS, ERROR_INVALID_HANDLE); @@ -452,7 +452,7 @@ smb_com_find(smb_request_t *sr) } } - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od == NULL) { smbsr_error(sr, NT_STATUS_INVALID_HANDLE, ERRDOS, ERROR_INVALID_HANDLE); @@ -575,7 +575,7 @@ smb_com_find_close(smb_request_t *sr) return (SDRC_ERROR); } - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od == NULL) { smbsr_error(sr, NT_STATUS_INVALID_HANDLE, ERRDOS, ERROR_INVALID_HANDLE); @@ -649,7 +649,7 @@ smb_com_find_unique(struct smb_request *sr) odid = smb_odir_open(sr, pn->pn_path, sattr, 0); if (odid == 0) return (SDRC_ERROR); - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od == NULL) return (SDRC_ERROR); diff --git a/usr/src/uts/common/fs/smbsrv/smb_fsops.c b/usr/src/uts/common/fs/smbsrv/smb_fsops.c index 2f4545e966..c64313fdbf 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_fsops.c +++ b/usr/src/uts/common/fs/smbsrv/smb_fsops.c @@ -805,7 +805,7 @@ smb_fsop_remove_streams(smb_request_t *sr, cred_t *cr, smb_node_t *fnode) return (-1); } - if ((od = smb_tree_lookup_odir(sr->tid_tree, odid)) == NULL) { + if ((od = smb_tree_lookup_odir(sr, odid)) == NULL) { smbsr_errno(sr, ENOENT); return (-1); } diff --git a/usr/src/uts/common/fs/smbsrv/smb_nt_create_andx.c b/usr/src/uts/common/fs/smbsrv/smb_nt_create_andx.c index c77c175fc1..037c1373b5 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_nt_create_andx.c +++ b/usr/src/uts/common/fs/smbsrv/smb_nt_create_andx.c @@ -264,8 +264,7 @@ smb_com_nt_create_andx(struct smb_request *sr) if (op->rootdirfid == 0) { op->fqi.fq_dnode = sr->tid_tree->t_snode; } else { - op->dir = smb_ofile_lookup_by_fid(sr->tid_tree, - (uint16_t)op->rootdirfid); + op->dir = smb_ofile_lookup_by_fid(sr, (uint16_t)op->rootdirfid); if (op->dir == NULL) { smbsr_error(sr, NT_STATUS_INVALID_HANDLE, ERRDOS, ERRbadfid); diff --git a/usr/src/uts/common/fs/smbsrv/smb_nt_transact_create.c b/usr/src/uts/common/fs/smbsrv/smb_nt_transact_create.c index fcc12f2fc8..dcfa469617 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_nt_transact_create.c +++ b/usr/src/uts/common/fs/smbsrv/smb_nt_transact_create.c @@ -173,8 +173,7 @@ smb_nt_transact_create(smb_request_t *sr, smb_xa_t *xa) if (op->rootdirfid == 0) { op->fqi.fq_dnode = sr->tid_tree->t_snode; } else { - op->dir = smb_ofile_lookup_by_fid(sr->tid_tree, - (uint16_t)op->rootdirfid); + op->dir = smb_ofile_lookup_by_fid(sr, (uint16_t)op->rootdirfid); if (op->dir == NULL) { smbsr_error(sr, NT_STATUS_INVALID_HANDLE, ERRDOS, ERRbadfid); diff --git a/usr/src/uts/common/fs/smbsrv/smb_odir.c b/usr/src/uts/common/fs/smbsrv/smb_odir.c index b8435d191a..16fffa6692 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_odir.c +++ b/usr/src/uts/common/fs/smbsrv/smb_odir.c @@ -39,15 +39,15 @@ * +-------------------+ +-------------------+ +-------------------+ * | SESSION |<----->| SESSION |......| SESSION | * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v - * +-------------------+ +-------------------+ +-------------------+ - * | USER |<----->| USER |......| USER | - * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v + * | | + * | | + * | v + * | +-------------------+ +-------------------+ +-------------------+ + * | | USER |<--->| USER |...| USER | + * | +-------------------+ +-------------------+ +-------------------+ + * | + * | + * v * +-------------------+ +-------------------+ +-------------------+ * | TREE |<----->| TREE |......| TREE | * +-------------------+ +-------------------+ +-------------------+ @@ -153,7 +153,7 @@ * and add it into the tree's list of odirs. * Return an identifier (odid) uniquely identifying the created odir. * - * smb_odir_t *odir = smb_tree_lookup_odir(odid) + * smb_odir_t *odir = smb_tree_lookup_odir(..., odid) * Find the odir corresponding to the specified odid in the tree's * list of odirs. Place a hold on the odir. * @@ -312,9 +312,9 @@ smb_odir_open(smb_request_t *sr, char *path, uint16_t sattr, uint32_t flags) } if (flags & SMB_ODIR_OPENF_BACKUP_INTENT) - cr = smb_user_getprivcred(tree->t_user); + cr = smb_user_getprivcred(sr->uid_user); else - cr = tree->t_user->u_cred; + cr = sr->uid_user->u_cred; odid = smb_odir_create(sr, dnode, pattern, sattr, cr); smb_node_release(dnode); @@ -888,6 +888,12 @@ smb_odir_create(smb_request_t *sr, smb_node_t *dnode, od->d_opened_by_pid = sr->smb_pid; od->d_session = tree->t_session; od->d_cred = cr; + /* + * grab a ref for od->d_user + * released in smb_odir_delete() + */ + smb_user_hold_internal(sr->uid_user); + od->d_user = sr->uid_user; od->d_tree = tree; od->d_dnode = dnode; smb_node_ref(dnode); @@ -947,6 +953,7 @@ smb_odir_delete(void *arg) od->d_magic = 0; smb_node_release(od->d_dnode); + smb_user_release(od->d_user); mutex_destroy(&od->d_mutex); kmem_cache_free(od->d_tree->t_server->si_cache_odir, od); } diff --git a/usr/src/uts/common/fs/smbsrv/smb_ofile.c b/usr/src/uts/common/fs/smbsrv/smb_ofile.c index 8987da2950..ee45f13c8b 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_ofile.c +++ b/usr/src/uts/common/fs/smbsrv/smb_ofile.c @@ -39,15 +39,15 @@ * +-------------------+ +-------------------+ +-------------------+ * | SESSION |<----->| SESSION |......| SESSION | * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v - * +-------------------+ +-------------------+ +-------------------+ - * | USER |<----->| USER |......| USER | - * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v + * | | + * | | + * | v + * | +-------------------+ +-------------------+ +-------------------+ + * | | USER |<--->| USER |...| USER | + * | +-------------------+ +-------------------+ +-------------------+ + * | + * | + * v * +-------------------+ +-------------------+ +-------------------+ * | TREE |<----->| TREE |......| TREE | * +-------------------+ +-------------------+ +-------------------+ @@ -175,7 +175,7 @@ static void smb_ofile_netinfo_fini(smb_netfileinfo_t *); */ smb_ofile_t * smb_ofile_open( - smb_tree_t *tree, + smb_request_t *sr, smb_node_t *node, uint16_t pid, struct open_param *op, @@ -183,10 +183,13 @@ smb_ofile_open( uint32_t uniqid, smb_error_t *err) { + smb_tree_t *tree = sr->tid_tree; smb_ofile_t *of; uint16_t fid; smb_attr_t attr; int rc; + enum errstates { EMPTY, FIDALLOC, CRHELD, MUTEXINIT }; + enum errstates state = EMPTY; if (smb_idpool_alloc(&tree->t_fid_pool, &fid)) { err->status = NT_STATUS_TOO_MANY_OPENED_FILES; @@ -194,6 +197,7 @@ smb_ofile_open( err->errcode = ERROR_TOO_MANY_OPEN_FILES; return (NULL); } + state = FIDALLOC; of = kmem_cache_alloc(tree->t_server->si_cache_ofile, KM_SLEEP); bzero(of, sizeof (smb_ofile_t)); @@ -206,16 +210,23 @@ smb_ofile_open( of->f_share_access = op->share_access; of->f_create_options = op->create_options; of->f_cr = (op->create_options & FILE_OPEN_FOR_BACKUP_INTENT) ? - smb_user_getprivcred(tree->t_user) : tree->t_user->u_cred; + smb_user_getprivcred(sr->uid_user) : sr->uid_user->u_cred; crhold(of->f_cr); + state = CRHELD; of->f_ftype = ftype; of->f_server = tree->t_server; - of->f_session = tree->t_user->u_session; - of->f_user = tree->t_user; + of->f_session = tree->t_session; + /* + * grab a ref for of->f_user + * released in smb_ofile_delete() + */ + smb_user_hold_internal(sr->uid_user); + of->f_user = sr->uid_user; of->f_tree = tree; of->f_node = node; mutex_init(&of->f_mutex, NULL, MUTEX_DEFAULT, NULL); + state = MUTEXINIT; of->f_state = SMB_OFILE_STATE_OPEN; if (ftype == SMB_FTYPE_MESG_PIPE) { @@ -232,15 +243,10 @@ smb_ofile_open( attr.sa_mask = SMB_AT_UID | SMB_AT_DOSATTR; rc = smb_node_getattr(NULL, node, of->f_cr, NULL, &attr); if (rc != 0) { - of->f_magic = 0; - mutex_destroy(&of->f_mutex); - crfree(of->f_cr); - smb_idpool_free(&tree->t_fid_pool, of->f_fid); - kmem_cache_free(tree->t_server->si_cache_ofile, of); err->status = NT_STATUS_INTERNAL_ERROR; err->errcls = ERRDOS; err->errcode = ERROR_INTERNAL_ERROR; - return (NULL); + goto errout; } if (crgetuid(of->f_cr) == attr.sa_vattr.va_uid) { /* @@ -254,16 +260,10 @@ smb_ofile_open( of->f_mode = smb_fsop_amask_to_omode(of->f_granted_access); if (smb_fsop_open(node, of->f_mode, of->f_cr) != 0) { - of->f_magic = 0; - mutex_destroy(&of->f_mutex); - crfree(of->f_cr); - smb_idpool_free(&tree->t_fid_pool, of->f_fid); - kmem_cache_free(tree->t_server->si_cache_ofile, - of); err->status = NT_STATUS_ACCESS_DENIED; err->errcls = ERRDOS; err->errcode = ERROR_ACCESS_DENIED; - return (NULL); + goto errout; } } @@ -290,6 +290,25 @@ smb_ofile_open( atomic_inc_32(&tree->t_open_files); atomic_inc_32(&of->f_session->s_file_cnt); return (of); + +errout: + switch (state) { + case MUTEXINIT: + mutex_destroy(&of->f_mutex); + smb_user_release(of->f_user); + /*FALLTHROUGH*/ + case CRHELD: + crfree(of->f_cr); + of->f_magic = 0; + kmem_cache_free(tree->t_server->si_cache_ofile, of); + /*FALLTHROUGH*/ + case FIDALLOC: + smb_idpool_free(&tree->t_fid_pool, fid); + /*FALLTHROUGH*/ + case EMPTY: + break; + } + return (NULL); } /* @@ -601,9 +620,10 @@ smb_ofile_request_complete(smb_ofile_t *of) */ smb_ofile_t * smb_ofile_lookup_by_fid( - smb_tree_t *tree, + smb_request_t *sr, uint16_t fid) { + smb_tree_t *tree = sr->tid_tree; smb_llist_t *of_list; smb_ofile_t *of; @@ -616,19 +636,32 @@ smb_ofile_lookup_by_fid( while (of) { ASSERT(of->f_magic == SMB_OFILE_MAGIC); ASSERT(of->f_tree == tree); - if (of->f_fid == fid) { - mutex_enter(&of->f_mutex); - if (of->f_state != SMB_OFILE_STATE_OPEN) { - mutex_exit(&of->f_mutex); - smb_llist_exit(of_list); - return (NULL); - } - of->f_refcnt++; - mutex_exit(&of->f_mutex); + if (of->f_fid == fid) break; - } of = smb_llist_next(of_list, of); } + if (of == NULL) + goto out; + + /* + * Only allow use of a given FID with the same UID that + * was used to open it. MS-CIFS 3.3.5.14 + */ + if (of->f_user != sr->uid_user) { + of = NULL; + goto out; + } + + mutex_enter(&of->f_mutex); + if (of->f_state != SMB_OFILE_STATE_OPEN) { + mutex_exit(&of->f_mutex); + of = NULL; + goto out; + } + of->f_refcnt++; + mutex_exit(&of->f_mutex); + +out: smb_llist_exit(of_list); return (of); } @@ -921,6 +954,7 @@ smb_ofile_delete(void *arg) of->f_magic = (uint32_t)~SMB_OFILE_MAGIC; mutex_destroy(&of->f_mutex); crfree(of->f_cr); + smb_user_release(of->f_user); kmem_cache_free(of->f_tree->t_server->si_cache_ofile, of); } diff --git a/usr/src/uts/common/fs/smbsrv/smb_opipe.c b/usr/src/uts/common/fs/smbsrv/smb_opipe.c index bb178f3952..90cb25aaa0 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_opipe.c +++ b/usr/src/uts/common/fs/smbsrv/smb_opipe.c @@ -130,8 +130,8 @@ smb_opipe_open(smb_request_t *sr) op->create_options = 0; - of = smb_ofile_open(sr->tid_tree, NULL, sr->smb_pid, op, - SMB_FTYPE_MESG_PIPE, SMB_UNIQ_FID(), &err); + of = smb_ofile_open(sr, NULL, sr->smb_pid, op, SMB_FTYPE_MESG_PIPE, + SMB_UNIQ_FID(), &err); if (of == NULL) return (err.status); diff --git a/usr/src/uts/common/fs/smbsrv/smb_process_exit.c b/usr/src/uts/common/fs/smbsrv/smb_process_exit.c index b8c835cd57..2839ca2807 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_process_exit.c +++ b/usr/src/uts/common/fs/smbsrv/smb_process_exit.c @@ -85,11 +85,11 @@ smb_com_process_exit(smb_request_t *sr) * to be the only thing that sends this request these days and * it doesn't provide a TID. */ - sr->tid_tree = smb_user_lookup_tree(sr->uid_user, sr->smb_tid); + sr->tid_tree = smb_session_lookup_tree(sr->session, sr->smb_tid); if (sr->tid_tree != NULL) smb_tree_close_pid(sr->tid_tree, sr->smb_pid); else - smb_user_close_pid(sr->uid_user, sr->smb_pid); + smb_session_close_pid(sr->session, sr->smb_pid); rc = smbsr_encode_empty_result(sr); return ((rc == 0) ? SDRC_SUCCESS : SDRC_ERROR); diff --git a/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c b/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c index bef69e7f61..70ac2e7b24 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c +++ b/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c @@ -662,7 +662,7 @@ smb_encode_stream_info(smb_request_t *sr, smb_xa_t *xa, smb_queryinfo_t *qinfo) odid = smb_odir_openat(sr, fnode); if (odid != 0) - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od != NULL) rc = smb_odir_read_streaminfo(sr, od, sinfo, &eos); diff --git a/usr/src/uts/common/fs/smbsrv/smb_server.c b/usr/src/uts/common/fs/smbsrv/smb_server.c index 3654744569..8687d42b18 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_server.c +++ b/usr/src/uts/common/fs/smbsrv/smb_server.c @@ -240,7 +240,8 @@ static void smb_event_cancel(smb_server_t *, uint32_t); static uint32_t smb_event_alloc_txid(void); static void smb_server_disconnect_share(smb_llist_t *, const char *); -static void smb_server_enum_private(smb_llist_t *, smb_svcenum_t *); +static void smb_server_enum_users(smb_llist_t *, smb_svcenum_t *); +static void smb_server_enum_trees(smb_llist_t *, smb_svcenum_t *); static int smb_server_session_disconnect(smb_llist_t *, const char *, const char *); static int smb_server_fclose(smb_llist_t *, uint32_t); @@ -833,15 +834,6 @@ smb_server_enum(smb_ioc_svcenum_t *ioc) smb_server_t *sv; int rc; - switch (svcenum->se_type) { - case SMB_SVCENUM_TYPE_USER: - case SMB_SVCENUM_TYPE_TREE: - case SMB_SVCENUM_TYPE_FILE: - break; - default: - return (EINVAL); - } - if ((rc = smb_server_lookup(&sv)) != 0) return (rc); @@ -849,11 +841,26 @@ smb_server_enum(smb_ioc_svcenum_t *ioc) svcenum->se_bused = 0; svcenum->se_nitems = 0; - smb_server_enum_private(&sv->sv_nbt_daemon.ld_session_list, svcenum); - smb_server_enum_private(&sv->sv_tcp_daemon.ld_session_list, svcenum); + switch (svcenum->se_type) { + case SMB_SVCENUM_TYPE_USER: + smb_server_enum_users(&sv->sv_nbt_daemon.ld_session_list, + svcenum); + smb_server_enum_users(&sv->sv_tcp_daemon.ld_session_list, + svcenum); + break; + case SMB_SVCENUM_TYPE_TREE: + case SMB_SVCENUM_TYPE_FILE: + smb_server_enum_trees(&sv->sv_nbt_daemon.ld_session_list, + svcenum); + smb_server_enum_trees(&sv->sv_tcp_daemon.ld_session_list, + svcenum); + break; + default: + rc = EINVAL; + } smb_server_release(sv); - return (0); + return (rc); } /* @@ -1694,7 +1701,7 @@ smb_server_release(smb_server_t *sv) * Enumerate the users associated with a session list. */ static void -smb_server_enum_private(smb_llist_t *ll, smb_svcenum_t *svcenum) +smb_server_enum_users(smb_llist_t *ll, smb_svcenum_t *svcenum) { smb_session_t *sn; smb_llist_t *ulist; @@ -1714,6 +1721,8 @@ smb_server_enum_private(smb_llist_t *ll, smb_svcenum_t *svcenum) if (smb_user_hold(user)) { rc = smb_user_enum(user, svcenum); smb_user_release(user); + if (rc != 0) + break; } user = smb_llist_next(ulist, user); @@ -1731,6 +1740,48 @@ smb_server_enum_private(smb_llist_t *ll, smb_svcenum_t *svcenum) } /* + * Enumerate the trees/files associated with a session list. + */ +static void +smb_server_enum_trees(smb_llist_t *ll, smb_svcenum_t *svcenum) +{ + smb_session_t *sn; + smb_llist_t *tlist; + smb_tree_t *tree; + int rc = 0; + + smb_llist_enter(ll, RW_READER); + sn = smb_llist_head(ll); + + while (sn != NULL) { + SMB_SESSION_VALID(sn); + tlist = &sn->s_tree_list; + smb_llist_enter(tlist, RW_READER); + tree = smb_llist_head(tlist); + + while (tree != NULL) { + if (smb_tree_hold(tree)) { + rc = smb_tree_enum(tree, svcenum); + smb_tree_release(tree); + if (rc != 0) + break; + } + + tree = smb_llist_next(tlist, tree); + } + + smb_llist_exit(tlist); + + if (rc != 0) + break; + + sn = smb_llist_next(ll, sn); + } + + smb_llist_exit(ll); +} + +/* * Disconnect sessions associated with the specified client and username. * Empty strings are treated as wildcards. */ @@ -1796,8 +1847,8 @@ static int smb_server_fclose(smb_llist_t *ll, uint32_t uniqid) { smb_session_t *sn; - smb_llist_t *ulist; - smb_user_t *user; + smb_llist_t *tlist; + smb_tree_t *tree; int rc = ENOENT; smb_llist_enter(ll, RW_READER); @@ -1805,20 +1856,20 @@ smb_server_fclose(smb_llist_t *ll, uint32_t uniqid) while ((sn != NULL) && (rc == ENOENT)) { SMB_SESSION_VALID(sn); - ulist = &sn->s_user_list; - smb_llist_enter(ulist, RW_READER); - user = smb_llist_head(ulist); - - while ((user != NULL) && (rc == ENOENT)) { - if (smb_user_hold(user)) { - rc = smb_user_fclose(user, uniqid); - smb_user_release(user); + tlist = &sn->s_tree_list; + smb_llist_enter(tlist, RW_READER); + tree = smb_llist_head(tlist); + + while ((tree != NULL) && (rc == ENOENT)) { + if (smb_tree_hold(tree)) { + rc = smb_tree_fclose(tree, uniqid); + smb_tree_release(tree); } - user = smb_llist_next(ulist, user); + tree = smb_llist_next(tlist, tree); } - smb_llist_exit(ulist); + smb_llist_exit(tlist); sn = smb_llist_next(ll, sn); } diff --git a/usr/src/uts/common/fs/smbsrv/smb_session.c b/usr/src/uts/common/fs/smbsrv/smb_session.c index 0fdac10ca6..b8284b372f 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_session.c +++ b/usr/src/uts/common/fs/smbsrv/smb_session.c @@ -43,6 +43,7 @@ static int smb_session_message(smb_session_t *); static int smb_session_xprt_puthdr(smb_session_t *, smb_xprt_t *, uint8_t *, size_t); static smb_user_t *smb_session_lookup_user(smb_session_t *, char *, char *); +static smb_tree_t *smb_session_get_tree(smb_session_t *, smb_tree_t *); static void smb_session_logoff(smb_session_t *); static void smb_request_init_command_mbuf(smb_request_t *sr); void dump_smb_inaddr(smb_inaddr_t *ipaddr); @@ -624,6 +625,11 @@ smb_session_create(ksocket_t new_so, uint16_t port, smb_server_t *sv, kmem_cache_free(sv->si_cache_session, session); return (NULL); } + if (smb_idpool_constructor(&session->s_tid_pool)) { + smb_idpool_destructor(&session->s_uid_pool); + kmem_cache_free(sv->si_cache_session, session); + return (NULL); + } now = ddi_get_lbolt64(); @@ -642,6 +648,9 @@ smb_session_create(ksocket_t new_so, uint16_t port, smb_server_t *sv, smb_llist_constructor(&session->s_user_list, sizeof (smb_user_t), offsetof(smb_user_t, u_lnd)); + smb_llist_constructor(&session->s_tree_list, sizeof (smb_tree_t), + offsetof(smb_tree_t, t_lnd)); + smb_llist_constructor(&session->s_xa_list, sizeof (smb_xa_t), offsetof(smb_xa_t, xa_lnd)); @@ -719,6 +728,7 @@ smb_session_delete(smb_session_t *session) list_destroy(&session->s_oplock_brkreqs); smb_slist_destructor(&session->s_req_list); + smb_llist_destructor(&session->s_tree_list); smb_llist_destructor(&session->s_user_list); smb_llist_destructor(&session->s_xa_list); @@ -726,6 +736,7 @@ smb_session_delete(smb_session_t *session) ASSERT(session->s_file_cnt == 0); ASSERT(session->s_dir_cnt == 0); + smb_idpool_destructor(&session->s_tid_pool); smb_idpool_destructor(&session->s_uid_pool); if (session->sock != NULL) { if (session->s_local_port == IPPORT_NETBIOS_SSN) @@ -928,45 +939,306 @@ smb_session_post_user(smb_session_t *session, smb_user_t *user) } /* - * Logoff all users associated with the specified session. + * Find a tree by tree-id. */ -static void -smb_session_logoff(smb_session_t *session) +smb_tree_t * +smb_session_lookup_tree( + smb_session_t *session, + uint16_t tid) + { - smb_user_t *user; + smb_tree_t *tree; SMB_SESSION_VALID(session); - smb_llist_enter(&session->s_user_list, RW_READER); + smb_llist_enter(&session->s_tree_list, RW_READER); + tree = smb_llist_head(&session->s_tree_list); - user = smb_llist_head(&session->s_user_list); - while (user) { - SMB_USER_VALID(user); - ASSERT(user->u_session == session); + while (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); - if (smb_user_hold(user)) { - smb_user_logoff(user); - smb_user_release(user); + if (tree->t_tid == tid) { + if (smb_tree_hold(tree)) { + smb_llist_exit(&session->s_tree_list); + return (tree); + } else { + smb_llist_exit(&session->s_tree_list); + return (NULL); + } } - user = smb_llist_next(&session->s_user_list, user); + tree = smb_llist_next(&session->s_tree_list, tree); } - smb_llist_exit(&session->s_user_list); + smb_llist_exit(&session->s_tree_list); + return (NULL); +} + +/* + * Find the first connected tree that matches the specified sharename. + * If the specified tree is NULL the search starts from the beginning of + * the user's tree list. If a tree is provided the search starts just + * after that tree. + */ +smb_tree_t * +smb_session_lookup_share( + smb_session_t *session, + const char *sharename, + smb_tree_t *tree) +{ + SMB_SESSION_VALID(session); + ASSERT(sharename); + + smb_llist_enter(&session->s_tree_list, RW_READER); + + if (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); + tree = smb_llist_next(&session->s_tree_list, tree); + } else { + tree = smb_llist_head(&session->s_tree_list); + } + + while (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); + if (smb_strcasecmp(tree->t_sharename, sharename, 0) == 0) { + if (smb_tree_hold(tree)) { + smb_llist_exit(&session->s_tree_list); + return (tree); + } + } + tree = smb_llist_next(&session->s_tree_list, tree); + } + + smb_llist_exit(&session->s_tree_list); + return (NULL); +} + +/* + * Find the first connected tree that matches the specified volume name. + * If the specified tree is NULL the search starts from the beginning of + * the user's tree list. If a tree is provided the search starts just + * after that tree. + */ +smb_tree_t * +smb_session_lookup_volume( + smb_session_t *session, + const char *name, + smb_tree_t *tree) +{ + SMB_SESSION_VALID(session); + ASSERT(name); + + smb_llist_enter(&session->s_tree_list, RW_READER); + + if (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); + tree = smb_llist_next(&session->s_tree_list, tree); + } else { + tree = smb_llist_head(&session->s_tree_list); + } + + while (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); + + if (smb_strcasecmp(tree->t_volume, name, 0) == 0) { + if (smb_tree_hold(tree)) { + smb_llist_exit(&session->s_tree_list); + return (tree); + } + } + + tree = smb_llist_next(&session->s_tree_list, tree); + } + + smb_llist_exit(&session->s_tree_list); + return (NULL); +} + +/* + * Disconnect all trees that match the specified client process-id. + */ +void +smb_session_close_pid( + smb_session_t *session, + uint16_t pid) +{ + smb_tree_t *tree; + + SMB_SESSION_VALID(session); + + tree = smb_session_get_tree(session, NULL); + while (tree) { + smb_tree_t *next; + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); + smb_tree_close_pid(tree, pid); + next = smb_session_get_tree(session, tree); + smb_tree_release(tree); + tree = next; + } +} + +static void +smb_session_tree_dtor(void *t) +{ + smb_tree_t *tree = (smb_tree_t *)t; + + smb_tree_disconnect(tree, B_TRUE); + /* release the ref acquired during the traversal loop */ + smb_tree_release(tree); } + /* - * Disconnect any trees associated with the specified share. - * Iterate through the users on this session and tell each user - * to disconnect from the share. + * Disconnect all trees that this user has connected. */ void -smb_session_disconnect_share(smb_session_t *session, const char *sharename) +smb_session_disconnect_owned_trees( + smb_session_t *session, + smb_user_t *owner) +{ + smb_tree_t *tree; + smb_llist_t *tree_list = &session->s_tree_list; + + SMB_SESSION_VALID(session); + SMB_USER_VALID(owner); + + smb_llist_enter(tree_list, RW_READER); + + tree = smb_llist_head(tree_list); + while (tree) { + if ((tree->t_owner == owner) && + smb_tree_hold(tree)) { + /* + * smb_tree_hold() succeeded, hence we are in state + * SMB_TREE_STATE_CONNECTED; schedule this tree + * for asynchronous disconnect, which will fire + * after we drop the llist traversal lock. + */ + smb_llist_post(tree_list, tree, smb_session_tree_dtor); + } + tree = smb_llist_next(tree_list, tree); + } + + /* drop the lock and flush the dtor queue */ + smb_llist_exit(tree_list); +} + +/* + * Disconnect all trees that this user has connected. + */ +void +smb_session_disconnect_trees( + smb_session_t *session) +{ + smb_tree_t *tree; + + SMB_SESSION_VALID(session); + + tree = smb_session_get_tree(session, NULL); + while (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); + smb_tree_disconnect(tree, B_TRUE); + smb_tree_release(tree); + tree = smb_session_get_tree(session, NULL); + } +} + +/* + * Disconnect all trees that match the specified share name. + */ +void +smb_session_disconnect_share( + smb_session_t *session, + const char *sharename) +{ + smb_tree_t *tree; + smb_tree_t *next; + + SMB_SESSION_VALID(session); + + tree = smb_session_lookup_share(session, sharename, NULL); + while (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + ASSERT(tree->t_session == session); + smb_session_cancel_requests(session, tree, NULL); + smb_tree_disconnect(tree, B_TRUE); + next = smb_session_lookup_share(session, sharename, tree); + smb_tree_release(tree); + tree = next; + } +} + +void +smb_session_post_tree(smb_session_t *session, smb_tree_t *tree) +{ + SMB_SESSION_VALID(session); + SMB_TREE_VALID(tree); + ASSERT0(tree->t_refcnt); + ASSERT(tree->t_state == SMB_TREE_STATE_DISCONNECTED); + ASSERT(tree->t_session == session); + + smb_llist_post(&session->s_tree_list, tree, smb_tree_dealloc); +} + +/* + * Get the next connected tree in the list. A reference is taken on + * the tree, which can be released later with smb_tree_release(). + * + * If the specified tree is NULL the search starts from the beginning of + * the tree list. If a tree is provided the search starts just after + * that tree. + * + * Returns NULL if there are no connected trees in the list. + */ +static smb_tree_t * +smb_session_get_tree( + smb_session_t *session, + smb_tree_t *tree) +{ + smb_llist_t *tree_list; + + SMB_SESSION_VALID(session); + tree_list = &session->s_tree_list; + + smb_llist_enter(tree_list, RW_READER); + + if (tree) { + ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC); + tree = smb_llist_next(tree_list, tree); + } else { + tree = smb_llist_head(tree_list); + } + + while (tree) { + if (smb_tree_hold(tree)) + break; + + tree = smb_llist_next(tree_list, tree); + } + + smb_llist_exit(tree_list); + return (tree); +} + +/* + * Logoff all users associated with the specified session. + */ +static void +smb_session_logoff(smb_session_t *session) { smb_user_t *user; SMB_SESSION_VALID(session); + smb_session_disconnect_trees(session); + smb_llist_enter(&session->s_user_list, RW_READER); user = smb_llist_head(&session->s_user_list); @@ -975,7 +1247,7 @@ smb_session_disconnect_share(smb_session_t *session, const char *sharename) ASSERT(user->u_session == session); if (smb_user_hold(user)) { - smb_user_disconnect_share(user, sharename); + smb_user_logoff(user); smb_user_release(user); } diff --git a/usr/src/uts/common/fs/smbsrv/smb_trans2_find.c b/usr/src/uts/common/fs/smbsrv/smb_trans2_find.c index 037b2a3b36..d0d60cea5d 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_trans2_find.c +++ b/usr/src/uts/common/fs/smbsrv/smb_trans2_find.c @@ -332,7 +332,7 @@ smb_com_trans2_find_first2(smb_request_t *sr, smb_xa_t *xa) return (SDRC_ERROR); } - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od == NULL) return (SDRC_ERROR); @@ -463,7 +463,7 @@ smb_com_trans2_find_next2(smb_request_t *sr, smb_xa_t *xa) if (args.fa_maxdata == 0) return (SDRC_ERROR); - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od == NULL) { smbsr_error(sr, NT_STATUS_INVALID_HANDLE, ERRDOS, ERROR_INVALID_HANDLE); @@ -943,7 +943,7 @@ smb_com_find_close2(smb_request_t *sr) if (smbsr_decode_vwv(sr, "w", &odid) != 0) return (SDRC_ERROR); - od = smb_tree_lookup_odir(sr->tid_tree, odid); + od = smb_tree_lookup_odir(sr, odid); if (od == NULL) { smbsr_error(sr, NT_STATUS_INVALID_HANDLE, ERRDOS, ERROR_INVALID_HANDLE); diff --git a/usr/src/uts/common/fs/smbsrv/smb_tree.c b/usr/src/uts/common/fs/smbsrv/smb_tree.c index 13adc2d803..b225c67623 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_tree.c +++ b/usr/src/uts/common/fs/smbsrv/smb_tree.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ /* @@ -40,15 +40,15 @@ * +-------------------+ +-------------------+ +-------------------+ * | SESSION |<----->| SESSION |......| SESSION | * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v - * +-------------------+ +-------------------+ +-------------------+ - * | USER |<----->| USER |......| USER | - * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v + * | | + * | | + * | v + * | +-------------------+ +-------------------+ +-------------------+ + * | | USER |<--->| USER |...| USER | + * | +-------------------+ +-------------------+ +-------------------+ + * | + * | + * v * +-------------------+ +-------------------+ +-------------------+ * | TREE |<----->| TREE |......| TREE | * +-------------------+ +-------------------+ +-------------------+ @@ -175,7 +175,7 @@ static smb_tree_t *smb_tree_connect_core(smb_request_t *); static smb_tree_t *smb_tree_connect_disk(smb_request_t *, const char *); static smb_tree_t *smb_tree_connect_printq(smb_request_t *, const char *); static smb_tree_t *smb_tree_connect_ipc(smb_request_t *, const char *); -static smb_tree_t *smb_tree_alloc(smb_user_t *, const smb_kshare_t *, +static smb_tree_t *smb_tree_alloc(smb_request_t *, const smb_kshare_t *, smb_node_t *, uint32_t, uint32_t); static boolean_t smb_tree_is_connected_locked(smb_tree_t *); static boolean_t smb_tree_is_disconnected(smb_tree_t *); @@ -269,6 +269,7 @@ smb_tree_connect_core(smb_request_t *sr) } smb_kshare_release(si); + return (tree); } @@ -361,7 +362,7 @@ smb_tree_release( smb_llist_flush(&tree->t_odir_list); if (smb_tree_is_disconnected(tree) && (tree->t_refcnt == 0)) - smb_user_post_tree(tree->t_user, tree); + smb_session_post_tree(tree->t_session, tree); mutex_exit(&tree->t_mutex); } @@ -428,7 +429,7 @@ smb_tree_enum(smb_tree_t *tree, smb_svcenum_t *svcenum) { smb_ofile_t *of; smb_ofile_t *next; - int rc; + int rc = 0; ASSERT(tree); ASSERT(tree->t_magic == SMB_TREE_MAGIC); @@ -712,8 +713,7 @@ smb_tree_connect_disk(smb_request_t *sr, const char *sharename) if (!smb_shortnames) sr->arg.tcon.optional_support |= SMB_UNIQUE_FILE_NAME; - tree = smb_tree_alloc(user, si, snode, access, - sr->sr_cfg->skc_execflags); + tree = smb_tree_alloc(sr, si, snode, access, sr->sr_cfg->skc_execflags); smb_node_release(snode); @@ -805,8 +805,7 @@ smb_tree_connect_printq(smb_request_t *sr, const char *sharename) sr->sr_tcon.optional_support = SMB_SUPPORT_SEARCH_BITS; - tree = smb_tree_alloc(user, si, snode, access, - sr->sr_cfg->skc_execflags); + tree = smb_tree_alloc(sr, si, snode, access, sr->sr_cfg->skc_execflags); smb_node_release(snode); @@ -846,7 +845,7 @@ smb_tree_connect_ipc(smb_request_t *sr, const char *name) sr->sr_tcon.optional_support = SMB_SUPPORT_SEARCH_BITS; - tree = smb_tree_alloc(user, si, NULL, ACE_ALL_PERMS, 0); + tree = smb_tree_alloc(sr, si, NULL, ACE_ALL_PERMS, 0); if (tree == NULL) { smb_tree_log(sr, name, "access denied"); smbsr_error(sr, NT_STATUS_ACCESS_DENIED, ERRSRV, ERRaccess); @@ -859,41 +858,45 @@ smb_tree_connect_ipc(smb_request_t *sr, const char *name) * Allocate a tree. */ static smb_tree_t * -smb_tree_alloc(smb_user_t *user, const smb_kshare_t *si, smb_node_t *snode, - uint32_t access, uint32_t execflags) +smb_tree_alloc(smb_request_t *sr, const smb_kshare_t *si, + smb_node_t *snode, uint32_t access, uint32_t execflags) { + smb_session_t *session = sr->session; smb_tree_t *tree; uint32_t stype = si->shr_type; uint16_t tid; - if (smb_idpool_alloc(&user->u_tid_pool, &tid)) + if (smb_idpool_alloc(&session->s_tid_pool, &tid)) return (NULL); - tree = kmem_cache_alloc(user->u_server->si_cache_tree, KM_SLEEP); + tree = kmem_cache_alloc(session->s_server->si_cache_tree, KM_SLEEP); bzero(tree, sizeof (smb_tree_t)); - tree->t_user = user; - tree->t_session = user->u_session; - tree->t_server = user->u_server; + tree->t_session = session; + tree->t_server = session->s_server; + + /* grab a ref for tree->t_owner */ + smb_user_hold_internal(sr->uid_user); + tree->t_owner = sr->uid_user; if (STYPE_ISDSK(stype) || STYPE_ISPRN(stype)) { if (smb_tree_getattr(si, snode, tree) != 0) { - smb_idpool_free(&user->u_tid_pool, tid); - kmem_cache_free(user->u_server->si_cache_tree, tree); + smb_idpool_free(&session->s_tid_pool, tid); + kmem_cache_free(session->s_server->si_cache_tree, tree); return (NULL); } } if (smb_idpool_constructor(&tree->t_fid_pool)) { - smb_idpool_free(&user->u_tid_pool, tid); - kmem_cache_free(user->u_server->si_cache_tree, tree); + smb_idpool_free(&session->s_tid_pool, tid); + kmem_cache_free(session->s_server->si_cache_tree, tree); return (NULL); } if (smb_idpool_constructor(&tree->t_odid_pool)) { smb_idpool_destructor(&tree->t_fid_pool); - smb_idpool_free(&user->u_tid_pool, tid); - kmem_cache_free(user->u_server->si_cache_tree, tree); + smb_idpool_free(&session->s_tid_pool, tid); + kmem_cache_free(session->s_server->si_cache_tree, tree); return (NULL); } @@ -929,11 +932,11 @@ smb_tree_alloc(smb_user_t *user, const smb_kshare_t *si, smb_node_t *snode, tree->t_acltype = smb_fsop_acltype(snode); } - smb_llist_enter(&user->u_tree_list, RW_WRITER); - smb_llist_insert_head(&user->u_tree_list, tree); - smb_llist_exit(&user->u_tree_list); - atomic_inc_32(&user->u_session->s_tree_cnt); - smb_server_inc_trees(user->u_server); + smb_llist_enter(&session->s_tree_list, RW_WRITER); + smb_llist_insert_head(&session->s_tree_list, tree); + smb_llist_exit(&session->s_tree_list); + atomic_inc_32(&session->s_tree_cnt); + smb_server_inc_trees(session->s_server); return (tree); } @@ -947,19 +950,19 @@ smb_tree_alloc(smb_user_t *user, const smb_kshare_t *si, smb_node_t *snode, void smb_tree_dealloc(void *arg) { - smb_user_t *user; + smb_session_t *session; smb_tree_t *tree = (smb_tree_t *)arg; SMB_TREE_VALID(tree); ASSERT(tree->t_state == SMB_TREE_STATE_DISCONNECTED); ASSERT(tree->t_refcnt == 0); - user = tree->t_user; - smb_llist_enter(&user->u_tree_list, RW_WRITER); - smb_llist_remove(&user->u_tree_list, tree); - smb_idpool_free(&user->u_tid_pool, tree->t_tid); - atomic_dec_32(&tree->t_session->s_tree_cnt); - smb_llist_exit(&user->u_tree_list); + session = tree->t_session; + smb_llist_enter(&session->s_tree_list, RW_WRITER); + smb_llist_remove(&session->s_tree_list, tree); + smb_idpool_free(&session->s_tid_pool, tree->t_tid); + atomic_dec_32(&session->s_tree_cnt); + smb_llist_exit(&session->s_tree_list); mutex_enter(&tree->t_mutex); mutex_exit(&tree->t_mutex); @@ -974,6 +977,10 @@ smb_tree_dealloc(void *arg) smb_llist_destructor(&tree->t_odir_list); smb_idpool_destructor(&tree->t_fid_pool); smb_idpool_destructor(&tree->t_odid_pool); + + SMB_USER_VALID(tree->t_owner); + smb_user_release(tree->t_owner); + kmem_cache_free(tree->t_server->si_cache_tree, tree); } @@ -1234,27 +1241,38 @@ smb_tree_log(smb_request_t *sr, const char *sharename, const char *fmt, ...) * Returns NULL if odir not found or a hold cannot be obtained. */ smb_odir_t * -smb_tree_lookup_odir(smb_tree_t *tree, uint16_t odid) +smb_tree_lookup_odir(smb_request_t *sr, uint16_t odid) { smb_odir_t *od; smb_llist_t *od_list; + smb_tree_t *tree = sr->tid_tree; - ASSERT(tree); ASSERT(tree->t_magic == SMB_TREE_MAGIC); od_list = &tree->t_odir_list; - smb_llist_enter(od_list, RW_READER); + smb_llist_enter(od_list, RW_READER); od = smb_llist_head(od_list); while (od) { - if (od->d_odid == odid) { - if (!smb_odir_hold(od)) - od = NULL; + if (od->d_odid == odid) break; - } od = smb_llist_next(od_list, od); } + if (od == NULL) + goto out; + + /* + * Only allow use of a given Search ID with the same UID that + * was used to create it. MS-CIFS 3.3.5.14 + */ + if (od->d_user != sr->uid_user) { + od = NULL; + goto out; + } + if (!smb_odir_hold(od)) + od = NULL; +out: smb_llist_exit(od_list); return (od); } @@ -1377,15 +1395,16 @@ smb_tree_close_odirs(smb_tree_t *tree, uint16_t pid) } static void -smb_tree_set_execinfo(smb_tree_t *tree, smb_shr_execinfo_t *exec, int exec_type) +smb_tree_set_execinfo(smb_tree_t *tree, smb_shr_execinfo_t *exec, + int exec_type) { exec->e_sharename = tree->t_sharename; - exec->e_winname = tree->t_user->u_name; - exec->e_userdom = tree->t_user->u_domain; + exec->e_winname = tree->t_owner->u_name; + exec->e_userdom = tree->t_owner->u_domain; exec->e_srv_ipaddr = tree->t_session->local_ipaddr; exec->e_cli_ipaddr = tree->t_session->ipaddr; exec->e_cli_netbiosname = tree->t_session->workstation; - exec->e_uid = crgetuid(tree->t_user->u_cred); + exec->e_uid = crgetuid(tree->t_owner->u_cred); exec->e_type = exec_type; } @@ -1438,6 +1457,26 @@ smb_tree_netinfo_encode(smb_tree_t *tree, uint8_t *buf, size_t buflen, return (rc); } +static void +smb_tree_netinfo_username(smb_tree_t *tree, char **namestr, uint32_t *namelen) +{ + smb_user_t *user = tree->t_owner; + + /* + * u_domain_len and u_name_len include the '\0' in their + * lengths, hence the sum of the two lengths gives us room + * for both the '\\' and '\0' chars. + */ + ASSERT(namestr); + ASSERT(namelen); + ASSERT(user->u_domain_len > 0); + ASSERT(user->u_name_len > 0); + *namelen = user->u_domain_len + user->u_name_len; + *namestr = kmem_alloc(*namelen, KM_SLEEP); + (void) snprintf(*namestr, *namelen, "%s\\%s", user->u_domain, + user->u_name); +} + /* * Note: ci_numusers should be the number of users connected to * the share rather than the number of references on the tree but @@ -1446,8 +1485,6 @@ smb_tree_netinfo_encode(smb_tree_t *tree, uint8_t *buf, size_t buflen, static void smb_tree_netinfo_init(smb_tree_t *tree, smb_netconnectinfo_t *info) { - smb_user_t *user; - ASSERT(tree); info->ci_id = tree->t_tid; @@ -1459,13 +1496,7 @@ smb_tree_netinfo_init(smb_tree_t *tree, smb_netconnectinfo_t *info) info->ci_sharelen = strlen(tree->t_sharename) + 1; info->ci_share = smb_mem_strdup(tree->t_sharename); - user = tree->t_user; - ASSERT(user); - - info->ci_namelen = user->u_domain_len + user->u_name_len + 2; - info->ci_username = kmem_alloc(info->ci_namelen, KM_SLEEP); - (void) snprintf(info->ci_username, info->ci_namelen, "%s\\%s", - user->u_domain, user->u_name); + smb_tree_netinfo_username(tree, &info->ci_username, &info->ci_namelen); } static void diff --git a/usr/src/uts/common/fs/smbsrv/smb_tree_connect.c b/usr/src/uts/common/fs/smbsrv/smb_tree_connect.c index 1ce9720f5d..19b857e834 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_tree_connect.c +++ b/usr/src/uts/common/fs/smbsrv/smb_tree_connect.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ #include <smbsrv/smb_kproto.h> @@ -362,8 +363,7 @@ smb_sdrc_t smb_pre_tree_disconnect(smb_request_t *sr) { sr->uid_user = smb_session_lookup_uid(sr->session, sr->smb_uid); - if (sr->uid_user != NULL) - sr->tid_tree = smb_user_lookup_tree(sr->uid_user, sr->smb_tid); + sr->tid_tree = smb_session_lookup_tree(sr->session, sr->smb_tid); DTRACE_SMB_1(op__TreeDisconnect__start, smb_request_t *, sr); return (SDRC_SUCCESS); diff --git a/usr/src/uts/common/fs/smbsrv/smb_user.c b/usr/src/uts/common/fs/smbsrv/smb_user.c index cc3fde7f38..09eaba699c 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_user.c +++ b/usr/src/uts/common/fs/smbsrv/smb_user.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ /* @@ -38,15 +39,15 @@ * +-------------------+ +-------------------+ +-------------------+ * | SESSION |<----->| SESSION |......| SESSION | * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v - * +-------------------+ +-------------------+ +-------------------+ - * | USER |<----->| USER |......| USER | - * +-------------------+ +-------------------+ +-------------------+ - * | - * | - * v + * | | + * | | + * | v + * | +-------------------+ +-------------------+ +-------------------+ + * | | USER |<--->| USER |...| USER | + * | +-------------------+ +-------------------+ +-------------------+ + * | + * | + * v * +-------------------+ +-------------------+ +-------------------+ * | TREE |<----->| TREE |......| TREE | * +-------------------+ +-------------------+ +-------------------+ @@ -170,7 +171,6 @@ static boolean_t smb_user_is_logged_in(smb_user_t *); static int smb_user_enum_private(smb_user_t *, smb_svcenum_t *); -static smb_tree_t *smb_user_get_tree(smb_llist_t *, smb_tree_t *); static void smb_user_setcred(smb_user_t *, cred_t *, uint32_t); static void smb_user_nonauth_logon(uint32_t); static void smb_user_auth_logoff(uint32_t); @@ -210,20 +210,15 @@ smb_user_login( user->u_audit_sid = audit_sid; if (!smb_idpool_alloc(&session->s_uid_pool, &user->u_uid)) { - if (!smb_idpool_constructor(&user->u_tid_pool)) { - smb_llist_constructor(&user->u_tree_list, - sizeof (smb_tree_t), offsetof(smb_tree_t, t_lnd)); - mutex_init(&user->u_mutex, NULL, MUTEX_DEFAULT, NULL); - smb_user_setcred(user, cr, privileges); - user->u_state = SMB_USER_STATE_LOGGED_IN; - user->u_magic = SMB_USER_MAGIC; - smb_llist_enter(&session->s_user_list, RW_WRITER); - smb_llist_insert_tail(&session->s_user_list, user); - smb_llist_exit(&session->s_user_list); - smb_server_inc_users(session->s_server); - return (user); - } - smb_idpool_free(&session->s_uid_pool, user->u_uid); + mutex_init(&user->u_mutex, NULL, MUTEX_DEFAULT, NULL); + smb_user_setcred(user, cr, privileges); + user->u_state = SMB_USER_STATE_LOGGED_IN; + user->u_magic = SMB_USER_MAGIC; + smb_llist_enter(&session->s_user_list, RW_WRITER); + smb_llist_insert_tail(&session->s_user_list, user); + smb_llist_exit(&session->s_user_list); + smb_server_inc_users(session->s_server); + return (user); } smb_mem_free(user->u_name); smb_mem_free(user->u_domain); @@ -279,10 +274,7 @@ smb_user_logoff( */ user->u_state = SMB_USER_STATE_LOGGING_OFF; mutex_exit(&user->u_mutex); - /* - * All the trees hanging off of this user are disconnected. - */ - smb_user_disconnect_trees(user); + smb_session_disconnect_owned_trees(user->u_session, user); smb_user_auth_logoff(user->u_audit_sid); mutex_enter(&user->u_mutex); user->u_state = SMB_USER_STATE_LOGGED_OFF; @@ -301,13 +293,13 @@ smb_user_logoff( } /* - * Take a reference on a user. + * Take a reference on a user. Do not return a reference unless the user is in + * the logged-in state. */ boolean_t smb_user_hold(smb_user_t *user) { - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); + SMB_USER_VALID(user); mutex_enter(&user->u_mutex); @@ -322,6 +314,19 @@ smb_user_hold(smb_user_t *user) } /* + * Unconditionally take a reference on a user. + */ +void +smb_user_hold_internal(smb_user_t *user) +{ + SMB_USER_VALID(user); + + mutex_enter(&user->u_mutex); + user->u_refcnt++; + mutex_exit(&user->u_mutex); +} + +/* * Release a reference on a user. If the reference count falls to * zero and the user has logged off, post the object for deletion. * Object deletion is deferred to avoid modifying a list while an @@ -337,9 +342,6 @@ smb_user_release( ASSERT(user->u_refcnt); user->u_refcnt--; - /* flush the tree list's delete queue */ - smb_llist_flush(&user->u_tree_list); - switch (user->u_state) { case SMB_USER_STATE_LOGGED_OFF: if (user->u_refcnt == 0) @@ -357,248 +359,6 @@ smb_user_release( mutex_exit(&user->u_mutex); } -void -smb_user_post_tree(smb_user_t *user, smb_tree_t *tree) -{ - SMB_USER_VALID(user); - SMB_TREE_VALID(tree); - ASSERT(tree->t_refcnt == 0); - ASSERT(tree->t_state == SMB_TREE_STATE_DISCONNECTED); - ASSERT(tree->t_user == user); - - smb_llist_post(&user->u_tree_list, tree, smb_tree_dealloc); -} - - -/* - * Find a tree by tree-id. - */ -smb_tree_t * -smb_user_lookup_tree( - smb_user_t *user, - uint16_t tid) - -{ - smb_tree_t *tree; - - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); - - smb_llist_enter(&user->u_tree_list, RW_READER); - tree = smb_llist_head(&user->u_tree_list); - - while (tree) { - ASSERT(tree->t_magic == SMB_TREE_MAGIC); - ASSERT(tree->t_user == user); - - if (tree->t_tid == tid) { - if (smb_tree_hold(tree)) { - smb_llist_exit(&user->u_tree_list); - return (tree); - } else { - smb_llist_exit(&user->u_tree_list); - return (NULL); - } - } - - tree = smb_llist_next(&user->u_tree_list, tree); - } - - smb_llist_exit(&user->u_tree_list); - return (NULL); -} - -/* - * Find the first connected tree that matches the specified sharename. - * If the specified tree is NULL the search starts from the beginning of - * the user's tree list. If a tree is provided the search starts just - * after that tree. - */ -smb_tree_t * -smb_user_lookup_share( - smb_user_t *user, - const char *sharename, - smb_tree_t *tree) -{ - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); - ASSERT(sharename); - - smb_llist_enter(&user->u_tree_list, RW_READER); - - if (tree) { - ASSERT(tree->t_magic == SMB_TREE_MAGIC); - ASSERT(tree->t_user == user); - tree = smb_llist_next(&user->u_tree_list, tree); - } else { - tree = smb_llist_head(&user->u_tree_list); - } - - while (tree) { - ASSERT(tree->t_magic == SMB_TREE_MAGIC); - ASSERT(tree->t_user == user); - if (smb_strcasecmp(tree->t_sharename, sharename, 0) == 0) { - if (smb_tree_hold(tree)) { - smb_llist_exit(&user->u_tree_list); - return (tree); - } - } - tree = smb_llist_next(&user->u_tree_list, tree); - } - - smb_llist_exit(&user->u_tree_list); - return (NULL); -} - -/* - * Find the first connected tree that matches the specified volume name. - * If the specified tree is NULL the search starts from the beginning of - * the user's tree list. If a tree is provided the search starts just - * after that tree. - */ -smb_tree_t * -smb_user_lookup_volume( - smb_user_t *user, - const char *name, - smb_tree_t *tree) -{ - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); - ASSERT(name); - - smb_llist_enter(&user->u_tree_list, RW_READER); - - if (tree) { - ASSERT(tree->t_magic == SMB_TREE_MAGIC); - ASSERT(tree->t_user == user); - tree = smb_llist_next(&user->u_tree_list, tree); - } else { - tree = smb_llist_head(&user->u_tree_list); - } - - while (tree) { - ASSERT(tree->t_magic == SMB_TREE_MAGIC); - ASSERT(tree->t_user == user); - - if (smb_strcasecmp(tree->t_volume, name, 0) == 0) { - if (smb_tree_hold(tree)) { - smb_llist_exit(&user->u_tree_list); - return (tree); - } - } - - tree = smb_llist_next(&user->u_tree_list, tree); - } - - smb_llist_exit(&user->u_tree_list); - return (NULL); -} - -/* - * Disconnect all trees that match the specified client process-id. - */ -void -smb_user_close_pid( - smb_user_t *user, - uint16_t pid) -{ - smb_tree_t *tree; - - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); - - tree = smb_user_get_tree(&user->u_tree_list, NULL); - while (tree) { - smb_tree_t *next; - ASSERT(tree->t_user == user); - smb_tree_close_pid(tree, pid); - next = smb_user_get_tree(&user->u_tree_list, tree); - smb_tree_release(tree); - tree = next; - } -} - -/* - * Disconnect all trees that this user has connected. - */ -void -smb_user_disconnect_trees( - smb_user_t *user) -{ - smb_tree_t *tree; - - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); - - tree = smb_user_get_tree(&user->u_tree_list, NULL); - while (tree) { - ASSERT(tree->t_user == user); - smb_tree_disconnect(tree, B_TRUE); - smb_tree_release(tree); - tree = smb_user_get_tree(&user->u_tree_list, NULL); - } -} - -/* - * Disconnect all trees that match the specified share name. - */ -void -smb_user_disconnect_share( - smb_user_t *user, - const char *sharename) -{ - smb_tree_t *tree; - smb_tree_t *next; - - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); - ASSERT(user->u_refcnt); - - tree = smb_user_lookup_share(user, sharename, NULL); - while (tree) { - ASSERT(tree->t_magic == SMB_TREE_MAGIC); - smb_session_cancel_requests(user->u_session, tree, NULL); - smb_tree_disconnect(tree, B_TRUE); - next = smb_user_lookup_share(user, sharename, tree); - smb_tree_release(tree); - tree = next; - } -} - -/* - * Close a file by its unique id. - */ -int -smb_user_fclose(smb_user_t *user, uint32_t uniqid) -{ - smb_llist_t *tree_list; - smb_tree_t *tree; - int rc = ENOENT; - - ASSERT(user); - ASSERT(user->u_magic == SMB_USER_MAGIC); - - tree_list = &user->u_tree_list; - ASSERT(tree_list); - - smb_llist_enter(tree_list, RW_READER); - tree = smb_llist_head(tree_list); - - while ((tree != NULL) && (rc == ENOENT)) { - ASSERT(tree->t_user == user); - - if (smb_tree_hold(tree)) { - rc = smb_tree_fclose(tree, uniqid); - smb_tree_release(tree); - } - - tree = smb_llist_next(tree_list, tree); - } - - smb_llist_exit(tree_list); - return (rc); -} - /* * Determine whether or not the user is an administrator. * Members of the administrators group have administrative rights. @@ -688,9 +448,7 @@ smb_user_namecmp(smb_user_t *user, const char *name) int smb_user_enum(smb_user_t *user, smb_svcenum_t *svcenum) { - smb_tree_t *tree; - smb_tree_t *next; - int rc; + int rc = 0; ASSERT(user); ASSERT(user->u_magic == SMB_USER_MAGIC); @@ -698,21 +456,6 @@ smb_user_enum(smb_user_t *user, smb_svcenum_t *svcenum) if (svcenum->se_type == SMB_SVCENUM_TYPE_USER) return (smb_user_enum_private(user, svcenum)); - tree = smb_user_get_tree(&user->u_tree_list, NULL); - while (tree) { - ASSERT(tree->t_user == user); - - rc = smb_tree_enum(tree, svcenum); - if (rc != 0) { - smb_tree_release(tree); - break; - } - - next = smb_user_get_tree(&user->u_tree_list, tree); - smb_tree_release(tree); - tree = next; - } - return (rc); } @@ -769,8 +512,6 @@ smb_user_delete(void *arg) user->u_magic = (uint32_t)~SMB_USER_MAGIC; mutex_destroy(&user->u_mutex); - smb_llist_destructor(&user->u_tree_list); - smb_idpool_destructor(&user->u_tid_pool); if (user->u_cred) crfree(user->u_cred); if (user->u_privcred) @@ -780,43 +521,6 @@ smb_user_delete(void *arg) kmem_cache_free(user->u_server->si_cache_user, user); } -/* - * Get the next connected tree in the list. A reference is taken on - * the tree, which can be released later with smb_tree_release(). - * - * If the specified tree is NULL the search starts from the beginning of - * the tree list. If a tree is provided the search starts just after - * that tree. - * - * Returns NULL if there are no connected trees in the list. - */ -static smb_tree_t * -smb_user_get_tree( - smb_llist_t *tree_list, - smb_tree_t *tree) -{ - ASSERT(tree_list); - - smb_llist_enter(tree_list, RW_READER); - - if (tree) { - ASSERT(tree->t_magic == SMB_TREE_MAGIC); - tree = smb_llist_next(tree_list, tree); - } else { - tree = smb_llist_head(tree_list); - } - - while (tree) { - if (smb_tree_hold(tree)) - break; - - tree = smb_llist_next(tree_list, tree); - } - - smb_llist_exit(tree_list); - return (tree); -} - cred_t * smb_user_getcred(smb_user_t *user) { diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c index ba9c766c65..b4ab4ec3fd 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dataset.c +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c @@ -363,8 +363,19 @@ dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx, boolean_t dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag) { - return (dmu_buf_try_add_ref(ds->ds_dbuf, dp->dp_meta_objset, - ds->ds_object, DMU_BONUS_BLKID, tag)); + dmu_buf_t *dbuf = ds->ds_dbuf; + boolean_t result = B_FALSE; + + if (dbuf != NULL && dmu_buf_try_add_ref(dbuf, dp->dp_meta_objset, + ds->ds_object, DMU_BONUS_BLKID, tag)) { + + if (ds == dmu_buf_get_user(dbuf)) + result = B_TRUE; + else + dmu_buf_rele(dbuf, tag); + } + + return (result); } int diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c index 6a27544201..02844cef07 100644 --- a/usr/src/uts/common/os/exit.c +++ b/usr/src/uts/common/os/exit.c @@ -400,14 +400,36 @@ proc_exit(int why, int what) if (z->zone_boot_err == 0 && zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) { - if (z->zone_restart_init == B_TRUE) { - if (restart_init(what, why) == 0) - return (0); - } - z->zone_init_status = wstat(why, what); - (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, - zone_kcred()); + /* + * If the init process should be restarted, the + * "zone_restart_init" member will be set. Some init + * programs in branded zones do not tolerate a restart + * in the traditional manner; setting the + * "zone_reboot_on_init_exit" member will cause the + * entire zone to be rebooted instead. If neither of + * these flags is set the zone will shut down. + */ + if (z->zone_reboot_on_init_exit == B_TRUE && + z->zone_restart_init == B_TRUE) { + /* + * Trigger a zone reboot and continue + * with exit processing. + */ + z->zone_init_status = wstat(why, what); + (void) zone_kadmin(A_REBOOT, 0, NULL, + zone_kcred()); + + } else { + if (z->zone_restart_init == B_TRUE) { + if (restart_init(what, why) == 0) + return (0); + } + + z->zone_init_status = wstat(why, what); + (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, + zone_kcred()); + } } /* @@ -995,10 +1017,9 @@ winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) int waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) { - int found; proc_t *cp, *pp; - int proc_gone; int waitflag = !(options & WNOWAIT); + boolean_t have_brand_helper = B_FALSE; /* * Obsolete flag, defined here only for binary compatibility @@ -1047,10 +1068,37 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) return (ECHILD); } - while (pp->p_child != NULL) { + if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) { + have_brand_helper = B_TRUE; + } + + while (pp->p_child != NULL || have_brand_helper) { + boolean_t brand_wants_wait = B_FALSE; + int proc_gone = 0; + int found = 0; + + /* + * Give the brand a chance to return synthetic results from + * this waitid() call before we do the real thing. + */ + if (have_brand_helper) { + int ret; - proc_gone = 0; + if (BROP(pp)->b_waitid_helper(idtype, id, ip, options, + &brand_wants_wait, &ret) == 0) { + mutex_exit(&pidlock); + return (ret); + } + if (pp->p_child == NULL) { + goto no_real_children; + } + } + + /* + * Look for interesting children in the newstate list. + */ + VERIFY(pp->p_child != NULL); for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) { if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID)) continue; @@ -1107,7 +1155,6 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) * Wow! None of the threads on the p_sibling_ns list were * interesting threads. Check all the kids! */ - found = 0; for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) { if (idtype == P_PID && id != cp->p_pid) continue; @@ -1186,11 +1233,12 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) break; } +no_real_children: /* * If we found no interesting processes at all, * break out and return ECHILD. */ - if (found + proc_gone == 0) + if (!brand_wants_wait && (found + proc_gone == 0)) break; if (options & WNOHANG) { @@ -1209,7 +1257,7 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) * change state while we wait, we don't wait at all. * Get out with ECHILD according to SVID. */ - if (found == proc_gone) + if (!brand_wants_wait && (found == proc_gone)) break; if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { diff --git a/usr/src/uts/common/os/logsubr.c b/usr/src/uts/common/os/logsubr.c index 86e9045887..6a603c8982 100644 --- a/usr/src/uts/common/os/logsubr.c +++ b/usr/src/uts/common/os/logsubr.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2013 Gary Mills * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright (c) 2015, Joyent, Inc. */ #include <sys/types.h> @@ -250,7 +250,7 @@ log_init(void) */ printf("\rSunOS Release %s Version %s %u-bit\n", utsname.release, utsname.version, NBBY * (uint_t)sizeof (void *)); - printf("Copyright (c) 2010-2014, Joyent Inc. All rights reserved.\n"); + printf("Copyright (c) 2010-2015, Joyent Inc. All rights reserved.\n"); #ifdef DEBUG printf("DEBUG enabled\n"); #endif diff --git a/usr/src/uts/common/os/sig.c b/usr/src/uts/common/os/sig.c index b117bf3584..ae643c280e 100644 --- a/usr/src/uts/common/os/sig.c +++ b/usr/src/uts/common/os/sig.c @@ -22,7 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -194,7 +194,7 @@ eat_signal(kthread_t *t, int sig) !(ttoproc(t)->p_proc_flag & P_PR_LOCK)) { ttoproc(t)->p_stopsig = 0; t->t_dtrace_stop = 0; - t->t_schedflag |= TS_XSTART | TS_PSTART; + t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART; setrun_locked(t); } else if (t != curthread && t->t_state == TS_ONPROC) { aston(t); /* make it do issig promptly */ @@ -608,6 +608,21 @@ issig_forreal(void) } /* + * Allow the brand the chance to alter (or suppress) delivery + * of this signal. + */ + if (PROC_IS_BRANDED(p) && BROP(p)->b_issig_stop != NULL) { + /* + * The brand hook will return 0 if it would like + * us to drive on, or -1 if we should restart + * the loop to check other conditions. + */ + if (BROP(p)->b_issig_stop(p, lwp) != 0) { + continue; + } + } + + /* * Honor requested stop before dealing with the * current signal; a debugger may change it. * Do not want to go back to loop here since this is a special @@ -939,6 +954,16 @@ stop(int why, int what) } break; + case PR_BRAND: + /* + * We have been stopped by the brand code for a brand-private + * reason. This is an asynchronous stop affecting only this + * LWP. + */ + VERIFY(PROC_IS_BRANDED(p)); + flags &= ~TS_BSTART; + break; + default: /* /proc stop */ flags &= ~TS_PSTART; /* @@ -1050,7 +1075,7 @@ stop(int why, int what) } } - if (why != PR_JOBCONTROL && why != PR_CHECKPOINT) { + if (why != PR_JOBCONTROL && why != PR_CHECKPOINT && why != PR_BRAND) { /* * Do process-level notification when all lwps are * either stopped on events of interest to /proc @@ -1156,6 +1181,13 @@ stop(int why, int what) if (why == PR_CHECKPOINT) del_one_utstop(); + /* + * Allow the brand to post notification of this stop condition. + */ + if (PROC_IS_BRANDED(p) && BROP(p)->b_stop_notify != NULL) { + BROP(p)->b_stop_notify(p, lwp, why, what); + } + thread_lock(t); ASSERT((t->t_schedflag & TS_ALLSTART) == 0); t->t_schedflag |= flags; @@ -1177,7 +1209,7 @@ stop(int why, int what) (p->p_flag & (SEXITLWPS|SKILLED))) { p->p_stopsig = 0; thread_lock(t); - t->t_schedflag |= TS_XSTART | TS_PSTART; + t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART; setrun_locked(t); thread_unlock_nopreempt(t); } else if (why == PR_JOBCONTROL) { @@ -1795,6 +1827,15 @@ sigcld_repost() sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); mutex_enter(&pidlock); + if (PROC_IS_BRANDED(pp) && BROP(pp)->b_sigcld_repost != NULL) { + /* + * Allow the brand to inject synthetic SIGCLD signals. + */ + if (BROP(pp)->b_sigcld_repost(pp, sqp) == 0) { + mutex_exit(&pidlock); + return; + } + } for (cp = pp->p_child; cp; cp = cp->p_sibling) { if (cp->p_pidflag & CLDPEND) { post_sigcld(cp, sqp); diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 285aeac032..347a90a022 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -2624,6 +2624,7 @@ zone_init(void) zone0.zone_ntasks = 1; mutex_exit(&p0.p_lock); zone0.zone_restart_init = B_TRUE; + zone0.zone_reboot_on_init_exit = B_FALSE; zone0.zone_init_status = -1; zone0.zone_brand = &native_brand; rctl_prealloc_destroy(gp); @@ -4669,8 +4670,9 @@ parse_rctls(caddr_t ubuf, size_t buflen, nvlist_t **nvlp) error = EINVAL; name = nvpair_name(nvp); - if (strncmp(nvpair_name(nvp), "zone.", sizeof ("zone.") - 1) - != 0 || nvpair_type(nvp) != DATA_TYPE_NVLIST_ARRAY) { + if ((strncmp(name, "zone.", sizeof ("zone.") - 1) != 0 && + strncmp(name, "project.", sizeof ("project.") - 1) != 0) || + nvpair_type(nvp) != DATA_TYPE_NVLIST_ARRAY) { goto out; } if ((hndl = rctl_hndl_lookup(name)) == -1) { @@ -4819,6 +4821,7 @@ zone_create(const char *zone_name, const char *zone_root, zone->zone_ncpus = 0; zone->zone_ncpus_online = 0; zone->zone_restart_init = B_TRUE; + zone->zone_reboot_on_init_exit = B_FALSE; zone->zone_init_status = -1; zone->zone_brand = &native_brand; zone->zone_initname = NULL; @@ -5045,8 +5048,8 @@ zone_create(const char *zone_name, const char *zone_root, /* * The process, task, and project rctls are probably wrong; * we need an interface to get the default values of all rctls, - * and initialize zsched appropriately. I'm not sure that that - * makes much of a difference, though. + * and initialize zsched appropriately. However, we allow zoneadmd + * to pass down both zone and project rctls for the zone's init. */ error = newproc(zsched, (void *)&zarg, syscid, minclsyspri, NULL, 0); if (error != 0) { diff --git a/usr/src/uts/common/smbsrv/smb_kproto.h b/usr/src/uts/common/smbsrv/smb_kproto.h index f2de265176..ad7402fd80 100644 --- a/usr/src/uts/common/smbsrv/smb_kproto.h +++ b/usr/src/uts/common/smbsrv/smb_kproto.h @@ -521,6 +521,15 @@ void smb_session_disconnect_from_share(smb_llist_t *, char *); smb_user_t *smb_session_dup_user(smb_session_t *, char *, char *); smb_user_t *smb_session_lookup_uid(smb_session_t *, uint16_t); void smb_session_post_user(smb_session_t *, smb_user_t *); +void smb_session_post_tree(smb_session_t *, smb_tree_t *); +smb_tree_t *smb_session_lookup_tree(smb_session_t *, uint16_t); +smb_tree_t *smb_session_lookup_share(smb_session_t *, const char *, + smb_tree_t *); +smb_tree_t *smb_session_lookup_volume(smb_session_t *, const char *, + smb_tree_t *); +void smb_session_close_pid(smb_session_t *, uint16_t); +void smb_session_disconnect_owned_trees(smb_session_t *, smb_user_t *); +void smb_session_disconnect_trees(smb_session_t *); void smb_session_disconnect_share(smb_session_t *, const char *); void smb_session_getclient(smb_session_t *, char *, size_t); boolean_t smb_session_isclient(smb_session_t *, const char *); @@ -539,10 +548,10 @@ void smb_request_free(smb_request_t *); /* * ofile functions (file smb_ofile.c) */ -smb_ofile_t *smb_ofile_lookup_by_fid(smb_tree_t *, uint16_t); +smb_ofile_t *smb_ofile_lookup_by_fid(smb_request_t *, uint16_t); smb_ofile_t *smb_ofile_lookup_by_uniqid(smb_tree_t *, uint32_t); boolean_t smb_ofile_disallow_fclose(smb_ofile_t *); -smb_ofile_t *smb_ofile_open(smb_tree_t *, smb_node_t *, uint16_t, +smb_ofile_t *smb_ofile_open(smb_request_t *, smb_node_t *, uint16_t, smb_arg_open_t *, uint16_t, uint32_t, smb_error_t *); void smb_ofile_close(smb_ofile_t *, int32_t); void smb_ofile_delete(void *); @@ -603,18 +612,11 @@ smb_user_t *smb_user_login(smb_session_t *, cred_t *, smb_user_t *smb_user_dup(smb_user_t *); void smb_user_logoff(smb_user_t *); void smb_user_delete(void *); -void smb_user_post_tree(smb_user_t *, smb_tree_t *); -smb_tree_t *smb_user_lookup_tree(smb_user_t *, uint16_t); -smb_tree_t *smb_user_lookup_share(smb_user_t *, const char *, smb_tree_t *); -smb_tree_t *smb_user_lookup_volume(smb_user_t *, const char *, smb_tree_t *); boolean_t smb_user_is_admin(smb_user_t *); boolean_t smb_user_namecmp(smb_user_t *, const char *); int smb_user_enum(smb_user_t *, smb_svcenum_t *); -void smb_user_close_pid(smb_user_t *, uint16_t); -void smb_user_disconnect_trees(smb_user_t *user); -void smb_user_disconnect_share(smb_user_t *, const char *); -int smb_user_fclose(smb_user_t *, uint32_t); boolean_t smb_user_hold(smb_user_t *); +void smb_user_hold_internal(smb_user_t *); void smb_user_release(smb_user_t *); cred_t *smb_user_getcred(smb_user_t *); cred_t *smb_user_getprivcred(smb_user_t *); @@ -637,7 +639,7 @@ int smb_tree_enum(smb_tree_t *, smb_svcenum_t *); int smb_tree_fclose(smb_tree_t *, uint32_t); boolean_t smb_tree_hold(smb_tree_t *); void smb_tree_release(smb_tree_t *); -smb_odir_t *smb_tree_lookup_odir(smb_tree_t *, uint16_t); +smb_odir_t *smb_tree_lookup_odir(smb_request_t *, uint16_t); boolean_t smb_tree_is_connected(smb_tree_t *); #define SMB_TREE_GET_TID(tree) ((tree)->t_tid) diff --git a/usr/src/uts/common/smbsrv/smb_ktypes.h b/usr/src/uts/common/smbsrv/smb_ktypes.h index 493e7130a7..2c5d102f62 100644 --- a/usr/src/uts/common/smbsrv/smb_ktypes.h +++ b/usr/src/uts/common/smbsrv/smb_ktypes.h @@ -908,7 +908,9 @@ typedef struct smb_session { smb_slist_t s_req_list; smb_llist_t s_xa_list; smb_llist_t s_user_list; + smb_llist_t s_tree_list; smb_idpool_t s_uid_pool; + smb_idpool_t s_tid_pool; smb_txlst_t s_txlst; volatile uint32_t s_tree_cnt; @@ -975,9 +977,6 @@ typedef struct smb_user { cred_t *u_cred; cred_t *u_privcred; - smb_llist_t u_tree_list; - smb_idpool_t u_tid_pool; - uint32_t u_refcnt; uint32_t u_flags; uint32_t u_privileges; @@ -1028,7 +1027,11 @@ typedef struct smb_tree { struct smb_server *t_server; smb_session_t *t_session; - smb_user_t *t_user; + /* + * user whose uid was in the tree connect message + * ("owner" in MS-CIFS parlance, see section 2.2.1.6 definition of FID) + */ + smb_user_t *t_owner; smb_node_t *t_snode; smb_llist_t t_ofile_list; @@ -1259,6 +1262,7 @@ typedef struct smb_odir { list_node_t d_lnd; smb_odir_state_t d_state; smb_session_t *d_session; + smb_user_t *d_user; smb_tree_t *d_tree; smb_node_t *d_dnode; cred_t *d_cred; diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h index 3486ae864d..b3abada863 100644 --- a/usr/src/uts/common/sys/brand.h +++ b/usr/src/uts/common/sys/brand.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_BRAND_H @@ -132,6 +132,11 @@ struct brand_ops { boolean_t (*b_native_exec)(uint8_t, const char **); void (*b_ptrace_exectrap)(proc_t *); uint32_t (*b_map32limit)(proc_t *); + void (*b_stop_notify)(proc_t *, klwp_t *, ushort_t, ushort_t); + int (*b_waitid_helper)(idtype_t, id_t, k_siginfo_t *, int, + boolean_t *, int *); + int (*b_sigcld_repost)(proc_t *, sigqueue_t *); + int (*b_issig_stop)(proc_t *, klwp_t *); }; /* diff --git a/usr/src/uts/common/sys/procfs.h b/usr/src/uts/common/sys/procfs.h index f592fd9dcf..501af712ef 100644 --- a/usr/src/uts/common/sys/procfs.h +++ b/usr/src/uts/common/sys/procfs.h @@ -25,6 +25,7 @@ */ /* * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_PROCFS_H @@ -233,6 +234,7 @@ typedef struct pstatus { #define PR_FAULTED 6 #define PR_SUSPENDED 7 #define PR_CHECKPOINT 8 +#define PR_BRAND 9 /* * lwp ps(1) information file. /proc/<pid>/lwp/<lwpid>/lwpsinfo diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h index 9f2e166fea..41ea2331df 100644 --- a/usr/src/uts/common/sys/thread.h +++ b/usr/src/uts/common/sys/thread.h @@ -419,8 +419,9 @@ typedef struct _kthread { #define TS_RESUME 0x1000 /* setrun() by CPR resume process */ #define TS_CREATE 0x2000 /* setrun() by syslwp_create() */ #define TS_RUNQMATCH 0x4000 /* exact run queue balancing by setbackdq() */ +#define TS_BSTART 0x8000 /* setrun() by brand */ #define TS_ALLSTART \ - (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE) + (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE|TS_BSTART) #define TS_ANYWAITQ (TS_PROJWAITQ|TS_ZONEWAITQ) /* @@ -448,6 +449,10 @@ typedef struct _kthread { #define ISTOPPED(t) ((t)->t_state == TS_STOPPED && \ !((t)->t_schedflag & TS_PSTART)) +/* True if thread is stopped for a brand-specific reason */ +#define BSTOPPED(t) ((t)->t_state == TS_STOPPED && \ + !((t)->t_schedflag & TS_BSTART)) + /* True if thread is asleep and wakeable */ #define ISWAKEABLE(t) (((t)->t_state == TS_SLEEP && \ ((t)->t_flag & T_WAKEABLE))) diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 7ab9377e16..a5d1610842 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -594,6 +594,7 @@ typedef struct zone { tsol_mlp_list_t zone_mlps; /* MLPs on zone-private addresses */ boolean_t zone_restart_init; /* Restart init if it dies? */ + boolean_t zone_reboot_on_init_exit; /* Reboot if init dies? */ struct brand *zone_brand; /* zone's brand */ void *zone_brand_data; /* store brand specific data */ id_t zone_defaultcid; /* dflt scheduling class id */ |