summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/common/brand/lx/lx_syscall.h47
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/clone.c28
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/wait.c19
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_brand.c54
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_misc.c153
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_brand.h22
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_clone.c109
-rw-r--r--usr/src/uts/common/brand/sn1/sn1_brand.c5
-rw-r--r--usr/src/uts/common/brand/sngl/sngl_brand.c8
-rw-r--r--usr/src/uts/common/brand/solaris10/s10_brand.c5
-rw-r--r--usr/src/uts/common/os/brand.c3
-rw-r--r--usr/src/uts/common/os/exit.c79
-rw-r--r--usr/src/uts/common/sys/brand.h4
-rw-r--r--usr/src/uts/common/sys/proc.h11
-rw-r--r--usr/src/uts/i86pc/ml/offsets.in2
-rw-r--r--usr/src/uts/intel/brand/common/brand_asm.h2
-rw-r--r--usr/src/uts/sun4/brand/common/brand_solaris.s2
-rw-r--r--usr/src/uts/sun4/ml/offsets.in2
18 files changed, 473 insertions, 82 deletions
diff --git a/usr/src/common/brand/lx/lx_syscall.h b/usr/src/common/brand/lx/lx_syscall.h
new file mode 100644
index 0000000000..7bfc0537c9
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_syscall.h
@@ -0,0 +1,47 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LX_SYSCALL_H
+#define _LX_SYSCALL_H
+
+#include <sys/lx_brand.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The br_scall_args field of lx_lwp_data is going to be populated with
+ * pointers to structs. The types of these structs should be defined in this
+ * header file. These are Linux specific arguments to system calls that don't
+ * exist in illumos. Each section should be labelled with which system call it
+ * belongs to.
+ */
+
+/* arguments for waitpid(2) */
+/* see comments in usr/src/lib/brand/lx/lx_brand/common/wait.c */
+#define LX_WNOTHREAD 0x20000000 /* Do not wait on siblings' children */
+#define LX_WALL 0x40000000 /* Wait on all children */
+#define LX_WCLONE 0x80000000 /* Wait only on clone children */
+typedef struct lx_waitid_args {
+ int waitid_flags;
+} lx_waitid_args_t;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_SYSCALL_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/common/clone.c b/usr/src/lib/brand/lx/lx_brand/common/clone.c
index 8752d0b02f..abd5fdea04 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/clone.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/clone.c
@@ -48,6 +48,7 @@
#include <sys/lx_brand.h>
#include <sys/lx_debug.h>
#include <sys/lx_thread.h>
+#include <sys/fork.h>
#define LX_CSIGNAL 0x000000ff
#define LX_CLONE_VM 0x00000100
@@ -67,7 +68,8 @@
#define LX_CLONE_CHILD_SETTID 0x01000000
#define SHARED_AS \
- (LX_CLONE_VM | LX_CLONE_FS | LX_CLONE_FILES | LX_CLONE_SIGHAND)
+ (LX_CLONE_VM | LX_CLONE_FS | LX_CLONE_FILES | LX_CLONE_SIGHAND \
+ | LX_CLONE_THREAD)
#define CLONE_VFORK (LX_CLONE_VM | LX_CLONE_VFORK)
#define CLONE_TD (LX_CLONE_THREAD|LX_CLONE_DETACH)
@@ -353,6 +355,7 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
int pid;
lx_regs_t *rp;
sigset_t sigmask;
+ int fork_flags = 0;
if (flags & LX_CLONE_SETTLS) {
lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p ldt=0x%p "
@@ -413,13 +416,16 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
if (flags & LX_CLONE_PTRACE)
lx_ptrace_fork();
+ if ((flags & LX_CSIGNAL) == 0)
+ fork_flags |= FORK_NOSIGCHLD;
+
if (flags & LX_CLONE_VFORK) {
is_vforked++;
- rval = vfork();
+ rval = vforkx(fork_flags);
if (rval != 0)
is_vforked--;
} else {
- rval = fork1();
+ rval = forkx(fork_flags);
if (rval == 0 && lx_is_rpm)
(void) sleep(lx_rpm_delay);
}
@@ -449,12 +455,24 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
if (rval != 0)
return ((rval < 0) ? -errno : rval);
+
+ /*
+ * Set up additional data in the lx_proc_data structure as
+ * necessary.
+ */
+ rval = syscall(SYS_brand, B_EMULATE_SYSCALL + LX_SYS_clone,
+ flags, cldstk, ptidp, ldtinfo, ctidp, NULL);
+ if (rval < 0) {
+ return (rval);
+ }
+
/*
* If provided, the child needs its new stack set up.
*/
if (cldstk)
lx_setup_clone(rp->lxr_gs, (void *)rp->lxr_eip, cldstk);
+ /* lx_setup_clone() doesn't return */
lx_ptrace_stop_if_option(LX_PTRACE_O_TRACECLONE);
return (0);
}
@@ -465,8 +483,8 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
*/
if (((flags & SHARED_AS) != SHARED_AS)) {
lx_unsupported(gettext(
- "clone(2) requires that all or none of CLONE_VM "
- "CLONE_FS, CLONE_FILES, and CLONE_SIGHAND be set.\n"));
+ "clone(2) requires that all or none of CLONE_VM, CLONE_FS,"
+ "CLONE_FILES, CLONE_THREAD and CLONE_SIGHAND be set.\n"));
return (-ENOTSUP);
}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/wait.c b/usr/src/lib/brand/lx/lx_brand/common/wait.c
index 7512838bf3..7485808a2b 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/wait.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/wait.c
@@ -72,10 +72,12 @@
#include <sys/lx_signal.h>
#include <sys/lx_misc.h>
#include <sys/lx_syscall.h>
+#include <sys/syscall.h>
#include <sys/times.h>
#include <strings.h>
#include <unistd.h>
#include <assert.h>
+#include <lx_syscall.h>
/*
* Convert between Linux options and Solaris options, returning -1 if any
@@ -99,13 +101,28 @@ static int
ltos_options(uintptr_t options)
{
int newoptions = 0;
+ int rval;
+ lx_waitid_args_t extra;
if (((options) & ~(LX_WNOHANG | LX_WUNTRACED | LX_WEXITED |
LX_WCONTINUED | LX_WNOWAIT | LX_WNOTHREAD | LX_WALL |
LX_WCLONE)) != 0) {
return (-1);
}
- /* XXX implement LX_WNOTHREAD, LX_WALL, LX_WCLONE */
+ /*
+ * We use the B_STORE_ARGS command to store any of LX_WNOTHREAD,
+ * LX_WALL, and LX_WCLONE that have been set as options on this waitid
+ * call. These flags are stored as part of the lwp_brand_data, so that
+ * when there is a later syscall to waitid, the brand code there can
+ * detect that we added extra flags here and use them as appropriate.
+ * We pass them in here rather than the normal channel for flags to
+ * prevent polluting the namespace.
+ */
+ extra.waitid_flags = options & (LX_WNOTHREAD | LX_WALL | LX_WCLONE);
+ rval = syscall(SYS_brand, B_STORE_ARGS, &extra,
+ sizeof (lx_waitid_args_t), NULL, NULL, NULL, NULL);
+ if (rval < 0)
+ return (rval);
if (options & LX_WNOHANG)
newoptions |= WNOHANG;
diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c
index 1ddc0367c8..972f76431e 100644
--- a/usr/src/uts/common/brand/lx/os/lx_brand.c
+++ b/usr/src/uts/common/brand/lx/os/lx_brand.c
@@ -58,6 +58,7 @@
#include <sys/zone.h>
#include <sys/brand.h>
#include <sys/sdt.h>
+#include <lx_signum.h>
int lx_debug = 0;
@@ -78,6 +79,8 @@ extern int lx_initlwp(klwp_t *);
extern void lx_forklwp(klwp_t *, klwp_t *);
extern void lx_exitlwp(klwp_t *);
extern void lx_freelwp(klwp_t *);
+extern void lx_exit_with_sig(proc_t *, sigqueue_t *, void *);
+extern boolean_t lx_wait_filter(proc_t *, proc_t *);
extern greg_t lx_fixsegreg(greg_t, model_t);
extern int lx_sched_affinity(int, uintptr_t, int, uintptr_t, int64_t *);
@@ -112,6 +115,8 @@ struct brand_ops lx_brops = {
NULL,
NULL,
NSIG,
+ lx_exit_with_sig,
+ lx_wait_filter,
};
struct brand_mach_ops lx_mops = {
@@ -127,7 +132,8 @@ struct brand lx_brand = {
BRAND_VER_1,
"lx",
&lx_brops,
- &lx_mops
+ &lx_mops,
+ sizeof (struct lx_proc_data)
};
static struct modlbrand modlbrand = {
@@ -142,6 +148,7 @@ void
lx_proc_exit(proc_t *p, klwp_t *lwp)
{
zone_t *z = p->p_zone;
+ int sig = ptolxproc(p)->l_signal;
ASSERT(p->p_brand != NULL);
ASSERT(p->p_brand_data != NULL);
@@ -167,8 +174,7 @@ lx_proc_exit(proc_t *p, klwp_t *lwp)
*/
if (lwp != NULL)
lx_exitlwp(lwp);
- kmem_free(p->p_brand_data, sizeof (struct lx_proc_data));
- p->p_brand_data = NULL;
+ p->p_exit_data = sig;
}
void
@@ -181,6 +187,7 @@ lx_setbrand(proc_t *p)
ASSERT(ttolxlwp(curthread) == NULL);
p->p_brand_data = kmem_zalloc(sizeof (struct lx_proc_data), KM_SLEEP);
+ ptolxproc(p)->l_signal = stol_signo[SIGCHLD];
/*
* This routine can only be called for single-threaded processes.
@@ -760,6 +767,47 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
return (0);
+ case B_STORE_ARGS:
+ /*
+ * B_STORE_ARGS subcommand
+ * arg1 = address of struct to be copied in
+ * arg2 = size of the struct being copied in
+ * arg3-arg6 ignored
+ * rval = the amount of data copied.
+ */
+ {
+ int err;
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+ void *buf;
+
+ /* only have upper limit because arg2 is unsigned */
+ if (arg2 > LX_BR_ARGS_SIZE_MAX) {
+ return (EINVAL);
+ }
+
+ buf = kmem_alloc(arg2, KM_SLEEP);
+ if ((err = copyin((void *)arg1, buf, arg2)) != 0) {
+ lx_print("Failed to copyin scall arg at 0x%p\n",
+ (void *) arg1);
+ kmem_free(buf, arg2);
+ /*
+ * Purposely not setting br_scall_args to NULL
+ * to preserve data for debugging.
+ */
+ return (EFAULT);
+ }
+
+ if (lwpd->br_scall_args != NULL) {
+ ASSERT(lwpd->br_args_size > 0);
+ kmem_free(lwpd->br_scall_args,
+ lwpd->br_args_size);
+ }
+
+ lwpd->br_scall_args = buf;
+ lwpd->br_args_size = arg2;
+ *rval = arg2;
+ return (0);
+ }
default:
linux_call = cmd - B_EMULATE_SYSCALL;
/*
diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c
index 797ac399d0..2d565a4902 100644
--- a/usr/src/uts/common/brand/lx/os/lx_misc.c
+++ b/usr/src/uts/common/brand/lx/os/lx_misc.c
@@ -38,6 +38,12 @@
#include <sys/lx_brand.h>
#include <sys/lx_pid.h>
#include <sys/lx_futex.h>
+#include <sys/cmn_err.h>
+#include <sys/siginfo.h>
+#include <sys/contract/process_impl.h>
+#include <lx_signum.h>
+#include <lx_syscall.h>
+#include <sys/proc.h>
/* Linux specific functions and definitions */
void lx_setrval(klwp_t *, int, int);
@@ -184,6 +190,10 @@ lx_exitlwp(klwp_t *lwp)
}
free:
+ if (lwpd->br_scall_args != NULL) {
+ ASSERT(lwpd->br_args_size > 0);
+ kmem_free(lwpd->br_scall_args, lwpd->br_args_size);
+ }
if (sqp)
kmem_free(sqp, sizeof (sigqueue_t));
@@ -221,7 +231,8 @@ lx_initlwp(klwp_t *lwp)
lwpd->br_set_ctidp = NULL;
lwpd->br_signal = 0;
/*
- * lwpd->br_affinitymask was zeroed by kmem_zalloc().
+ * lwpd->br_affinitymask was zeroed by kmem_zalloc()
+ * as was lwpd->br_scall_args and lwpd->br_args_size.
*/
/*
@@ -280,7 +291,7 @@ lx_forklwp(klwp_t *srclwp, klwp_t *dstlwp)
* copy only these flags
*/
dst->br_lwp_flags = src->br_lwp_flags & BR_CPU_BOUND;
- dst->br_clone_args = NULL;
+ dst->br_scall_args = NULL;
}
/*
@@ -372,3 +383,141 @@ lx_fixsegreg(greg_t sr, model_t datamodel)
return (sr | SEL_TI_LDT | SEL_UPL);
#endif /* __amd64 */
}
+
+/*
+ * These two functions simulate winfo and post_sigcld for the lx brand. The
+ * difference is delivering a designated signal as opposed to always SIGCLD.
+ */
+void
+lx_winfo(proc_t *pp, k_siginfo_t *ip, struct lx_proc_data *dat)
+{
+ ASSERT(MUTEX_HELD(&pidlock));
+ bzero(ip, sizeof (k_siginfo_t));
+ ip->si_signo = ltos_signo[dat->l_signal];
+ ip->si_code = pp->p_wcode;
+ ip->si_pid = pp->p_pid;
+ ip->si_ctid = PRCTID(pp);
+ ip->si_zoneid = pp->p_zone->zone_id;
+ ip->si_status = pp->p_wdata;
+ ip->si_stime = pp->p_stime;
+ ip->si_utime = pp->p_utime;
+}
+
+void
+lx_post_exit_sig(proc_t *cp, sigqueue_t *sqp, struct lx_proc_data *dat)
+{
+ proc_t *pp = cp->p_parent;
+
+ ASSERT(MUTEX_HELD(&pidlock));
+ mutex_enter(&pp->p_lock);
+ /*
+ * Since Linux doesn't queue SIGCHLD, or any other non RT
+ * signals, we just blindly deliver whatever signal we can.
+ */
+ ASSERT(sqp != NULL);
+ lx_winfo(cp, &sqp->sq_info, dat);
+ sigaddqa(pp, NULL, sqp);
+ sqp = NULL;
+ mutex_exit(&pp->p_lock);
+}
+
+
+/*
+ * Brand specific code for exiting and sending a signal to the parent, as
+ * opposed to sigcld().
+ */
+void
+lx_exit_with_sig(proc_t *cp, sigqueue_t *sqp, void *brand_data)
+{
+ proc_t *pp = cp->p_parent;
+ struct lx_proc_data *lx_brand_data = brand_data;
+ ASSERT(MUTEX_HELD(&pidlock));
+
+ switch (cp->p_wcode) {
+ case CLD_EXITED:
+ case CLD_DUMPED:
+ case CLD_KILLED:
+ ASSERT(cp->p_stat == SZOMB);
+ /*
+ * The broadcast on p_srwchan_cv is a kludge to
+ * wakeup a possible thread in uadmin(A_SHUTDOWN).
+ */
+ cv_broadcast(&cp->p_srwchan_cv);
+
+ /*
+ * Add to newstate list of the parent
+ */
+ add_ns(pp, cp);
+
+ cv_broadcast(&pp->p_cv);
+ if ((pp->p_flag & SNOWAIT) ||
+ PTOU(pp)->u_signal[SIGCLD - 1] == SIG_IGN) {
+ if (!(cp->p_pidflag & CLDWAITPID))
+ freeproc(cp);
+ } else if (!(cp->p_pidflag & CLDNOSIGCHLD) &&
+ lx_brand_data->l_signal != 0) {
+ lx_post_exit_sig(cp, sqp, lx_brand_data);
+ sqp = NULL;
+ }
+ break;
+
+ case CLD_STOPPED:
+ case CLD_CONTINUED:
+ case CLD_TRAPPED:
+ panic("Should not be called in this case");
+ }
+
+ if (sqp)
+ siginfofree(sqp);
+}
+
+/*
+ * Filters based on arguments that have been passed in by a separate syscall
+ * using the B_STORE_ARGS mechanism. if the __WALL flag is set, no filter is
+ * applied, otherwise we look at the difference between a clone and non-clone
+ * process.
+ * The definition of a clone process in Linux is a thread that does not deliver
+ * SIGCHLD to its parent. The option __WCLONE indicates to wait only on clone
+ * processes. Without that option, a process should only wait on normal
+ * children. The following table shows the cases.
+ *
+ * default __WCLONE
+ * no SIGCHLD - X
+ * SIGCHLD X -
+ *
+ * This is an XOR of __WCLONE being set, and SIGCHLD being the signal sent on
+ * process exit. Since (flags & __WCLONE) is not guaranteed to have the
+ * least-significant bit set when the flags is enabled, !! is used to place
+ * that bit into the least significant bit. Then, the bitwise XOR can be
+ * used, because there is no logical XOR in the C language.
+ *
+ * More information on wait in lx brands can be found at
+ * usr/src/lib/brand/lx/lx_brand/common/wait.c.
+ */
+boolean_t
+lx_wait_filter(proc_t *pp, proc_t *cp)
+{
+ int flags;
+ boolean_t ret;
+
+ if (LX_ARGS(waitid) != NULL) {
+ flags = LX_ARGS(waitid)->waitid_flags;
+ mutex_enter(&cp->p_lock);
+ if (flags & LX_WALL) {
+ ret = B_TRUE;
+ } else if (cp->p_stat == SZOMB ||
+ cp->p_brand == &native_brand) {
+ ret = (((!!(flags & LX_WCLONE)) ^
+ (stol_signo[SIGCHLD] == cp->p_exit_data))
+ ? B_TRUE : B_FALSE);
+ } else {
+ ret = (((!!(flags & LX_WCLONE)) ^
+ (stol_signo[SIGCHLD] == ptolxproc(cp)->l_signal))
+ ? B_TRUE : B_FALSE);
+ }
+ mutex_exit(&cp->p_lock);
+ return (ret);
+ } else {
+ return (B_TRUE);
+ }
+}
diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h
index dcbd37c21b..49d3d0a446 100644
--- a/usr/src/uts/common/brand/lx/sys/lx_brand.h
+++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h
@@ -78,6 +78,7 @@ extern "C" {
#define B_PTRACE_EXT_OPTS 134
#define B_PTRACE_STOP_FOR_OPT 135
#define B_UNSUPPORTED 136
+#define B_STORE_ARGS 137
#define B_EMULATE_SYSCALL 192
@@ -186,12 +187,13 @@ typedef struct lx_proc_data {
uintptr_t l_handler; /* address of user-space handler */
uintptr_t l_tracehandler; /* address of user-space traced handler */
uintptr_t l_traceflag; /* address of 32-bit tracing flag */
- void (*l_sigrestorer[MAXSIG])(void); /* array of sigrestorer fns */
pid_t l_ppid; /* pid of originating parent proc */
uint64_t l_ptrace; /* process being observed with ptrace */
uint_t l_ptrace_opts; /* process's extended ptrace options */
uint_t l_ptrace_event; /* extended ptrace option trap event */
lx_elf_data_t l_elf_data; /* ELF data for linux executable */
+ int l_signal; /* signal to deliver to parent when this */
+ /* thread group dies */
} lx_proc_data_t;
#endif /* _KERNEL */
@@ -231,14 +233,22 @@ typedef struct lx_lwp_data {
void *br_set_ctidp; /* clone thread id ptr */
/*
- * The following struct is used by lx_clone()
- * to pass info into fork()
+ * The following struct is used by some system calls to pass extra
+ * flags into the kernel without impinging on the namespace for
+ * illumos.
*/
- void *br_clone_args;
+ void *br_scall_args;
+ int br_args_size; /* size in bytes of br_scall_args */
uint_t br_ptrace; /* ptrace is active for this LWP */
} lx_lwp_data_t;
+/*
+ * Upper limit on br_args_size, low because this value can persist until
+ * overridden with another value, and the size is given from userland.
+ */
+#define LX_BR_ARGS_SIZE_MAX (1024)
+
/* brand specific data */
typedef struct lx_zone_data {
char lxzd_kernel_version[LX_VERS_MAX];
@@ -250,6 +260,10 @@ typedef struct lx_zone_data {
#define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t))
#define lwptolxlwp(l) ((struct lx_lwp_data *)lwptolwpbrand(l))
#define ttolxproc(t) ((struct lx_proc_data *)(t)->t_procp->p_brand_data)
+#define ptolxproc(p) ((struct lx_proc_data *)(p)->p_brand_data)
+/* Macro for converting to system call arguments. */
+#define LX_ARGS(scall) ((struct lx_##scall##_args *)\
+ (ttolxlwp(curthread)->br_scall_args))
void lx_brand_int80_callback(void);
int64_t lx_emulate_syscall(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_clone.c b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
index 3c5c7464ab..4006e1187f 100644
--- a/usr/src/uts/common/brand/lx/syscall/lx_clone.c
+++ b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
@@ -30,6 +30,7 @@
#include <sys/brand.h>
#include <sys/lx_brand.h>
#include <sys/lx_ldt.h>
+#include <lx_signum.h>
#define LX_CSIGNAL 0x000000ff
#define LX_CLONE_VM 0x00000100
@@ -57,6 +58,7 @@ long
lx_clone(int flags, void *stkp, void *ptidp, void *ldtinfo, void *ctidp)
{
struct lx_lwp_data *lwpd = ttolxlwp(curthread);
+ struct lx_proc_data *lproc = ttolxproc(curthread);
struct ldt_info info;
struct user_desc descr;
int tls_index;
@@ -64,62 +66,65 @@ lx_clone(int flags, void *stkp, void *ptidp, void *ldtinfo, void *ctidp)
int signo;
signo = flags & LX_CSIGNAL;
- if (signo < 0 || signo > MAXSIG)
+ if (signo < 0 || signo > LX_NSIG)
return (set_errno(EINVAL));
- if (flags & LX_CLONE_SETTLS) {
- if (copyin((caddr_t)ldtinfo, &info, sizeof (info)))
- return (set_errno(EFAULT));
-
- if (LDT_INFO_EMPTY(&info))
- return (set_errno(EINVAL));
-
- entry = info.entry_number;
- if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
- return (set_errno(EINVAL));
-
- tls_index = entry - GDT_TLSMIN;
-
- /*
- * Convert the user-space structure into a real x86
- * descriptor and copy it into this LWP's TLS array. We
- * also load it into the GDT.
- */
- LDT_INFO_TO_DESC(&info, &descr);
- bcopy(&descr, &lwpd->br_tls[tls_index], sizeof (descr));
- lx_set_gdt(entry, &lwpd->br_tls[tls_index]);
+ if (!(flags & LX_CLONE_THREAD)) {
+ lproc->l_signal = signo;
} else {
- tls_index = -1;
- bzero(&descr, sizeof (descr));
- }
-
- lwpd->br_clear_ctidp =
- (flags & LX_CLONE_CHILD_CLEARTID) ? ctidp : NULL;
-
- if (signo && ! (flags & LX_CLONE_DETACH))
- lwpd->br_signal = signo;
- else
- lwpd->br_signal = 0;
-
- if (flags & LX_CLONE_THREAD)
- lwpd->br_tgid = curthread->t_procp->p_pid;
-
- if (flags & LX_CLONE_PARENT)
- lwpd->br_ppid = 0;
-
- if ((flags & LX_CLONE_CHILD_SETTID) && (ctidp != NULL) &&
- (suword32(ctidp, lwpd->br_pid) != 0)) {
- if (entry >= 0)
- lx_clear_gdt(entry);
- return (set_errno(EFAULT));
- }
- if ((flags & LX_CLONE_PARENT_SETTID) && (ptidp != NULL) &&
- (suword32(ptidp, lwpd->br_pid) != 0)) {
- if (entry >= 0)
- lx_clear_gdt(entry);
- return (set_errno(EFAULT));
+ if (flags & LX_CLONE_SETTLS) {
+ if (copyin((caddr_t)ldtinfo, &info, sizeof (info)))
+ return (set_errno(EFAULT));
+
+ if (LDT_INFO_EMPTY(&info))
+ return (set_errno(EINVAL));
+
+ entry = info.entry_number;
+ if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+ return (set_errno(EINVAL));
+
+ tls_index = entry - GDT_TLSMIN;
+
+ /*
+ * Convert the user-space structure into a real x86
+ * descriptor and copy it into this LWP's TLS array. We
+ * also load it into the GDT.
+ */
+ LDT_INFO_TO_DESC(&info, &descr);
+ bcopy(&descr, &lwpd->br_tls[tls_index], sizeof (descr));
+ lx_set_gdt(entry, &lwpd->br_tls[tls_index]);
+ } else {
+ tls_index = -1;
+ bzero(&descr, sizeof (descr));
+ }
+
+ lwpd->br_clear_ctidp =
+ (flags & LX_CLONE_CHILD_CLEARTID) ? ctidp : NULL;
+
+ if (signo && ! (flags & LX_CLONE_DETACH))
+ lwpd->br_signal = signo;
+ else
+ lwpd->br_signal = 0;
+
+ if (flags & LX_CLONE_THREAD)
+ lwpd->br_tgid = curthread->t_procp->p_pid;
+
+ if (flags & LX_CLONE_PARENT)
+ lwpd->br_ppid = 0;
+
+ if ((flags & LX_CLONE_CHILD_SETTID) && (ctidp != NULL) &&
+ (suword32(ctidp, lwpd->br_pid) != 0)) {
+ if (entry >= 0)
+ lx_clear_gdt(entry);
+ return (set_errno(EFAULT));
+ }
+ if ((flags & LX_CLONE_PARENT_SETTID) && (ptidp != NULL) &&
+ (suword32(ptidp, lwpd->br_pid) != 0)) {
+ if (entry >= 0)
+ lx_clear_gdt(entry);
+ return (set_errno(EFAULT));
+ }
}
-
return (lwpd->br_pid);
}
diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.c b/usr/src/uts/common/brand/sn1/sn1_brand.c
index ab733a07cc..65a7760f63 100644
--- a/usr/src/uts/common/brand/sn1/sn1_brand.c
+++ b/usr/src/uts/common/brand/sn1/sn1_brand.c
@@ -79,6 +79,8 @@ struct brand_ops sn1_brops = {
NULL,
NULL,
NSIG,
+ NULL,
+ NULL,
};
#ifdef sparc
@@ -119,7 +121,8 @@ struct brand sn1_brand = {
BRAND_VER_1,
"sn1",
&sn1_brops,
- &sn1_mops
+ &sn1_mops,
+ sizeof (brand_proc_data_t),
};
static struct modlbrand modlbrand = {
diff --git a/usr/src/uts/common/brand/sngl/sngl_brand.c b/usr/src/uts/common/brand/sngl/sngl_brand.c
index 23e23286ea..b0d658a1d0 100644
--- a/usr/src/uts/common/brand/sngl/sngl_brand.c
+++ b/usr/src/uts/common/brand/sngl/sngl_brand.c
@@ -81,6 +81,8 @@ struct brand_ops sngl_brops = {
NULL,
NULL,
NSIG,
+ NULL,
+ NULL,
};
#ifdef __amd64
@@ -106,7 +108,8 @@ struct brand sngl_brand = {
BRAND_VER_1,
"sngl",
&sngl_brops,
- &sngl_mops
+ &sngl_mops,
+ sizeof (brand_proc_data_t),
};
static struct modlbrand modlbrand = {
@@ -147,7 +150,8 @@ sngl_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
int res;
*rval = 0;
- res = brand_solaris_cmd(cmd, arg1, arg2, arg3, &sngl_brand, SNGL_VERSION);
+ res = brand_solaris_cmd(cmd, arg1, arg2, arg3, &sngl_brand,
+ SNGL_VERSION);
if (res >= 0)
return (res);
diff --git a/usr/src/uts/common/brand/solaris10/s10_brand.c b/usr/src/uts/common/brand/solaris10/s10_brand.c
index 2e2309a33e..23ba010af7 100644
--- a/usr/src/uts/common/brand/solaris10/s10_brand.c
+++ b/usr/src/uts/common/brand/solaris10/s10_brand.c
@@ -84,6 +84,8 @@ struct brand_ops s10_brops = {
s10_sigset_native_to_s10,
s10_sigset_s10_to_native,
S10_NSIG,
+ NULL,
+ NULL,
};
#ifdef sparc
@@ -124,7 +126,8 @@ struct brand s10_brand = {
BRAND_VER_1,
"solaris10",
&s10_brops,
- &s10_mops
+ &s10_mops,
+ sizeof (brand_proc_data_t),
};
static struct modlbrand modlbrand = {
diff --git a/usr/src/uts/common/os/brand.c b/usr/src/uts/common/os/brand.c
index 2e7c604277..688c152eca 100644
--- a/usr/src/uts/common/os/brand.c
+++ b/usr/src/uts/common/os/brand.c
@@ -54,7 +54,8 @@ brand_t native_brand = {
BRAND_VER_1,
"native",
NULL,
- &native_mach_ops
+ &native_mach_ops,
+ 0,
};
/*
diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c
index 7c5b8323e3..612e936540 100644
--- a/usr/src/uts/common/os/exit.c
+++ b/usr/src/uts/common/os/exit.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2011, 2014 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -345,6 +345,8 @@ proc_exit(int why, int what)
refstr_t *cwd;
hrtime_t hrutime, hrstime;
int evaporate;
+ brand_t *orig_brand = NULL;
+ void *brand_data = NULL;
/*
* Stop and discard the process's lwps except for the current one,
@@ -374,10 +376,16 @@ proc_exit(int why, int what)
* is always the last lwp, will also perform lwp_exit and free brand
* data
*/
+ mutex_enter(&p->p_lock);
if (PROC_IS_BRANDED(p)) {
+ orig_brand = p->p_brand;
+ if (p->p_brand_data != NULL && orig_brand->b_data_size > 0) {
+ brand_data = p->p_brand_data;
+ }
lwp_detach_brand_hdlrs(lwp);
brand_clearbrand(p, B_FALSE);
}
+ mutex_exit(&p->p_lock);
/*
* Don't let init exit unless zone_start_init() failed its exec, or
@@ -839,8 +847,60 @@ proc_exit(int why, int what)
mutex_exit(&p->p_lock);
if (!evaporate) {
- p->p_pidflag &= ~CLDPEND;
- sigcld(p, sqp);
+ /*
+ * The brand specific code only happens when the brand has a
+ * function to call in place of sigcld, the data itself still
+ * existed, and the parent of the exiting process is not the
+ * global zone init. If the parent is the global zone init,
+ * then the process was reparented, and we don't want brand
+ * code delivering possibly strange signals to init. Also, init
+ * is not branded, so any brand specific exit data will not be
+ * picked up by init anyway.
+ * It is assumed by this code that any brand where
+ * b_exit_with_sig == NULL, will free its own brand_data rather
+ * than letting this piece of code free it.
+ */
+ if (orig_brand != NULL &&
+ orig_brand->b_ops->b_exit_with_sig != NULL &&
+ brand_data != NULL && p->p_ppid != 1) {
+ /*
+ * The code for _fini that could unload the brand_t
+ * blocks until the count of zones using the module
+ * reaches zero. Zones decrement the refcount on their
+ * brands only after all user tasks in that zone have
+ * exited and been waited on. The decrement on the
+ * brand's refcount happen in zone_destroy(). That
+ * depends on zone_shutdown() having been completed.
+ * zone_shutdown() includes a call to zone_empty(),
+ * where the zone waits for itself to reach the state
+ * ZONE_IS_EMPTY. This state is only set in either
+ * zone_shutdown(), when there are no user processes as
+ * the zone enters this function, or in
+ * zone_task_rele(). zone_task_rele() is called from
+ * code triggered by waiting on processes, not by the
+ * processes exiting through proc_exit(). This means
+ * all the branded processes that could exist for a
+ * specific brand_t must exit and get reaped before the
+ * refcount on the brand_t can reach 0. _fini will
+ * never unload the corresponding brand module before
+ * proc_exit finishes execution for all processes
+ * branded with a particular brand_t, which makes the
+ * operation below safe to do. Brands that wish to use
+ * this mechanism must wait in _fini as described
+ * above.
+ */
+ orig_brand->b_ops->b_exit_with_sig(p,
+ sqp, brand_data);
+ } else {
+ p->p_pidflag &= ~CLDPEND;
+ sigcld(p, sqp);
+ }
+ if (brand_data != NULL) {
+ kmem_free(brand_data, orig_brand->b_data_size);
+ brand_data = NULL;
+ orig_brand = NULL;
+ }
+
} else {
/*
* Do what sigcld() would do if the disposition
@@ -950,7 +1010,8 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
pp = ttoproc(curthread);
/*
- * lock parent mutex so that sibling chain can be searched.
+ * Anytime you are looking for a process, you take pidlock to prevent
+ * things from changing as you look.
*/
mutex_enter(&pidlock);
@@ -981,6 +1042,11 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
continue;
if (idtype == P_PGID && id != cp->p_pgrp)
continue;
+ if (PROC_IS_BRANDED(pp)) {
+ if (BROP(pp)->b_wait_filter != NULL &&
+ BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
+ continue;
+ }
switch (cp->p_wcode) {
@@ -1031,6 +1097,11 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
continue;
if (idtype == P_PGID && id != cp->p_pgrp)
continue;
+ if (PROC_IS_BRANDED(pp)) {
+ if (BROP(pp)->b_wait_filter != NULL &&
+ BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
+ continue;
+ }
switch (cp->p_wcode) {
case CLD_TRAPPED:
diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h
index badc3faff8..dfbbd870db 100644
--- a/usr/src/uts/common/sys/brand.h
+++ b/usr/src/uts/common/sys/brand.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_BRAND_H
@@ -125,6 +126,8 @@ struct brand_ops {
void (*b_sigset_native_to_brand)(sigset_t *);
void (*b_sigset_brand_to_native)(sigset_t *);
int b_nsig;
+ void (*b_exit_with_sig)(proc_t *, sigqueue_t *, void *);
+ boolean_t (*b_wait_filter)(proc_t *, proc_t *);
};
/*
@@ -135,6 +138,7 @@ typedef struct brand {
char *b_name;
struct brand_ops *b_ops;
struct brand_mach_ops *b_machops;
+ size_t b_data_size;
} brand_t;
extern brand_t native_brand;
diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h
index f1a2fc5485..ee5892066b 100644
--- a/usr/src/uts/common/sys/proc.h
+++ b/usr/src/uts/common/sys/proc.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -346,7 +347,12 @@ typedef struct proc {
struct zone *p_zone; /* zone in which process lives */
struct vnode *p_execdir; /* directory that p_exec came from */
struct brand *p_brand; /* process's brand */
- void *p_brand_data; /* per-process brand state */
+
+ /* per-process brand state */
+ union {
+ void *__brand_data;
+ int __exit_data;
+ } __p_brand_data;
/* additional lock to protect p_sessp (but not its contents) */
kmutex_t p_splock;
@@ -361,7 +367,8 @@ typedef struct proc {
*/
struct user p_user; /* (see sys/user.h) */
} proc_t;
-
+#define p_brand_data __p_brand_data.__brand_data
+#define p_exit_data __p_brand_data.__exit_data
#define PROC_T /* headers relying on proc_t are OK */
#ifdef _KERNEL
diff --git a/usr/src/uts/i86pc/ml/offsets.in b/usr/src/uts/i86pc/ml/offsets.in
index 50a27b3d30..9922891e56 100644
--- a/usr/src/uts/i86pc/ml/offsets.in
+++ b/usr/src/uts/i86pc/ml/offsets.in
@@ -80,7 +80,7 @@ proc PROCSIZE
p_agenttp
p_zone
p_brand
- p_brand_data
+ __p_brand_data
_kthread THREAD_SIZE
t_pcb T_LABEL
diff --git a/usr/src/uts/intel/brand/common/brand_asm.h b/usr/src/uts/intel/brand/common/brand_asm.h
index 1d540db2a9..c820d8e187 100644
--- a/usr/src/uts/intel/brand/common/brand_asm.h
+++ b/usr/src/uts/intel/brand/common/brand_asm.h
@@ -161,7 +161,7 @@ extern "C" {
#define GET_P_BRAND_DATA(sp, pcnt, reg) \
GET_PROCP(sp, pcnt, reg); \
- mov P_BRAND_DATA(reg), reg /* get p_brand_data */
+ mov __P_BRAND_DATA(reg), reg /* get p_brand_data */
/*
* Each of the following macros returns to the standard syscall codepath if
diff --git a/usr/src/uts/sun4/brand/common/brand_solaris.s b/usr/src/uts/sun4/brand/common/brand_solaris.s
index 889218bc5f..9097273036 100644
--- a/usr/src/uts/sun4/brand/common/brand_solaris.s
+++ b/usr/src/uts/sun4/brand/common/brand_solaris.s
@@ -236,7 +236,7 @@ _emulation_check:
#endif /* sun4v */
ldn [%g2 + CPU_THREAD], %g3; /* get thread ptr */
ldn [%g3 + T_PROCP], %g4; /* get proc ptr */
- ldn [%g4 + P_BRAND_DATA], %g5; /* get brand data ptr */
+ ldn [%g4 + __P_BRAND_DATA], %g5; /* get brand data ptr */
ldn [%g5 + SPD_HANDLER], %g5; /* get userland brnd hdlr ptr */
brz %g5, _exit; /* has it been set? */
nop;
diff --git a/usr/src/uts/sun4/ml/offsets.in b/usr/src/uts/sun4/ml/offsets.in
index 4f6d19ba01..de214274ee 100644
--- a/usr/src/uts/sun4/ml/offsets.in
+++ b/usr/src/uts/sun4/ml/offsets.in
@@ -109,7 +109,7 @@ proc PROCSIZE
p_utraps
p_agenttp
p_brand
- p_brand_data
+ __p_brand_data
\#define P_UTRAP4 (UT_ILLTRAP_INSTRUCTION * CPTRSIZE)
\#define P_UTRAP7 (UT_FP_DISABLED * CPTRSIZE)