summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/exec
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/exec')
-rw-r--r--usr/src/uts/common/exec/aout/aout.c5
-rw-r--r--usr/src/uts/common/exec/elf/elf.c429
-rw-r--r--usr/src/uts/common/exec/intp/intp.c35
-rw-r--r--usr/src/uts/common/exec/java/java.c3
-rw-r--r--usr/src/uts/common/exec/shbin/shbin.c5
5 files changed, 403 insertions, 74 deletions
diff --git a/usr/src/uts/common/exec/aout/aout.c b/usr/src/uts/common/exec/aout/aout.c
index fc45bd9544..5dbb2ed28c 100644
--- a/usr/src/uts/common/exec/aout/aout.c
+++ b/usr/src/uts/common/exec/aout/aout.c
@@ -22,6 +22,7 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2011 Bayard G. Bell. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -54,7 +55,7 @@
static int aoutexec(vnode_t *vp, execa_t *uap, uarg_t *args,
intpdata_t *idatap, int level, long *execsz, int setid,
- caddr_t exec_file, cred_t *cred, int brand_action);
+ caddr_t exec_file, cred_t *cred, int *brand_action);
static int get_aout_head(struct vnode **vpp, struct exdata *edp, long *execsz,
int *isdyn);
static int aoutcore(vnode_t *vp, proc_t *pp, cred_t *credp,
@@ -130,7 +131,7 @@ _info(struct modinfo *modinfop)
static int
aoutexec(vnode_t *vp, struct execa *uap, struct uarg *args,
struct intpdata *idatap, int level, long *execsz, int setid,
- caddr_t exec_file, cred_t *cred, int brand_action)
+ caddr_t exec_file, cred_t *cred, int *brand_action)
{
auxv32_t auxflags_auxv32;
int error;
diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c
index dc04b292b0..d74737dead 100644
--- a/usr/src/uts/common/exec/elf/elf.c
+++ b/usr/src/uts/common/exec/elf/elf.c
@@ -26,7 +26,7 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
#include <sys/types.h>
@@ -66,6 +66,11 @@
#include <sys/sdt.h>
#include <sys/siginfo.h>
+#if defined(__x86) && !defined(__xpv)
+#include <sys/comm_page.h>
+#endif /* defined(__x86) && !defined(__xpv) */
+
+
extern int at_flags;
#define ORIGIN_STR "ORIGIN"
@@ -163,12 +168,16 @@ dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
}
/*
- * Map in the executable pointed to by vp. Returns 0 on success.
+ * Map in the executable pointed to by vp. Returns 0 on success. Note that
+ * this function currently has the maximum number of arguments allowed by
+ * modstubs on x86 (MAXNARG)! Do _not_ add to this function signature without
+ * adding to MAXNARG. (Better yet, do not add to this monster of a function
+ * signature!)
*/
int
mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
- intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
- caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap)
+ intptr_t *voffset, caddr_t exec_file, char **interpp, caddr_t *bssbase,
+ caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap, uintptr_t *minaddrp)
{
size_t len;
struct vattr vat;
@@ -180,6 +189,7 @@ mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
Phdr *junk = NULL;
Phdr *dynphdr = NULL;
Phdr *dtrphdr = NULL;
+ char *interp = NULL;
uintptr_t lddata;
long execsz;
intptr_t minaddr;
@@ -187,6 +197,9 @@ mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
if (lddatap != NULL)
*lddatap = NULL;
+ if (minaddrp != NULL)
+ *minaddrp = NULL;
+
if (error = execpermissions(vp, &vat, args)) {
uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
return (error);
@@ -212,25 +225,89 @@ mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
&junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr,
len, &execsz, brksize)) {
uprintf("%s: Cannot map %s\n", exec_file, args->pathname);
+ if (uphdr != NULL && uphdr->p_flags == 0)
+ kmem_free(uphdr, sizeof (Phdr));
kmem_free(phdrbase, phdrsize);
return (error);
}
+ if (minaddrp != NULL)
+ *minaddrp = minaddr;
+
/*
- * Inform our caller if the executable needs an interpreter.
+ * If the executable requires an interpreter, determine its name.
*/
- *interp = (dynphdr == NULL) ? 0 : 1;
+ if (dynphdr != NULL) {
+ ssize_t resid;
+
+ if (dynphdr->p_filesz > MAXPATHLEN || dynphdr->p_filesz == 0) {
+ uprintf("%s: Invalid interpreter\n", exec_file);
+ kmem_free(phdrbase, phdrsize);
+ return (ENOEXEC);
+ }
+
+ interp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ if ((error = vn_rdwr(UIO_READ, vp, interp, dynphdr->p_filesz,
+ (offset_t)dynphdr->p_offset, UIO_SYSSPACE, 0,
+ (rlim64_t)0, CRED(), &resid)) != 0 || resid != 0 ||
+ interp[dynphdr->p_filesz - 1] != '\0') {
+ uprintf("%s: Cannot obtain interpreter pathname\n",
+ exec_file);
+ kmem_free(interp, MAXPATHLEN);
+ kmem_free(phdrbase, phdrsize);
+ return (error != 0 ? error : ENOEXEC);
+ }
+ }
/*
* If this is a statically linked executable, voffset should indicate
* the address of the executable itself (it normally holds the address
* of the interpreter).
*/
- if (ehdr->e_type == ET_EXEC && *interp == 0)
+ if (ehdr->e_type == ET_EXEC && interp == NULL)
*voffset = minaddr;
+ /*
+ * If the caller has asked for the interpreter name, return it (it's
+ * up to the caller to free it); if the caller hasn't asked for it,
+ * free it ourselves.
+ */
+ if (interpp != NULL) {
+ *interpp = interp;
+ } else if (interp != NULL) {
+ kmem_free(interp, MAXPATHLEN);
+ }
+
if (uphdr != NULL) {
*uphdr_vaddr = uphdr->p_vaddr;
+
+ if (uphdr->p_flags == 0)
+ kmem_free(uphdr, sizeof (Phdr));
+ } else if (ehdr->e_type == ET_DYN) {
+ /*
+ * If we don't have a uphdr, we'll apply the logic found
+ * in mapelfexec() and use the p_vaddr of the first PT_LOAD
+ * section as the base address of the object.
+ */
+ Phdr *phdr = (Phdr *)phdrbase;
+ int i, hsize = ehdr->e_phentsize;
+
+ for (i = nphdrs; i > 0; i--) {
+ if (phdr->p_type == PT_LOAD) {
+ *uphdr_vaddr = (uintptr_t)phdr->p_vaddr +
+ ehdr->e_phoff;
+ break;
+ }
+
+ phdr = (Phdr *)((caddr_t)phdr + hsize);
+ }
+
+ /*
+ * If we don't have a PT_LOAD segment, we should have returned
+ * ENOEXEC when elfsize() returned 0, above.
+ */
+ VERIFY(i > 0);
} else {
*uphdr_vaddr = (Addr)-1;
}
@@ -243,13 +320,13 @@ mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
int
elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
- int brand_action)
+ int *brand_action)
{
caddr_t phdrbase = NULL;
caddr_t bssbase = 0;
caddr_t brkbase = 0;
size_t brksize = 0;
- ssize_t dlnsize;
+ ssize_t dlnsize, nsize = 0;
aux_entry_t *aux;
int error;
ssize_t resid;
@@ -273,6 +350,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
int hasauxv = 0;
int hasdy = 0;
int branded = 0;
+ int dynuphdr = 0;
struct proc *p = ttoproc(curthread);
struct user *up = PTOU(p);
@@ -327,7 +405,9 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
*execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1);
} else {
args->to_model = DATAMODEL_LP64;
- args->stk_prot &= ~PROT_EXEC;
+ if (!args->stk_prot_override) {
+ args->stk_prot &= ~PROT_EXEC;
+ }
#if defined(__i386) || defined(__amd64)
args->dat_prot &= ~PROT_EXEC;
#endif
@@ -339,11 +419,25 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
#endif /* _LP64 */
/*
- * We delay invoking the brand callback until we've figured out
- * what kind of elf binary we're trying to run, 32-bit or 64-bit.
- * We do this because now the brand library can just check
- * args->to_model to see if the target is 32-bit or 64-bit without
- * having do duplicate all the code above.
+ * We delay invoking the brand callback until we've figured out what
+ * kind of elf binary we're trying to run, 32-bit or 64-bit. We do this
+ * because now the brand library can just check args->to_model to see if
+ * the target is 32-bit or 64-bit without having do duplicate all the
+ * code above.
+ *
+ * We also give the brand a chance to indicate that based on the ELF
+ * OSABI of the target binary it should become unbranded and optionally
+ * indicate that it should be treated as existing in a specific prefix.
+ *
+ * Note that if a brand opts to go down this route it does not actually
+ * end up being debranded. In other words, future programs that exec
+ * will still be considered for branding unless this escape hatch is
+ * used. Consider the case of lx brand for example. If a user runs
+ * /native/usr/sbin/dtrace -c /bin/ls, the isaexec and normal executable
+ * of DTrace that's in /native will take this escape hatch and be run
+ * and interpreted using the normal system call table; however, the
+ * execution of a non-illumos binary in the form of /bin/ls will still
+ * be branded and be subject to all of the normal actions of the brand.
*
* The level checks associated with brand handling below are used to
* prevent a loop since the brand elfexec function typically comes back
@@ -351,8 +445,20 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
* handling in the #! interpreter code will increment the level before
* calling gexec to run the final elfexec interpreter.
*/
+ if ((level <= INTP_MAXDEPTH) && (*brand_action != EBA_NATIVE) &&
+ (PROC_IS_BRANDED(p)) && (BROP(p)->b_native_exec != NULL)) {
+ if (BROP(p)->b_native_exec(ehdrp->e_ident[EI_OSABI],
+ &args->brand_nroot) == B_TRUE) {
+ ASSERT(ehdrp->e_ident[EI_OSABI]);
+ *brand_action = EBA_NATIVE;
+ /* Add one for the trailing '/' in the path */
+ if (args->brand_nroot != NULL)
+ nsize = strlen(args->brand_nroot) + 1;
+ }
+ }
+
if ((level <= INTP_MAXDEPTH) &&
- (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
+ (*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
error = BROP(p)->b_elfexec(vp, uap, args,
idatap, level + 1, execsz, setid, exec_file, cred,
brand_action);
@@ -423,14 +529,15 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
* AT_BASE
* AT_FLAGS
* AT_PAGESZ
+ * AT_RANDOM (added in stk_copyout)
* AT_SUN_AUXFLAGS
* AT_SUN_HWCAP
* AT_SUN_HWCAP2
- * AT_SUN_PLATFORM (added in stk_copyout)
- * AT_SUN_EXECNAME (added in stk_copyout)
+ * AT_SUN_PLATFORM (added in stk_copyout)
+ * AT_SUN_EXECNAME (added in stk_copyout)
* AT_NULL
*
- * total == 9
+ * total == 10
*/
if (hasdy && hasu) {
/*
@@ -445,7 +552,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
*
* total = 5
*/
- args->auxsize = (9 + 5) * sizeof (aux_entry_t);
+ args->auxsize = (10 + 5) * sizeof (aux_entry_t);
} else if (hasdy) {
/*
* Has PT_INTERP but no PT_PHDR
@@ -455,9 +562,9 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
*
* total = 2
*/
- args->auxsize = (9 + 2) * sizeof (aux_entry_t);
+ args->auxsize = (10 + 2) * sizeof (aux_entry_t);
} else {
- args->auxsize = 9 * sizeof (aux_entry_t);
+ args->auxsize = 10 * sizeof (aux_entry_t);
}
} else {
args->auxsize = 0;
@@ -470,13 +577,41 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
if (args->emulator != NULL)
args->auxsize += sizeof (aux_entry_t);
- if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
+ /*
+ * If this is a native binary that's been given a modified interpreter
+ * root, inform it that the native system exists at that root.
+ */
+ if (args->brand_nroot != NULL) {
+ args->auxsize += sizeof (aux_entry_t);
+ }
+
+
+ /*
+ * On supported kernels (64-bit, non-xpv) make room in the auxv for the
+ * AT_SUN_COMMPAGE entry.
+ */
+#if defined(__amd64) && !defined(__xpv)
+ args->auxsize += sizeof (aux_entry_t);
+#endif /* defined(__amd64) && !defined(__xpv) */
+
+ /*
+ * If we have user credentials, we'll supply the following entries:
+ * AT_SUN_UID
+ * AT_SUN_RUID
+ * AT_SUN_GID
+ * AT_SUN_RGID
+ */
+ if (cred != NULL) {
+ args->auxsize += 4 * sizeof (aux_entry_t);
+ }
+
+ if ((*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
branded = 1;
/*
- * We will be adding 4 entries to the aux vectors. One for
- * the the brandname and 3 for the brand specific aux vectors.
+ * We will be adding 5 entries to the aux vectors. One for
+ * the the brandname and 4 for the brand specific aux vectors.
*/
- args->auxsize += 4 * sizeof (aux_entry_t);
+ args->auxsize += 5 * sizeof (aux_entry_t);
}
/* Hardware/Software capabilities */
@@ -507,7 +642,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
aux = bigwad->elfargs;
/*
* Move args to the user's stack.
- * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
+ * This can fill in the AT_SUN_PLATFORM, AT_SUN_EXECNAME and AT_RANDOM
+ * aux entries.
*/
if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
if (error == -1) {
@@ -534,6 +670,14 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
len, execsz, &brksize)) != 0)
goto bad;
+ if (uphdr != NULL) {
+ /*
+ * Our uphdr has been dynamically allocated if (and only if)
+ * its program header flags are clear.
+ */
+ dynuphdr = (uphdr->p_flags == 0);
+ }
+
if (uphdr != NULL && dyphdr == NULL)
goto bad;
@@ -548,17 +692,22 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
char *p;
struct vnode *nvp;
- dlnsize = dyphdr->p_filesz;
+ dlnsize = dyphdr->p_filesz + nsize;
if (dlnsize > MAXPATHLEN || dlnsize <= 0)
goto bad;
+ if (nsize != 0) {
+ bcopy(args->brand_nroot, dlnp, nsize - 1);
+ dlnp[nsize - 1] = '/';
+ }
+
/*
* Read in "interpreter" pathname.
*/
- if ((error = vn_rdwr(UIO_READ, vp, dlnp, dyphdr->p_filesz,
- (offset_t)dyphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
- CRED(), &resid)) != 0) {
+ if ((error = vn_rdwr(UIO_READ, vp, dlnp + nsize,
+ dyphdr->p_filesz, (offset_t)dyphdr->p_offset, UIO_SYSSPACE,
+ 0, (rlim64_t)0, CRED(), &resid)) != 0) {
uprintf("%s: Cannot obtain interpreter pathname\n",
exec_file);
goto bad;
@@ -703,9 +852,10 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
dtrphdr = NULL;
- error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, &junk, &junk,
+ error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, NULL, &junk,
&junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len,
execsz, NULL);
+
if (error || junk != NULL) {
VN_RELE(nvp);
uprintf("%s: Cannot map %s\n", exec_file, dlnp);
@@ -732,9 +882,10 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
if (hasauxv) {
int auxf = AF_SUN_HWCAPVERIFY;
+
/*
- * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via
- * exec_args()
+ * Note: AT_SUN_PLATFORM, AT_SUN_EXECNAME and AT_RANDOM were
+ * filled in via exec_args()
*/
ADDAUX(aux, AT_BASE, voffset)
ADDAUX(aux, AT_FLAGS, at_flags)
@@ -762,7 +913,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
* malicious user within the zone from crafting a wrapper to
* run native suid commands with unsecure libraries interposed.
*/
- if ((brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
+ if ((*brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
(setid &= ~EXECSETID_SETID) != 0))
auxf &= ~AF_SUN_SETUGID;
@@ -775,6 +926,18 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
((char *)&aux->a_type -
(char *)bigwad->elfargs));
ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
+
+ /*
+ * Record information about the real and effective user and
+ * group IDs.
+ */
+ if (cred != NULL) {
+ ADDAUX(aux, AT_SUN_UID, crgetuid(cred));
+ ADDAUX(aux, AT_SUN_RUID, crgetruid(cred));
+ ADDAUX(aux, AT_SUN_GID, crgetgid(cred));
+ ADDAUX(aux, AT_SUN_RGID, crgetrgid(cred));
+ }
+
/*
* Hardware capability flag word (performance hints)
* Used for choosing faster library routines.
@@ -804,8 +967,19 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
ADDAUX(aux, AT_SUN_BRAND_AUX1, 0)
ADDAUX(aux, AT_SUN_BRAND_AUX2, 0)
ADDAUX(aux, AT_SUN_BRAND_AUX3, 0)
+ ADDAUX(aux, AT_SUN_BRAND_AUX4, 0)
}
+ /*
+ * Add the comm page auxv entry, mapping it in if needed.
+ */
+#if defined(__amd64) && !defined(__xpv)
+ if (args->commpage != NULL ||
+ (args->commpage = (uintptr_t)comm_page_mapin()) != NULL) {
+ ADDAUX(aux, AT_SUN_COMMPAGE, args->commpage)
+ }
+#endif /* defined(__amd64) && !defined(__xpv) */
+
ADDAUX(aux, AT_NULL, 0)
postfixsize = (char *)aux - (char *)bigwad->elfargs;
@@ -845,6 +1019,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
}
bzero(up->u_auxv, sizeof (up->u_auxv));
+ up->u_commpagep = args->commpage;
if (postfixsize) {
int num_auxv;
@@ -911,6 +1086,8 @@ bad:
if (error == 0)
error = ENOEXEC;
out:
+ if (dynuphdr)
+ kmem_free(uphdr, sizeof (Phdr));
if (phdrbase != NULL)
kmem_free(phdrbase, phdrsize);
if (cap != NULL)
@@ -1177,6 +1354,29 @@ getelfshdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr,
return (0);
}
+
+#ifdef _ELF32_COMPAT
+int
+elf32readhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs,
+ caddr_t *phbasep, ssize_t *phsizep)
+#else
+int
+elfreadhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs,
+ caddr_t *phbasep, ssize_t *phsizep)
+#endif
+{
+ int error, nshdrs, shstrndx;
+
+ if ((error = getelfhead(vp, credp, ehdrp, &nshdrs, &shstrndx,
+ nphdrs)) != 0 ||
+ (error = getelfphdr(vp, credp, ehdrp, *nphdrs, phbasep,
+ phsizep)) != 0) {
+ return (error);
+ }
+ return (0);
+}
+
+
static int
mapelfexec(
vnode_t *vp,
@@ -1197,7 +1397,7 @@ mapelfexec(
size_t *brksize)
{
Phdr *phdr;
- int i, prot, error;
+ int i, prot, error, lastprot = 0;
caddr_t addr = NULL;
size_t zfodsz;
int ptload = 0;
@@ -1205,43 +1405,78 @@ mapelfexec(
off_t offset;
int hsize = ehdr->e_phentsize;
caddr_t mintmp = (caddr_t)-1;
+ uintptr_t lastaddr = NULL;
extern int use_brk_lpg;
if (ehdr->e_type == ET_DYN) {
- /*
- * Obtain the virtual address of a hole in the
- * address space to map the "interpreter".
- */
- map_addr(&addr, len, (offset_t)0, 1, 0);
- if (addr == NULL)
- return (ENOMEM);
- *voffset = (intptr_t)addr;
+ caddr_t vaddr;
/*
- * Calculate the minimum vaddr so it can be subtracted out.
- * According to the ELF specification, since PT_LOAD sections
- * must be sorted by increasing p_vaddr values, this is
- * guaranteed to be the first PT_LOAD section.
+ * Despite the fact that mmapobj(2) refuses to load them, we
+ * need to support executing ET_DYN objects that have a
+ * non-NULL p_vaddr. When found in the wild, these objects
+ * are likely to be due to an old (and largely obviated) Linux
+ * facility, prelink(8), that rewrites shared objects to
+ * prefer specific (disjoint) virtual address ranges. (Yes,
+ * this is putatively for performance -- and yes, it has
+ * limited applicability, many edge conditions and grisly
+ * failure modes; even for Linux, it's insane.) As ELF
+ * mandates that the PT_LOAD segments be in p_vaddr order, we
+ * find the lowest p_vaddr by finding the first PT_LOAD
+ * segment.
*/
phdr = (Phdr *)phdrbase;
for (i = nphdrs; i > 0; i--) {
if (phdr->p_type == PT_LOAD) {
- *voffset -= (uintptr_t)phdr->p_vaddr;
+ addr = (caddr_t)(uintptr_t)phdr->p_vaddr;
break;
}
phdr = (Phdr *)((caddr_t)phdr + hsize);
}
+ /*
+ * We have a non-zero p_vaddr in the first PT_LOAD segment --
+ * presumably because we're directly executing a prelink(8)'d
+ * ld-linux.so. While we could correctly execute such an
+ * object without locating it at its desired p_vaddr (it is,
+ * after all, still relocatable), our inner antiquarian
+ * derives a perverse pleasure in accommodating the steampunk
+ * prelink(8) contraption -- goggles on!
+ */
+ if ((vaddr = addr) != NULL) {
+ if (as_gap(curproc->p_as, len,
+ &addr, &len, AH_LO, NULL) == -1 || addr != vaddr) {
+ addr = NULL;
+ }
+ }
+
+ if (addr == NULL) {
+ /*
+ * We either have a NULL p_vaddr (the common case, by
+ * many orders of magnitude) or we have a non-NULL
+ * p_vaddr and we were unable to obtain the specified
+ * VA range (presumably because it's an illegal
+ * address). Either way, obtain an address in which
+ * to map the interpreter.
+ */
+ map_addr(&addr, len, (offset_t)0, 1, 0);
+ if (addr == NULL)
+ return (ENOMEM);
+ }
+
+ /*
+ * Our voffset is the difference between where we landed and
+ * where we wanted to be.
+ */
+ *voffset = (uintptr_t)addr - (uintptr_t)vaddr;
} else {
*voffset = 0;
}
+
phdr = (Phdr *)phdrbase;
for (i = nphdrs; i > 0; i--) {
switch (phdr->p_type) {
case PT_LOAD:
- if ((*dyphdr != NULL) && (*uphdr == NULL))
- return (0);
-
ptload = 1;
prot = PROT_USER;
if (phdr->p_flags & PF_R)
@@ -1253,6 +1488,34 @@ mapelfexec(
addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
+ if ((*dyphdr != NULL) && uphdr != NULL &&
+ (*uphdr == NULL)) {
+ /*
+ * The PT_PHDR program header is, strictly
+ * speaking, optional. If we find that this
+ * is missing, we will determine the location
+ * of the program headers based on the address
+ * of the lowest PT_LOAD segment (namely, this
+ * one): we subtract the p_offset to get to
+ * the ELF header and then add back the program
+ * header offset to get to the program headers.
+ * We then cons up a Phdr that corresponds to
+ * the (missing) PT_PHDR, setting the flags
+ * to 0 to denote that this is artificial and
+ * should (must) be freed by the caller.
+ */
+ Phdr *cons;
+
+ cons = kmem_zalloc(sizeof (Phdr), KM_SLEEP);
+
+ cons->p_flags = 0;
+ cons->p_type = PT_PHDR;
+ cons->p_vaddr = ((uintptr_t)addr -
+ phdr->p_offset) + ehdr->e_phoff;
+
+ *uphdr = cons;
+ }
+
/*
* Keep track of the segment with the lowest starting
* address.
@@ -1260,6 +1523,41 @@ mapelfexec(
if (addr < mintmp)
mintmp = addr;
+ /*
+ * Segments need not correspond to page boundaries:
+ * they are permitted to share a page. If two PT_LOAD
+ * segments share the same page, and the permissions
+ * of the segments differ, the behavior is historically
+ * that the permissions of the latter segment are used
+ * for the page that the two segments share. This is
+ * also historically a non-issue: binaries generated
+ * by most anything will make sure that two PT_LOAD
+ * segments with differing permissions don't actually
+ * share any pages. However, there exist some crazy
+ * things out there (including at least an obscure
+ * Portuguese teaching language called G-Portugol) that
+ * actually do the wrong thing and expect it to work:
+ * they have a segment with execute permission share
+ * a page with a subsequent segment that does not
+ * have execute permissions and expect the resulting
+ * shared page to in fact be executable. To accommodate
+ * such broken link editors, we take advantage of a
+ * latitude explicitly granted to the loader: it is
+ * permitted to make _any_ PT_LOAD segment executable
+ * (provided that it is readable or writable). If we
+ * see that we're sharing a page and that the previous
+ * page was executable, we will add execute permissions
+ * to our segment.
+ */
+ if (btop(lastaddr) == btop((uintptr_t)addr) &&
+ (phdr->p_flags & (PF_R | PF_W)) &&
+ (lastprot & PROT_EXEC)) {
+ prot |= PROT_EXEC;
+ }
+
+ lastaddr = (uintptr_t)addr + phdr->p_filesz;
+ lastprot = prot;
+
zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
offset = phdr->p_offset;
@@ -1324,8 +1622,22 @@ mapelfexec(
break;
case PT_INTERP:
- if (ptload)
- goto bad;
+ /*
+ * The ELF specification is unequivocal about the
+ * PT_INTERP program header with respect to any PT_LOAD
+ * program header: "If it is present, it must precede
+ * any loadable segment entry." Linux, however, makes
+ * no attempt to enforce this -- which has allowed some
+ * binary editing tools to get away with generating
+ * invalid ELF binaries in the respect that PT_INTERP
+ * occurs after the first PT_LOAD program header. This
+ * is unfortunate (and of course, disappointing) but
+ * it's no worse than that: there is no reason that we
+ * can't process the PT_INTERP entry (if present) after
+ * one or more PT_LOAD entries. We therefore
+ * deliberately do not check ptload here and always
+ * store dyphdr to be the PT_INTERP program header.
+ */
*dyphdr = phdr;
break;
@@ -1334,9 +1646,12 @@ mapelfexec(
break;
case PT_PHDR:
- if (ptload)
+ if (ptload || phdr->p_flags == 0)
goto bad;
- *uphdr = phdr;
+
+ if (uphdr != NULL)
+ *uphdr = phdr;
+
break;
case PT_NULL:
@@ -2185,7 +2500,7 @@ static struct modlexec modlexec = {
extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
intpdata_t *idatap, int level, long *execsz,
int setid, caddr_t exec_file, cred_t *cred,
- int brand_action);
+ int *brand_action);
extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
rlim64_t rlimit, int sig, core_content_t content);
diff --git a/usr/src/uts/common/exec/intp/intp.c b/usr/src/uts/common/exec/intp/intp.c
index 269ba86b1b..512cab2b66 100644
--- a/usr/src/uts/common/exec/intp/intp.c
+++ b/usr/src/uts/common/exec/intp/intp.c
@@ -22,6 +22,7 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright 2012 Milan Jurik. All rights reserved.
+ * Copyright 2016, Joyent, Inc.
*/
/* Copyright (c) 1988 AT&T */
@@ -47,6 +48,7 @@
#include <sys/kmem.h>
#include <sys/note.h>
#include <sys/sdt.h>
+#include <sys/brand.h>
/*
* This is the loadable module wrapper.
@@ -54,7 +56,7 @@
#include <sys/modctl.h>
extern int intpexec(struct vnode *, struct execa *, struct uarg *,
- struct intpdata *, int, long *, int, caddr_t, struct cred *, int);
+ struct intpdata *, int, long *, int, caddr_t, struct cred *, int *);
static struct execsw esw = {
intpmagicstr,
@@ -126,13 +128,20 @@ getintphead(struct vnode *vp, struct intpdata *idatap)
*cp = '\0';
/*
- * Locate the beginning and end of the interpreter name.
- * In addition to the name, one additional argument may
- * optionally be included here, to be prepended to the
- * arguments provided on the command line. Thus, for
- * example, you can say
+ * Locate the beginning and end of the interpreter name. Historically,
+ * for illumos and its predecessors, in addition to the name, one
+ * additional argument may optionally be included here, to be prepended
+ * to the arguments provided on the command line. Thus, for example,
+ * you can say
*
* #! /usr/bin/awk -f
+ *
+ * However, handling of interpreter arguments varies across operating
+ * systems and other systems allow more than one argument. In
+ * particular, Linux allows more than one and delivers all arguments
+ * as a single string (argv[1] is "-arg1 -arg2 ..."). We support this
+ * style of argument handling as a brand-specific option (setting
+ * b_intp_parse_arg to B_FALSE).
*/
for (cp = &linep[2]; *cp == ' '; cp++)
;
@@ -151,9 +160,12 @@ getintphead(struct vnode *vp, struct intpdata *idatap)
idatap->intp_arg[0] = NULL;
else {
idatap->intp_arg[0] = cp;
- while (*cp && *cp != ' ')
- cp++;
- *cp = '\0';
+ if (!PROC_IS_BRANDED(curproc) ||
+ BROP(curproc)->b_intp_parse_arg) {
+ while (*cp && *cp != ' ')
+ cp++;
+ *cp = '\0';
+ }
}
}
return (0);
@@ -188,9 +200,8 @@ intpexec(
int setid,
caddr_t exec_file,
struct cred *cred,
- int brand_action)
+ int *brand_action)
{
- _NOTE(ARGUNUSED(brand_action))
vnode_t *nvp;
int error = 0;
struct intpdata idata;
@@ -281,7 +292,7 @@ intpexec(
}
error = gexec(&nvp, uap, args, &idata, ++level, execsz, exec_file, cred,
- EBA_NONE);
+ brand_action);
if (!error) {
/*
diff --git a/usr/src/uts/common/exec/java/java.c b/usr/src/uts/common/exec/java/java.c
index fdc327dcbb..5170fda5cb 100644
--- a/usr/src/uts/common/exec/java/java.c
+++ b/usr/src/uts/common/exec/java/java.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
/*
@@ -85,7 +86,7 @@ char *jexec_arg = "-jar";
static int
javaexec(vnode_t *vp, struct execa *uap, struct uarg *args,
struct intpdata *idatap, int level, long *execsz, int setid,
- caddr_t execfile, cred_t *cred, int brand_action)
+ caddr_t execfile, cred_t *cred, int *brand_action)
{
struct intpdata idata;
int error;
diff --git a/usr/src/uts/common/exec/shbin/shbin.c b/usr/src/uts/common/exec/shbin/shbin.c
index ee5060a07e..016d87b9ef 100644
--- a/usr/src/uts/common/exec/shbin/shbin.c
+++ b/usr/src/uts/common/exec/shbin/shbin.c
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -58,7 +59,7 @@ shbinexec(
int setid,
caddr_t exec_file,
struct cred *cred,
- int brand_action);
+ int *brand_action);
#define SHBIN_CNTL(x) ((x)&037)
#define SHBINMAGIC_LEN 4
@@ -162,7 +163,7 @@ shbinexec(
int setid,
caddr_t exec_file,
struct cred *cred,
- int brand_action)
+ int *brand_action)
{
_NOTE(ARGUNUSED(brand_action))
vnode_t *nvp;