summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/exec/elf
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/exec/elf')
-rw-r--r--usr/src/uts/common/exec/elf/elf.c473
-rw-r--r--usr/src/uts/common/exec/elf/elf_notes.c2
2 files changed, 355 insertions, 120 deletions
diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c
index 73d302aaa5..53bbd078ba 100644
--- a/usr/src/uts/common/exec/elf/elf.c
+++ b/usr/src/uts/common/exec/elf/elf.c
@@ -26,7 +26,7 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
- * Copyright 2019, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
* Copyright 2022 Oxide Computer Company
*/
@@ -94,7 +94,6 @@ static int mapelfexec(vnode_t *, Ehdr *, uint_t, caddr_t, Phdr **, Phdr **,
Phdr **, Phdr **, Phdr *, caddr_t *, caddr_t *, intptr_t *, uintptr_t *,
size_t, size_t *, size_t *);
-
#ifdef _ELF32_COMPAT
/* Link against the non-compat instances when compiling the 32-bit version. */
extern size_t elf_datasz_max;
@@ -181,12 +180,16 @@ elf_ctx_resize_scratch(elf_core_ctx_t *ctx, size_t sz)
#endif /* _ELF32_COMPAT */
/*
- * Map in the executable pointed to by vp. Returns 0 on success.
+ * Map in the executable pointed to by vp. Returns 0 on success. Note that
+ * this function currently has the maximum number of arguments allowed by
+ * modstubs on x86 (MAXNARG)! Do _not_ add to this function signature without
+ * adding to MAXNARG. (Better yet, do not add to this monster of a function
+ * signature!)
*/
int
mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
- intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
- caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap)
+ intptr_t *voffset, caddr_t exec_file, char **interpp, caddr_t *bssbase,
+ caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap, uintptr_t *minaddrp)
{
size_t len, phdrsize;
struct vattr vat;
@@ -197,12 +200,16 @@ mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
Phdr *junk = NULL;
Phdr *dynphdr = NULL;
Phdr *dtrphdr = NULL;
+ char *interp = NULL;
uintptr_t lddata, minaddr;
size_t execsz;
if (lddatap != NULL)
*lddatap = 0;
+ if (minaddrp != NULL)
+ *minaddrp = (uintptr_t)NULL;
+
if (error = execpermissions(vp, &vat, args)) {
uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
return (error);
@@ -234,24 +241,85 @@ mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
return (error);
}
+ if (minaddrp != NULL)
+ *minaddrp = minaddr;
+
/*
- * Inform our caller if the executable needs an interpreter.
+ * If the executable requires an interpreter, determine its name.
*/
- *interp = (dynphdr == NULL) ? 0 : 1;
+ if (dynphdr != NULL) {
+ ssize_t resid;
+
+ if (dynphdr->p_filesz > MAXPATHLEN || dynphdr->p_filesz == 0) {
+ uprintf("%s: Invalid interpreter\n", exec_file);
+ kmem_free(phdrbase, phdrsize);
+ return (ENOEXEC);
+ }
+
+ interp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ if ((error = vn_rdwr(UIO_READ, vp, interp,
+ (ssize_t)dynphdr->p_filesz,
+ (offset_t)dynphdr->p_offset, UIO_SYSSPACE, 0,
+ (rlim64_t)0, CRED(), &resid)) != 0 || resid != 0 ||
+ interp[dynphdr->p_filesz - 1] != '\0') {
+ uprintf("%s: Cannot obtain interpreter pathname\n",
+ exec_file);
+ kmem_free(interp, MAXPATHLEN);
+ kmem_free(phdrbase, phdrsize);
+ return (error != 0 ? error : ENOEXEC);
+ }
+ }
/*
* If this is a statically linked executable, voffset should indicate
* the address of the executable itself (it normally holds the address
* of the interpreter).
*/
- if (ehdr->e_type == ET_EXEC && *interp == 0)
+ if (ehdr->e_type == ET_EXEC && interp == NULL)
*voffset = minaddr;
+ /*
+ * If the caller has asked for the interpreter name, return it (it's
+ * up to the caller to free it); if the caller hasn't asked for it,
+ * free it ourselves.
+ */
+ if (interpp != NULL) {
+ *interpp = interp;
+ } else if (interp != NULL) {
+ kmem_free(interp, MAXPATHLEN);
+ }
+
if (uphdr != NULL) {
*uphdr_vaddr = uphdr->p_vaddr;
if (uphdr->p_flags == 0)
kmem_free(uphdr, sizeof (Phdr));
+ } else if (ehdr->e_type == ET_DYN) {
+ /*
+ * If we don't have a uphdr, we'll apply the logic found
+ * in mapelfexec() and use the p_vaddr of the first PT_LOAD
+ * section as the base address of the object.
+ */
+ const Phdr *phdr = (Phdr *)phdrbase;
+ const uint_t hsize = ehdr->e_phentsize;
+ uint_t i;
+
+ for (i = nphdrs; i > 0; i--) {
+ if (phdr->p_type == PT_LOAD) {
+ *uphdr_vaddr = (uintptr_t)phdr->p_vaddr +
+ ehdr->e_phoff;
+ break;
+ }
+
+ phdr = (Phdr *)((caddr_t)phdr + hsize);
+ }
+
+ /*
+ * If we don't have a PT_LOAD segment, we should have returned
+ * ENOEXEC when elfsize() returned 0, above.
+ */
+ VERIFY(i > 0);
} else {
*uphdr_vaddr = (Addr)-1;
}
@@ -263,13 +331,13 @@ mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
int
elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
int level, size_t *execsz, int setid, caddr_t exec_file, cred_t *cred,
- int brand_action)
+ int *brand_action)
{
caddr_t phdrbase = NULL;
caddr_t bssbase = 0;
caddr_t brkbase = 0;
size_t brksize = 0;
- size_t dlnsize;
+ size_t dlnsize, nsize = 0;
aux_entry_t *aux;
int error;
ssize_t resid;
@@ -349,7 +417,9 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
*execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1);
} else {
args->to_model = DATAMODEL_LP64;
- args->stk_prot &= ~PROT_EXEC;
+ if (!args->stk_prot_override) {
+ args->stk_prot &= ~PROT_EXEC;
+ }
#if defined(__x86)
args->dat_prot &= ~PROT_EXEC;
#endif
@@ -361,11 +431,25 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
#endif /* _LP64 */
/*
- * We delay invoking the brand callback until we've figured out
- * what kind of elf binary we're trying to run, 32-bit or 64-bit.
- * We do this because now the brand library can just check
- * args->to_model to see if the target is 32-bit or 64-bit without
- * having do duplicate all the code above.
+ * We delay invoking the brand callback until we've figured out what
+ * kind of elf binary we're trying to run, 32-bit or 64-bit. We do this
+ * because now the brand library can just check args->to_model to see if
+ * the target is 32-bit or 64-bit without having do duplicate all the
+ * code above.
+ *
+ * We also give the brand a chance to indicate that based on the ELF
+ * OSABI of the target binary it should become unbranded and optionally
+ * indicate that it should be treated as existing in a specific prefix.
+ *
+ * Note that if a brand opts to go down this route it does not actually
+ * end up being debranded. In other words, future programs that exec
+ * will still be considered for branding unless this escape hatch is
+ * used. Consider the case of lx brand for example. If a user runs
+ * /native/usr/sbin/dtrace -c /bin/ls, the isaexec and normal executable
+ * of DTrace that's in /native will take this escape hatch and be run
+ * and interpreted using the normal system call table; however, the
+ * execution of a non-illumos binary in the form of /bin/ls will still
+ * be branded and be subject to all of the normal actions of the brand.
*
* The level checks associated with brand handling below are used to
* prevent a loop since the brand elfexec function typically comes back
@@ -373,8 +457,20 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
* handling in the #! interpreter code will increment the level before
* calling gexec to run the final elfexec interpreter.
*/
+ if ((level <= INTP_MAXDEPTH) && (*brand_action != EBA_NATIVE) &&
+ (PROC_IS_BRANDED(p)) && (BROP(p)->b_native_exec != NULL)) {
+ if (BROP(p)->b_native_exec(ehdrp->e_ident[EI_OSABI],
+ &args->brand_nroot) == B_TRUE) {
+ ASSERT(ehdrp->e_ident[EI_OSABI]);
+ *brand_action = EBA_NATIVE;
+ /* Add one for the trailing '/' in the path */
+ if (args->brand_nroot != NULL)
+ nsize = strlen(args->brand_nroot) + 1;
+ }
+ }
+
if ((level <= INTP_MAXDEPTH) &&
- (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
+ (*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
error = BROP(p)->b_elfexec(vp, uap, args,
idatap, level + 1, execsz, setid, exec_file, cred,
brand_action);
@@ -448,6 +544,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
* AT_BASE
* AT_FLAGS
* AT_PAGESZ
+ * AT_RANDOM (added in stk_copyout)
* AT_SUN_AUXFLAGS
* AT_SUN_HWCAP
* AT_SUN_HWCAP2
@@ -456,7 +553,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
* AT_SUN_EXECNAME (added in stk_copyout)
* AT_NULL
*
- * total == 10
+ * total == 11
*/
if (hasintp && hasu) {
/*
@@ -471,7 +568,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
*
* total = 5
*/
- args->auxsize = (10 + 5) * sizeof (aux_entry_t);
+ args->auxsize = (11 + 5) * sizeof (aux_entry_t);
} else if (hasintp) {
/*
* Has PT_INTERP but no PT_PHDR
@@ -481,9 +578,9 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
*
* total = 2
*/
- args->auxsize = (10 + 2) * sizeof (aux_entry_t);
+ args->auxsize = (11 + 2) * sizeof (aux_entry_t);
} else {
- args->auxsize = 10 * sizeof (aux_entry_t);
+ args->auxsize = 11 * sizeof (aux_entry_t);
}
} else {
args->auxsize = 0;
@@ -497,6 +594,15 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
args->auxsize += sizeof (aux_entry_t);
/*
+ * If this is a native binary that's been given a modified interpreter
+ * root, inform it that the native system exists at that root.
+ */
+ if (args->brand_nroot != NULL) {
+ args->auxsize += sizeof (aux_entry_t);
+ }
+
+
+ /*
* On supported kernels (x86_64) make room in the auxv for the
* AT_SUN_COMMPAGE entry. This will go unpopulated on i86xpv systems
* which do not provide such functionality.
@@ -508,13 +614,24 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
args->auxsize += 3 * sizeof (aux_entry_t);
#endif /* defined(__amd64) */
- if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
+ /*
+ * If we have user credentials, we'll supply the following entries:
+ * AT_SUN_UID
+ * AT_SUN_RUID
+ * AT_SUN_GID
+ * AT_SUN_RGID
+ */
+ if (cred != NULL) {
+ args->auxsize += 4 * sizeof (aux_entry_t);
+ }
+
+ if ((*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
branded = 1;
/*
- * We will be adding 4 entries to the aux vectors. One for
- * the the brandname and 3 for the brand specific aux vectors.
+ * We will be adding 5 entries to the aux vectors. One for
+ * the brandname and 4 for the brand specific aux vectors.
*/
- args->auxsize += 4 * sizeof (aux_entry_t);
+ args->auxsize += 5 * sizeof (aux_entry_t);
}
/* If the binary has an explicit ASLR flag, it must be honoured */
@@ -595,7 +712,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
aux = bigwad->elfargs;
/*
* Move args to the user's stack.
- * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
+ * This can fill in the AT_SUN_PLATFORM, AT_SUN_EXECNAME and AT_RANDOM
+ * aux entries.
*/
if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
if (error == -1) {
@@ -645,7 +763,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
char *p;
struct vnode *nvp;
- dlnsize = intphdr->p_filesz;
+ dlnsize = intphdr->p_filesz + nsize;
/*
* Make sure none of the component pieces of dlnsize result in
@@ -656,10 +774,15 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
goto bad;
}
+ if (nsize != 0) {
+ bcopy(args->brand_nroot, dlnp, nsize - 1);
+ dlnp[nsize - 1] = '/';
+ }
+
/*
* Read in "interpreter" pathname.
*/
- if ((error = vn_rdwr(UIO_READ, vp, dlnp,
+ if ((error = vn_rdwr(UIO_READ, vp, dlnp + nsize,
(ssize_t)intphdr->p_filesz, (offset_t)intphdr->p_offset,
UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
uprintf("%s: Cannot obtain interpreter pathname\n",
@@ -842,8 +965,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
#endif /* defined(__amd64) */
/*
- * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via
- * exec_args()
+ * Note: AT_SUN_PLATFORM, AT_SUN_EXECNAME and AT_RANDOM were
+ * filled in via exec_args()
*/
ADDAUX(aux, AT_BASE, voffset)
ADDAUX(aux, AT_FLAGS, at_flags)
@@ -871,7 +994,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
* malicious user within the zone from crafting a wrapper to
* run native suid commands with unsecure libraries interposed.
*/
- if ((brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
+ if ((*brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
(setid &= ~EXECSETID_SETID) != 0))
auxf &= ~AF_SUN_SETUGID;
@@ -886,6 +1009,17 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
/*
+ * Record information about the real and effective user and
+ * group IDs.
+ */
+ if (cred != NULL) {
+ ADDAUX(aux, AT_SUN_UID, crgetuid(cred));
+ ADDAUX(aux, AT_SUN_RUID, crgetruid(cred));
+ ADDAUX(aux, AT_SUN_GID, crgetgid(cred));
+ ADDAUX(aux, AT_SUN_RGID, crgetrgid(cred));
+ }
+
+ /*
* Hardware capability flag word (performance hints)
* Used for choosing faster library routines.
* (Potentially different between 32-bit and 64-bit ABIs)
@@ -912,6 +1046,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
ADDAUX(aux, AT_SUN_BRAND_AUX1, 0)
ADDAUX(aux, AT_SUN_BRAND_AUX2, 0)
ADDAUX(aux, AT_SUN_BRAND_AUX3, 0)
+ ADDAUX(aux, AT_SUN_BRAND_AUX4, 0)
}
/*
@@ -1119,10 +1254,10 @@ getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, uint_t *nshdrs,
* We got here by the first two bytes in ident,
* now read the entire ELF header.
*/
- if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)ehdr,
- sizeof (Ehdr), (offset_t)0, UIO_SYSSPACE, 0,
- (rlim64_t)0, credp, &resid)) != 0)
+ if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)ehdr, sizeof (Ehdr),
+ (offset_t)0, UIO_SYSSPACE, 0, (rlim64_t)0, credp, &resid)) != 0) {
return (error);
+ }
/*
* Since a separate version is compiled for handling 32-bit and
@@ -1131,8 +1266,9 @@ getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, uint_t *nshdrs,
*/
if (resid != 0 ||
ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
- ehdr->e_ident[EI_MAG3] != ELFMAG3)
+ ehdr->e_ident[EI_MAG3] != ELFMAG3) {
return (ENOEXEC);
+ }
if ((ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) ||
#if defined(_ILP32) || defined(_ELF32_COMPAT)
@@ -1141,8 +1277,9 @@ getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, uint_t *nshdrs,
ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
#endif
!elfheadcheck(ehdr->e_ident[EI_DATA], ehdr->e_machine,
- ehdr->e_flags))
+ ehdr->e_flags)) {
return (EINVAL);
+ }
*nshdrs = ehdr->e_shnum;
*shstrndx = ehdr->e_shstrndx;
@@ -1162,9 +1299,8 @@ getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, uint_t *nshdrs,
if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&shdr,
sizeof (shdr), (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0,
- (rlim64_t)0, credp, NULL)) != 0) {
+ (rlim64_t)0, credp, NULL)) != 0)
return (error);
- }
if (*nshdrs == 0)
*nshdrs = shdr.sh_size;
@@ -1335,7 +1471,7 @@ mapelfexec(
size_t *brksize)
{
Phdr *phdr;
- int error, page, prot;
+ int error, page, prot, lastprot = 0;
caddr_t addr = NULL;
caddr_t minaddr = (caddr_t)UINTPTR_MAX;
uint_t i;
@@ -1343,9 +1479,11 @@ mapelfexec(
boolean_t ptload = B_FALSE;
off_t offset;
const uint_t hsize = ehdr->e_phentsize;
+ uintptr_t lastaddr = 0;
extern int use_brk_lpg;
if (ehdr->e_type == ET_DYN) {
+ caddr_t vaddr;
secflagset_t flags = 0;
/*
* Obtain the virtual address of a hole in the
@@ -1357,23 +1495,65 @@ mapelfexec(
map_addr(&addr, len, (offset_t)0, 1, flags);
if (addr == NULL)
return (ENOMEM);
- *voffset = (intptr_t)addr;
/*
- * Calculate the minimum vaddr so it can be subtracted out.
- * According to the ELF specification, since PT_LOAD sections
- * must be sorted by increasing p_vaddr values, this is
- * guaranteed to be the first PT_LOAD section.
+ * Despite the fact that mmapobj(2) refuses to load them, we
+ * need to support executing ET_DYN objects that have a
+ * non-NULL p_vaddr. When found in the wild, these objects
+ * are likely to be due to an old (and largely obviated) Linux
+ * facility, prelink(8), that rewrites shared objects to
+ * prefer specific (disjoint) virtual address ranges. (Yes,
+ * this is putatively for performance -- and yes, it has
+ * limited applicability, many edge conditions and grisly
+ * failure modes; even for Linux, it's insane.) As ELF
+ * mandates that the PT_LOAD segments be in p_vaddr order, we
+ * find the lowest p_vaddr by finding the first PT_LOAD
+ * segment.
*/
phdr = (Phdr *)phdrbase;
for (i = nphdrs; i > 0; i--) {
if (phdr->p_type == PT_LOAD) {
- *voffset -= (uintptr_t)phdr->p_vaddr;
+ addr = (caddr_t)(uintptr_t)phdr->p_vaddr;
break;
}
phdr = (Phdr *)((caddr_t)phdr + hsize);
}
+ /*
+ * We have a non-zero p_vaddr in the first PT_LOAD segment --
+ * presumably because we're directly executing a prelink(8)'d
+ * ld-linux.so. While we could correctly execute such an
+ * object without locating it at its desired p_vaddr (it is,
+ * after all, still relocatable), our inner antiquarian
+ * derives a perverse pleasure in accommodating the steampunk
+ * prelink(8) contraption -- goggles on!
+ */
+ if ((vaddr = addr) != NULL) {
+ if (as_gap(curproc->p_as, len, &addr, &len,
+ AH_LO, NULL) == -1 || addr != vaddr) {
+ addr = NULL;
+ }
+ }
+
+ if (addr == NULL) {
+ /*
+ * We either have a NULL p_vaddr (the common case, by
+ * many orders of magnitude) or we have a non-NULL
+ * p_vaddr and we were unable to obtain the specified
+ * VA range (presumably because it's an illegal
+ * address). Either way, obtain an address in which
+ * to map the interpreter.
+ */
+ map_addr(&addr, len, (offset_t)0, 1, 0);
+ if (addr == NULL)
+ return (ENOMEM);
+ }
+
+ /*
+ * Our voffset is the difference between where we landed and
+ * where we wanted to be.
+ */
+ *voffset = (uintptr_t)addr - (uintptr_t)vaddr;
} else {
*voffset = 0;
}
@@ -1437,6 +1617,41 @@ mapelfexec(
if (addr < minaddr)
minaddr = addr;
+ /*
+ * Segments need not correspond to page boundaries:
+ * they are permitted to share a page. If two PT_LOAD
+ * segments share the same page, and the permissions
+ * of the segments differ, the behavior is historically
+ * that the permissions of the latter segment are used
+ * for the page that the two segments share. This is
+ * also historically a non-issue: binaries generated
+ * by most anything will make sure that two PT_LOAD
+ * segments with differing permissions don't actually
+ * share any pages. However, there exist some crazy
+ * things out there (including at least an obscure
+ * Portuguese teaching language called G-Portugol) that
+ * actually do the wrong thing and expect it to work:
+ * they have a segment with execute permission share
+ * a page with a subsequent segment that does not
+ * have execute permissions and expect the resulting
+ * shared page to in fact be executable. To accommodate
+ * such broken link editors, we take advantage of a
+ * latitude explicitly granted to the loader: it is
+ * permitted to make _any_ PT_LOAD segment executable
+ * (provided that it is readable or writable). If we
+ * see that we're sharing a page and that the previous
+ * page was executable, we will add execute permissions
+ * to our segment.
+ */
+ if (btop(lastaddr) == btop((uintptr_t)addr) &&
+ (phdr->p_flags & (PF_R | PF_W)) &&
+ (lastprot & PROT_EXEC)) {
+ prot |= PROT_EXEC;
+ }
+
+ lastaddr = (uintptr_t)addr + phdr->p_filesz;
+ lastprot = prot;
+
zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
offset = phdr->p_offset;
@@ -1521,8 +1736,22 @@ mapelfexec(
break;
case PT_INTERP:
- if (ptload)
- goto bad;
+ /*
+ * The ELF specification is unequivocal about the
+ * PT_INTERP program header with respect to any PT_LOAD
+ * program header: "If it is present, it must precede
+ * any loadable segment entry." Linux, however, makes
+ * no attempt to enforce this -- which has allowed some
+ * binary editing tools to get away with generating
+ * invalid ELF binaries in the respect that PT_INTERP
+ * occurs after the first PT_LOAD program header. This
+ * is unfortunate (and of course, disappointing) but
+ * it's no worse than that: there is no reason that we
+ * can't process the PT_INTERP entry (if present) after
+ * one or more PT_LOAD entries. We therefore
+ * deliberately do not check ptload here and always
+ * store dyphdr to be the PT_INTERP program header.
+ */
*intphdr = phdr;
break;
@@ -1629,6 +1858,7 @@ elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
return (0);
}
+
/*
* Copy the section data from one vnode to the section of another vnode.
*/
@@ -1676,28 +1906,38 @@ elf_copy_scn(elf_core_ctx_t *ctx, const Shdr *src, vnode_t *src_vp, Shdr *dst)
}
/*
+ * The design of this check is intentional.
+ * In particular, we want to capture any sections that begin with '.debug_' for
+ * a few reasons:
+ *
+ * 1) Various revisions to the DWARF spec end up changing the set of section
+ * headers that exist. This ensures that we don't need to change the kernel
+ * to get a new version.
+ *
+ * 2) Other software uses .debug_ sections for things which aren't DWARF. This
+ * allows them to be captured as well.
+ */
+#define IS_DEBUGSECTION(name) (strncmp(name, ".debug_", strlen(".debug_")) == 0)
+
+/*
* Walk sections for a given ELF object, counting (or copying) those of
* interest (CTF, symtab, strtab, .debug_*).
*/
-static int
+static uint_t
elf_process_obj_scns(elf_core_ctx_t *ctx, vnode_t *mvp, caddr_t saddr,
- Shdr *v, uint_t idx, uint_t remain, shstrtab_t *shstrtab, uint_t *countp)
+ Shdr *v, uint_t idx, uint_t remain, shstrtab_t *shstrtab, int *errp)
{
Ehdr ehdr;
const core_content_t content = ctx->ecc_content;
cred_t *credp = ctx->ecc_credp;
Shdr *ctf = NULL, *symtab = NULL, *strtab = NULL;
uintptr_t off = 0;
- uint_t nshdrs, shstrndx, nphdrs, count = 0;
+ uint_t nshdrs, shstrndx, nphdrs, ndebug, count = 0;
u_offset_t *doffp = &ctx->ecc_doffset;
boolean_t ctf_link = B_FALSE;
caddr_t shbase;
size_t shsize, shstrsize;
char *shstrbase;
- int error = 0;
- const boolean_t justcounting = v == NULL;
-
- *countp = 0;
if ((content &
(CC_CONTENT_CTF | CC_CONTENT_SYMTAB | CC_CONTENT_DEBUG)) == 0) {
@@ -1712,6 +1952,7 @@ elf_process_obj_scns(elf_core_ctx_t *ctx, vnode_t *mvp, caddr_t saddr,
/* Starting at index 1 skips SHT_NULL which is expected at index 0 */
off = ehdr.e_shentsize;
+ ndebug = 0;
for (uint_t i = 1; i < nshdrs; i++, off += ehdr.e_shentsize) {
Shdr *shdr, *symchk = NULL, *strchk;
const char *name;
@@ -1739,51 +1980,8 @@ elf_process_obj_scns(elf_core_ctx_t *ctx, vnode_t *mvp, caddr_t saddr,
strcmp(name, shstrtab_data[STR_SYMTAB]) == 0) {
symchk = shdr;
} else if ((content & CC_CONTENT_DEBUG) != 0 &&
- strncmp(name, ".debug_", strlen(".debug_")) == 0) {
- /*
- * The design of the above check is intentional. In
- * particular, we want to capture any sections that
- * begin with '.debug_' for a few reasons:
- *
- * 1) Various revisions to the DWARF spec end up
- * changing the set of section headers that exist. This
- * ensures that we don't need to change the kernel to
- * get a new version.
- *
- * 2) Other software uses .debug_ sections for things
- * which aren't DWARF. This allows them to be captured
- * as well.
- */
- count++;
-
- if (!justcounting) {
- if (count > remain) {
- error = ENOMEM;
- goto done;
- }
-
- elf_ctx_resize_scratch(ctx, shdr->sh_size);
-
- if (!shstrtab_ndx(shstrtab,
- name, &v[idx].sh_name)) {
- error = ENOMEM;
- goto done;
- }
-
- v[idx].sh_addr = (Addr)(uintptr_t)saddr;
- v[idx].sh_type = shdr->sh_type;
- v[idx].sh_addralign = shdr->sh_addralign;
- *doffp = roundup(*doffp, v[idx].sh_addralign);
- v[idx].sh_offset = *doffp;
- v[idx].sh_size = shdr->sh_size;
- v[idx].sh_link = 0;
- v[idx].sh_entsize = shdr->sh_entsize;
- v[idx].sh_info = shdr->sh_info;
-
- elf_copy_scn(ctx, shdr, mvp, &v[idx]);
- idx++;
- }
-
+ IS_DEBUGSECTION(name)) {
+ ndebug++;
continue;
} else {
continue;
@@ -1815,24 +2013,19 @@ elf_process_obj_scns(elf_core_ctx_t *ctx, vnode_t *mvp, caddr_t saddr,
count += 1;
if (symtab != NULL)
count += 2;
-
- if (count > remain) {
- count = remain;
- if (!justcounting)
- error = ENOMEM;
+ count += ndebug;
+ if (v == NULL || count == 0 || count > remain) {
+ count = MIN(count, remain);
goto done;
}
- if (justcounting)
- goto done;
-
/* output CTF section */
if (ctf != NULL) {
elf_ctx_resize_scratch(ctx, ctf->sh_size);
if (!shstrtab_ndx(shstrtab,
shstrtab_data[STR_CTF], &v[idx].sh_name)) {
- error = ENOMEM;
+ *errp = ENOMEM;
goto done;
}
v[idx].sh_addr = (Addr)(uintptr_t)saddr;
@@ -1875,12 +2068,12 @@ elf_process_obj_scns(elf_core_ctx_t *ctx, vnode_t *mvp, caddr_t saddr,
if (!shstrtab_ndx(shstrtab,
shstrtab_data[symtab_type], &symtab_name)) {
- error = ENOMEM;
+ *errp = ENOMEM;
goto done;
}
if (!shstrtab_ndx(shstrtab,
shstrtab_data[strtab_type], &strtab_name)) {
- error = ENOMEM;
+ *errp = ENOMEM;
goto done;
}
@@ -1915,14 +2108,52 @@ elf_process_obj_scns(elf_core_ctx_t *ctx, vnode_t *mvp, caddr_t saddr,
idx++;
}
+ if (ndebug == 0)
+ goto done;
+
+ /* output DEBUG sections */
+ off = 0;
+ for (uint_t i = 1; i < nshdrs; i++, off += ehdr.e_shentsize) {
+ const char *name;
+ Shdr *shdr;
+
+ shdr = (Shdr *)(shbase + off);
+ if (shdr->sh_name >= shstrsize || shdr->sh_type == SHT_NULL)
+ continue;
+
+ name = shstrbase + shdr->sh_name;
+
+ if (!IS_DEBUGSECTION(name))
+ continue;
+
+ elf_ctx_resize_scratch(ctx, shdr->sh_size);
+
+ if (!shstrtab_ndx(shstrtab, name, &v[idx].sh_name)) {
+ *errp = ENOMEM;
+ goto done;
+ }
+
+ v[idx].sh_addr = (Addr)(uintptr_t)saddr;
+ v[idx].sh_type = shdr->sh_type;
+ v[idx].sh_addralign = shdr->sh_addralign;
+ *doffp = roundup(*doffp, v[idx].sh_addralign);
+ v[idx].sh_offset = *doffp;
+ v[idx].sh_size = shdr->sh_size;
+ v[idx].sh_link = 0;
+ v[idx].sh_entsize = shdr->sh_entsize;
+ v[idx].sh_info = shdr->sh_info;
+
+ elf_copy_scn(ctx, shdr, mvp, &v[idx]);
+ idx++;
+
+ if (--ndebug == 0)
+ break;
+ }
+
done:
kmem_free(shstrbase, shstrsize);
kmem_free(shbase, shsize);
-
- if (error == 0)
- *countp = count;
-
- return (error);
+ return (count);
}
/*
@@ -1979,8 +2210,9 @@ elf_process_scns(elf_core_ctx_t *ctx, Shdr *v, uint_t nv, uint_t *nshdrsp)
if (seg->s_ops != &segvn_ops ||
SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
mvp == lastvp || mvp == NULL || mvp->v_type != VREG ||
- (segsize = pr_getsegsize(seg, 1)) == 0)
+ (segsize = pr_getsegsize(seg, 1)) == 0) {
continue;
+ }
eaddr = saddr + segsize;
prot = pr_getprot(seg, 1, &tmp, &saddr, &naddr, eaddr);
@@ -1993,8 +2225,8 @@ elf_process_scns(elf_core_ctx_t *ctx, Shdr *v, uint_t nv, uint_t *nshdrsp)
if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC)
continue;
- error = elf_process_obj_scns(ctx, mvp, saddr, v, idx, remain,
- &shstrtab, &count);
+ count = elf_process_obj_scns(ctx, mvp, saddr, v, idx, remain,
+ &shstrtab, &error);
if (error != 0)
goto done;
@@ -2106,8 +2338,9 @@ top:
* we overflow the 16 bits allotted to the program header count in
* the ELF header, we'll need that program header at index zero.
*/
- if (nshdrs == 0 && nphdrs >= PN_XNUM)
+ if (nshdrs == 0 && nphdrs >= PN_XNUM) {
nshdrs = 1;
+ }
/*
* Allocate a buffer which is sized adequately to hold the ehdr, phdrs
@@ -2556,7 +2789,7 @@ static struct modlexec modlexec = {
extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
intpdata_t *idatap, int level, size_t *execsz,
int setid, caddr_t exec_file, cred_t *cred,
- int brand_action);
+ int *brand_action);
extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
rlim64_t rlimit, int sig, core_content_t content);
diff --git a/usr/src/uts/common/exec/elf/elf_notes.c b/usr/src/uts/common/exec/elf/elf_notes.c
index 78305cc076..0a0d405eba 100644
--- a/usr/src/uts/common/exec/elf/elf_notes.c
+++ b/usr/src/uts/common/exec/elf/elf_notes.c
@@ -347,11 +347,13 @@ write_elfnotes(proc_t *p, int sig, vnode_t *vp, offset_t offset,
/* open file table */
+ mutex_enter(&p->p_lock);
vroot = PTOU(p)->u_rdir;
if (vroot == NULL)
vroot = rootdir;
VN_HOLD(vroot);
+ mutex_exit(&p->p_lock);
fip = P_FINFO(p);