summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBryan Cantrill <bryan@joyent.com>2013-03-11 17:58:53 +0000
committerBryan Cantrill <bryan@joyent.com>2013-03-11 17:58:53 +0000
commit077880e88374f944d97873eb2b40105b7a775345 (patch)
treedfce1ef31dd564ff1f8f4d98647194623e532f36
parentf92ce8dbf9f6816354b4fcf332853620b6c08e06 (diff)
downloadillumos-joyent-077880e88374f944d97873eb2b40105b7a775345.tar.gz
OS-1764 core dumping is abysmally slow
Reviewed by: Keith Wesolowski <keith.wesolowski@joyent.com> Reviewed by: Joshua M. Clulow <josh@sysmgr.org>
-rw-r--r--usr/src/cmd/sgs/include/conv.h2
-rw-r--r--usr/src/cmd/sgs/libconv/common/phdr.c6
-rw-r--r--usr/src/cmd/sgs/libconv/common/phdr.msg4
-rw-r--r--usr/src/lib/libproc/common/Pcontrol.h2
-rw-r--r--usr/src/lib/libproc/common/Pcore.c75
-rw-r--r--usr/src/man/man4/core.410
-rw-r--r--usr/src/uts/common/exec/elf/elf.c80
-rw-r--r--usr/src/uts/common/os/core.c33
-rw-r--r--usr/src/uts/common/sys/elf.h2
-rw-r--r--usr/src/uts/common/sys/proc.h6
10 files changed, 194 insertions, 26 deletions
diff --git a/usr/src/cmd/sgs/include/conv.h b/usr/src/cmd/sgs/include/conv.h
index 611feb403e..8f4dd4a584 100644
--- a/usr/src/cmd/sgs/include/conv.h
+++ b/usr/src/cmd/sgs/include/conv.h
@@ -277,7 +277,7 @@ typedef union {
} Conv_bnd_obj_buf_t;
/* conv_phdr_flags() */
-#define CONV_PHDR_FLAGS_BUFSIZE 57
+#define CONV_PHDR_FLAGS_BUFSIZE 88
typedef union {
Conv_inv_buf_t inv_buf;
char buf[CONV_PHDR_FLAGS_BUFSIZE];
diff --git a/usr/src/cmd/sgs/libconv/common/phdr.c b/usr/src/cmd/sgs/libconv/common/phdr.c
index c7522da44a..05aeb0a89b 100644
--- a/usr/src/cmd/sgs/libconv/common/phdr.c
+++ b/usr/src/cmd/sgs/libconv/common/phdr.c
@@ -215,6 +215,8 @@ conv_phdr_flags_strings(Conv_fmt_flags_t fmt_flags)
MSG_PF_W_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
MSG_PF_R_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
MSG_PF_SUNW_FAILURE_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
+ MSG_PF_SUNW_KILLED_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
+ MSG_PF_SUNW_SIGINFO_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
CONV_INV_BUFSIZE + CONV_EXPN_FIELD_DEF_SUFFIX_SIZE
/*
@@ -241,6 +243,8 @@ conv_phdr_flags_strings(Conv_fmt_flags_t fmt_flags)
{ PF_W, ALL, MSG_PF_W_CF },
{ PF_R, ALL, MSG_PF_R_CF },
{ PF_SUNW_FAILURE, SOL, MSG_PF_SUNW_FAILURE_CF },
+ { PF_SUNW_KILLED, SOL, MSG_PF_SUNW_KILLED_CF },
+ { PF_SUNW_SIGINFO, SOL, MSG_PF_SUNW_SIGINFO_CF },
{ 0 }
};
static const Val_desc2 vda_nf[] = {
@@ -248,6 +252,8 @@ conv_phdr_flags_strings(Conv_fmt_flags_t fmt_flags)
{ PF_W, ALL, MSG_PF_W_NF },
{ PF_R, ALL, MSG_PF_R_NF },
{ PF_SUNW_FAILURE, SOL, MSG_PF_SUNW_FAILURE_NF },
+ { PF_SUNW_KILLED, SOL, MSG_PF_SUNW_KILLED_NF },
+ { PF_SUNW_SIGINFO, SOL, MSG_PF_SUNW_SIGINFO_NF },
{ 0 }
};
diff --git a/usr/src/cmd/sgs/libconv/common/phdr.msg b/usr/src/cmd/sgs/libconv/common/phdr.msg
index c4af313935..789832a16a 100644
--- a/usr/src/cmd/sgs/libconv/common/phdr.msg
+++ b/usr/src/cmd/sgs/libconv/common/phdr.msg
@@ -105,5 +105,9 @@
@ MSG_PF_R_NF "r"
@ MSG_PF_SUNW_FAILURE_CF "PF_SUNW_FAILURE" # 0x00100000
@ MSG_PF_SUNW_FAILURE_NF "sunw_failure"
+@ MSG_PF_SUNW_KILLED_CF "PF_SUNW_KILLED" # 0x00200000
+@ MSG_PF_SUNW_KILLED_NF "sunw_killed"
+@ MSG_PF_SUNW_SIGINFO_CF "PF_SUNW_SIGINFO" # 0x00400000
+@ MSG_PF_SUNW_SIGINFO_NF "sunw_siginfo"
@ MSG_GBL_ZERO "0"
diff --git a/usr/src/lib/libproc/common/Pcontrol.h b/usr/src/lib/libproc/common/Pcontrol.h
index 4774093b6d..b3e3dbb930 100644
--- a/usr/src/lib/libproc/common/Pcontrol.h
+++ b/usr/src/lib/libproc/common/Pcontrol.h
@@ -233,6 +233,8 @@ struct ps_prochandle {
char *zoneroot; /* cached path to zone root */
plist_t fd_head; /* head of file desc info list */
int num_fd; /* number of file descs in list */
+ uintptr_t map_missing; /* first missing mapping in core due to sig */
+ siginfo_t killinfo; /* signal that interrupted core dump */
};
/* flags */
diff --git a/usr/src/lib/libproc/common/Pcore.c b/usr/src/lib/libproc/common/Pcore.c
index 3e0350eb04..4eff92a8d6 100644
--- a/usr/src/lib/libproc/common/Pcore.c
+++ b/usr/src/lib/libproc/common/Pcore.c
@@ -29,6 +29,7 @@
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/sysmacros.h>
+#include <sys/proc.h>
#include <alloca.h>
#include <rtld_db.h>
@@ -40,6 +41,7 @@
#include <errno.h>
#include <gelf.h>
#include <stddef.h>
+#include <signal.h>
#include "libproc.h"
#include "Pcontrol.h"
@@ -727,6 +729,69 @@ static int (*nhdlrs[])(struct ps_prochandle *, size_t) = {
note_fdinfo, /* 22 NT_FDINFO */
};
+static void
+core_report_mapping(struct ps_prochandle *P, GElf_Phdr *php)
+{
+ prkillinfo_t killinfo;
+ siginfo_t *si = &killinfo.prk_info;
+ char signame[SIG2STR_MAX], sig[64], info[64];
+ void *addr = (void *)(uintptr_t)php->p_vaddr;
+
+ const char *errfmt = "core file data for mapping at %p not saved: %s\n";
+ const char *incfmt = "core file incomplete due to %s%s\n";
+ const char *msgfmt = "mappings at and above %p are missing\n";
+
+ if (!(php->p_flags & PF_SUNW_KILLED)) {
+ int err = 0;
+
+ (void) pread64(P->asfd, &err,
+ sizeof (err), (off64_t)php->p_offset);
+
+ Perror_printf(P, errfmt, addr, strerror(err));
+ dprintf(errfmt, addr, strerror(err));
+ return;
+ }
+
+ if (!(php->p_flags & PF_SUNW_SIGINFO))
+ return;
+
+ (void) memset(&killinfo, 0, sizeof (killinfo));
+
+ (void) pread64(P->asfd, &killinfo,
+ sizeof (killinfo), (off64_t)php->p_offset);
+
+ /*
+ * While there is (or at least should be) only one segment that has
+ * PF_SUNW_SIGINFO set, the signal information there is globally
+ * useful (even if only to those debugging libproc consumers); we hang
+ * the signal information gleaned here off of the ps_prochandle.
+ */
+ P->map_missing = php->p_vaddr;
+ P->killinfo = killinfo.prk_info;
+
+ if (sig2str(si->si_signo, signame) == -1) {
+ (void) snprintf(sig, sizeof (sig),
+ "<Unknown signal: 0x%x>, ", si->si_signo);
+ } else {
+ (void) snprintf(sig, sizeof (sig), "SIG%s, ", signame);
+ }
+
+ if (si->si_code == SI_USER || si->si_code == SI_QUEUE) {
+ (void) snprintf(info, sizeof (info),
+ "pid=%d uid=%d zone=%d ctid=%d",
+ si->si_pid, si->si_uid, si->si_zoneid, si->si_ctid);
+ } else {
+ (void) snprintf(info, sizeof (info),
+ "code=%d", si->si_code);
+ }
+
+ Perror_printf(P, incfmt, sig, info);
+ Perror_printf(P, msgfmt, addr);
+
+ dprintf(incfmt, sig, info);
+ dprintf(msgfmt, addr);
+}
+
/*
* Add information on the address space mapping described by the given
* PT_LOAD program header. We fill in more information on the mapping later.
@@ -734,7 +799,6 @@ static int (*nhdlrs[])(struct ps_prochandle *, size_t) = {
static int
core_add_mapping(struct ps_prochandle *P, GElf_Phdr *php)
{
- int err = 0;
prmap_t pmap;
dprintf("mapping base %llx filesz %llu memsz %llu offset %llu\n",
@@ -749,14 +813,7 @@ core_add_mapping(struct ps_prochandle *P, GElf_Phdr *php)
* PF_SUNW_FAILURE in the Phdr and try to stash away the errno for us.
*/
if (php->p_flags & PF_SUNW_FAILURE) {
- (void) pread64(P->asfd, &err,
- sizeof (err), (off64_t)php->p_offset);
-
- Perror_printf(P, "core file data for mapping at %p not saved: "
- "%s\n", (void *)(uintptr_t)php->p_vaddr, strerror(err));
- dprintf("core file data for mapping at %p not saved: %s\n",
- (void *)(uintptr_t)php->p_vaddr, strerror(err));
-
+ core_report_mapping(P, php);
} else if (php->p_filesz != 0 && php->p_offset >= P->core->core_size) {
Perror_printf(P, "core file may be corrupt -- data for mapping "
"at %p is missing\n", (void *)(uintptr_t)php->p_vaddr);
diff --git a/usr/src/man/man4/core.4 b/usr/src/man/man4/core.4
index c6bc9ea373..3c773a3a6c 100644
--- a/usr/src/man/man4/core.4
+++ b/usr/src/man/man4/core.4
@@ -130,9 +130,13 @@ represent mappings whose data is included in the core file have their
.sp
.LP
A mapping's data can be excluded due to the core file content settings (see
-\fBcoreadm\fR(1M)), or due to some failure. If the data is excluded because of
-a failure, the program header entry will have the \fBPF_SUNW_FAILURE\fR flag
-set in its \fBp_flags\fR field.
+\fBcoreadm\fR(1M)), due to a failure, or due to a signal received after
+core dump initiation but before its completion. If the data is excluded
+because of a failure, the program header entry will have the
+\fBPF_SUNW_FAILURE\fR flag
+set in its \fBp_flags\fR field; if the data is excluded because of a signal,
+the segment's \fBp_flags\fR field will have the \fBPF_SUNW_KILLED\fR
+flag set.
.sp
.LP
The program headers of an \fBELF\fR core file also contain entries for two
diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c
index ab46ff3924..e5a5294233 100644
--- a/usr/src/uts/common/exec/elf/elf.c
+++ b/usr/src/uts/common/exec/elf/elf.c
@@ -64,6 +64,7 @@
#include <sys/brand.h>
#include "elf_impl.h"
#include <sys/sdt.h>
+#include <sys/siginfo.h>
extern int at_flags;
@@ -1726,6 +1727,7 @@ elfcore(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig,
caddr_t stkbase;
size_t stksize;
int ntries = 0;
+ klwp_t *lwp = ttolwp(curthread);
top:
/*
@@ -1989,6 +1991,10 @@ exclude:
goto done;
for (i = 2; i < nphdrs; i++) {
+ prkillinfo_t killinfo;
+ sigqueue_t *sq;
+ int sig, j;
+
if (v[i].p_filesz == 0)
continue;
@@ -2002,9 +2008,13 @@ exclude:
*/
if ((error = core_seg(p, vp, v[i].p_offset,
(caddr_t)(uintptr_t)v[i].p_vaddr, v[i].p_filesz,
- rlimit, credp)) != 0) {
+ rlimit, credp)) == 0) {
+ continue;
+ }
+ if ((sig = lwp->lwp_cursig) == 0) {
/*
+ * We failed due to something other than a signal.
* Since the space reserved for the segment is now
* unused, we stash the errno in the first four
* bytes. This undocumented interface will let us
@@ -2019,7 +2029,75 @@ exclude:
poffset + sizeof (v[i]) * i, &v[i], sizeof (v[i]),
rlimit, credp)) != 0)
goto done;
+
+ continue;
+ }
+
+ /*
+ * We took a signal. We want to abort the dump entirely, but
+ * we also want to indicate what failed and why. We therefore
+ * use the space reserved for the first failing segment to
+ * write our error (which, for purposes of compatability with
+ * older core dump readers, we set to EINTR) followed by any
+ * siginfo associated with the signal.
+ */
+ bzero(&killinfo, sizeof (killinfo));
+ killinfo.prk_error = EINTR;
+
+ sq = sig == SIGKILL ? curproc->p_killsqp : lwp->lwp_curinfo;
+
+ if (sq != NULL) {
+ bcopy(&sq->sq_info, &killinfo.prk_info,
+ sizeof (killinfo.prk_info));
+ } else {
+ killinfo.prk_info.si_signo = lwp->lwp_cursig;
+ killinfo.prk_info.si_code = SI_NOINFO;
}
+
+#if (defined(_SYSCALL32_IMPL) || defined(_LP64))
+ /*
+ * If this is a 32-bit process, we need to translate from the
+ * native siginfo to the 32-bit variant. (Core readers must
+ * always have the same data model as their target or must
+ * be aware of -- and compensate for -- data model differences.)
+ */
+ if (curproc->p_model == DATAMODEL_ILP32) {
+ siginfo32_t si32;
+
+ siginfo_kto32((k_siginfo_t *)&killinfo.prk_info, &si32);
+ bcopy(&si32, &killinfo.prk_info, sizeof (si32));
+ }
+#endif
+
+ (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
+ &killinfo, sizeof (killinfo), rlimit, credp);
+
+ /*
+ * For the segment on which we took the signal, indicate that
+ * its data now refers to a siginfo.
+ */
+ v[i].p_filesz = 0;
+ v[i].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED |
+ PF_SUNW_SIGINFO;
+
+ /*
+ * And for every other segment, indicate that its absence
+ * is due to a signal.
+ */
+ for (j = i + 1; j < nphdrs; j++) {
+ v[j].p_filesz = 0;
+ v[j].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED;
+ }
+
+ /*
+ * Finally, write out our modified program headers.
+ */
+ if ((error = core_write(vp, UIO_SYSSPACE,
+ poffset + sizeof (v[i]) * i, &v[i],
+ sizeof (v[i]) * (nphdrs - i), rlimit, credp)) != 0)
+ goto done;
+
+ break;
}
if (nshdrs > 0) {
diff --git a/usr/src/uts/common/os/core.c b/usr/src/uts/common/os/core.c
index 3b3935a772..05deaf4151 100644
--- a/usr/src/uts/common/os/core.c
+++ b/usr/src/uts/common/os/core.c
@@ -64,6 +64,7 @@
#include <sys/contract/process_impl.h>
#include <sys/ddi.h>
+extern int yield(void);
/*
* Processes running within a zone potentially dump core in 3 locations,
* based on the per-process, per-zone, and the global zone's core settings.
@@ -659,11 +660,11 @@ core(int sig, int ext)
}
/*
- * Block all signals except SIGHUP, SIGINT, SIGKILL, and SIGTERM.
- * These signals are allowed to interrupt the core dump.
- * SIGQUIT is not allowed because it is supposed to make a core.
- * Additionally, get current limit on core file size for handling later
- * error reporting.
+ * Block all signals except SIGHUP, SIGINT, SIGKILL, and SIGTERM; no
+ * other signal may interrupt a core dump. For each signal, we
+ * explicitly unblock it and set it in p_siginfo to allow for some
+ * minimal error reporting. Additionally, we get the current limit on
+ * core file size for handling later error reporting.
*/
mutex_enter(&p->p_lock);
@@ -679,6 +680,12 @@ core(int sig, int ext)
sigdelset(&sighold, SIGKILL);
if (!sigismember(&sigmask, SIGTERM))
sigdelset(&sighold, SIGTERM);
+
+ sigaddset(&p->p_siginfo, SIGHUP);
+ sigaddset(&p->p_siginfo, SIGINT);
+ sigaddset(&p->p_siginfo, SIGKILL);
+ sigaddset(&p->p_siginfo, SIGTERM);
+
curthread->t_hold = sighold;
rlimit = rctl_enforced_value(rctlproc_legacy[RLIMIT_CORE], p->p_rctls,
@@ -797,6 +804,7 @@ core_seg(proc_t *p, vnode_t *vp, offset_t offset, caddr_t addr, size_t size,
len = eaddr - base;
if (as_memory(p->p_as, &base, &len) != 0)
return (0);
+
/*
* Reduce len to a reasonable value so that we don't
* overwhelm the VM system with a monstrously large
@@ -808,16 +816,17 @@ core_seg(proc_t *p, vnode_t *vp, offset_t offset, caddr_t addr, size_t size,
err = core_write(vp, UIO_USERSPACE,
offset + (size_t)(base - addr), base, len, rlimit, credp);
- if (err == 0) {
- /*
- * Give pageout a chance to run.
- * Also allow core dumping to be interruptible.
- */
- err = delay_sig(drv_usectohz(core_delay_usec));
- }
if (err)
return (err);
+
+ /*
+ * If we have taken a signal, return EINTR to allow the dump
+ * to be aborted.
+ */
+ if (issig(JUSTLOOKING) && issig(FORREAL))
+ return (EINTR);
}
+
return (0);
}
diff --git a/usr/src/uts/common/sys/elf.h b/usr/src/uts/common/sys/elf.h
index bc25aee9c4..9bda8486e4 100644
--- a/usr/src/uts/common/sys/elf.h
+++ b/usr/src/uts/common/sys/elf.h
@@ -366,6 +366,8 @@ typedef struct {
#define PF_MASKPROC 0xf0000000 /* processor specific values */
#define PF_SUNW_FAILURE 0x00100000 /* mapping absent due to failure */
+#define PF_SUNW_KILLED 0x00200000 /* signal received during dump */
+#define PF_SUNW_SIGINFO 0x00400000 /* segment has killing sig's siginfo */
#define PN_XNUM 0xffff /* extended program header index */
diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h
index 762e7149b2..f1a2fc5485 100644
--- a/usr/src/uts/common/sys/proc.h
+++ b/usr/src/uts/common/sys/proc.h
@@ -574,6 +574,12 @@ typedef enum {
LWP_STAT_MSGSND
} lwp_stat_id_t;
+typedef struct prkillinfo {
+ int32_t prk_error; /* errno */
+ int32_t prk_pad; /* pad */
+ siginfo_t prk_info; /* siginfo of killing signal */
+} prkillinfo_t;
+
#ifdef _KERNEL
/* user profiling functions */