diff options
author | Bryan Cantrill <bryan@joyent.com> | 2013-03-11 17:58:53 +0000 |
---|---|---|
committer | Bryan Cantrill <bryan@joyent.com> | 2013-03-11 17:58:53 +0000 |
commit | 077880e88374f944d97873eb2b40105b7a775345 (patch) | |
tree | dfce1ef31dd564ff1f8f4d98647194623e532f36 | |
parent | f92ce8dbf9f6816354b4fcf332853620b6c08e06 (diff) | |
download | illumos-joyent-077880e88374f944d97873eb2b40105b7a775345.tar.gz |
OS-1764 core dumping is abysmally slow
Reviewed by: Keith Wesolowski <keith.wesolowski@joyent.com>
Reviewed by: Joshua M. Clulow <josh@sysmgr.org>
-rw-r--r-- | usr/src/cmd/sgs/include/conv.h | 2 | ||||
-rw-r--r-- | usr/src/cmd/sgs/libconv/common/phdr.c | 6 | ||||
-rw-r--r-- | usr/src/cmd/sgs/libconv/common/phdr.msg | 4 | ||||
-rw-r--r-- | usr/src/lib/libproc/common/Pcontrol.h | 2 | ||||
-rw-r--r-- | usr/src/lib/libproc/common/Pcore.c | 75 | ||||
-rw-r--r-- | usr/src/man/man4/core.4 | 10 | ||||
-rw-r--r-- | usr/src/uts/common/exec/elf/elf.c | 80 | ||||
-rw-r--r-- | usr/src/uts/common/os/core.c | 33 | ||||
-rw-r--r-- | usr/src/uts/common/sys/elf.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/sys/proc.h | 6 |
10 files changed, 194 insertions, 26 deletions
diff --git a/usr/src/cmd/sgs/include/conv.h b/usr/src/cmd/sgs/include/conv.h index 611feb403e..8f4dd4a584 100644 --- a/usr/src/cmd/sgs/include/conv.h +++ b/usr/src/cmd/sgs/include/conv.h @@ -277,7 +277,7 @@ typedef union { } Conv_bnd_obj_buf_t; /* conv_phdr_flags() */ -#define CONV_PHDR_FLAGS_BUFSIZE 57 +#define CONV_PHDR_FLAGS_BUFSIZE 88 typedef union { Conv_inv_buf_t inv_buf; char buf[CONV_PHDR_FLAGS_BUFSIZE]; diff --git a/usr/src/cmd/sgs/libconv/common/phdr.c b/usr/src/cmd/sgs/libconv/common/phdr.c index c7522da44a..05aeb0a89b 100644 --- a/usr/src/cmd/sgs/libconv/common/phdr.c +++ b/usr/src/cmd/sgs/libconv/common/phdr.c @@ -215,6 +215,8 @@ conv_phdr_flags_strings(Conv_fmt_flags_t fmt_flags) MSG_PF_W_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \ MSG_PF_R_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \ MSG_PF_SUNW_FAILURE_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \ + MSG_PF_SUNW_KILLED_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \ + MSG_PF_SUNW_SIGINFO_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \ CONV_INV_BUFSIZE + CONV_EXPN_FIELD_DEF_SUFFIX_SIZE /* @@ -241,6 +243,8 @@ conv_phdr_flags_strings(Conv_fmt_flags_t fmt_flags) { PF_W, ALL, MSG_PF_W_CF }, { PF_R, ALL, MSG_PF_R_CF }, { PF_SUNW_FAILURE, SOL, MSG_PF_SUNW_FAILURE_CF }, + { PF_SUNW_KILLED, SOL, MSG_PF_SUNW_KILLED_CF }, + { PF_SUNW_SIGINFO, SOL, MSG_PF_SUNW_SIGINFO_CF }, { 0 } }; static const Val_desc2 vda_nf[] = { @@ -248,6 +252,8 @@ conv_phdr_flags_strings(Conv_fmt_flags_t fmt_flags) { PF_W, ALL, MSG_PF_W_NF }, { PF_R, ALL, MSG_PF_R_NF }, { PF_SUNW_FAILURE, SOL, MSG_PF_SUNW_FAILURE_NF }, + { PF_SUNW_KILLED, SOL, MSG_PF_SUNW_KILLED_NF }, + { PF_SUNW_SIGINFO, SOL, MSG_PF_SUNW_SIGINFO_NF }, { 0 } }; diff --git a/usr/src/cmd/sgs/libconv/common/phdr.msg b/usr/src/cmd/sgs/libconv/common/phdr.msg index c4af313935..789832a16a 100644 --- a/usr/src/cmd/sgs/libconv/common/phdr.msg +++ b/usr/src/cmd/sgs/libconv/common/phdr.msg @@ -105,5 +105,9 @@ @ MSG_PF_R_NF "r" @ MSG_PF_SUNW_FAILURE_CF "PF_SUNW_FAILURE" # 0x00100000 @ MSG_PF_SUNW_FAILURE_NF "sunw_failure" +@ MSG_PF_SUNW_KILLED_CF "PF_SUNW_KILLED" # 0x00200000 +@ MSG_PF_SUNW_KILLED_NF "sunw_killed" +@ MSG_PF_SUNW_SIGINFO_CF "PF_SUNW_SIGINFO" # 0x00400000 +@ MSG_PF_SUNW_SIGINFO_NF "sunw_siginfo" @ MSG_GBL_ZERO "0" diff --git a/usr/src/lib/libproc/common/Pcontrol.h b/usr/src/lib/libproc/common/Pcontrol.h index 4774093b6d..b3e3dbb930 100644 --- a/usr/src/lib/libproc/common/Pcontrol.h +++ b/usr/src/lib/libproc/common/Pcontrol.h @@ -233,6 +233,8 @@ struct ps_prochandle { char *zoneroot; /* cached path to zone root */ plist_t fd_head; /* head of file desc info list */ int num_fd; /* number of file descs in list */ + uintptr_t map_missing; /* first missing mapping in core due to sig */ + siginfo_t killinfo; /* signal that interrupted core dump */ }; /* flags */ diff --git a/usr/src/lib/libproc/common/Pcore.c b/usr/src/lib/libproc/common/Pcore.c index 3e0350eb04..4eff92a8d6 100644 --- a/usr/src/lib/libproc/common/Pcore.c +++ b/usr/src/lib/libproc/common/Pcore.c @@ -29,6 +29,7 @@ #include <sys/types.h> #include <sys/utsname.h> #include <sys/sysmacros.h> +#include <sys/proc.h> #include <alloca.h> #include <rtld_db.h> @@ -40,6 +41,7 @@ #include <errno.h> #include <gelf.h> #include <stddef.h> +#include <signal.h> #include "libproc.h" #include "Pcontrol.h" @@ -727,6 +729,69 @@ static int (*nhdlrs[])(struct ps_prochandle *, size_t) = { note_fdinfo, /* 22 NT_FDINFO */ }; +static void +core_report_mapping(struct ps_prochandle *P, GElf_Phdr *php) +{ + prkillinfo_t killinfo; + siginfo_t *si = &killinfo.prk_info; + char signame[SIG2STR_MAX], sig[64], info[64]; + void *addr = (void *)(uintptr_t)php->p_vaddr; + + const char *errfmt = "core file data for mapping at %p not saved: %s\n"; + const char *incfmt = "core file incomplete due to %s%s\n"; + const char *msgfmt = "mappings at and above %p are missing\n"; + + if (!(php->p_flags & PF_SUNW_KILLED)) { + int err = 0; + + (void) pread64(P->asfd, &err, + sizeof (err), (off64_t)php->p_offset); + + Perror_printf(P, errfmt, addr, strerror(err)); + dprintf(errfmt, addr, strerror(err)); + return; + } + + if (!(php->p_flags & PF_SUNW_SIGINFO)) + return; + + (void) memset(&killinfo, 0, sizeof (killinfo)); + + (void) pread64(P->asfd, &killinfo, + sizeof (killinfo), (off64_t)php->p_offset); + + /* + * While there is (or at least should be) only one segment that has + * PF_SUNW_SIGINFO set, the signal information there is globally + * useful (even if only to those debugging libproc consumers); we hang + * the signal information gleaned here off of the ps_prochandle. + */ + P->map_missing = php->p_vaddr; + P->killinfo = killinfo.prk_info; + + if (sig2str(si->si_signo, signame) == -1) { + (void) snprintf(sig, sizeof (sig), + "<Unknown signal: 0x%x>, ", si->si_signo); + } else { + (void) snprintf(sig, sizeof (sig), "SIG%s, ", signame); + } + + if (si->si_code == SI_USER || si->si_code == SI_QUEUE) { + (void) snprintf(info, sizeof (info), + "pid=%d uid=%d zone=%d ctid=%d", + si->si_pid, si->si_uid, si->si_zoneid, si->si_ctid); + } else { + (void) snprintf(info, sizeof (info), + "code=%d", si->si_code); + } + + Perror_printf(P, incfmt, sig, info); + Perror_printf(P, msgfmt, addr); + + dprintf(incfmt, sig, info); + dprintf(msgfmt, addr); +} + /* * Add information on the address space mapping described by the given * PT_LOAD program header. We fill in more information on the mapping later. @@ -734,7 +799,6 @@ static int (*nhdlrs[])(struct ps_prochandle *, size_t) = { static int core_add_mapping(struct ps_prochandle *P, GElf_Phdr *php) { - int err = 0; prmap_t pmap; dprintf("mapping base %llx filesz %llu memsz %llu offset %llu\n", @@ -749,14 +813,7 @@ core_add_mapping(struct ps_prochandle *P, GElf_Phdr *php) * PF_SUNW_FAILURE in the Phdr and try to stash away the errno for us. */ if (php->p_flags & PF_SUNW_FAILURE) { - (void) pread64(P->asfd, &err, - sizeof (err), (off64_t)php->p_offset); - - Perror_printf(P, "core file data for mapping at %p not saved: " - "%s\n", (void *)(uintptr_t)php->p_vaddr, strerror(err)); - dprintf("core file data for mapping at %p not saved: %s\n", - (void *)(uintptr_t)php->p_vaddr, strerror(err)); - + core_report_mapping(P, php); } else if (php->p_filesz != 0 && php->p_offset >= P->core->core_size) { Perror_printf(P, "core file may be corrupt -- data for mapping " "at %p is missing\n", (void *)(uintptr_t)php->p_vaddr); diff --git a/usr/src/man/man4/core.4 b/usr/src/man/man4/core.4 index c6bc9ea373..3c773a3a6c 100644 --- a/usr/src/man/man4/core.4 +++ b/usr/src/man/man4/core.4 @@ -130,9 +130,13 @@ represent mappings whose data is included in the core file have their .sp .LP A mapping's data can be excluded due to the core file content settings (see -\fBcoreadm\fR(1M)), or due to some failure. If the data is excluded because of -a failure, the program header entry will have the \fBPF_SUNW_FAILURE\fR flag -set in its \fBp_flags\fR field. +\fBcoreadm\fR(1M)), due to a failure, or due to a signal received after +core dump initiation but before its completion. If the data is excluded +because of a failure, the program header entry will have the +\fBPF_SUNW_FAILURE\fR flag +set in its \fBp_flags\fR field; if the data is excluded because of a signal, +the segment's \fBp_flags\fR field will have the \fBPF_SUNW_KILLED\fR +flag set. .sp .LP The program headers of an \fBELF\fR core file also contain entries for two diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c index ab46ff3924..e5a5294233 100644 --- a/usr/src/uts/common/exec/elf/elf.c +++ b/usr/src/uts/common/exec/elf/elf.c @@ -64,6 +64,7 @@ #include <sys/brand.h> #include "elf_impl.h" #include <sys/sdt.h> +#include <sys/siginfo.h> extern int at_flags; @@ -1726,6 +1727,7 @@ elfcore(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig, caddr_t stkbase; size_t stksize; int ntries = 0; + klwp_t *lwp = ttolwp(curthread); top: /* @@ -1989,6 +1991,10 @@ exclude: goto done; for (i = 2; i < nphdrs; i++) { + prkillinfo_t killinfo; + sigqueue_t *sq; + int sig, j; + if (v[i].p_filesz == 0) continue; @@ -2002,9 +2008,13 @@ exclude: */ if ((error = core_seg(p, vp, v[i].p_offset, (caddr_t)(uintptr_t)v[i].p_vaddr, v[i].p_filesz, - rlimit, credp)) != 0) { + rlimit, credp)) == 0) { + continue; + } + if ((sig = lwp->lwp_cursig) == 0) { /* + * We failed due to something other than a signal. * Since the space reserved for the segment is now * unused, we stash the errno in the first four * bytes. This undocumented interface will let us @@ -2019,7 +2029,75 @@ exclude: poffset + sizeof (v[i]) * i, &v[i], sizeof (v[i]), rlimit, credp)) != 0) goto done; + + continue; + } + + /* + * We took a signal. We want to abort the dump entirely, but + * we also want to indicate what failed and why. We therefore + * use the space reserved for the first failing segment to + * write our error (which, for purposes of compatability with + * older core dump readers, we set to EINTR) followed by any + * siginfo associated with the signal. + */ + bzero(&killinfo, sizeof (killinfo)); + killinfo.prk_error = EINTR; + + sq = sig == SIGKILL ? curproc->p_killsqp : lwp->lwp_curinfo; + + if (sq != NULL) { + bcopy(&sq->sq_info, &killinfo.prk_info, + sizeof (killinfo.prk_info)); + } else { + killinfo.prk_info.si_signo = lwp->lwp_cursig; + killinfo.prk_info.si_code = SI_NOINFO; } + +#if (defined(_SYSCALL32_IMPL) || defined(_LP64)) + /* + * If this is a 32-bit process, we need to translate from the + * native siginfo to the 32-bit variant. (Core readers must + * always have the same data model as their target or must + * be aware of -- and compensate for -- data model differences.) + */ + if (curproc->p_model == DATAMODEL_ILP32) { + siginfo32_t si32; + + siginfo_kto32((k_siginfo_t *)&killinfo.prk_info, &si32); + bcopy(&si32, &killinfo.prk_info, sizeof (si32)); + } +#endif + + (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset, + &killinfo, sizeof (killinfo), rlimit, credp); + + /* + * For the segment on which we took the signal, indicate that + * its data now refers to a siginfo. + */ + v[i].p_filesz = 0; + v[i].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED | + PF_SUNW_SIGINFO; + + /* + * And for every other segment, indicate that its absence + * is due to a signal. + */ + for (j = i + 1; j < nphdrs; j++) { + v[j].p_filesz = 0; + v[j].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED; + } + + /* + * Finally, write out our modified program headers. + */ + if ((error = core_write(vp, UIO_SYSSPACE, + poffset + sizeof (v[i]) * i, &v[i], + sizeof (v[i]) * (nphdrs - i), rlimit, credp)) != 0) + goto done; + + break; } if (nshdrs > 0) { diff --git a/usr/src/uts/common/os/core.c b/usr/src/uts/common/os/core.c index 3b3935a772..05deaf4151 100644 --- a/usr/src/uts/common/os/core.c +++ b/usr/src/uts/common/os/core.c @@ -64,6 +64,7 @@ #include <sys/contract/process_impl.h> #include <sys/ddi.h> +extern int yield(void); /* * Processes running within a zone potentially dump core in 3 locations, * based on the per-process, per-zone, and the global zone's core settings. @@ -659,11 +660,11 @@ core(int sig, int ext) } /* - * Block all signals except SIGHUP, SIGINT, SIGKILL, and SIGTERM. - * These signals are allowed to interrupt the core dump. - * SIGQUIT is not allowed because it is supposed to make a core. - * Additionally, get current limit on core file size for handling later - * error reporting. + * Block all signals except SIGHUP, SIGINT, SIGKILL, and SIGTERM; no + * other signal may interrupt a core dump. For each signal, we + * explicitly unblock it and set it in p_siginfo to allow for some + * minimal error reporting. Additionally, we get the current limit on + * core file size for handling later error reporting. */ mutex_enter(&p->p_lock); @@ -679,6 +680,12 @@ core(int sig, int ext) sigdelset(&sighold, SIGKILL); if (!sigismember(&sigmask, SIGTERM)) sigdelset(&sighold, SIGTERM); + + sigaddset(&p->p_siginfo, SIGHUP); + sigaddset(&p->p_siginfo, SIGINT); + sigaddset(&p->p_siginfo, SIGKILL); + sigaddset(&p->p_siginfo, SIGTERM); + curthread->t_hold = sighold; rlimit = rctl_enforced_value(rctlproc_legacy[RLIMIT_CORE], p->p_rctls, @@ -797,6 +804,7 @@ core_seg(proc_t *p, vnode_t *vp, offset_t offset, caddr_t addr, size_t size, len = eaddr - base; if (as_memory(p->p_as, &base, &len) != 0) return (0); + /* * Reduce len to a reasonable value so that we don't * overwhelm the VM system with a monstrously large @@ -808,16 +816,17 @@ core_seg(proc_t *p, vnode_t *vp, offset_t offset, caddr_t addr, size_t size, err = core_write(vp, UIO_USERSPACE, offset + (size_t)(base - addr), base, len, rlimit, credp); - if (err == 0) { - /* - * Give pageout a chance to run. - * Also allow core dumping to be interruptible. - */ - err = delay_sig(drv_usectohz(core_delay_usec)); - } if (err) return (err); + + /* + * If we have taken a signal, return EINTR to allow the dump + * to be aborted. + */ + if (issig(JUSTLOOKING) && issig(FORREAL)) + return (EINTR); } + return (0); } diff --git a/usr/src/uts/common/sys/elf.h b/usr/src/uts/common/sys/elf.h index bc25aee9c4..9bda8486e4 100644 --- a/usr/src/uts/common/sys/elf.h +++ b/usr/src/uts/common/sys/elf.h @@ -366,6 +366,8 @@ typedef struct { #define PF_MASKPROC 0xf0000000 /* processor specific values */ #define PF_SUNW_FAILURE 0x00100000 /* mapping absent due to failure */ +#define PF_SUNW_KILLED 0x00200000 /* signal received during dump */ +#define PF_SUNW_SIGINFO 0x00400000 /* segment has killing sig's siginfo */ #define PN_XNUM 0xffff /* extended program header index */ diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h index 762e7149b2..f1a2fc5485 100644 --- a/usr/src/uts/common/sys/proc.h +++ b/usr/src/uts/common/sys/proc.h @@ -574,6 +574,12 @@ typedef enum { LWP_STAT_MSGSND } lwp_stat_id_t; +typedef struct prkillinfo { + int32_t prk_error; /* errno */ + int32_t prk_pad; /* pad */ + siginfo_t prk_info; /* siginfo of killing signal */ +} prkillinfo_t; + #ifdef _KERNEL /* user profiling functions */ |