summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorRichard Lowe <richlowe@richlowe.net>2021-10-03 18:50:39 -0500
committerRichard Lowe <richlowe@richlowe.net>2021-11-06 14:54:00 -0500
commit252adeb303174e992b64771bf9639e63a4d55418 (patch)
tree0faba8d6b598747a2dac61ce2cd458fd43bf3928 /usr/src
parentc53c97f77356a767b8a3cec554ede591cf4074d9 (diff)
downloadillumos-gate-252adeb303174e992b64771bf9639e63a4d55418.tar.gz
14155 ld(1) string table merging could be much faster
14157 ld(1) string table merging should follow gABI more closely Reviewed by: Toomas Soome <tsoome@me.com> Reviewed by: Robert Mustacchi <rm@fingolfin.org> Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/sgs/common/string_table.c179
-rw-r--r--usr/src/cmd/sgs/include/libld.h3
-rw-r--r--usr/src/cmd/sgs/include/string_table.h4
-rw-r--r--usr/src/cmd/sgs/libld/common/sections.c449
-rw-r--r--usr/src/cmd/sgs/tools/SUNWonld-README2
-rw-r--r--usr/src/pkg/manifests/system-test-elftest.mf8
-rw-r--r--usr/src/test/elf-tests/runfiles/default.run5
-rw-r--r--usr/src/test/elf-tests/tests/Makefile1
-rw-r--r--usr/src/test/elf-tests/tests/string-merge/Makefile47
-rw-r--r--usr/src/test/elf-tests/tests/string-merge/simple.sh56
-rw-r--r--usr/src/test/elf-tests/tests/string-merge/str1.s6
-rw-r--r--usr/src/test/elf-tests/tests/string-merge/str2.s4
-rw-r--r--usr/src/tools/sgs/sgsmsg/Makefile1
13 files changed, 521 insertions, 244 deletions
diff --git a/usr/src/cmd/sgs/common/string_table.c b/usr/src/cmd/sgs/common/string_table.c
index c15473150e..9cbf23e54b 100644
--- a/usr/src/cmd/sgs/common/string_table.c
+++ b/usr/src/cmd/sgs/common/string_table.c
@@ -227,12 +227,15 @@ st_insert(Str_tbl *stp, const char *str)
if ((lnp = avl_find(stp->st_lentree, &ln, &where)) == NULL) {
if ((lnp = calloc(sizeof (*lnp), 1)) == NULL)
return (-1);
- lnp->ln_strlen = len;
- avl_insert(stp->st_lentree, lnp, where);
if ((lnp->ln_strtree = calloc(sizeof (*lnp->ln_strtree), 1)) ==
- NULL)
- return (0);
+ NULL) {
+ free(lnp);
+ return (-1);
+ }
+
+ lnp->ln_strlen = len;
+ avl_insert(stp->st_lentree, lnp, where);
avl_create(lnp->ln_strtree, &avl_str_compare, sizeof (StrNode),
SGSOFFSETOF(StrNode, sn_avlnode));
@@ -337,10 +340,47 @@ st_destroy(Str_tbl *stp)
free(stp);
}
+/*
+ * Hash a single additional character into hashval, separately so we can
+ * iteratively get suffix hashes. See st_string_hash and st_hash_insert
+ */
+static inline uint_t
+st_string_hashround(uint_t hashval, char c)
+{
+ /* h = ((h * 33) + c) */
+ return (((hashval << 5) + hashval) + c);
+}
/*
- * For a given string - copy it into the buffer associated with
- * the string table - and return the offset it has been assigned.
+ * We use a classic 'Bernstein k=33' hash function. But
+ * instead of hashing from the start of the string to the
+ * end, we do it in reverse.
+ *
+ * This way we are essentially building all of the
+ * suffix hashvalues as we go. We can check to see if
+ * any suffixes already exist in the tree as we generate
+ * the hash.
+ */
+static inline uint_t
+st_string_hash(const char *str)
+{
+ uint_t hashval = HASHSEED;
+ size_t stlen = strlen(str);
+
+ /* We should never be hashing the NUL string */
+ assert(stlen > 0);
+
+ for (int i = stlen; i >= 0; i--) {
+ assert(i <= stlen); /* not unsigned->signed truncated */
+ hashval = st_string_hashround(hashval, str[i]);
+ }
+
+ return (hashval);
+}
+
+/*
+ * For a given string - copy it into the buffer associated with the string
+ * table - and return the offset it has been assigned in stoff.
*
* If a value of '-1' is returned - the string was not found in
* the Str_tbl.
@@ -352,12 +392,11 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
uint_t hashval;
Str_hash *sthash;
Str_master *mstr;
- int i;
/*
* String table *must* have been previously cooked
*/
- assert(stp->st_strbuf);
+ assert(stp->st_strbuf != NULL);
assert(stp->st_flags & FLG_STTAB_COOKED);
stlen = strlen(str);
@@ -365,7 +404,8 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
* Null string always points to head of string table
*/
if (stlen == 0) {
- *stoff = 0;
+ if (stoff != NULL)
+ *stoff = 0;
return (0);
}
@@ -380,7 +420,8 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
if ((_stoff + stlen) > stp->st_fullstrsize)
return (-1);
memcpy(stp->st_strbuf + _stoff, str, stlen);
- *stoff = _stoff;
+ if (stoff != NULL)
+ *stoff = _stoff;
stp->st_nextoff += stlen;
return (0);
}
@@ -388,11 +429,7 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
/*
* Calculate reverse hash for string.
*/
- hashval = HASHSEED;
- for (i = stlen; i >= 0; i--) {
- hashval = ((hashval << 5) + hashval) +
- str[i]; /* h = ((h * 33) + c) */
- }
+ hashval = st_string_hash(str);
for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt]; sthash;
sthash = sthash->hi_next) {
@@ -436,12 +473,12 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
/*
* Calculate offset of (sub)string.
*/
- *stoff = mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen;
+ if (stoff != NULL)
+ *stoff = mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen;
return (0);
}
-
static int
st_hash_insert(Str_tbl *stp, const char *str, size_t len)
{
@@ -452,19 +489,12 @@ st_hash_insert(Str_tbl *stp, const char *str, size_t len)
Str_hash *sthash;
Str_master *mstr = 0;
- /*
- * We use a classic 'Bernstein k=33' hash function. But
- * instead of hashing from the start of the string to the
- * end, we do it in reverse.
- *
- * This way - we are essentially building all of the
- * suffix hashvalues as we go. We can check to see if
- * any suffixes already exist in the tree as we generate
- * the hash.
- */
for (i = len; i >= 0; i--) {
- hashval = ((hashval << 5) + hashval) +
- str[i]; /* h = ((h * 33) + c) */
+ /*
+ * Build up 'hashval' character by character, so we always
+ * have the hash of the current string suffix
+ */
+ hashval = st_string_hashround(hashval, str[i]);
for (sthash = hashbcks[hashval % bckcnt];
sthash; sthash = sthash->hi_next) {
@@ -646,15 +676,16 @@ st_getstrtab_sz(Str_tbl *stp)
return (stp->st_strsize);
}
-/*
- * Associate a buffer with a string table.
- */
const char *
st_getstrbuf(Str_tbl *stp)
{
return (stp->st_strbuf);
}
+
+/*
+ * Associate a buffer with a string table.
+ */
int
st_setstrbuf(Str_tbl *stp, char *stbuf, size_t bufsize)
{
@@ -682,3 +713,87 @@ st_setstrbuf(Str_tbl *stp, char *stbuf, size_t bufsize)
#endif
return (0);
}
+
+/*
+ * Populate the buffer with all strings from stp.
+ * The table must be compressed and cooked
+ */
+void
+st_setallstrings(Str_tbl *stp)
+{
+ assert(stp->st_strbuf != NULL);
+ assert((stp->st_flags & FLG_STTAB_COOKED));
+ assert((stp->st_flags & FLG_STTAB_COMPRESS));
+
+ for (Str_master *str = stp->st_mstrlist; str != NULL;
+ str = str->sm_next) {
+ int res = 0;
+
+ res = st_setstring(stp, str->sm_str, NULL);
+ assert(res == 0);
+ }
+}
+
+/*
+ * Find str in the given table
+ * return it's offset, or -1
+ */
+off_t
+st_findstring(Str_tbl *stp, const char *needle)
+{
+ uint_t hashval;
+ Str_hash *sthash;
+ Str_master *mstr;
+
+ assert(stp->st_strbuf != NULL);
+ assert((stp->st_flags & FLG_STTAB_COOKED));
+
+ /* The NUL string is always first */
+ if (needle[0] == '\0')
+ return (0);
+
+ /* In the uncompressed case we must linear search */
+ if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
+ const char *str, *end;
+
+ end = stp->st_strbuf + stp->st_fullstrsize;
+
+ for (str = stp->st_strbuf; str < end;
+ str += strlen(str) + 1) {
+ if (strcmp(str, needle) == 0)
+ return (str - stp->st_strbuf);
+ }
+
+ return (-1);
+ }
+
+ hashval = st_string_hash(needle);
+
+ for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt];
+ sthash != NULL;
+ sthash = sthash->hi_next) {
+ const char *hstr;
+
+ if (sthash->hi_hashval != hashval)
+ continue;
+
+ hstr = &sthash->hi_mstr->sm_str[sthash->hi_mstr->sm_strlen -
+ sthash->hi_strlen];
+ if (strcmp(needle, hstr) == 0)
+ break;
+ }
+
+ /*
+ * Did we find the string?
+ */
+ if (sthash == NULL)
+ return (-1);
+
+ mstr = sthash->hi_mstr;
+ assert(mstr->sm_stroff != 0);
+
+ /*
+ * Calculate offset of (sub)string.
+ */
+ return (mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen);
+}
diff --git a/usr/src/cmd/sgs/include/libld.h b/usr/src/cmd/sgs/include/libld.h
index 11a73d949a..01734885f0 100644
--- a/usr/src/cmd/sgs/include/libld.h
+++ b/usr/src/cmd/sgs/include/libld.h
@@ -999,6 +999,9 @@ struct os_desc { /* Output section descriptor */
Xword os_szoutrels; /* size of output relocation section */
uint_t os_namehash; /* hash on section name */
uchar_t os_flags; /* various flags */
+ APlist *os_mstrsyms; /* symbols affected by string merge */
+ APlist *os_mstrrels; /* section relocs affected by... */
+ Str_tbl *os_mstrtab; /* merged string table */
};
#define FLG_OS_KEY 0x01 /* section requires sort keys */
diff --git a/usr/src/cmd/sgs/include/string_table.h b/usr/src/cmd/sgs/include/string_table.h
index e42f81779f..951df25c6f 100644
--- a/usr/src/cmd/sgs/include/string_table.h
+++ b/usr/src/cmd/sgs/include/string_table.h
@@ -26,8 +26,6 @@
#ifndef _STRING_TABLE_DOT_H
#define _STRING_TABLE_DOT_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#ifdef __cplusplus
@@ -50,6 +48,8 @@ extern int st_insert(Str_tbl *, const char *);
extern Str_tbl *st_new(uint_t);
extern int st_setstrbuf(Str_tbl *, char *, size_t);
extern int st_setstring(Str_tbl *, const char *, size_t *);
+extern void st_setallstrings(Str_tbl *);
+extern off_t st_findstring(Str_tbl *, const char *);
/*
* Exported flags values for st_new().
diff --git a/usr/src/cmd/sgs/libld/common/sections.c b/usr/src/cmd/sgs/libld/common/sections.c
index 22b2655105..ec147c69b1 100644
--- a/usr/src/cmd/sgs/libld/common/sections.c
+++ b/usr/src/cmd/sgs/libld/common/sections.c
@@ -32,6 +32,8 @@
#define ELF_TARGET_AMD64
+#include <sys/debug.h>
+
#include <string.h>
#include <strings.h>
#include <stdio.h>
@@ -2742,47 +2744,48 @@ strmerge_get_reloc_str(Ofl_desc *ofl, Rel_desc *rsp)
/*
* First pass over the relocation records for string table merging.
* Build lists of relocations and symbols that will need modification,
- * and insert the strings they reference into the mstrtab string table.
+ * and insert the strings they reference into the output string table.
*
* entry:
- * ofl, osp - As passed to ld_make_strmerge().
- * mstrtab - String table to receive input strings. This table
- * must be in its first (initialization) pass and not
- * yet cooked (st_getstrtab_sz() not yet called).
- * rel_alpp - APlist to receive pointer to any relocation
- * descriptors with STT_SECTION symbols that reference
- * one of the input sections being merged.
- * sym_alpp - APlist to receive pointer to any symbols that reference
- * one of the input sections being merged.
- * rcp - Pointer to cache of relocation descriptors to examine.
- * Either &ofl->ofl_actrels (active relocations)
- * or &ofl->ofl_outrels (output relocations).
+ * ofl - Output file descriptor
*
- * exit:
- * On success, rel_alpp and sym_alpp are updated, and
- * any strings in the mergeable input sections referenced by
- * a relocation has been entered into mstrtab. True (1) is returned.
+ * exit: On success, the string merging specific members of each output
+ * section descriptor in ofl are updated based on information from the
+ * relocation entries, and 0 is returned.
*
- * On failure, False (0) is returned.
+ * On error, S_ERROR is returned.
*/
-static int
-strmerge_pass1(Ofl_desc *ofl, Os_desc *osp, Str_tbl *mstrtab,
- APlist **rel_alpp, APlist **sym_alpp, Rel_cache *rcp)
+static uintptr_t
+ld_gather_strmerge(Ofl_desc *ofl, Rel_cache *cache)
{
- Aliste idx;
- Rel_cachebuf *rcbp;
- Sym_desc *sdp;
- Sym_desc *last_sdp = NULL;
- Rel_desc *rsp;
- const char *name;
+ Rel_cachebuf *rbcp;
+ Rel_desc *rsp;
+ Sym_desc *last_sdp = NULL;
+ Aliste idx1;
- REL_CACHE_TRAVERSE(rcp, idx, rcbp, rsp) {
- sdp = rsp->rel_sym;
+ /*
+ * Pass 1:
+ *
+ * Build lists of relocations and symbols that will need
+ * modification, and insert the strings they reference into
+ * the output string table.
+ */
+ REL_CACHE_TRAVERSE(cache, idx1, rbcp, rsp) {
+ Sym_desc *sdp = rsp->rel_sym;
+ Os_desc *osp;
+ const char *name;
+
+ /*
+ * If there's no input section, or the input section is
+ * discarded or does not contain mergable strings, we have
+ * nothing to do.
+ */
if ((sdp->sd_isc == NULL) || ((sdp->sd_isc->is_flags &
- (FLG_IS_DISCARD | FLG_IS_INSTRMRG)) != FLG_IS_INSTRMRG) ||
- (sdp->sd_isc->is_osdesc != osp))
+ (FLG_IS_DISCARD | FLG_IS_INSTRMRG)) != FLG_IS_INSTRMRG))
continue;
+ osp = sdp->sd_isc->is_osdesc;
+
/*
* Remember symbol for use in the third pass. There is no
* reason to save a given symbol more than once, so we take
@@ -2791,78 +2794,108 @@ strmerge_pass1(Ofl_desc *ofl, Os_desc *osp, Str_tbl *mstrtab,
* we saved last time, don't bother.
*/
if (last_sdp != sdp) {
- if (aplist_append(sym_alpp, sdp, AL_CNT_STRMRGSYM) ==
- NULL)
- return (0);
+ if (aplist_append(&osp->os_mstrsyms, sdp,
+ AL_CNT_STRMRGSYM) == NULL)
+ return (S_ERROR);
last_sdp = sdp;
}
+ if ((osp->os_mstrtab == NULL) &&
+ (osp->os_mstrtab = st_new(FLG_STNEW_COMPRESS)) == NULL)
+ return (S_ERROR);
+
/* Enter the string into our new string table */
name = strmerge_get_reloc_str(ofl, rsp);
- if (st_insert(mstrtab, name) == -1)
- return (0);
+ if (st_insert(osp->os_mstrtab, name) == -1)
+ return (S_ERROR);
/*
* If this is an STT_SECTION symbol, then the second pass
* will need to modify this relocation, so hang on to it.
*/
if ((ELF_ST_TYPE(sdp->sd_sym->st_info) == STT_SECTION) &&
- (aplist_append(rel_alpp, rsp, AL_CNT_STRMRGREL) == NULL))
- return (0);
+ (aplist_append(&osp->os_mstrrels, rsp,
+ AL_CNT_STRMRGREL) == NULL)) {
+ return (S_ERROR);
+ }
}
- return (1);
+ return (0);
}
/*
- * If the output section has any SHF_MERGE|SHF_STRINGS input sections,
- * replace them with a single merged/compressed input section.
+ * If any an output section has more than one SHF_MERGE|SHF_STRINGS input
+ * section, replace them with a single merged/compressed input section.
+ *
+ * This is done by making a Str_tbl (as we use for managing SHT_STRTAB
+ * sections) per output section with compression enabled to manage all strings
+ * in the mergeable input sections. We then discard all inputs which
+ * contributed to this table and replace them with an input section we create
+ * taking data from this Str_tbl. References to the now discarded sections
+ * are then updated to refer to our new merged input section, and the string
+ * table and other metadata are freed.
+ *
+ * This process is done in 3 passes. For efficiency reasons half of pass 1 is
+ * done by ld_strmerge_gather() so relocations only need to be processed once.
+ * Steps 1.5 onward are performed here. The steps are:
+ *
+ * 1) In ld_strmerge_gather() examine all relocations, insert strings
+ * from relocations to the mergeable input sections into the string
+ * table.
+ * 1.5) Gather every string from the mergeable input sections, regardless
+ * of whether it is referenced from a relocation. All strings
+ * must be processed, and relocations may point into the middle
+ * of an actual NUL-terminated string, so we must enter both the
+ * precise strings referenced by relocations and full strings
+ * within the section.
+ * 2) Modify the relocation values to be correct for the
+ * new merged section.
+ * 3) Modify the symbols used by the relocations to reference
+ * the new section.
+ *
+ * These passes cannot be combined:
+ * - The string table code works in two passes, and all
+ * strings have to be loaded in pass one before the
+ * offset of any strings can be determined.
+ * - Multiple relocations reference a single symbol, so the
+ * symbol cannot be modified until all relocations are
+ * fixed.
*
* entry:
* ofl - Output file descriptor
- * osp - Output section descriptor
- * rel_alpp, sym_alpp, - Address of 2 APlists, to be used
- * for internal processing. On the initial call to
- * ld_make_strmerge, these list pointers must be NULL.
- * The caller is encouraged to pass the same lists back for
- * successive calls to this function without freeing
- * them in between calls. This causes a single pair of
- * memory allocations to be reused multiple times.
+ * osp - Outputs section descriptor
*
* exit:
- * If section merging is possible, it is done. If no errors are
- * encountered, True (1) is returned. On error, S_ERROR.
+ * If section merging is possible for this output section, it is done.
+ * If no errors are encountered, 0 is returned. On error, S_ERROR is
+ * returned.
*
- * The contents of rel_alpp and sym_alpp on exit are
- * undefined. The caller can free them, or pass them back to a subsequent
- * call to this routine, but should not examine their contents.
+ * The contents of the string-merging specific members of this output
+ * section descriptor are undefined after this function returns.
*/
static uintptr_t
-ld_make_strmerge(Ofl_desc *ofl, Os_desc *osp, APlist **rel_alpp,
- APlist **sym_alpp)
+ld_strmerge_sec(Ofl_desc *ofl, Os_desc *osp)
{
- Str_tbl *mstrtab; /* string table for string merge secs */
- Is_desc *mstrsec; /* Generated string merge section */
- Is_desc *isp;
- Shdr *mstr_shdr;
- Elf_Data *mstr_data;
+ Is_desc *isp = NULL;
Sym_desc *sdp;
Rel_desc *rsp;
- Aliste idx;
+ Is_desc *mstrsec = NULL; /* Generated string merge section */
+ Shdr *mstr_shdr = NULL;
+ Elf_Data *mstr_data = NULL;
size_t data_size;
- int st_setstring_status;
- size_t stoff;
-
- /* If string table compression is disabled, there's nothing to do */
- if ((ofl->ofl_flags1 & FLG_OF1_NCSTTAB) != 0)
- return (1);
+ Aliste idx;
+ uintptr_t ret = 0;
+ Boolean placed = FALSE;
/*
- * Pass over the mergeable input sections, and if they haven't
- * all been discarded, create a string table.
+ * Pass 1.5: Add all strings from all mergeable input sections.
+ *
+ * The last section we find also serves as the template for our
+ * replacement merged section, providing the section attributes, etc.
*/
- mstrtab = NULL;
for (APLIST_TRAVERSE(osp->os_mstrisdescs, idx, isp)) {
+ const char *str, *end;
+
if (isdesc_discarded(isp))
continue;
@@ -2873,93 +2906,66 @@ ld_make_strmerge(Ofl_desc *ofl, Os_desc *osp, APlist **rel_alpp,
if (isp->is_shdr->sh_size == 0)
continue;
- /*
- * We have at least one non-discarded section.
- * Create a string table descriptor.
- */
- if ((mstrtab = st_new(FLG_STNEW_COMPRESS)) == NULL)
- return (S_ERROR);
- break;
- }
-
- /* If no string table was created, we have no mergeable sections */
- if (mstrtab == NULL)
- return (1);
+ if ((osp->os_mstrtab == NULL) &&
+ (osp->os_mstrtab = st_new(FLG_STNEW_COMPRESS)) == NULL) {
+ ret = S_ERROR;
+ goto out;
+ }
- /*
- * This routine has to make 3 passes:
- *
- * 1) Examine all relocations, insert strings from relocations
- * to the mergeable input sections into the string table.
- * 2) Modify the relocation values to be correct for the
- * new merged section.
- * 3) Modify the symbols used by the relocations to reference
- * the new section.
- *
- * These passes cannot be combined:
- * - The string table code works in two passes, and all
- * strings have to be loaded in pass one before the
- * offset of any strings can be determined.
- * - Multiple relocations reference a single symbol, so the
- * symbol cannot be modified until all relocations are
- * fixed.
- *
- * The number of relocations related to section merging is usually
- * a mere fraction of the overall active and output relocation lists,
- * and the number of symbols is usually a fraction of the number
- * of related relocations. We therefore build APlists for the
- * relocations and symbols in the first pass, and then use those
- * lists to accelerate the operation of pass 2 and 3.
- *
- * Reinitialize the lists to a completely empty state.
- */
- aplist_reset(*rel_alpp);
- aplist_reset(*sym_alpp);
+ end = isp->is_indata->d_buf + isp->is_indata->d_size;
+ for (str = isp->is_indata->d_buf; str < end;
+ str += strlen(str) + 1) {
+ if (st_insert(osp->os_mstrtab, str) != 0) {
+ ret = S_ERROR;
+ goto out;
+ }
+ }
+ }
- /*
- * Pass 1:
- *
- * Every relocation related to this output section (and the input
- * sections that make it up) is found in either the active, or the
- * output relocation list, depending on whether the relocation is to
- * be processed by this invocation of the linker, or inserted into the
- * output object.
- *
- * Build lists of relocations and symbols that will need modification,
- * and insert the strings they reference into the mstrtab string table.
- */
- if (strmerge_pass1(ofl, osp, mstrtab, rel_alpp, sym_alpp,
- &ofl->ofl_actrels) == 0)
- goto return_s_error;
- if (strmerge_pass1(ofl, osp, mstrtab, rel_alpp, sym_alpp,
- &ofl->ofl_outrels) == 0)
- goto return_s_error;
+ IMPLY(osp->os_mstrtab != NULL, isp != NULL);
+ if (osp->os_mstrtab == NULL) {
+ ret = 0;
+ goto out;
+ }
/*
- * Get the size of the new input section. Requesting the
- * string table size "cooks" the table, and finalizes its contents.
+ * Get the size of the new input section. Requesting the string
+ * table size "cooks" the table, and finalizes its contents.
*/
- data_size = st_getstrtab_sz(mstrtab);
+ data_size = st_getstrtab_sz(osp->os_mstrtab);
/* Create a new input section to hold the merged strings */
if (new_section_from_template(ofl, isp, data_size,
- &mstrsec, &mstr_shdr, &mstr_data) == S_ERROR)
- goto return_s_error;
+ &mstrsec, &mstr_shdr, &mstr_data) == S_ERROR) {
+ ret = S_ERROR;
+ goto out;
+ }
mstrsec->is_flags |= FLG_IS_GNSTRMRG;
/*
- * Allocate a data buffer for the new input section.
- * Then, associate the buffer with the string table descriptor.
+ * Allocate a data buffer for the new input section, associate the
+ * buffer with the string table descriptor, and fill it from the
+ * string table.
*/
- if ((mstr_data->d_buf = libld_malloc(data_size)) == NULL)
- goto return_s_error;
- if (st_setstrbuf(mstrtab, mstr_data->d_buf, data_size) == -1)
- goto return_s_error;
+ if ((mstr_data->d_buf = libld_malloc(data_size)) == NULL) {
+ ret = S_ERROR;
+ goto out;
+ }
+ if ((st_setstrbuf(osp->os_mstrtab, mstr_data->d_buf,
+ data_size) == -1)) {
+ ret = S_ERROR;
+ goto out;
+ }
+
+ st_setallstrings(osp->os_mstrtab);
/* Add the new section to the output image */
if (ld_place_section(ofl, mstrsec, NULL, osp->os_identndx, NULL) ==
- (Os_desc *)S_ERROR)
- goto return_s_error;
+ (Os_desc *)S_ERROR) {
+ ret = S_ERROR;
+ goto out;
+ }
+ placed = TRUE;
/*
* Pass 2:
@@ -2969,20 +2975,17 @@ ld_make_strmerge(Ofl_desc *ofl, Os_desc *osp, APlist **rel_alpp,
* record so that the offset it contains is for the new section
* instead of the original.
*/
- for (APLIST_TRAVERSE(*rel_alpp, idx, rsp)) {
+ for (APLIST_TRAVERSE(osp->os_mstrrels, idx, rsp)) {
const char *name;
+ size_t stoff;
- /* Put the string into the merged string table */
+ /*
+ * Find the string to the merged table's buffer and get its
+ * offset.
+ */
name = strmerge_get_reloc_str(ofl, rsp);
- st_setstring_status = st_setstring(mstrtab, name, &stoff);
- if (st_setstring_status == -1) {
- /*
- * A failure to insert at this point means that
- * something is corrupt. This isn't a resource issue.
- */
- assert(st_setstring_status != -1);
- goto return_s_error;
- }
+ stoff = st_findstring(osp->os_mstrtab, name);
+ VERIFY3S(stoff, !=, -1);
/*
* Alter the relocation to access the string at the
@@ -3002,18 +3005,23 @@ ld_make_strmerge(Ofl_desc *ofl, Os_desc *osp, APlist **rel_alpp,
* ld_do_activerelocs(). The FLG_REL_NADDEND flag
* tells them that this is the case.
*/
- if ((rsp->rel_flags & FLG_REL_RELA) == 0) /* REL */
+ if ((rsp->rel_flags & FLG_REL_RELA) == 0) {
+ /* REL */
rsp->rel_flags |= FLG_REL_NADDEND;
+ }
rsp->rel_raddend = (Sxword)stoff;
/*
* Generate a symbol name string for STT_SECTION symbols
* that might reference our merged section. This shows up
* in debug output and helps show how the relocation has
- * changed from its original input section to our merged one.
+ * changed from its original input section to our merged
+ * one.
*/
- if (ld_stt_section_sym_name(mstrsec) == NULL)
- goto return_s_error;
+ if (ld_stt_section_sym_name(mstrsec) == NULL) {
+ ret = S_ERROR;
+ goto out;
+ }
}
/*
@@ -3023,13 +3031,13 @@ ld_make_strmerge(Ofl_desc *ofl, Os_desc *osp, APlist **rel_alpp,
* so that they reference the new input section containing the
* merged strings instead of the original input sections.
*/
- for (APLIST_TRAVERSE(*sym_alpp, idx, sdp)) {
+ for (APLIST_TRAVERSE(osp->os_mstrsyms, idx, sdp)) {
/*
- * If we've already processed this symbol, don't do it
- * twice. strmerge_pass1() uses a heuristic (relocations to
- * the same symbol clump together) to avoid inserting a
- * given symbol more than once, but repeat symbols in
- * the list can occur.
+ * If we've already redirected this symbol to the merged data,
+ * don't do it again. ld_gather_strmerge() uses a heuristic
+ * (relocations to the same symbol clump together) to avoid
+ * inserting a given symbol more than once, but repeat symbols
+ * in the list can occur.
*/
if ((sdp->sd_isc->is_flags & FLG_IS_INSTRMRG) == 0)
continue;
@@ -3041,23 +3049,22 @@ ld_make_strmerge(Ofl_desc *ofl, Os_desc *osp, APlist **rel_alpp,
* input section. Update the address to reflect
* the address in our new merged section.
*/
- const char *name = sdp->sd_sym->st_value +
+ const char *name;
+ size_t stoff;
+
+ /*
+ * Find the string in the merged table's buffer and get
+ * its offset.
+ */
+ name = sdp->sd_sym->st_value +
(char *)sdp->sd_isc->is_indata->d_buf;
+ stoff = st_findstring(osp->os_mstrtab, name);
+ VERIFY3S(stoff, !=, -1);
- st_setstring_status =
- st_setstring(mstrtab, name, &stoff);
- if (st_setstring_status == -1) {
- /*
- * A failure to insert at this point means
- * something is corrupt. This isn't a
- * resource issue.
- */
- assert(st_setstring_status != -1);
- goto return_s_error;
+ if (ld_sym_copy(sdp) == S_ERROR) {
+ ret = S_ERROR;
+ goto out;
}
-
- if (ld_sym_copy(sdp) == S_ERROR)
- goto return_s_error;
sdp->sd_sym->st_value = (Word)stoff;
}
@@ -3085,12 +3092,64 @@ ld_make_strmerge(Ofl_desc *ofl, Os_desc *osp, APlist **rel_alpp,
DBG_CALL(Dbg_sec_genstr_compress(ofl->ofl_lml, osp->os_name, data_size,
mstr_data->d_size));
- st_destroy(mstrtab);
- return (1);
+out:
+ if ((ret == S_ERROR) && !placed) {
+ libld_free(mstrsec);
+ if (mstr_data != NULL)
+ libld_free(mstr_data->d_buf);
+ libld_free(mstr_data);
+ libld_free(mstr_shdr);
+ }
+
+ libld_free(osp->os_mstrsyms);
+ osp->os_mstrsyms = NULL;
+ libld_free(osp->os_mstrrels);
+ osp->os_mstrrels = NULL;
+
+ if (osp->os_mstrtab != NULL) {
+ st_destroy(osp->os_mstrtab);
+ osp->os_mstrtab = NULL;
+ }
+
+ return (ret);
+}
+
+/*
+ * If any output section has SHF_MERGE|SHF_STRINGS input sections,
+ * replace them with a single merged/compressed input section.
+ *
+ * entry:
+ * ofl - Output file descriptor
+ *
+ * exit:
+ * If section merging is possible, it is done. If no errors are
+ * encountered, 0 is returned. On error, S_ERROR is returned.
+ *
+ * The contents of the string-merging specific members of any output
+ * section descriptor are undefined after this function returns.
+ */
+static uintptr_t
+ld_make_strmerge(Ofl_desc *ofl)
+{
+ Sg_desc *sgp;
+ Aliste idx1;
+
+ if (ld_gather_strmerge(ofl, &ofl->ofl_actrels) == S_ERROR)
+ return (S_ERROR);
+ if (ld_gather_strmerge(ofl, &ofl->ofl_outrels) == S_ERROR)
+ return (S_ERROR);
-return_s_error:
- st_destroy(mstrtab);
- return (S_ERROR);
+ for (APLIST_TRAVERSE(ofl->ofl_segs, idx1, sgp)) {
+ Os_desc *osp;
+ Aliste idx2;
+
+ for (APLIST_TRAVERSE(sgp->sg_osdescs, idx2, osp)) {
+ if (ld_strmerge_sec(ofl, osp) == S_ERROR)
+ return (S_ERROR);
+ }
+ }
+
+ return (0);
}
/*
@@ -3197,29 +3256,7 @@ ld_make_sections(Ofl_desc *ofl)
* for this processing, for all the output sections.
*/
if ((ofl->ofl_flags1 & FLG_OF1_NCSTTAB) == 0) {
- int error_seen = 0;
- APlist *rel_alpp = NULL;
- APlist *sym_alpp = NULL;
- Aliste idx1;
-
- for (APLIST_TRAVERSE(ofl->ofl_segs, idx1, sgp)) {
- Os_desc *osp;
- Aliste idx2;
-
- for (APLIST_TRAVERSE(sgp->sg_osdescs, idx2, osp))
- if ((osp->os_mstrisdescs != NULL) &&
- (ld_make_strmerge(ofl, osp,
- &rel_alpp, &sym_alpp) ==
- S_ERROR)) {
- error_seen = 1;
- break;
- }
- }
- if (rel_alpp != NULL)
- libld_free(rel_alpp);
- if (sym_alpp != NULL)
- libld_free(sym_alpp);
- if (error_seen != 0)
+ if (ld_make_strmerge(ofl) == S_ERROR)
return (S_ERROR);
}
diff --git a/usr/src/cmd/sgs/tools/SUNWonld-README b/usr/src/cmd/sgs/tools/SUNWonld-README
index c0f2b139d1..da5af4dba8 100644
--- a/usr/src/cmd/sgs/tools/SUNWonld-README
+++ b/usr/src/cmd/sgs/tools/SUNWonld-README
@@ -1677,3 +1677,5 @@ Bugid Risk Synopsis
14090 backout: turns over big rocks, discovers big bugs
14152 ld(1) should be more careful about empty alists
14127 ld(1) can double free when cleaning up
+14155 ld(1) string table merging could be much faster
+14157 ld(1) string table merging should follow gABI more closely
diff --git a/usr/src/pkg/manifests/system-test-elftest.mf b/usr/src/pkg/manifests/system-test-elftest.mf
index c6300add29..92bb6c1148 100644
--- a/usr/src/pkg/manifests/system-test-elftest.mf
+++ b/usr/src/pkg/manifests/system-test-elftest.mf
@@ -9,9 +9,7 @@
# http://www.illumos.org/license/CDDL.
#
-#
-# Copyright 2018, Richard Lowe.
-#
+# Copyright 2021, Richard Lowe.
set name=pkg.fmri value=pkg:/system/test/elftest@$(PKGVERS)
set name=pkg.description value="ELF Unit Tests"
@@ -28,6 +26,7 @@ dir path=opt/elf-tests/tests/linker-sets
dir path=opt/elf-tests/tests/mapfiles
dir path=opt/elf-tests/tests/mapfiles/assert
dir path=opt/elf-tests/tests/mapfiles/parser
+dir path=opt/elf-tests/tests/string-merge
dir path=opt/elf-tests/tests/tls
dir path=opt/elf-tests/tests/tls/amd64
dir path=opt/elf-tests/tests/tls/amd64/ie
@@ -84,6 +83,9 @@ file path=opt/elf-tests/tests/mapfiles/parser/mapfile.sizemult.twobegin \
file path=opt/elf-tests/tests/mapfiles/parser/mapfile.sizemult.wrong mode=0444
file path=opt/elf-tests/tests/mapfiles/parser/object.c mode=0444
file path=opt/elf-tests/tests/mapfiles/parser/test-parser mode=0555
+file path=opt/elf-tests/tests/string-merge/simple mode=0555
+file path=opt/elf-tests/tests/string-merge/str1.s mode=0444
+file path=opt/elf-tests/tests/string-merge/str2.s mode=0444
file path=opt/elf-tests/tests/tls/amd64/ie/Makefile.test mode=0444
file path=opt/elf-tests/tests/tls/amd64/ie/amd64-ie-test mode=0555
file path=opt/elf-tests/tests/tls/amd64/ie/style1-func-with-r12.s mode=0444
diff --git a/usr/src/test/elf-tests/runfiles/default.run b/usr/src/test/elf-tests/runfiles/default.run
index 87b7cd352e..d5010ef2bc 100644
--- a/usr/src/test/elf-tests/runfiles/default.run
+++ b/usr/src/test/elf-tests/runfiles/default.run
@@ -10,7 +10,7 @@
# http://www.illumos.org/license/CDDL.
#
-# Copyright 2018, Richard Lowe.
+# Copyright 2021, Richard Lowe.
[DEFAULT]
pre =
@@ -32,6 +32,9 @@ tests = ['test-parser']
[/opt/elf-tests/tests/mapfiles/assert]
tests = ['test-assert']
+[/opt/elf-tests/tests/string-merge/]
+tests = ['simple']
+
[/opt/elf-tests/tests/tls/amd64/ie]
arch = i86pc
tests = ['amd64-ie-test']
diff --git a/usr/src/test/elf-tests/tests/Makefile b/usr/src/test/elf-tests/tests/Makefile
index 060d1c2e3e..47f18e8093 100644
--- a/usr/src/test/elf-tests/tests/Makefile
+++ b/usr/src/test/elf-tests/tests/Makefile
@@ -18,6 +18,7 @@ SUBDIRS = \
assert-deflib \
linker-sets \
mapfiles \
+ string-merge \
tls
include $(SRC)/test/Makefile.com
diff --git a/usr/src/test/elf-tests/tests/string-merge/Makefile b/usr/src/test/elf-tests/tests/string-merge/Makefile
new file mode 100644
index 0000000000..809eac39bf
--- /dev/null
+++ b/usr/src/test/elf-tests/tests/string-merge/Makefile
@@ -0,0 +1,47 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+# Copyright 2021, Richard Lowe.
+
+include $(SRC)/cmd/Makefile.cmd
+include $(SRC)/test/Makefile.com
+
+PROG = simple
+
+DATAFILES = str1.s str2.s
+
+ROOTOPTPKG = $(ROOT)/opt/elf-tests
+TESTDIR = $(ROOTOPTPKG)/tests/string-merge
+
+CMDS = $(PROG:%=$(TESTDIR)/%)
+$(CMDS) := FILEMODE = 0555
+
+
+DATA = $(DATAFILES:%=$(TESTDIR)/%)
+$(DATA) := FILEMODE = 0444
+
+all: $(PROG)
+
+install: all $(CMDS) $(DATA)
+
+clobber: clean
+ -$(RM) $(PROG)
+
+clean:
+ -$(RM) $(CLEANFILES)
+
+$(CMDS): $(TESTDIR) $(PROG)
+
+$(TESTDIR):
+ $(INS.dir)
+
+$(TESTDIR)/%: %
+ $(INS.file)
diff --git a/usr/src/test/elf-tests/tests/string-merge/simple.sh b/usr/src/test/elf-tests/tests/string-merge/simple.sh
new file mode 100644
index 0000000000..5715fa5a97
--- /dev/null
+++ b/usr/src/test/elf-tests/tests/string-merge/simple.sh
@@ -0,0 +1,56 @@
+#!/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+# Copyright 2021, Richard Lowe.
+
+TESTDIR=$(dirname $0)
+
+tmpdir=/tmp/test.$$
+mkdir $tmpdir
+cd $tmpdir
+
+cleanup() {
+ cd /
+ rm -fr $tmpdir
+}
+
+trap 'cleanup' EXIT
+
+if [[ $PWD != $tmpdir ]]; then
+ print -u2 "Failed to create temporary directory: $tmpdir"
+ exit 1;
+fi
+
+assemble() {
+ gcc -c -o $2 $1
+ if (( $? != 0 )); then
+ print -u2 "assembly of ${1} failed";
+ exit 1;
+ fi
+}
+
+# We expect any alternate linker to be in LD_ALTEXEC for us already
+assemble ${TESTDIR}/str1.s str1.o
+assemble ${TESTDIR}/str2.s str2.o
+
+gcc -shared -o strmerge.so str1.o str2.o
+if (( $? != 0 )); then
+ print -u2 "link of ${TESTDIR}/str[12].o failed";
+ exit 1;
+fi
+
+elfdump -N.test -w /dev/stdout strmerge.so | tr '\0' ' ' | grep -q '^ buffalo bills $'
+if (( $? != 0 )); then
+ print -u2 "Merged section contains unexpected data";
+ elfdump -N.test -w /dev/stdout strmerge.so | tr '\0' ' ' >&2
+ exit 1;
+fi
diff --git a/usr/src/test/elf-tests/tests/string-merge/str1.s b/usr/src/test/elf-tests/tests/string-merge/str1.s
new file mode 100644
index 0000000000..69c5e462e5
--- /dev/null
+++ b/usr/src/test/elf-tests/tests/string-merge/str1.s
@@ -0,0 +1,6 @@
+ .section .test,"aSM",@progbits,1
+ .asciz "buffalo"
+ .asciz "buffalo"
+ .asciz "buffalo"
+ .asciz "buffalo"
+ .asciz "buffalo"
diff --git a/usr/src/test/elf-tests/tests/string-merge/str2.s b/usr/src/test/elf-tests/tests/string-merge/str2.s
new file mode 100644
index 0000000000..d143d1c10a
--- /dev/null
+++ b/usr/src/test/elf-tests/tests/string-merge/str2.s
@@ -0,0 +1,4 @@
+ .section .test,"aSM",@progbits,1
+ .asciz "bills"
+ .asciz "uffalo"
+ .asciz "ills"
diff --git a/usr/src/tools/sgs/sgsmsg/Makefile b/usr/src/tools/sgs/sgsmsg/Makefile
index ecde42eb3c..6231dc552f 100644
--- a/usr/src/tools/sgs/sgsmsg/Makefile
+++ b/usr/src/tools/sgs/sgsmsg/Makefile
@@ -55,6 +55,7 @@ FILEMODE= 0755
CPPFLAGS = $(NATIVE_CPPFLAGS)
CFLAGS = $(NATIVE_CFLAGS)
NATIVE_LIBS += libc.so
+CSTD = $(CSTD_GNU99)
CW_LINKER =
# not linted