summaryrefslogtreecommitdiff
path: root/usr/src/cmd/sgs/common/string_table.c
diff options
context:
space:
mode:
authorRichard Lowe <richlowe@richlowe.net>2021-10-03 18:50:39 -0500
committerRichard Lowe <richlowe@richlowe.net>2021-11-06 14:54:00 -0500
commit252adeb303174e992b64771bf9639e63a4d55418 (patch)
tree0faba8d6b598747a2dac61ce2cd458fd43bf3928 /usr/src/cmd/sgs/common/string_table.c
parentc53c97f77356a767b8a3cec554ede591cf4074d9 (diff)
downloadillumos-gate-252adeb303174e992b64771bf9639e63a4d55418.tar.gz
14155 ld(1) string table merging could be much faster
14157 ld(1) string table merging should follow gABI more closely Reviewed by: Toomas Soome <tsoome@me.com> Reviewed by: Robert Mustacchi <rm@fingolfin.org> Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src/cmd/sgs/common/string_table.c')
-rw-r--r--usr/src/cmd/sgs/common/string_table.c179
1 files changed, 147 insertions, 32 deletions
diff --git a/usr/src/cmd/sgs/common/string_table.c b/usr/src/cmd/sgs/common/string_table.c
index c15473150e..9cbf23e54b 100644
--- a/usr/src/cmd/sgs/common/string_table.c
+++ b/usr/src/cmd/sgs/common/string_table.c
@@ -227,12 +227,15 @@ st_insert(Str_tbl *stp, const char *str)
if ((lnp = avl_find(stp->st_lentree, &ln, &where)) == NULL) {
if ((lnp = calloc(sizeof (*lnp), 1)) == NULL)
return (-1);
- lnp->ln_strlen = len;
- avl_insert(stp->st_lentree, lnp, where);
if ((lnp->ln_strtree = calloc(sizeof (*lnp->ln_strtree), 1)) ==
- NULL)
- return (0);
+ NULL) {
+ free(lnp);
+ return (-1);
+ }
+
+ lnp->ln_strlen = len;
+ avl_insert(stp->st_lentree, lnp, where);
avl_create(lnp->ln_strtree, &avl_str_compare, sizeof (StrNode),
SGSOFFSETOF(StrNode, sn_avlnode));
@@ -337,10 +340,47 @@ st_destroy(Str_tbl *stp)
free(stp);
}
+/*
+ * Hash a single additional character into hashval, separately so we can
+ * iteratively get suffix hashes. See st_string_hash and st_hash_insert
+ */
+static inline uint_t
+st_string_hashround(uint_t hashval, char c)
+{
+ /* h = ((h * 33) + c) */
+ return (((hashval << 5) + hashval) + c);
+}
/*
- * For a given string - copy it into the buffer associated with
- * the string table - and return the offset it has been assigned.
+ * We use a classic 'Bernstein k=33' hash function. But
+ * instead of hashing from the start of the string to the
+ * end, we do it in reverse.
+ *
+ * This way we are essentially building all of the
+ * suffix hashvalues as we go. We can check to see if
+ * any suffixes already exist in the tree as we generate
+ * the hash.
+ */
+static inline uint_t
+st_string_hash(const char *str)
+{
+ uint_t hashval = HASHSEED;
+ size_t stlen = strlen(str);
+
+ /* We should never be hashing the NUL string */
+ assert(stlen > 0);
+
+ for (int i = stlen; i >= 0; i--) {
+ assert(i <= stlen); /* not unsigned->signed truncated */
+ hashval = st_string_hashround(hashval, str[i]);
+ }
+
+ return (hashval);
+}
+
+/*
+ * For a given string - copy it into the buffer associated with the string
+ * table - and return the offset it has been assigned in stoff.
*
* If a value of '-1' is returned - the string was not found in
* the Str_tbl.
@@ -352,12 +392,11 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
uint_t hashval;
Str_hash *sthash;
Str_master *mstr;
- int i;
/*
* String table *must* have been previously cooked
*/
- assert(stp->st_strbuf);
+ assert(stp->st_strbuf != NULL);
assert(stp->st_flags & FLG_STTAB_COOKED);
stlen = strlen(str);
@@ -365,7 +404,8 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
* Null string always points to head of string table
*/
if (stlen == 0) {
- *stoff = 0;
+ if (stoff != NULL)
+ *stoff = 0;
return (0);
}
@@ -380,7 +420,8 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
if ((_stoff + stlen) > stp->st_fullstrsize)
return (-1);
memcpy(stp->st_strbuf + _stoff, str, stlen);
- *stoff = _stoff;
+ if (stoff != NULL)
+ *stoff = _stoff;
stp->st_nextoff += stlen;
return (0);
}
@@ -388,11 +429,7 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
/*
* Calculate reverse hash for string.
*/
- hashval = HASHSEED;
- for (i = stlen; i >= 0; i--) {
- hashval = ((hashval << 5) + hashval) +
- str[i]; /* h = ((h * 33) + c) */
- }
+ hashval = st_string_hash(str);
for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt]; sthash;
sthash = sthash->hi_next) {
@@ -436,12 +473,12 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
/*
* Calculate offset of (sub)string.
*/
- *stoff = mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen;
+ if (stoff != NULL)
+ *stoff = mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen;
return (0);
}
-
static int
st_hash_insert(Str_tbl *stp, const char *str, size_t len)
{
@@ -452,19 +489,12 @@ st_hash_insert(Str_tbl *stp, const char *str, size_t len)
Str_hash *sthash;
Str_master *mstr = 0;
- /*
- * We use a classic 'Bernstein k=33' hash function. But
- * instead of hashing from the start of the string to the
- * end, we do it in reverse.
- *
- * This way - we are essentially building all of the
- * suffix hashvalues as we go. We can check to see if
- * any suffixes already exist in the tree as we generate
- * the hash.
- */
for (i = len; i >= 0; i--) {
- hashval = ((hashval << 5) + hashval) +
- str[i]; /* h = ((h * 33) + c) */
+ /*
+ * Build up 'hashval' character by character, so we always
+ * have the hash of the current string suffix
+ */
+ hashval = st_string_hashround(hashval, str[i]);
for (sthash = hashbcks[hashval % bckcnt];
sthash; sthash = sthash->hi_next) {
@@ -646,15 +676,16 @@ st_getstrtab_sz(Str_tbl *stp)
return (stp->st_strsize);
}
-/*
- * Associate a buffer with a string table.
- */
const char *
st_getstrbuf(Str_tbl *stp)
{
return (stp->st_strbuf);
}
+
+/*
+ * Associate a buffer with a string table.
+ */
int
st_setstrbuf(Str_tbl *stp, char *stbuf, size_t bufsize)
{
@@ -682,3 +713,87 @@ st_setstrbuf(Str_tbl *stp, char *stbuf, size_t bufsize)
#endif
return (0);
}
+
+/*
+ * Populate the buffer with all strings from stp.
+ * The table must be compressed and cooked
+ */
+void
+st_setallstrings(Str_tbl *stp)
+{
+ assert(stp->st_strbuf != NULL);
+ assert((stp->st_flags & FLG_STTAB_COOKED));
+ assert((stp->st_flags & FLG_STTAB_COMPRESS));
+
+ for (Str_master *str = stp->st_mstrlist; str != NULL;
+ str = str->sm_next) {
+ int res = 0;
+
+ res = st_setstring(stp, str->sm_str, NULL);
+ assert(res == 0);
+ }
+}
+
+/*
+ * Find str in the given table
+ * return it's offset, or -1
+ */
+off_t
+st_findstring(Str_tbl *stp, const char *needle)
+{
+ uint_t hashval;
+ Str_hash *sthash;
+ Str_master *mstr;
+
+ assert(stp->st_strbuf != NULL);
+ assert((stp->st_flags & FLG_STTAB_COOKED));
+
+ /* The NUL string is always first */
+ if (needle[0] == '\0')
+ return (0);
+
+ /* In the uncompressed case we must linear search */
+ if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
+ const char *str, *end;
+
+ end = stp->st_strbuf + stp->st_fullstrsize;
+
+ for (str = stp->st_strbuf; str < end;
+ str += strlen(str) + 1) {
+ if (strcmp(str, needle) == 0)
+ return (str - stp->st_strbuf);
+ }
+
+ return (-1);
+ }
+
+ hashval = st_string_hash(needle);
+
+ for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt];
+ sthash != NULL;
+ sthash = sthash->hi_next) {
+ const char *hstr;
+
+ if (sthash->hi_hashval != hashval)
+ continue;
+
+ hstr = &sthash->hi_mstr->sm_str[sthash->hi_mstr->sm_strlen -
+ sthash->hi_strlen];
+ if (strcmp(needle, hstr) == 0)
+ break;
+ }
+
+ /*
+ * Did we find the string?
+ */
+ if (sthash == NULL)
+ return (-1);
+
+ mstr = sthash->hi_mstr;
+ assert(mstr->sm_stroff != 0);
+
+ /*
+ * Calculate offset of (sub)string.
+ */
+ return (mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen);
+}