summaryrefslogtreecommitdiff
path: root/usr/src/cmd/sgs/common/string_table.c
diff options
context:
space:
mode:
authorDan McDonald <danmcd@joyent.com>2021-11-08 10:44:53 -0500
committerDan McDonald <danmcd@joyent.com>2021-11-08 10:44:53 -0500
commita26cfd37a56ba50d6d034ca439a223a0278264d0 (patch)
tree12c970aa156115b8acfd3a2e2c21a2dd962b0693 /usr/src/cmd/sgs/common/string_table.c
parentb5c9b2a8a4b90f5425926c057b12db4d078d106b (diff)
parentdb2effc6fa1e364418090bfc0ca0cfd267792bea (diff)
downloadillumos-joyent-a26cfd37a56ba50d6d034ca439a223a0278264d0.tar.gz
[illumos-gate merge]
commit db2effc6fa1e364418090bfc0ca0cfd267792bea 14200 refhash could be used outside of mpt_sas commit da0001592ab4792956d927cb6a8dc2c02c7e6719 14221 logname(1) should only be delivered once commit 83c2c0baa22bd77bc77facf1e1ef091642673ce2 13679 rdist: error() and fatal() only do work in server commit 252adeb303174e992b64771bf9639e63a4d55418 14155 ld(1) string table merging could be much faster 14157 ld(1) string table merging should follow gABI more closely commit c53c97f77356a767b8a3cec554ede591cf4074d9 14189 want support for dd status= 14190 dd could include a human byte size commit 01aad2697e36a09a93fa18833b39bcc0486de567 14197 Implement id_space as a library commit 1e8d79d21400b4e47d64ce367181e7e5ce992649 13707 remove C99LMODE cruft 13708 remove lint cruft from Makefile.master commit 6538c7b4c76e1d53fc801540cfe1dfe59d26bf29 14121 loader: net_open() should not replace f->f_devdata commit 4fd0933306bf532a1642c8821ccc6e886949df54 14217 shbin and java exec modules do not work after 6826 Conflicts: usr/src/lib/libidspace/Makefile usr/src/lib/libidspace/Makefile.com usr/src/lib/libidspace/amd64/Makefile usr/src/lib/libidspace/common/mapfile-vers usr/src/lib/libidspace/i386/Makefile usr/src/uts/common/Makefile.files usr/src/uts/common/Makefile.rules usr/src/uts/common/sys/refhash.h
Diffstat (limited to 'usr/src/cmd/sgs/common/string_table.c')
-rw-r--r--usr/src/cmd/sgs/common/string_table.c179
1 files changed, 147 insertions, 32 deletions
diff --git a/usr/src/cmd/sgs/common/string_table.c b/usr/src/cmd/sgs/common/string_table.c
index c15473150e..9cbf23e54b 100644
--- a/usr/src/cmd/sgs/common/string_table.c
+++ b/usr/src/cmd/sgs/common/string_table.c
@@ -227,12 +227,15 @@ st_insert(Str_tbl *stp, const char *str)
if ((lnp = avl_find(stp->st_lentree, &ln, &where)) == NULL) {
if ((lnp = calloc(sizeof (*lnp), 1)) == NULL)
return (-1);
- lnp->ln_strlen = len;
- avl_insert(stp->st_lentree, lnp, where);
if ((lnp->ln_strtree = calloc(sizeof (*lnp->ln_strtree), 1)) ==
- NULL)
- return (0);
+ NULL) {
+ free(lnp);
+ return (-1);
+ }
+
+ lnp->ln_strlen = len;
+ avl_insert(stp->st_lentree, lnp, where);
avl_create(lnp->ln_strtree, &avl_str_compare, sizeof (StrNode),
SGSOFFSETOF(StrNode, sn_avlnode));
@@ -337,10 +340,47 @@ st_destroy(Str_tbl *stp)
free(stp);
}
+/*
+ * Hash a single additional character into hashval, separately so we can
+ * iteratively get suffix hashes. See st_string_hash and st_hash_insert
+ */
+static inline uint_t
+st_string_hashround(uint_t hashval, char c)
+{
+ /* h = ((h * 33) + c) */
+ return (((hashval << 5) + hashval) + c);
+}
/*
- * For a given string - copy it into the buffer associated with
- * the string table - and return the offset it has been assigned.
+ * We use a classic 'Bernstein k=33' hash function. But
+ * instead of hashing from the start of the string to the
+ * end, we do it in reverse.
+ *
+ * This way we are essentially building all of the
+ * suffix hashvalues as we go. We can check to see if
+ * any suffixes already exist in the tree as we generate
+ * the hash.
+ */
+static inline uint_t
+st_string_hash(const char *str)
+{
+ uint_t hashval = HASHSEED;
+ size_t stlen = strlen(str);
+
+ /* We should never be hashing the NUL string */
+ assert(stlen > 0);
+
+ for (int i = stlen; i >= 0; i--) {
+ assert(i <= stlen); /* not unsigned->signed truncated */
+ hashval = st_string_hashround(hashval, str[i]);
+ }
+
+ return (hashval);
+}
+
+/*
+ * For a given string - copy it into the buffer associated with the string
+ * table - and return the offset it has been assigned in stoff.
*
* If a value of '-1' is returned - the string was not found in
* the Str_tbl.
@@ -352,12 +392,11 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
uint_t hashval;
Str_hash *sthash;
Str_master *mstr;
- int i;
/*
* String table *must* have been previously cooked
*/
- assert(stp->st_strbuf);
+ assert(stp->st_strbuf != NULL);
assert(stp->st_flags & FLG_STTAB_COOKED);
stlen = strlen(str);
@@ -365,7 +404,8 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
* Null string always points to head of string table
*/
if (stlen == 0) {
- *stoff = 0;
+ if (stoff != NULL)
+ *stoff = 0;
return (0);
}
@@ -380,7 +420,8 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
if ((_stoff + stlen) > stp->st_fullstrsize)
return (-1);
memcpy(stp->st_strbuf + _stoff, str, stlen);
- *stoff = _stoff;
+ if (stoff != NULL)
+ *stoff = _stoff;
stp->st_nextoff += stlen;
return (0);
}
@@ -388,11 +429,7 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
/*
* Calculate reverse hash for string.
*/
- hashval = HASHSEED;
- for (i = stlen; i >= 0; i--) {
- hashval = ((hashval << 5) + hashval) +
- str[i]; /* h = ((h * 33) + c) */
- }
+ hashval = st_string_hash(str);
for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt]; sthash;
sthash = sthash->hi_next) {
@@ -436,12 +473,12 @@ st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
/*
* Calculate offset of (sub)string.
*/
- *stoff = mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen;
+ if (stoff != NULL)
+ *stoff = mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen;
return (0);
}
-
static int
st_hash_insert(Str_tbl *stp, const char *str, size_t len)
{
@@ -452,19 +489,12 @@ st_hash_insert(Str_tbl *stp, const char *str, size_t len)
Str_hash *sthash;
Str_master *mstr = 0;
- /*
- * We use a classic 'Bernstein k=33' hash function. But
- * instead of hashing from the start of the string to the
- * end, we do it in reverse.
- *
- * This way - we are essentially building all of the
- * suffix hashvalues as we go. We can check to see if
- * any suffixes already exist in the tree as we generate
- * the hash.
- */
for (i = len; i >= 0; i--) {
- hashval = ((hashval << 5) + hashval) +
- str[i]; /* h = ((h * 33) + c) */
+ /*
+ * Build up 'hashval' character by character, so we always
+ * have the hash of the current string suffix
+ */
+ hashval = st_string_hashround(hashval, str[i]);
for (sthash = hashbcks[hashval % bckcnt];
sthash; sthash = sthash->hi_next) {
@@ -646,15 +676,16 @@ st_getstrtab_sz(Str_tbl *stp)
return (stp->st_strsize);
}
-/*
- * Associate a buffer with a string table.
- */
const char *
st_getstrbuf(Str_tbl *stp)
{
return (stp->st_strbuf);
}
+
+/*
+ * Associate a buffer with a string table.
+ */
int
st_setstrbuf(Str_tbl *stp, char *stbuf, size_t bufsize)
{
@@ -682,3 +713,87 @@ st_setstrbuf(Str_tbl *stp, char *stbuf, size_t bufsize)
#endif
return (0);
}
+
+/*
+ * Populate the buffer with all strings from stp.
+ * The table must be compressed and cooked
+ */
+void
+st_setallstrings(Str_tbl *stp)
+{
+ assert(stp->st_strbuf != NULL);
+ assert((stp->st_flags & FLG_STTAB_COOKED));
+ assert((stp->st_flags & FLG_STTAB_COMPRESS));
+
+ for (Str_master *str = stp->st_mstrlist; str != NULL;
+ str = str->sm_next) {
+ int res = 0;
+
+ res = st_setstring(stp, str->sm_str, NULL);
+ assert(res == 0);
+ }
+}
+
+/*
+ * Find str in the given table
+ * return it's offset, or -1
+ */
+off_t
+st_findstring(Str_tbl *stp, const char *needle)
+{
+ uint_t hashval;
+ Str_hash *sthash;
+ Str_master *mstr;
+
+ assert(stp->st_strbuf != NULL);
+ assert((stp->st_flags & FLG_STTAB_COOKED));
+
+ /* The NUL string is always first */
+ if (needle[0] == '\0')
+ return (0);
+
+ /* In the uncompressed case we must linear search */
+ if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
+ const char *str, *end;
+
+ end = stp->st_strbuf + stp->st_fullstrsize;
+
+ for (str = stp->st_strbuf; str < end;
+ str += strlen(str) + 1) {
+ if (strcmp(str, needle) == 0)
+ return (str - stp->st_strbuf);
+ }
+
+ return (-1);
+ }
+
+ hashval = st_string_hash(needle);
+
+ for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt];
+ sthash != NULL;
+ sthash = sthash->hi_next) {
+ const char *hstr;
+
+ if (sthash->hi_hashval != hashval)
+ continue;
+
+ hstr = &sthash->hi_mstr->sm_str[sthash->hi_mstr->sm_strlen -
+ sthash->hi_strlen];
+ if (strcmp(needle, hstr) == 0)
+ break;
+ }
+
+ /*
+ * Did we find the string?
+ */
+ if (sthash == NULL)
+ return (-1);
+
+ mstr = sthash->hi_mstr;
+ assert(mstr->sm_stroff != 0);
+
+ /*
+ * Calculate offset of (sub)string.
+ */
+ return (mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen);
+}