summaryrefslogtreecommitdiff
path: root/dict.c
diff options
context:
space:
mode:
Diffstat (limited to 'dict.c')
-rw-r--r--dict.c357
1 files changed, 279 insertions, 78 deletions
diff --git a/dict.c b/dict.c
index c071887..20bd310 100644
--- a/dict.c
+++ b/dict.c
@@ -20,18 +20,44 @@
#include "libxml.h"
#include <string.h>
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#elif defined(WIN32)
+typedef unsigned __int32 uint32_t;
+#endif
#include <libxml/tree.h>
#include <libxml/dict.h>
#include <libxml/xmlmemory.h>
#include <libxml/xmlerror.h>
#include <libxml/globals.h>
-#define MAX_HASH_LEN 4
+/* #define DEBUG_GROW */
+/* #define DICT_DEBUG_PATTERNS */
+
+#define MAX_HASH_LEN 3
#define MIN_DICT_SIZE 128
#define MAX_DICT_HASH 8 * 2048
-
-/* #define ALLOW_REMOVAL */
-/* #define DEBUG_GROW */
+#define WITH_BIG_KEY
+
+#ifdef WITH_BIG_KEY
+#define xmlDictComputeKey(dict, name, len) \
+ (((dict)->size == MIN_DICT_SIZE) ? \
+ xmlDictComputeFastKey(name, len) : \
+ xmlDictComputeBigKey(name, len))
+
+#define xmlDictComputeQKey(dict, prefix, plen, name, len) \
+ (((prefix) == NULL) ? \
+ (xmlDictComputeKey(dict, name, len)) : \
+ (((dict)->size == MIN_DICT_SIZE) ? \
+ xmlDictComputeFastQKey(prefix, plen, name, len) : \
+ xmlDictComputeBigQKey(prefix, plen, name, len)))
+
+#else /* !WITH_BIG_KEY */
+#define xmlDictComputeKey(dict, name, len) \
+ xmlDictComputeFastKey(name, len)
+#define xmlDictComputeQKey(dict, prefix, plen, name, len) \
+ xmlDictComputeFastQKey(prefix, plen, name, len)
+#endif /* WITH_BIG_KEY */
/*
* An entry in the dictionnary
@@ -43,6 +69,7 @@ struct _xmlDictEntry {
const xmlChar *name;
int len;
int valid;
+ unsigned long okey;
};
typedef struct _xmlDictStrings xmlDictStrings;
@@ -129,6 +156,9 @@ xmlDictAddString(xmlDictPtr dict, const xmlChar *name, int namelen) {
const xmlChar *ret;
int size = 0; /* + sizeof(_xmlDictStrings) == 1024 */
+#ifdef DICT_DEBUG_PATTERNS
+ fprintf(stderr, "-");
+#endif
pool = dict->strings;
while (pool != NULL) {
if (pool->end - pool->free > namelen)
@@ -153,12 +183,16 @@ xmlDictAddString(xmlDictPtr dict, const xmlChar *name, int namelen) {
pool->end = &pool->array[size];
pool->next = dict->strings;
dict->strings = pool;
+#ifdef DICT_DEBUG_PATTERNS
+ fprintf(stderr, "+");
+#endif
}
found_pool:
ret = pool->free;
memcpy(pool->free, name, namelen);
pool->free += namelen;
*(pool->free++) = 0;
+ pool->nbStrings++;
return(ret);
}
@@ -166,6 +200,7 @@ found_pool:
* xmlDictAddQString:
* @dict: the dictionnary
* @prefix: the prefix of the userdata
+ * @plen: the prefix length
* @name: the name of the userdata
* @len: the length of the name, if -1 it is recomputed
*
@@ -174,20 +209,21 @@ found_pool:
* Returns the pointer of the local string, or NULL in case of error.
*/
static const xmlChar *
-xmlDictAddQString(xmlDictPtr dict, const xmlChar *prefix,
+xmlDictAddQString(xmlDictPtr dict, const xmlChar *prefix, int plen,
const xmlChar *name, int namelen)
{
xmlDictStringsPtr pool;
const xmlChar *ret;
int size = 0; /* + sizeof(_xmlDictStrings) == 1024 */
- int plen;
if (prefix == NULL) return(xmlDictAddString(dict, name, namelen));
- plen = xmlStrlen(prefix);
+#ifdef DICT_DEBUG_PATTERNS
+ fprintf(stderr, "=");
+#endif
pool = dict->strings;
while (pool != NULL) {
- if (pool->end - pool->free > namelen)
+ if (pool->end - pool->free > namelen + plen + 1)
goto found_pool;
if (pool->size > size) size = pool->size;
pool = pool->next;
@@ -198,8 +234,8 @@ xmlDictAddQString(xmlDictPtr dict, const xmlChar *prefix,
if (pool == NULL) {
if (size == 0) size = 1000;
else size *= 4; /* exponential growth */
- if (size < 4 * namelen)
- size = 4 * namelen; /* just in case ! */
+ if (size < 4 * (namelen + plen + 1))
+ size = 4 * (namelen + plen + 1); /* just in case ! */
pool = (xmlDictStringsPtr) xmlMalloc(sizeof(xmlDictStrings) + size);
if (pool == NULL)
return(NULL);
@@ -209,27 +245,106 @@ xmlDictAddQString(xmlDictPtr dict, const xmlChar *prefix,
pool->end = &pool->array[size];
pool->next = dict->strings;
dict->strings = pool;
+#ifdef DICT_DEBUG_PATTERNS
+ fprintf(stderr, "+");
+#endif
}
found_pool:
ret = pool->free;
memcpy(pool->free, prefix, plen);
pool->free += plen;
*(pool->free++) = ':';
- namelen -= plen + 1;
memcpy(pool->free, name, namelen);
pool->free += namelen;
*(pool->free++) = 0;
+ pool->nbStrings++;
return(ret);
}
+#ifdef WITH_BIG_KEY
+/*
+ * xmlDictComputeBigKey:
+ *
+ * Calculate a hash key using a good hash function that works well for
+ * larger hash table sizes.
+ *
+ * Hash function by "One-at-a-Time Hash" see
+ * http://burtleburtle.net/bob/hash/doobs.html
+ */
+
+static uint32_t
+xmlDictComputeBigKey(const xmlChar* data, int namelen) {
+ uint32_t hash;
+ int i;
+
+ if (namelen <= 0 || data == NULL) return(0);
+
+ hash = 0;
+
+ for (i = 0;i < namelen; i++) {
+ hash += data[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+
+ return hash;
+}
+
+/*
+ * xmlDictComputeBigQKey:
+ *
+ * Calculate a hash key for two strings using a good hash function
+ * that works well for larger hash table sizes.
+ *
+ * Hash function by "One-at-a-Time Hash" see
+ * http://burtleburtle.net/bob/hash/doobs.html
+ *
+ * Neither of the two strings must be NULL.
+ */
+static unsigned long
+xmlDictComputeBigQKey(const xmlChar *prefix, int plen,
+ const xmlChar *name, int len)
+{
+ uint32_t hash;
+ int i;
+
+ hash = 0;
+
+ for (i = 0;i < plen; i++) {
+ hash += prefix[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+ hash += ':';
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+
+ for (i = 0;i < len; i++) {
+ hash += name[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+
+ return hash;
+}
+#endif /* WITH_BIG_KEY */
+
/*
- * xmlDictComputeKey:
- * Calculate the hash key
+ * xmlDictComputeFastKey:
+ *
+ * Calculate a hash key using a fast hash function that works well
+ * for low hash table fill.
*/
static unsigned long
-xmlDictComputeKey(const xmlChar *name, int namelen) {
+xmlDictComputeFastKey(const xmlChar *name, int namelen) {
unsigned long value = 0L;
-
+
if (name == NULL) return(0);
value = *name;
value <<= 5;
@@ -253,24 +368,24 @@ xmlDictComputeKey(const xmlChar *name, int namelen) {
}
/*
- * xmlDictComputeQKey:
- * Calculate the hash key
+ * xmlDictComputeFastQKey:
+ *
+ * Calculate a hash key for two strings using a fast hash function
+ * that works well for low hash table fill.
+ *
+ * Neither of the two strings must be NULL.
*/
static unsigned long
-xmlDictComputeQKey(const xmlChar *prefix, const xmlChar *name, int len)
+xmlDictComputeFastQKey(const xmlChar *prefix, int plen,
+ const xmlChar *name, int len)
{
unsigned long value = 0L;
- int plen;
-
- if (prefix == NULL)
- return(xmlDictComputeKey(name, len));
- plen = xmlStrlen(prefix);
if (plen == 0)
value += 30 * (unsigned long) ':';
else
value += 30 * (*prefix);
-
+
if (len > 10) {
value += name[len - (plen + 1 + 1)];
len = 10;
@@ -325,7 +440,11 @@ xmlDictCreate(void) {
if (!xmlDictInitialized)
if (!xmlInitializeDict())
return(NULL);
-
+
+#ifdef DICT_DEBUG_PATTERNS
+ fprintf(stderr, "C");
+#endif
+
dict = xmlMalloc(sizeof(xmlDict));
if (dict) {
dict->ref_counter = 1;
@@ -358,8 +477,11 @@ xmlDictCreate(void) {
xmlDictPtr
xmlDictCreateSub(xmlDictPtr sub) {
xmlDictPtr dict = xmlDictCreate();
-
+
if ((dict != NULL) && (sub != NULL)) {
+#ifdef DICT_DEBUG_PATTERNS
+ fprintf(stderr, "R");
+#endif
dict->subdict = sub;
xmlDictReference(dict->subdict);
}
@@ -398,14 +520,16 @@ xmlDictReference(xmlDictPtr dict) {
*/
static int
xmlDictGrow(xmlDictPtr dict, int size) {
- unsigned long key;
+ unsigned long key, okey;
int oldsize, i;
xmlDictEntryPtr iter, next;
struct _xmlDictEntry *olddict;
#ifdef DEBUG_GROW
unsigned long nbElem = 0;
#endif
-
+ int ret = 0;
+ int keep_keys = 1;
+
if (dict == NULL)
return(-1);
if (size < 8)
@@ -413,11 +537,17 @@ xmlDictGrow(xmlDictPtr dict, int size) {
if (size > 8 * 2048)
return(-1);
+#ifdef DICT_DEBUG_PATTERNS
+ fprintf(stderr, "*");
+#endif
+
oldsize = dict->size;
olddict = dict->dict;
if (olddict == NULL)
return(-1);
-
+ if (oldsize == MIN_DICT_SIZE)
+ keep_keys = 0;
+
dict->dict = xmlMalloc(size * sizeof(xmlDictEntry));
if (dict->dict == NULL) {
dict->dict = olddict;
@@ -427,17 +557,44 @@ xmlDictGrow(xmlDictPtr dict, int size) {
dict->size = size;
/* If the two loops are merged, there would be situations where
- a new entry needs to allocated and data copied into it from
- the main dict. So instead, we run through the array twice, first
- copying all the elements in the main array (where we can't get
- conflicts) and then the rest, so we only free (and don't allocate)
+ a new entry needs to allocated and data copied into it from
+ the main dict. It is nicer to run through the array twice, first
+ copying all the elements in the main array (less probability of
+ allocate) and then the rest, so we only free in the second loop.
*/
for (i = 0; i < oldsize; i++) {
- if (olddict[i].valid == 0)
+ if (olddict[i].valid == 0)
continue;
- key = xmlDictComputeKey(olddict[i].name, olddict[i].len) % dict->size;
- memcpy(&(dict->dict[key]), &(olddict[i]), sizeof(xmlDictEntry));
- dict->dict[key].next = NULL;
+
+ if (keep_keys)
+ okey = olddict[i].okey;
+ else
+ okey = xmlDictComputeKey(dict, olddict[i].name, olddict[i].len);
+ key = okey % dict->size;
+
+ if (dict->dict[key].valid == 0) {
+ memcpy(&(dict->dict[key]), &(olddict[i]), sizeof(xmlDictEntry));
+ dict->dict[key].next = NULL;
+ dict->dict[key].okey = okey;
+ } else {
+ xmlDictEntryPtr entry;
+
+ entry = xmlMalloc(sizeof(xmlDictEntry));
+ if (entry != NULL) {
+ entry->name = olddict[i].name;
+ entry->len = olddict[i].len;
+ entry->okey = okey;
+ entry->next = dict->dict[key].next;
+ entry->valid = 1;
+ dict->dict[key].next = entry;
+ } else {
+ /*
+ * we don't have much ways to alert from herei
+ * result is loosing an entry and unicity garantee
+ */
+ ret = -1;
+ }
+ }
#ifdef DEBUG_GROW
nbElem++;
#endif
@@ -452,15 +609,21 @@ xmlDictGrow(xmlDictPtr dict, int size) {
* put back the entry in the new dict
*/
- key = xmlDictComputeKey(iter->name, iter->len) % dict->size;
+ if (keep_keys)
+ okey = iter->okey;
+ else
+ okey = xmlDictComputeKey(dict, iter->name, iter->len);
+ key = okey % dict->size;
if (dict->dict[key].valid == 0) {
memcpy(&(dict->dict[key]), iter, sizeof(xmlDictEntry));
dict->dict[key].next = NULL;
dict->dict[key].valid = 1;
+ dict->dict[key].okey = okey;
xmlFree(iter);
} else {
- iter->next = dict->dict[key].next;
- dict->dict[key].next = iter;
+ iter->next = dict->dict[key].next;
+ iter->okey = okey;
+ dict->dict[key].next = iter;
}
#ifdef DEBUG_GROW
@@ -478,7 +641,7 @@ xmlDictGrow(xmlDictPtr dict, int size) {
"xmlDictGrow : from %d to %d, %d elems\n", oldsize, size, nbElem);
#endif
- return(0);
+ return(ret);
}
/**
@@ -565,12 +728,12 @@ xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
return(NULL);
if (len < 0)
- len = xmlStrlen(name);
+ len = strlen((const char *) name);
/*
* Check for duplicate and insertion location.
*/
- okey = xmlDictComputeKey(name, len);
+ okey = xmlDictComputeKey(dict, name, len);
key = okey % dict->size;
if (dict->dict[key].valid == 0) {
insert = NULL;
@@ -578,55 +741,66 @@ xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
for (insert = &(dict->dict[key]); insert->next != NULL;
insert = insert->next) {
#ifdef __GNUC__
- if (insert->len == len) {
+ if ((insert->okey == okey) && (insert->len == len)) {
if (!memcmp(insert->name, name, len))
return(insert->name);
}
#else
- if ((insert->len == len) &&
+ if ((insert->okey == okey) && (insert->len == len) &&
(!xmlStrncmp(insert->name, name, len)))
return(insert->name);
#endif
nbi++;
}
#ifdef __GNUC__
- if (insert->len == len) {
+ if ((insert->okey == okey) && (insert->len == len)) {
if (!memcmp(insert->name, name, len))
return(insert->name);
}
#else
- if ((insert->len == len) &&
+ if ((insert->okey == okey) && (insert->len == len) &&
(!xmlStrncmp(insert->name, name, len)))
return(insert->name);
#endif
}
if (dict->subdict) {
- key = okey % dict->subdict->size;
+ unsigned long skey;
+
+ /* we cannot always reuse the same okey for the subdict */
+ if (((dict->size == MIN_DICT_SIZE) &&
+ (dict->subdict->size != MIN_DICT_SIZE)) ||
+ ((dict->size != MIN_DICT_SIZE) &&
+ (dict->subdict->size == MIN_DICT_SIZE)))
+ skey = xmlDictComputeKey(dict->subdict, name, len);
+ else
+ skey = okey;
+
+ key = skey % dict->subdict->size;
if (dict->subdict->dict[key].valid != 0) {
xmlDictEntryPtr tmp;
for (tmp = &(dict->subdict->dict[key]); tmp->next != NULL;
tmp = tmp->next) {
#ifdef __GNUC__
- if (tmp->len == len) {
+ if ((tmp->okey == skey) && (tmp->len == len)) {
if (!memcmp(tmp->name, name, len))
return(tmp->name);
}
#else
- if ((tmp->len == len) &&
+ if ((tmp->okey == skey) && (tmp->len == len) &&
(!xmlStrncmp(tmp->name, name, len)))
return(tmp->name);
#endif
nbi++;
}
#ifdef __GNUC__
- if (tmp->len == len) {
+ if ((tmp->okey == skey) && (tmp->len == len)) {
if (!memcmp(tmp->name, name, len))
return(tmp->name);
}
#else
- if ((tmp->len == len) &&
+ if ((tmp->okey == skey) && (tmp->len == len) &&
(!xmlStrncmp(tmp->name, name, len)))
return(tmp->name);
#endif
@@ -648,6 +822,7 @@ xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
entry->len = len;
entry->next = NULL;
entry->valid = 1;
+ entry->okey = okey;
if (insert != NULL)
@@ -656,8 +831,10 @@ xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
dict->nbElems++;
if ((nbi > MAX_HASH_LEN) &&
- (dict->size <= ((MAX_DICT_HASH / 2) / MAX_HASH_LEN)))
- xmlDictGrow(dict, MAX_HASH_LEN * 2 * dict->size);
+ (dict->size <= ((MAX_DICT_HASH / 2) / MAX_HASH_LEN))) {
+ if (xmlDictGrow(dict, MAX_HASH_LEN * 2 * dict->size) != 0)
+ return(NULL);
+ }
/* Note that entry may have been freed at this point by xmlDictGrow */
return(ret);
@@ -682,12 +859,12 @@ xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) {
return(NULL);
if (len < 0)
- len = xmlStrlen(name);
+ len = strlen((const char *) name);
/*
* Check for duplicate and insertion location.
*/
- okey = xmlDictComputeKey(name, len);
+ okey = xmlDictComputeKey(dict, name, len);
key = okey % dict->size;
if (dict->dict[key].valid == 0) {
insert = NULL;
@@ -695,60 +872,70 @@ xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) {
for (insert = &(dict->dict[key]); insert->next != NULL;
insert = insert->next) {
#ifdef __GNUC__
- if (insert->len == len) {
+ if ((insert->okey == okey) && (insert->len == len)) {
if (!memcmp(insert->name, name, len))
return(insert->name);
}
#else
- if ((insert->len == len) &&
+ if ((insert->okey == okey) && (insert->len == len) &&
(!xmlStrncmp(insert->name, name, len)))
return(insert->name);
#endif
nbi++;
}
#ifdef __GNUC__
- if (insert->len == len) {
+ if ((insert->okey == okey) && (insert->len == len)) {
if (!memcmp(insert->name, name, len))
return(insert->name);
}
#else
- if ((insert->len == len) &&
+ if ((insert->okey == okey) && (insert->len == len) &&
(!xmlStrncmp(insert->name, name, len)))
return(insert->name);
#endif
}
if (dict->subdict) {
- key = okey % dict->subdict->size;
+ unsigned long skey;
+
+ /* we cannot always reuse the same okey for the subdict */
+ if (((dict->size == MIN_DICT_SIZE) &&
+ (dict->subdict->size != MIN_DICT_SIZE)) ||
+ ((dict->size != MIN_DICT_SIZE) &&
+ (dict->subdict->size == MIN_DICT_SIZE)))
+ skey = xmlDictComputeKey(dict->subdict, name, len);
+ else
+ skey = okey;
+
+ key = skey % dict->subdict->size;
if (dict->subdict->dict[key].valid != 0) {
xmlDictEntryPtr tmp;
for (tmp = &(dict->subdict->dict[key]); tmp->next != NULL;
tmp = tmp->next) {
#ifdef __GNUC__
- if (tmp->len == len) {
+ if ((tmp->okey == skey) && (tmp->len == len)) {
if (!memcmp(tmp->name, name, len))
return(tmp->name);
}
#else
- if ((tmp->len == len) &&
+ if ((tmp->okey == skey) && (tmp->len == len) &&
(!xmlStrncmp(tmp->name, name, len)))
return(tmp->name);
#endif
nbi++;
}
#ifdef __GNUC__
- if (tmp->len == len) {
+ if ((tmp->okey == skey) && (tmp->len == len)) {
if (!memcmp(tmp->name, name, len))
return(tmp->name);
}
#else
- if ((tmp->len == len) &&
+ if ((tmp->okey == skey) && (tmp->len == len) &&
(!xmlStrncmp(tmp->name, name, len)))
return(tmp->name);
#endif
}
- key = okey % dict->size;
}
/* not found */
@@ -758,7 +945,7 @@ xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) {
/**
* xmlDictQLookup:
* @dict: the dictionnary
- * @prefix: the prefix
+ * @prefix: the prefix
* @name: the name
*
* Add the QName @prefix:@name to the hash @dict if not present.
@@ -771,54 +958,67 @@ xmlDictQLookup(xmlDictPtr dict, const xmlChar *prefix, const xmlChar *name) {
xmlDictEntryPtr entry;
xmlDictEntryPtr insert;
const xmlChar *ret;
- int len;
+ int len, plen, l;
if ((dict == NULL) || (name == NULL))
return(NULL);
+ if (prefix == NULL)
+ return(xmlDictLookup(dict, name, -1));
- len = xmlStrlen(name);
- if (prefix != NULL)
- len += 1 + xmlStrlen(prefix);
+ l = len = strlen((const char *) name);
+ plen = strlen((const char *) prefix);
+ len += 1 + plen;
/*
* Check for duplicate and insertion location.
*/
- okey = xmlDictComputeQKey(prefix, name, len);
+ okey = xmlDictComputeQKey(dict, prefix, plen, name, l);
key = okey % dict->size;
if (dict->dict[key].valid == 0) {
insert = NULL;
} else {
for (insert = &(dict->dict[key]); insert->next != NULL;
insert = insert->next) {
- if ((insert->len == len) &&
+ if ((insert->okey == okey) && (insert->len == len) &&
(xmlStrQEqual(prefix, name, insert->name)))
return(insert->name);
nbi++;
}
- if ((insert->len == len) &&
+ if ((insert->okey == okey) && (insert->len == len) &&
(xmlStrQEqual(prefix, name, insert->name)))
return(insert->name);
}
if (dict->subdict) {
- key = okey % dict->subdict->size;
+ unsigned long skey;
+
+ /* we cannot always reuse the same okey for the subdict */
+ if (((dict->size == MIN_DICT_SIZE) &&
+ (dict->subdict->size != MIN_DICT_SIZE)) ||
+ ((dict->size != MIN_DICT_SIZE) &&
+ (dict->subdict->size == MIN_DICT_SIZE)))
+ skey = xmlDictComputeQKey(dict->subdict, prefix, plen, name, l);
+ else
+ skey = okey;
+
+ key = skey % dict->subdict->size;
if (dict->subdict->dict[key].valid != 0) {
xmlDictEntryPtr tmp;
for (tmp = &(dict->subdict->dict[key]); tmp->next != NULL;
tmp = tmp->next) {
- if ((tmp->len == len) &&
+ if ((tmp->okey == skey) && (tmp->len == len) &&
(xmlStrQEqual(prefix, name, tmp->name)))
return(tmp->name);
nbi++;
}
- if ((tmp->len == len) &&
+ if ((tmp->okey == skey) && (tmp->len == len) &&
(xmlStrQEqual(prefix, name, tmp->name)))
return(tmp->name);
}
key = okey % dict->size;
}
- ret = xmlDictAddQString(dict, prefix, name, len);
+ ret = xmlDictAddQString(dict, prefix, plen, name, l);
if (ret == NULL)
return(NULL);
if (insert == NULL) {
@@ -832,6 +1032,7 @@ xmlDictQLookup(xmlDictPtr dict, const xmlChar *prefix, const xmlChar *name) {
entry->len = len;
entry->next = NULL;
entry->valid = 1;
+ entry->okey = okey;
if (insert != NULL)
insert->next = entry;