summaryrefslogtreecommitdiff
path: root/src/libknot/hash/cuckoo-hash-table.h
blob: dd78294f7c5346f120a7ffdf03cc1ea0c8faeac2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
/*!
 * \file cuckoo-hash-table.h
 *
 * \author Lubos Slovak <lubos.slovak@nic.cz>
 *
 * \brief Implementation of Cuckoo hashing scheme.
 *
 * Uses d-ary Cuckoo hashing with stash.
 *
 * \todo Maybe provide some way to resize the whole table if the number of items
 *       grows too much.
 * \todo Check size of integers, the table size may be larger than unsigned int.
 * \todo Maybe do not return ck_hash_table_item from ck_find_item(), but only
 *       its value.
 * \todo When hashing an item, only the first table is tried for this item.
 *       We may try all tables. (But it is not neccessary.)
 *
 * \addtogroup hashing
 * @{
 */
/*  Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef _KNOT_CUCKOO_HASH_TABLE_H_
#define _KNOT_CUCKOO_HASH_TABLE_H_

#include <stdint.h> /* uint32_t */
#include <stdlib.h> /* size_t */
#include <pthread.h>

#include "hash/universal-system.h"
#include "common/dynamic-array.h"

/*----------------------------------------------------------------------------*/

/*! \brief Macro for getting one hash table size. */
#define hashsize(n) ((uint32_t)1 << (n))

/*!
 * \brief Max number of hash tables - must be the same as number of the hash
 *        functions in each generation of the universal system.
 */
#define MAX_TABLES US_FNC_COUNT

/*! \brief Default stash size. */
static const uint STASH_SIZE = 10;

/*! \brief Maximum stash size. When achieved, rehashing is needed. */
static const uint STASH_SIZE_MAX = 30;

/*----------------------------------------------------------------------------*/
/* Public structures                                                          */
/*----------------------------------------------------------------------------*/
/*!
 * \brief Structure for storing the hashed data.
 */
struct ck_hash_table_item {
	const char *key; /*!< Key of the item, used for hashing. */

	size_t key_length; /*!< Length of the key in octets. */

	void *value; /*!< The actual item stored in the table. */

	/*!
	 * \brief Flags. Currently used for keeping the generation of the item,
	 *        i.e. the generation of the functions used for hashing this
	 *        item.
	 *
	 * Form: 000000xy;
	 * xy - generation; may be 01 (1) or 10 (2).
	 */
	uint8_t timestamp;
};

typedef struct ck_hash_table_item ck_hash_table_item_t;

struct ck_stash_item {
	ck_hash_table_item_t *item;
	struct ck_stash_item *next;
};

typedef struct ck_stash_item ck_stash_item_t;

/*----------------------------------------------------------------------------*/
/*!
 * \brief Hash table structure which uses cuckoo hashing.
 *
 * Keys are expected to be strings of characters (char *), not necesarily
 * null-terminated. It uses the Fowler/Noll/Vo (FNV) hash function to
 * obtain a 32bit unsigned integer from the character data and a function
 * randomly chosen from an universal system (see universal-system.h) to obtain
 * the final hash. The FNV hash was taken from
 * http://home.comcast.net/~bretm/hash/6.html and the universal system is
 * constructed according to Katajainen J., Lykke M., Experiments with universal
 * hashing (obtained from
 * http://www.diku.dk/OLD/publikationer/tekniske.rapporter/rapporter/96-08.pdf).
 *
 * The table uses either 3-ary or 4-ary cuckoo hashing (and thus 3 or 4 tables)
 * with stash, according to the number of items provided to ck_create_table()
 * function. The number of table pointers is however set to be the larger value
 * (4) always, so the \a tables array may be statically allocated. Size of one
 * table is always a power of 2 (due to the character of the hash function).
 * The stash has a default size STASH_SIZE, but can be resized if needed.
 * However, the resizing is only done in rehashing process, if the items do not
 * fit into the table and the original stash.
 *
 * Rehashing is done when the stash gets full (actually, last item is always
 * free and is used in the rehashing process as a temporary variable).
 */
struct ck_hash_table {
	uint table_count; /*!< Actual number of hash tables used. */

	/*!
	 * \brief Exponent of one table's size (2^table_size_exp is table size).
	 */
	int table_size_exp;

	ck_hash_table_item_t **tables[MAX_TABLES]; /*!< Array of hash tables. */

	//da_array_t stash; /*!< Stash implemented as a dynamic array. */
	ck_stash_item_t *stash;

	/*! \brief Temporary storage for item being hashed. */
	ck_hash_table_item_t *hashed;

	/*! \brief Mutex for avoiding multiple insertions / rehashes at once. */
	pthread_mutex_t mtx_table;

	/*!
	 * \brief Flags used for determining which hash functions are currently
	 *        used
	 *
	 * Form: 00000xyz.
	 * x - rehash flag (1 if rehashing is in progress)
	 * yz - generation (may be 10 = 2, or 01 = 1)
	 *
	 * There are always two sets of hash functions available via the
	 * us_hash() function (see universal-hashing.h). Normally all items in
	 * the table are hashed using one set of functions. However, during
	 * rehash, the other set is used for rehashing. In this case the rehash
	 * flag (x) is set, so the lookup function (ck_find_item()) tries to use
	 * both sets of functions when searching for item.
	 */
	uint8_t generation;

	us_system_t hash_system; /*!< Universal system of hash functions. */
	
	size_t items;
	size_t items_in_stash;
};

typedef struct ck_hash_table ck_hash_table_t;

/*----------------------------------------------------------------------------*/
/* API functions                                                              */
/*----------------------------------------------------------------------------*/
/*!
 * \brief Creates and initializes the hash table structure.
 *
 * All hash tables are allocated and their items initialized to 0 (NULL).
 * A stash of default size is also created. The \a generation flags are set to
 * 0.
 *
 * \param items Number of items to be hashed to the table. This number
 *              determines the size of the hash table that will be created.
 *
 *
 * \return Pointer to the initialized hash table.
 */
ck_hash_table_t *ck_create_table(uint items);

/*----------------------------------------------------------------------------*/
/*!
 * \brief Destroys the whole hash table together with the saved values.
 *
 * \param table Pointer to pointer to the hash table.
 * \param dtor_value Destructor function for the values that are be stored in
 *                   the hash table. Set to NULL if you do not want the values
 *                   to be deleted.
 * \param delete_key Set to 0 if you do not want the function to delete the
 *                   key of the item (e.g. when used elsewhere). Set to any
 *                   other value otherwise.
 *
 * \note Make sure the table and its items are not used anymore when calling
 * this function.
 */
void ck_destroy_table(ck_hash_table_t **table,
                      void (*dtor_value)(void *value), int delete_key);

/*!
 * \brief Destroys the table structures, but does not remove the individual
 *        hash table items.
 */
void ck_table_free(ck_hash_table_t **table);

/*----------------------------------------------------------------------------*/
/*!
 * \brief Inserts item into the hash table.
 *
 * Insertion starts always by trying to hash the item into the first table. The
 * possible displaced item is then hashed into randomly chosen other table,
 * etc., until a free place is found or a loop occured. A loop occurs when one
 * position in one table is tried more than twice.
 *
 * \param table Hash table the item should be inserted into.
 * \param key Item's key. It can be any string of octets. The key is not copied
 *            by the function.
 * \param length Length of the key in bytes (octets).
 * \param value Pointer to the actual item to be inserted into the hash table.
 *
 * \note This function does not copy the key.
 * \note This function may trigger rehash of the whole table in case the stash
 *       gets full.
 *
 * \retval 0 No error.
 * \retval -1 Insertion failed. This may occur only when the rehashing fails.
 *            In this case it is necessary to somehow manually force another
 *            rehash as no other rehash would be possible.
 */
int ck_insert_item(ck_hash_table_t *table, const char *key, size_t length,
                   void *value);

/*----------------------------------------------------------------------------*/
/*!
 * \brief Finds item in table.
 *
 * \param table Hash table to search in.
 * \param key Key of the item. It can be an arbitrary string of octets.
 * \param length Length of the key in bytes (octets).
 *
 * \return Pointer to the item if found. NULL otherwise.
 */
const ck_hash_table_item_t *ck_find_item(const ck_hash_table_t *table,
                                         const char *key, size_t length);

/*----------------------------------------------------------------------------*/
/*!
 * \brief Updates item with the given key by replacing its value.
 *
 * The update process is synchronized using RCU mechanism, so the old item's
 * value will not be deleted while some thread is using it.
 *
 * \param table Hash table where to search for the item.
 * \param key Key of the item to be updated. It can be an arbitrary string of
 *            octets.
 * \param length Length of the key in bytes (octets).
 * \param new_value New value for the item with key \a key.
 * \param dtor_value Destructor function for the values that are be stored in
 *                   the hash table. Set to NULL if you do not want the values
 *                   to be deleted.
 *
 * \retval 0 If successful.
 * \retval -1 If the item was not found in the table. No changes are made.
 */
int ck_update_item(const ck_hash_table_t *table, const char *key, size_t length,
                   void *new_value, void (*dtor_value)(void *value));

/*----------------------------------------------------------------------------*/
/*!
 * \brief Removes item with the given key from table.
 *
 * The deletion process is synchronized using RCU mechanism, so the old item
 * will not be deleted while some thread is using it.
 *
 * \param table Hash table where to search for the item.
 * \param key Key of the item to be removed. It can be an arbitrary string of
 *            octets.
 * \param length Length of the key in bytes (octets).
 * \param dtor_value Destructor function for the values that are be stored in
 *                   the hash table. Set to NULL if you do not want the values
 *                   to be deleted.
 * \param delete_key Set to 0 if you do not want the function to delete the
 *                   key of the item (e.g. when used elsewhere). Set to any
 *                   other value otherwise.
 *
 * \retval 0 If successful.
 * \retval -1 If the item was not found in the table.
 */
int ck_delete_item(const ck_hash_table_t *table, const char *key, size_t length,
                   void (*dtor_value)(void *value), int delete_key);

ck_hash_table_item_t *ck_remove_item(ck_hash_table_t *table, const char *key, 
                                     size_t length);

/*!
 * \brief Creates a shallow copy of the cuckoo hash table.
 *
 * This function creates just the ck_hash_table_t structure and its tables and
 * stash. It does not copy individual ck_hash_table_item_t structures.
 *
 * \param from Table to copy.
 * \param to The new copy will be stored here.
 *
 * \retval 0 if successful.
 * \retval
 */
int ck_shallow_copy(const ck_hash_table_t *from, ck_hash_table_t **to);

int ck_apply(ck_hash_table_t *table, 
             void (*function)(ck_hash_table_item_t *item, void *data), 
             void *data);

/*----------------------------------------------------------------------------*/

int ck_rehash(ck_hash_table_t *table);

// for testing purposes only
int ck_resize_table(ck_hash_table_t *table);

/*----------------------------------------------------------------------------*/
/*!
 * \brief Dumps the whole hash table to the standard output.
 */
void ck_dump_table(const ck_hash_table_t *table);

/*----------------------------------------------------------------------------*/

#endif /* _KNOT_CUCKOO_HASH_TABLE_H_ */

/*! @} */