Imported Upstream version 0.8.0~pre1upstream/0.8.0_pre1

author: Ondřej Surý <ondrej@sury.org> 2011-11-02 22:44:12 +0100
committer: Ondřej Surý <ondrej@sury.org> 2011-11-02 22:44:12 +0100
commit: c8d5977bb546dae9ed59d81556639c49badd8121 (patch)
tree: 4c86750db26c1c3502b60f2cd78ca9611cfa01d6 /src/common/slab
download: knot-upstream/0.8.0_pre1.tar.gz
5 files changed, 1249 insertions, 0 deletions
diff --git a/src/common/slab/alloc-common.h b/src/common/slab/alloc-common.h
new file mode 100644
index 0000000..32878ab
--- /dev/null
+++ b/src/common/slab/alloc-common.h
@@ -0,0 +1,61 @@
+/*  Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*!
+ * \file alloc-common.h
+ *
+ * \author Lubos Slovak <lubos.slovak@nic.cz>
+ *
+ * \brief Common macros for alloc.
+ *
+ * \addtogroup alloc
+ * @{
+ */
+
+#ifndef _KNOTD_COMMON_ALLOC_COMMON_H_
+#define _KNOTD_COMMON_ALLOC_COMMON_H_
+
+#include <stdio.h>
+
+//#define MEM_DEBUG
+//#define MEM_NOSLAB
+//#define MEM_POISON
+#define MEM_SLAB_CAP 5   // Cap slab_cache empty slab count (undefined = inf)
+#define MEM_COLORING       // Slab cache coloring
+//#define MEM_SLAB_DEPOT     // Use slab depot for slab caching (not thread-safe)
+
+/* Eliminate compiler warning with unused parameters. */
+#ifndef UNUSED
+#define UNUSED(param) (void)(param)
+#endif
+
+/* Optimisation macros. */
+#ifndef likely
+#define likely(x)       __builtin_expect((x),1)
+#endif
+#ifndef unlikely
+#define unlikely(x)     __builtin_expect((x),0)
+#endif
+
+#ifdef MEM_DEBUG
+#define dbg_mem(msg...) fprintf(stderr, msg)
+#else
+#define dbg_mem(msg...)
+#endif
+
+
+#endif /* _KNOTD_COMMON_ALLOC_COMMON_H_ */
+
+/*! @} */
diff --git a/src/common/slab/malloc.c b/src/common/slab/malloc.c
new file mode 100644
index 0000000..ec5a68d
--- /dev/null
+++ b/src/common/slab/malloc.c
@@ -0,0 +1,60 @@
+/*  Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+/*
+ * Skip unit if not debugging memory.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/resource.h>
+
+#include "common/slab/alloc-common.h"
+
+#ifdef MEM_DEBUG
+/*
+ * ((destructor)) attribute executes this function after main().
+ * \see http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
+ */
+void __attribute__ ((destructor)) usage_dump()
+#else
+void usage_dump()
+#endif
+{
+	/* Get resource usage. */
+	struct rusage usage;
+	if (getrusage(RUSAGE_SELF, &usage) < 0) {
+		memset(&usage, 0, sizeof(struct rusage));
+	}
+
+	fprintf(stderr, "\nMemory statistics:");
+	fprintf(stderr, "\n==================\n");
+
+	fprintf(stderr, "User time: %.03lf ms\nSystem time: %.03lf ms\n",
+	        usage.ru_utime.tv_sec * (double) 1000.0
+	        + usage.ru_utime.tv_usec / (double)1000.0,
+	        usage.ru_stime.tv_sec * (double) 1000.0
+	        + usage.ru_stime.tv_usec / (double)1000.0);
+	fprintf(stderr, "Major page faults: %lu (required I/O)\nMinor page faults: %lu\n",
+	        usage.ru_majflt, usage.ru_minflt);
+	fprintf(stderr, "Number of swaps: %lu\n",
+	        usage.ru_nswap);
+	fprintf(stderr, "Voluntary context switches: %lu\nInvoluntary context switches: %lu\n",
+	        usage.ru_nvcsw,
+	        usage.ru_nivcsw);
+	fprintf(stderr, "==================\n");
+}
diff --git a/src/common/slab/malloc.h b/src/common/slab/malloc.h
new file mode 100644
index 0000000..8ca9f58
--- /dev/null
+++ b/src/common/slab/malloc.h
@@ -0,0 +1,43 @@
+/*  Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*!
+ * \file malloc.h
+ *
+ * \author Marek Vavrusa <marek.vavrusa@nic.cz>
+ *
+ * \brief Memory allocation related functions.
+ *
+ * \addtogroup alloc
+ * @{
+ */
+
+#ifndef _KNOTD_COMMON_MALLOC_H_
+#define _KNOTD_COMMON_MALLOC_H_
+
+#include <stdlib.h>
+
+/*! \brief Print usage statistics.
+ *
+ *  \note This function has destructor attribute set if MEM_DEBUG is enabled.
+ *
+ *  \warning Not all printed statistics are available on every OS,
+ *           consult manual page for getrusage(2).
+ */
+void usage_dump();
+
+#endif // _KNOTD_COMMON_MALLOC_H_
+
+/*! @} */
diff --git a/src/common/slab/slab.c b/src/common/slab/slab.c
new file mode 100644
index 0000000..ccdf7ca
--- /dev/null
+++ b/src/common/slab/slab.c
@@ -0,0 +1,732 @@
+/*  Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "common/slab/alloc-common.h"
+#include "common/slab/slab.h"
+
+/*
+ * Magic constants.
+ */
+#define SLAB_MAGIC    0x51  /*!< "Sl" magic byte (slab type). */
+#define LOBJ_MAGIC    0x0B  /*!< "Ob" magic byte (object type). */
+#define POISON_DWORD  0xdeadbeef /*!< Memory boundary guard magic. */
+#define SLAB_MINCOLOR 64 /*!< Minimum space reserved for cache coloring. */
+#define SLAB_HEADER   sizeof(slab_t) /*!< Slab header size. */
+#define ALIGN_PTRSZ   __attribute__ ((__aligned__(sizeof(void*))))
+
+/*! \brief Fast cache id lookup table.
+ *
+ * Provides O(1) lookup.
+ * Filled with interesting values from default
+ * or on-demand.
+ */
+unsigned ALIGN_PTRSZ SLAB_CACHE_LUT[SLAB_SIZE] = {
+        [24]  = SLAB_GP_COUNT + 1,
+        [800] = SLAB_GP_COUNT + 2
+};
+
+/*! \brief Find the next highest power of 2. */
+static inline unsigned get_next_pow2(unsigned v)
+{
+	// Next highest power of 2
+	--v;
+	v |= v >> 1; v |= v >> 2;
+	v |= v >> 4; v |= v >> 8;
+	v |= v >> 16;
+	++v;
+
+	return v;
+}
+
+/*! \brief Return binary logarithm of a number, which is a power of 2. */
+static inline unsigned fastlog2(unsigned v)
+{
+	// Works if we know the size is a power of 2
+	register unsigned int r = (v & 0xAAAAAAAA) != 0;
+	r |= ((v & 0xFFFF0000) != 0) << 4;
+	r |= ((v & 0xFF00FF00) != 0) << 3;
+	r |= ((v & 0xF0F0F0F0) != 0) << 2;
+	r |= ((v & 0xCCCCCCCC) != 0) << 1;
+	return r;
+}
+
+/*!
+ * \brief Fast hashing function.
+ *
+ * Finds the next highest power of 2 and returns binary logarithm.
+ * Values are stored in LUT cache for future access.
+ */
+static unsigned slab_cache_id(unsigned size)
+{
+	// Assert cache id of the smallest bufsize is 0
+	if(size <= SLAB_MIN_BUFLEN) {
+		return 0;
+	}
+
+	// Check LUT
+	unsigned id = 0;
+	if ((id = SLAB_CACHE_LUT[size])) {
+		return id;
+	} else {
+
+		// Compute binary logarithm
+		// Next highest power of 2
+		id = fastlog2(get_next_pow2(size));
+
+		// Shift cacheid of SLAB_MIN_BUFLEN to 0
+		id -= SLAB_EXP_OFFSET;
+
+		// Store
+		SLAB_CACHE_LUT[size] = id;
+	}
+
+	return id;
+}
+
+/*
+ * Slab run-time constants.
+ */
+
+size_t SLAB_MASK = 0; /*!< \brief Slab address mask (for computing offsets). */
+static unsigned SLAB_LOGSIZE = 0; /*!< \brief Binary logarithm of slab size. */
+
+/*!
+ * Depot is a caching sub-allocator of slabs.
+ * It mitigates performance impact of sequentially allocating and freeing
+ * from a slab with just a few slab items by caching N slabs before returning
+ * them to the system.
+ *
+ * \todo With wider use, locking or RCU will be necessary.
+ */
+#ifdef MEM_SLAB_DEPOT
+static slab_depot_t _depot_g; /*! \brief Global slab depot. */
+#endif // MEM_SLAB_DEPOT
+
+/*!
+ * \brief Allocate a slab of given bufsize from depot.
+ *
+ * \retval Reserved memory for slab on success.
+ * \retval NULL on errors.
+ */
+static void* slab_depot_alloc(size_t bufsize)
+{
+    void *page = 0;
+#ifdef MEM_SLAB_DEPOT
+	if (_depot_g.available) {
+		for (int i = _depot_g.available - 1; i > -1 ; --i) {
+			if(_depot_g.cache[i]->bufsize == bufsize) {
+				page = _depot_g.cache[i];
+				_depot_g.cache[i] = _depot_g.cache[--_depot_g.available];
+				return page;
+			}
+		}
+		page = _depot_g.cache[--_depot_g.available];
+	} else {
+		if(posix_memalign(&page, SLAB_SIZE, SLAB_SIZE) == 0) {
+			((slab_t*)page)->bufsize = 0;
+		} else {
+			page = 0;
+		}
+
+	}
+#else // MEM_SLAB_DEPOT
+    if(posix_memalign(&page, SLAB_SIZE, SLAB_SIZE) == 0) {
+	((slab_t*)page)->bufsize = 0;
+    } else {
+	page = 0;
+    }
+#endif // MEM_SLAB_DEPOT
+
+	return page;
+}
+
+/*!
+ * \brief Return a slab to the depot.
+ *
+ * \note If the depot is full, slab gets immediately freed.
+ */
+static inline void slab_depot_free(void* slab)
+{
+#ifdef MEM_SLAB_DEPOT
+	if (_depot_g.available < SLAB_DEPOT_SIZE) {
+		_depot_g.cache[_depot_g.available++] = slab;
+	} else {
+		free(slab);
+	}
+#else // MEM_SLAB_DEPOT
+    free(slab);
+#endif // MEM_SLAB_DEPOT
+}
+
+/*! \brief Initialize slab depot. */
+static void slab_depot_init()
+{
+#ifdef MEM_SLAB_DEPOT
+	_depot_g.available = 0;
+#endif // MEM_SLAB_DEPOT
+}
+
+/*! \brief Destroy slab depot. */
+static void slab_depot_destroy()
+{
+#ifdef MEM_SLAB_DEPOT
+	while(_depot_g.available) {
+		free(_depot_g.cache[--_depot_g.available]);
+	}
+#endif // MEM_SLAB_DEPOT
+}
+
+/*
+ * Initializers.
+ */
+
+/*! \brief Initializes slab subsystem (it is called automatically). */
+void __attribute__ ((constructor)) slab_init()
+{
+	// Fetch page size
+	SLAB_LOGSIZE = fastlog2(SLAB_SIZE);
+
+	// Compute slab page mask
+	SLAB_MASK = 0;
+	for (int i = 0; i < SLAB_LOGSIZE; ++i) {
+		SLAB_MASK |= 1 << i;
+	}
+	SLAB_MASK = ~SLAB_MASK;
+
+	// Initialize depot
+	slab_depot_init();
+}
+
+/*! \brief Deinitializes slab subsystem (it is called automatically). */
+void __attribute__ ((destructor)) slab_deinit()
+{
+	// Deinitialize global allocator
+	if (SLAB_LOGSIZE) {
+		slab_depot_destroy();
+		SLAB_LOGSIZE = SLAB_MASK = 0;
+	}
+}
+
+/*
+ * Cache helper functions.
+ */
+
+/* \notice Not used right now.
+static void slab_dump(slab_t* slab) {
+
+	printf("%s: buffers (bufsize=%zuB, %u/%u free): \n",
+	       __func__, slab->cache->bufsize, slab->bufs_free,
+	       slab->bufs_count);
+
+	void** buf = slab->head;
+	int i = 0, n = 0;
+	while(buf != 0) {
+		size_t diff = (size_t)((char*)buf - (char*)slab->base);
+		printf("-> %lu", diff / slab->cache->bufsize);
+		buf = (void**)(*buf);
+		if (++i == 10) {
+			printf("\n");
+			i = 0;
+		}
+		++n;
+	}
+
+	printf("\n");
+}
+*/
+
+/*!
+ * \brief Free all slabs from a slab cache.
+ * \return Number of freed slabs.
+ */
+static inline int slab_cache_free_slabs(slab_t* slab)
+{
+	int count = 0;
+	while (slab) {
+		slab_t* next = slab->next;
+		slab_destroy(&slab);
+		++count;
+		slab = next;
+
+	}
+	return count;
+}
+
+/*
+ * Slab helper functions.
+ */
+
+/*! \brief Return number of slabs in a linked list. */
+static inline unsigned slab_list_walk(slab_t* slab)
+{
+	unsigned count = 0;
+	while(slab) {
+		slab = slab->next;
+		++count;
+	}
+	return count;
+}
+
+/*! \brief Remove slab from a linked list. */
+static void slab_list_remove(slab_t* slab)
+{
+	// Disconnect from list
+	if (slab->prev) {
+		slab->prev->next = slab->next;
+	}
+	if(slab->next) {
+		slab->next->prev = slab->prev;
+	}
+
+	// Disconnect from cache
+	slab_cache_t* cache = slab->cache;
+	{
+		if (cache->slabs_free == slab) {
+			cache->slabs_free = slab->next;
+		} else if (cache->slabs_full == slab) {
+			cache->slabs_full = slab->next;
+		}
+	}
+}
+
+/*! \brief Insert slab into a linked list. */
+static void slab_list_insert(slab_t** list, slab_t* item)
+{
+	// If list exists, push to the top
+	item->prev = 0;
+	item->next = *list;
+	if(*list) {
+		(*list)->prev = item;
+	}
+	*list = item;
+}
+
+/*! \brief Move slab from one linked list to another. */
+static inline void slab_list_move(slab_t** target, slab_t* slab)
+{
+	slab_list_remove(slab);
+	slab_list_insert(target, slab);
+}
+
+/*
+ * API functions.
+ */
+
+slab_t* slab_create(slab_cache_t* cache)
+{
+	const size_t size = SLAB_SIZE;
+
+	slab_t* slab = slab_depot_alloc(cache->bufsize);
+
+	if (unlikely(slab < 0)) {
+		dbg_mem("%s: failed to allocate aligned memory block\n",
+		          __func__);
+		return 0;
+	}
+
+	/* Initialize slab. */
+	slab->magic = SLAB_MAGIC;
+	slab->cache = cache;
+	slab_list_insert(&cache->slabs_free, slab);
+#ifdef MEM_SLAB_CAP
+	++cache->empty;
+#endif
+
+	/* Already initialized? */
+	if (slab->bufsize == cache->bufsize) {
+		return slab;
+	} else {
+		slab->bufsize = cache->bufsize;
+	}
+
+	/* Ensure the item size can hold at least a size of ptr. */
+	size_t item_size = slab->bufsize;
+	if (unlikely(item_size < SLAB_MIN_BUFLEN)) {
+		item_size = SLAB_MIN_BUFLEN;
+	}
+
+	/* Ensure at least some space for coloring */
+	size_t data_size = size - sizeof(slab_t);
+#ifdef MEM_COLORING
+	size_t free_space = data_size % item_size;
+	if (unlikely(free_space < SLAB_MINCOLOR)) {
+		free_space = SLAB_MINCOLOR;
+	}
+
+
+	/// unsigned short color = __sync_fetch_and_add(&cache->color, 1);
+	unsigned short color = (cache->color += sizeof(void*));
+	color = color % free_space;
+#else
+	const unsigned short color = 0;
+#endif
+
+	/* Calculate useable data size */
+	data_size -= color;
+	slab->bufs_count = data_size / item_size;
+	slab->bufs_free = slab->bufs_count;
+
+	// Save first item as next free
+	slab->base = (char*)slab + sizeof(slab_t) + color;
+	slab->head = (void**)slab->base;
+
+	// Create freelist, skip last member, which is set to NULL
+	char* item = (char*)slab->head;
+	for(unsigned i = 0; i < slab->bufs_count - 1; ++i) {
+		*((void**)item) = item + item_size;
+		item += item_size;
+	}
+
+	// Set last buf to NULL (tail)
+	*((void**)item) = (void*)0;
+
+	// Ensure the last item has a NULL next
+	dbg_mem("%s: created slab (%p, %p) (%zu B)\n",
+	          __func__, slab, slab + size, size);
+	return slab;
+}
+
+void slab_destroy(slab_t** slab)
+{
+	/* Disconnect from the list */
+	slab_list_remove(*slab);
+
+	/* Free slab */
+	slab_depot_free(*slab);
+
+	/* Invalidate pointer. */
+	dbg_mem("%s: deleted slab %p\n", __func__, *slab);
+	*slab = 0;
+}
+
+void* slab_alloc(slab_t* slab)
+{
+	// Fetch first free item
+	void **item = 0;
+	{
+		if((item = slab->head)) {
+			slab->head = (void**)*item;
+			--slab->bufs_free;
+		} else {
+			// No more free items
+			return 0;
+		}
+	}
+
+#ifdef MEM_DEBUG
+	// Increment statistics
+	__sync_add_and_fetch(&slab->cache->stat_allocs, 1);
+#endif
+
+	// Move to full?
+	if (unlikely(slab->bufs_free == 0)) {
+		slab_list_move(&slab->cache->slabs_full, slab);
+	} else {
+#ifdef MEM_SLAB_CAP
+		// Mark not empty?
+		if (unlikely(slab->bufs_free == slab->bufs_count - 1)) {
+			--slab->cache->empty;
+		}
+#endif
+	}
+
+	return item;
+}
+
+void slab_free(void* ptr)
+{
+	// Null pointer check
+	if (unlikely(!ptr)) {
+		return;
+	}
+
+	// Get slab start address
+	slab_t* slab = slab_from_ptr(ptr);
+	assert(slab);
+
+	// Check if it exists in directory
+	if (slab->magic == SLAB_MAGIC) {
+
+		// Return buf to slab
+		*((void**)ptr) = (void*)slab->head;
+		slab->head = (void**)ptr;
+		++slab->bufs_free;
+
+#ifdef MEM_DEBUG
+		// Increment statistics
+		__sync_add_and_fetch(&slab->cache->stat_frees, 1);
+#endif
+
+		// Return to partial
+		if(unlikely(slab->bufs_free == 1)) {
+			slab_list_move(&slab->cache->slabs_free, slab);
+		} else {
+#ifdef MEM_SLAB_CAP
+		// Recycle if empty
+			if(unlikely(slab_isempty(slab))) {
+				if(slab->cache->empty == MEM_SLAB_CAP) {
+					slab_destroy(&slab);
+				} else {
+					++slab->cache->empty;
+				}
+			}
+#endif
+		}
+
+	} else {
+
+		// Pointer is not a slab
+		// Presuming it's a large block
+		slab_obj_t* bs = (slab_obj_t*)ptr - 1;
+
+#ifdef MEM_POISON
+		// Remove memory barrier
+		mprotect(ptr + bs->size, sizeof(int), PROT_READ|PROT_WRITE);
+#endif
+
+		// Unmap
+		dbg_mem("%s: unmapping large block of %zu bytes at %p\n",
+		          __func__, bs->size, ptr);
+		free(bs);
+	}
+}
+
+int slab_cache_init(slab_cache_t* cache, size_t bufsize)
+{
+	if (unlikely(!bufsize)) {
+		return -1;
+	}
+
+	cache->empty = 0;
+	cache->bufsize = bufsize;
+	cache->slabs_free = cache->slabs_full = 0;
+	cache->color = 0;
+
+	/* Initialize stats */
+	cache->stat_allocs = cache->stat_frees = 0;
+
+	dbg_mem("%s: created cache of size %zu\n",
+	          __func__, bufsize);
+
+	return 0;
+}
+
+void slab_cache_destroy(slab_cache_t* cache) {
+
+	// Free slabs
+	unsigned free_s = slab_cache_free_slabs(cache->slabs_free);
+	unsigned full_s = slab_cache_free_slabs(cache->slabs_full);
+#ifndef MEM_DEBUG
+	UNUSED(free_s);
+	UNUSED(full_s);
+#else
+	dbg_mem("%s: %u empty/partial, %u full caches\n",
+	          __func__, free_s, full_s);
+#endif
+
+	// Invalidate cache
+	cache->bufsize = 0;
+	cache->slabs_free = cache->slabs_full = 0;
+}
+
+void* slab_cache_alloc(slab_cache_t* cache)
+{
+	slab_t* slab = cache->slabs_free;
+	if(!cache->slabs_free) {
+		slab = slab_create(cache);
+		if (slab == NULL) {
+			return NULL;
+		}
+	}
+
+
+	return slab_alloc(slab);
+}
+
+int slab_cache_reap(slab_cache_t* cache)
+{
+	// For now, just free empty slabs
+	slab_t* slab = cache->slabs_free;
+	int count = 0;
+	while (slab) {
+		slab_t* next = slab->next;
+		if (slab_isempty(slab)) {
+			slab_destroy(&slab);
+			++count;
+		}
+		slab = next;
+
+	}
+
+	cache->empty = 0;
+	return count;
+}
+
+int slab_alloc_init(slab_alloc_t* alloc)
+{
+	// Invalidate
+	memset(alloc, 0, sizeof(slab_alloc_t));
+
+	// Initialize descriptors cache
+	slab_cache_init(&alloc->descriptors, sizeof(slab_cache_t));
+
+	return 0;
+}
+
+void slab_alloc_destroy(slab_alloc_t* alloc)
+{
+	// Destroy all caches
+	for (unsigned i = 0; i < SLAB_CACHE_COUNT; ++i) {
+		if (alloc->caches[i] != 0) {
+			slab_cache_destroy(alloc->caches[i]);
+		}
+	}
+
+	// Destroy cache for descriptors
+	slab_cache_destroy(&alloc->descriptors);
+}
+
+void* slab_alloc_alloc(slab_alloc_t* alloc, size_t size)
+{
+	// Invalid size check
+	if (unlikely(!size)) {
+		return 0;
+	}
+
+#ifdef MEM_POISON
+	// Reserve memory for poison
+	size += sizeof(int);
+#endif
+	// Directly map large block
+	if (unlikely(size > SLAB_SIZE/2)) {
+
+		// Map block
+		size += sizeof(slab_obj_t);
+		slab_obj_t* p = 0;
+		p = malloc(size);
+
+		dbg_mem("%s: mapping large block of %zu bytes at %p\n",
+		          __func__, size, p + 1);
+
+		/* Initialize. */
+		p->magic = LOBJ_MAGIC;
+		p->size = size - sizeof(slab_obj_t);
+
+#ifdef MEM_POISON
+		// Reduce real size
+		p->size -= sizeof(int);
+
+		// Memory barrier
+		int* pb = (int*)((char*)p + size - sizeof(int));
+		*pb = POISON_DWORD;
+		mprotect(pb, sizeof(int), PROT_NONE);
+#endif
+
+		return p + 1;
+	}
+
+	// Get cache id from size
+	unsigned cache_id = slab_cache_id(size);
+
+	// Check if associated cache exists
+	if (unlikely(alloc->caches[cache_id] == 0)) {
+
+		// Assert minimum cache size
+		if (unlikely(size < SLAB_MIN_BUFLEN)) {
+			size = SLAB_MIN_BUFLEN;
+		}
+
+		// Calculate cache bufsize
+		size_t bufsize = size;
+		if (cache_id < SLAB_GP_COUNT) {
+			bufsize = get_next_pow2(size);
+		}
+
+		// Create cache
+		dbg_mem("%s: creating cache of %zuB (req. %zuB) (id=%u)\n",
+		          __func__, bufsize, size, cache_id);
+
+		slab_cache_t* cache = slab_cache_alloc(&alloc->descriptors);
+		slab_cache_init(cache, bufsize);
+		alloc->caches[cache_id] = cache;
+	}
+
+	// Allocate from cache
+	void* mem = slab_cache_alloc(alloc->caches[cache_id]);
+
+#ifdef MEM_POISON
+	// Memory barrier
+	/*! \todo Broken, need to store the barrier byte size. */
+	//int* pb = (int*)((char*)mem + size - sizeof(int));
+	//mprotect(pb, sizeof(int), PROT_NONE);
+#endif
+	return mem;
+}
+
+void *slab_alloc_realloc(slab_alloc_t* alloc, void *ptr, size_t size)
+{
+	// realloc(0) equals to free(ptr)
+	if (!size) {
+		slab_free(ptr);
+		return 0;
+	}
+
+	// Allocate new buf
+	void *nptr = slab_alloc_alloc(alloc, size);
+	assert(nptr);
+
+	// Copy memory if present
+	if (ptr) {
+		slab_t* slab = slab_from_ptr(ptr);
+		memcpy(nptr, ptr, slab->cache->bufsize);
+
+		// Free old buf
+		slab_free(ptr);
+	}
+
+	return nptr;
+}
+
+void slab_alloc_stats(slab_alloc_t* alloc)
+{
+#ifdef MEM_DEBUG
+	printf("Cache usage:\n");
+	for (int i = 0; i < SLAB_CACHE_COUNT; ++i) {
+
+		if (!alloc->caches[i])
+			continue;
+
+		slab_cache_t* cache = alloc->caches[i];
+		unsigned free_s = slab_list_walk(cache->slabs_free);
+		unsigned full_s = slab_list_walk(cache->slabs_full);
+		printf("%4zu: allocs=%lu frees=%lu "
+		       "(%u empty+partial, %u full)\n",
+		       cache->bufsize, cache->stat_allocs,
+		       cache->stat_frees, free_s, full_s);
+	}
+#else
+	printf("Cache usage: not available, enable MEM_DEBUG and recompile.\n");
+#endif
+}
+
diff --git a/src/common/slab/slab.h b/src/common/slab/slab.h
new file mode 100644
index 0000000..d64188e
--- /dev/null
+++ b/src/common/slab/slab.h
@@ -0,0 +1,353 @@
+/*  Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*!
+ * \file slab.h
+ *
+ * \author Marek Vavrusa <marek.vavusa@nic.cz>
+ *
+ * \brief SLAB allocator.
+ *
+ * SLAB cache works with either custom SLAB sizes and
+ * Next-Highest-Power-Of-2 sizes.
+ *
+ * Slab size is a multiple of PAGE_SIZE and uses
+ * system allocator for larger blocks.
+ *
+ * Allocated SLABs are PAGE_SIZE aligned for a fast O(1)
+ * address-from-item lookup. This results in nearly none memory
+ * overhead for a very small blocks (<64B), but it requires the
+ * underlying allocator to be effective in allocating page size aligned memory
+ * as well. The major disadvantage is that each Slab must be aligned to it's
+ * size as opposed to boundary tags.
+ *
+ * Slab implements simple coloring mechanism to improve
+ * cache line utilisation.
+ *
+ * \ref SLAB_SIZE is a fixed size of a slab. As a rule of thumb, the slab is
+ * effective when the maximum allocated block size is below 1/4 of a SLAB_SIZE.
+ * f.e. 16kB SLAB is most effective up to 4kB block size.
+ *
+ * \ref MEM_POISON flag enables checking read/writes after the allocated memory
+ * and segmentation fault. This poses a significant time and space overhead.
+ * Enable only when debugging.
+ *
+ * \ref MEM_SLAB_CAP defines a maximum limit of a number of empty slabs that a cache
+ * can keep at a time. This results in a slight performance regression,
+ * but actively recycles unuse memory.
+ *
+ * \ref MEM_DEPOT_COUNT defines how many recycled slabs will be cached for a later
+ * use instead of returning them immediately to the OS. This significantly
+ * reduces a number of syscalls in some cases.
+ * f.e. 16 means 16 * SLAB_SIZE cache, for 16kB slabs = 256kB cache
+ *
+ * \ref MEM_COLORING enables simple cache coloring. This is generally a useful
+ *      feature since all slabs are page size aligned and
+ *      (depending on architecture) this slightly improves performance
+ *      and cacheline usage at the cost of a minimum of 64 bytes per slab of
+ *      overhead. Undefine MEM_COLORING in common.h to disable coloring.
+ *
+ * Optimal usage for a specific behavior (similar allocation sizes):
+ * \code
+ * slab_cache_t cache;
+ * slab_cache_init(&cache, N); // Initialize, N means cache chunk size
+ * ...
+ * void* mem = slab_cache_alloc(&cache); // Allocate N bytes
+ * ...
+ * slab_free(mem); // Recycle memory
+ * ...
+ * slab_cache_destroy(&cache); // Deinitialize cache
+ * \endcode
+ *
+ *
+ * \todo Allocate slab headers elsewhere and use just first sizeof(void*) bytes
+ *       in each slab as a pointer to slab header. This could improve the
+ *       performance.
+ *
+ * \note Slab allocation is not thread safe for performance reasons.
+ *
+ * \addtogroup alloc
+ * @{
+ */
+
+#ifndef _KNOTD_COMMON_SLAB_H_
+#define _KNOTD_COMMON_SLAB_H_
+
+#include <pthread.h>
+#include <stdint.h>
+
+/* Constants. */
+#define SLAB_SIZE (4096*4) //!< Slab size (16K blocks)
+#define SLAB_MIN_BUFLEN 8  //!< Minimal allocation block size is 8B.
+#define SLAB_EXP_OFFSET 3  //!< Minimal allocation size is 8B = 2^3, exp is 3.
+#define SLAB_GP_COUNT  10  //!< General-purpose caches count.
+#define SLAB_US_COUNT  10  //!< User-specified caches count.
+#define SLAB_DEPOT_SIZE 16 //!< N slabs cached = N*SLAB_SIZE kB cap
+#define SLAB_CACHE_COUNT (SLAB_GP_COUNT + SLAB_US_COUNT) //!< Slab cache count.
+struct slab_cache_t;
+
+/* Macros. */
+
+/*! \brief Return slab base address from pointer. */
+#define slab_from_ptr(p) ((void*)((size_t)(p) & SLAB_MASK))
+
+/*! \brief Return true if slab is empty. */
+#define slab_isempty(s) ((s)->bufs_free == (s)->bufs_count)
+
+/*!
+ * \brief Slab descriptor.
+ *
+ * Slab is a block of memory used for an allocation of
+ * smaller objects (bufs) later on.
+ * Each slab is currently aligned to page size to easily
+ * determine slab address from buf pointer.
+ *
+ * \warning Do not use slab_t directly as it cannot grow, see slab_cache_t.
+ */
+typedef struct slab_t {
+	char magic;                 /*!< Identifies memory block type. */
+	unsigned short bufsize;     /*!< Slab bufsize. */
+	struct slab_cache_t *cache; /*!< Owner cache. */
+	struct slab_t *prev, *next; /*!< Neighbours in slab lists. */
+	unsigned bufs_count;        /*!< Number of bufs in slab. */
+	unsigned bufs_free;         /*!< Number of available bufs. */
+	void **head;                /*!< Pointer to first available buf. */
+	char* base;                 /*!< Base address for bufs. */
+} slab_t;
+
+/*!
+ * \brief Slab depot.
+ *
+ * To mitigate slab initialization costs, depot keeps a finite number of
+ * stacked slabs before returning them to the system.
+ */
+typedef struct slab_depot_t {
+	size_t available;               /*!< Number of available pages. */
+	slab_t* cache[SLAB_DEPOT_SIZE]; /*!< Stack of free slabs. */
+} slab_depot_t;
+
+/*!
+ * \brief Large object descriptor.
+ *
+ * Large object differs from slab with magic byte and
+ * contains object size.
+ *
+ * Magic needs to be first to overlap with slab_t magic byte.
+ */
+typedef struct slab_obj_t {
+	char magic;  /*!< Identifies memory block type. */
+	size_t size; /*!< Object size. */
+} slab_obj_t;
+
+/*!
+ * \brief Slab cache descriptor.
+ *
+ * Slab cache is a list of 0..n slabs with the same buf size.
+ * It is responsible for slab state keeping.
+ *
+ * Once a slab is created, it is moved to free list.
+ * When it is full, it is moved to full list.
+ * Once a buf from full slab is freed, the slab is moved to
+ * free list again (there may be some hysteresis for mitigating
+ * a sequential alloc/free).
+ *
+ * Allocation of new slabs is on-demand, empty slabs are reused if possible.
+ *
+ * \note Slab implementation is different from Bonwick (Usenix 2001)
+ *       http://www.usenix.org/event/usenix01/bonwick.html
+ *       as it doesn't feature empty and partial list.
+ *       This is due to fact, that user space allocator rarely
+ *       needs to count free slabs. There is no way the OS could
+ *       notify the application, that the memory is scarce.
+ *       A slight performance increased is measured in benchmark.
+ *
+ * \note Statistics are only available if MEM_DEBUG is enabled.
+ */
+typedef struct slab_cache_t {
+	unsigned short color;    /*!< Current cache color. */
+	unsigned short empty;    /*!< Number of empty slabs. */
+	size_t bufsize;          /*!< Cache object (buf) size. */
+	slab_t *slabs_free;      /*!< List of free slabs. */
+	slab_t *slabs_full;      /*!< List of full slabs. */
+
+	/* Statistics. */
+	unsigned long stat_allocs; /*!< Allocation count. */
+	unsigned long stat_frees;  /*!< Free count. */
+} slab_cache_t;
+
+/*!
+ * \brief Slab allocator descriptor.
+ *
+ * \note For a number of slab caches, consult SLAB_GP_COUNT
+ *       and a number of specific records in SLAB_CACHE_LUT lookup table.
+ *
+ * \warning It is currently not advised to use this general purpose allocator,
+ *          as it usually doesn't yield an expected performance for higher
+ *          bookkeeping costs and it also depends on the allocation behavior
+ *          as well. Look for slab_cache for a specialized use in most cases.
+ */
+typedef struct slab_alloc_t {
+	slab_cache_t descriptors; /*!< Slab cache for cache descriptors. */
+	slab_cache_t* caches[SLAB_CACHE_COUNT]; /*!< Number of slab caches. */
+} slab_alloc_t;
+
+/*!
+ * \brief Create a slab of predefined size.
+ *
+ * At the moment, slabs are equal to page size and page size aligned.
+ * This enables quick and efficient buf to slab lookup by pointer arithmetic.
+ *
+ * Slab uses simple coloring scheme with and the memory block is always
+ * sizeof(void*) aligned.
+ *
+ * \param cache Parent cache.
+ * \retval Slab instance on success.
+ * \retval NULL on error.
+ */
+slab_t* slab_create(slab_cache_t* cache);
+
+/*!
+ * \brief Destroy slab instance.
+ *
+ * Slab is disconnected from any list and freed.
+ * Dereferenced slab parameter is set to NULL.
+ *
+ * \param slab Pointer to given slab.
+ */
+void slab_destroy(slab_t** slab);
+
+/*!
+ * \brief Allocate a buf from slab.
+ *
+ * Returns a pointer to allocated memory or NULL on error.
+ *
+ * \param slab Given slab instance.
+ * \retval Pointer to allocated memory.
+ * \retval NULL on error.
+ */
+void* slab_alloc(slab_t* slab);
+
+/*!
+ * \brief Recycle memory.
+ *
+ * Given memory is returned to owner slab.
+ * Memory content may be rewritten.
+ *
+ * \param ptr Returned memory.
+ */
+void slab_free(void* ptr);
+
+/*!
+ * \brief Create a slab cache.
+ *
+ * Create a slab cache with no allocated slabs.
+ * Slabs are allocated on-demand.
+ *
+ * \param cache Pointer to uninitialized cache.
+ * \param bufsize Single item size for later allocs.
+ * \retval 0 on success.
+ * \retval -1 on error;
+ */
+int slab_cache_init(slab_cache_t* cache, size_t bufsize);
+
+/*!
+ * \brief Destroy a slab cache.
+ *
+ * Destroy a slab cache and all associated slabs.
+ *
+ * \param cache Pointer to slab cache.
+ */
+void slab_cache_destroy(slab_cache_t* cache);
+
+/*!
+ * \brief Allocate from the cache.
+ *
+ * It tries to use partially free caches first,
+ * empty caches second and allocates a new cache
+ * as a last resort.
+ *
+ * \param cache Given slab cache.
+ * \retval Pointer to allocated memory.
+ * \retval NULL on error.
+ */
+void* slab_cache_alloc(slab_cache_t* cache);
+
+/*!
+ * \brief Free unused slabs from cache.
+ *
+ * \param cache Given slab cache.
+ * \return Number of freed slabs.
+ */
+int slab_cache_reap(slab_cache_t* cache);
+
+/*!
+ * \brief Create a general purpose slab allocator.
+ *
+ * \note Please consult struct slab_alloc_t for performance hints.
+ *
+ * \retval 0 on success.
+ * \retval -1 on error.
+ */
+int slab_alloc_init(slab_alloc_t* alloc);
+
+/*!
+ * \brief Delete slab allocator.
+ *
+ * This destroys all associated caches and frees memory.
+ *
+ * \param alloc Given allocator instance.
+ */
+void slab_alloc_destroy(slab_alloc_t* alloc);
+
+/*!
+ * \brief Allocate a block of memory.
+ *
+ * Returns a block of allocated memory.
+ *
+ * \note At least SLAB_MIN_BUFSIZE bytes is allocated.
+ *
+ * \note Please consult struct slab_alloc_t for performance hints.
+ *
+ * \param alloc Allocator instance.
+ * \param size Requested block size.
+ * \retval Pointer to allocated memory.
+ * \retval NULL on error.
+ */
+void* slab_alloc_alloc(slab_alloc_t* alloc, size_t size);
+
+/*!
+ * \brief Reallocate data from one slab to another.
+ *
+ * \param alloc Allocator instance.
+ * \param ptr Pointer to allocated memory.
+ * \param size Requested memory block size.
+ * \retval Pointer to newly allocated memory.
+ * \retval NULL on error.
+ *
+ * \todo Realloc could be probably implement more effectively.
+ */
+void *slab_alloc_realloc(slab_alloc_t* alloc, void *ptr, size_t size);
+
+/*!
+ *
+ * \brief Dump allocator stats.
+ *
+ * \param alloc Allocator instance.
+ */
+void slab_alloc_stats(slab_alloc_t* alloc);
+
+#endif /* _KNOTD_COMMON_SLAB_H_ */
+
+/*! @} */
author	Ondřej Surý <ondrej@sury.org>	2011-11-02 22:44:12 +0100
committer	Ondřej Surý <ondrej@sury.org>	2011-11-02 22:44:12 +0100
commit	c8d5977bb546dae9ed59d81556639c49badd8121 (patch)
tree	4c86750db26c1c3502b60f2cd78ca9611cfa01d6 /src/common/slab
download	knot-upstream/0.8.0_pre1.tar.gz