summaryrefslogtreecommitdiff
path: root/src/common/slab
diff options
context:
space:
mode:
authorOndřej Surý <ondrej@sury.org>2011-11-02 22:44:12 +0100
committerOndřej Surý <ondrej@sury.org>2011-11-02 22:44:12 +0100
commitc8d5977bb546dae9ed59d81556639c49badd8121 (patch)
tree4c86750db26c1c3502b60f2cd78ca9611cfa01d6 /src/common/slab
downloadknot-upstream/0.8.0_pre1.tar.gz
Imported Upstream version 0.8.0~pre1upstream/0.8.0_pre1
Diffstat (limited to 'src/common/slab')
-rw-r--r--src/common/slab/alloc-common.h61
-rw-r--r--src/common/slab/malloc.c60
-rw-r--r--src/common/slab/malloc.h43
-rw-r--r--src/common/slab/slab.c732
-rw-r--r--src/common/slab/slab.h353
5 files changed, 1249 insertions, 0 deletions
diff --git a/src/common/slab/alloc-common.h b/src/common/slab/alloc-common.h
new file mode 100644
index 0000000..32878ab
--- /dev/null
+++ b/src/common/slab/alloc-common.h
@@ -0,0 +1,61 @@
+/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*!
+ * \file alloc-common.h
+ *
+ * \author Lubos Slovak <lubos.slovak@nic.cz>
+ *
+ * \brief Common macros for alloc.
+ *
+ * \addtogroup alloc
+ * @{
+ */
+
+#ifndef _KNOTD_COMMON_ALLOC_COMMON_H_
+#define _KNOTD_COMMON_ALLOC_COMMON_H_
+
+#include <stdio.h>
+
+//#define MEM_DEBUG
+//#define MEM_NOSLAB
+//#define MEM_POISON
+#define MEM_SLAB_CAP 5 // Cap slab_cache empty slab count (undefined = inf)
+#define MEM_COLORING // Slab cache coloring
+//#define MEM_SLAB_DEPOT // Use slab depot for slab caching (not thread-safe)
+
+/* Eliminate compiler warning with unused parameters. */
+#ifndef UNUSED
+#define UNUSED(param) (void)(param)
+#endif
+
+/* Optimisation macros. */
+#ifndef likely
+#define likely(x) __builtin_expect((x),1)
+#endif
+#ifndef unlikely
+#define unlikely(x) __builtin_expect((x),0)
+#endif
+
+#ifdef MEM_DEBUG
+#define dbg_mem(msg...) fprintf(stderr, msg)
+#else
+#define dbg_mem(msg...)
+#endif
+
+
+#endif /* _KNOTD_COMMON_ALLOC_COMMON_H_ */
+
+/*! @} */
diff --git a/src/common/slab/malloc.c b/src/common/slab/malloc.c
new file mode 100644
index 0000000..ec5a68d
--- /dev/null
+++ b/src/common/slab/malloc.c
@@ -0,0 +1,60 @@
+/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+/*
+ * Skip unit if not debugging memory.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/resource.h>
+
+#include "common/slab/alloc-common.h"
+
+#ifdef MEM_DEBUG
+/*
+ * ((destructor)) attribute executes this function after main().
+ * \see http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
+ */
+void __attribute__ ((destructor)) usage_dump()
+#else
+void usage_dump()
+#endif
+{
+ /* Get resource usage. */
+ struct rusage usage;
+ if (getrusage(RUSAGE_SELF, &usage) < 0) {
+ memset(&usage, 0, sizeof(struct rusage));
+ }
+
+ fprintf(stderr, "\nMemory statistics:");
+ fprintf(stderr, "\n==================\n");
+
+ fprintf(stderr, "User time: %.03lf ms\nSystem time: %.03lf ms\n",
+ usage.ru_utime.tv_sec * (double) 1000.0
+ + usage.ru_utime.tv_usec / (double)1000.0,
+ usage.ru_stime.tv_sec * (double) 1000.0
+ + usage.ru_stime.tv_usec / (double)1000.0);
+ fprintf(stderr, "Major page faults: %lu (required I/O)\nMinor page faults: %lu\n",
+ usage.ru_majflt, usage.ru_minflt);
+ fprintf(stderr, "Number of swaps: %lu\n",
+ usage.ru_nswap);
+ fprintf(stderr, "Voluntary context switches: %lu\nInvoluntary context switches: %lu\n",
+ usage.ru_nvcsw,
+ usage.ru_nivcsw);
+ fprintf(stderr, "==================\n");
+}
diff --git a/src/common/slab/malloc.h b/src/common/slab/malloc.h
new file mode 100644
index 0000000..8ca9f58
--- /dev/null
+++ b/src/common/slab/malloc.h
@@ -0,0 +1,43 @@
+/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*!
+ * \file malloc.h
+ *
+ * \author Marek Vavrusa <marek.vavrusa@nic.cz>
+ *
+ * \brief Memory allocation related functions.
+ *
+ * \addtogroup alloc
+ * @{
+ */
+
+#ifndef _KNOTD_COMMON_MALLOC_H_
+#define _KNOTD_COMMON_MALLOC_H_
+
+#include <stdlib.h>
+
+/*! \brief Print usage statistics.
+ *
+ * \note This function has destructor attribute set if MEM_DEBUG is enabled.
+ *
+ * \warning Not all printed statistics are available on every OS,
+ * consult manual page for getrusage(2).
+ */
+void usage_dump();
+
+#endif // _KNOTD_COMMON_MALLOC_H_
+
+/*! @} */
diff --git a/src/common/slab/slab.c b/src/common/slab/slab.c
new file mode 100644
index 0000000..ccdf7ca
--- /dev/null
+++ b/src/common/slab/slab.c
@@ -0,0 +1,732 @@
+/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "common/slab/alloc-common.h"
+#include "common/slab/slab.h"
+
+/*
+ * Magic constants.
+ */
+#define SLAB_MAGIC 0x51 /*!< "Sl" magic byte (slab type). */
+#define LOBJ_MAGIC 0x0B /*!< "Ob" magic byte (object type). */
+#define POISON_DWORD 0xdeadbeef /*!< Memory boundary guard magic. */
+#define SLAB_MINCOLOR 64 /*!< Minimum space reserved for cache coloring. */
+#define SLAB_HEADER sizeof(slab_t) /*!< Slab header size. */
+#define ALIGN_PTRSZ __attribute__ ((__aligned__(sizeof(void*))))
+
+/*! \brief Fast cache id lookup table.
+ *
+ * Provides O(1) lookup.
+ * Filled with interesting values from default
+ * or on-demand.
+ */
+unsigned ALIGN_PTRSZ SLAB_CACHE_LUT[SLAB_SIZE] = {
+ [24] = SLAB_GP_COUNT + 1,
+ [800] = SLAB_GP_COUNT + 2
+};
+
+/*! \brief Find the next highest power of 2. */
+static inline unsigned get_next_pow2(unsigned v)
+{
+ // Next highest power of 2
+ --v;
+ v |= v >> 1; v |= v >> 2;
+ v |= v >> 4; v |= v >> 8;
+ v |= v >> 16;
+ ++v;
+
+ return v;
+}
+
+/*! \brief Return binary logarithm of a number, which is a power of 2. */
+static inline unsigned fastlog2(unsigned v)
+{
+ // Works if we know the size is a power of 2
+ register unsigned int r = (v & 0xAAAAAAAA) != 0;
+ r |= ((v & 0xFFFF0000) != 0) << 4;
+ r |= ((v & 0xFF00FF00) != 0) << 3;
+ r |= ((v & 0xF0F0F0F0) != 0) << 2;
+ r |= ((v & 0xCCCCCCCC) != 0) << 1;
+ return r;
+}
+
+/*!
+ * \brief Fast hashing function.
+ *
+ * Finds the next highest power of 2 and returns binary logarithm.
+ * Values are stored in LUT cache for future access.
+ */
+static unsigned slab_cache_id(unsigned size)
+{
+ // Assert cache id of the smallest bufsize is 0
+ if(size <= SLAB_MIN_BUFLEN) {
+ return 0;
+ }
+
+ // Check LUT
+ unsigned id = 0;
+ if ((id = SLAB_CACHE_LUT[size])) {
+ return id;
+ } else {
+
+ // Compute binary logarithm
+ // Next highest power of 2
+ id = fastlog2(get_next_pow2(size));
+
+ // Shift cacheid of SLAB_MIN_BUFLEN to 0
+ id -= SLAB_EXP_OFFSET;
+
+ // Store
+ SLAB_CACHE_LUT[size] = id;
+ }
+
+ return id;
+}
+
+/*
+ * Slab run-time constants.
+ */
+
+size_t SLAB_MASK = 0; /*!< \brief Slab address mask (for computing offsets). */
+static unsigned SLAB_LOGSIZE = 0; /*!< \brief Binary logarithm of slab size. */
+
+/*!
+ * Depot is a caching sub-allocator of slabs.
+ * It mitigates performance impact of sequentially allocating and freeing
+ * from a slab with just a few slab items by caching N slabs before returning
+ * them to the system.
+ *
+ * \todo With wider use, locking or RCU will be necessary.
+ */
+#ifdef MEM_SLAB_DEPOT
+static slab_depot_t _depot_g; /*! \brief Global slab depot. */
+#endif // MEM_SLAB_DEPOT
+
+/*!
+ * \brief Allocate a slab of given bufsize from depot.
+ *
+ * \retval Reserved memory for slab on success.
+ * \retval NULL on errors.
+ */
+static void* slab_depot_alloc(size_t bufsize)
+{
+ void *page = 0;
+#ifdef MEM_SLAB_DEPOT
+ if (_depot_g.available) {
+ for (int i = _depot_g.available - 1; i > -1 ; --i) {
+ if(_depot_g.cache[i]->bufsize == bufsize) {
+ page = _depot_g.cache[i];
+ _depot_g.cache[i] = _depot_g.cache[--_depot_g.available];
+ return page;
+ }
+ }
+ page = _depot_g.cache[--_depot_g.available];
+ } else {
+ if(posix_memalign(&page, SLAB_SIZE, SLAB_SIZE) == 0) {
+ ((slab_t*)page)->bufsize = 0;
+ } else {
+ page = 0;
+ }
+
+ }
+#else // MEM_SLAB_DEPOT
+ if(posix_memalign(&page, SLAB_SIZE, SLAB_SIZE) == 0) {
+ ((slab_t*)page)->bufsize = 0;
+ } else {
+ page = 0;
+ }
+#endif // MEM_SLAB_DEPOT
+
+ return page;
+}
+
+/*!
+ * \brief Return a slab to the depot.
+ *
+ * \note If the depot is full, slab gets immediately freed.
+ */
+static inline void slab_depot_free(void* slab)
+{
+#ifdef MEM_SLAB_DEPOT
+ if (_depot_g.available < SLAB_DEPOT_SIZE) {
+ _depot_g.cache[_depot_g.available++] = slab;
+ } else {
+ free(slab);
+ }
+#else // MEM_SLAB_DEPOT
+ free(slab);
+#endif // MEM_SLAB_DEPOT
+}
+
+/*! \brief Initialize slab depot. */
+static void slab_depot_init()
+{
+#ifdef MEM_SLAB_DEPOT
+ _depot_g.available = 0;
+#endif // MEM_SLAB_DEPOT
+}
+
+/*! \brief Destroy slab depot. */
+static void slab_depot_destroy()
+{
+#ifdef MEM_SLAB_DEPOT
+ while(_depot_g.available) {
+ free(_depot_g.cache[--_depot_g.available]);
+ }
+#endif // MEM_SLAB_DEPOT
+}
+
+/*
+ * Initializers.
+ */
+
+/*! \brief Initializes slab subsystem (it is called automatically). */
+void __attribute__ ((constructor)) slab_init()
+{
+ // Fetch page size
+ SLAB_LOGSIZE = fastlog2(SLAB_SIZE);
+
+ // Compute slab page mask
+ SLAB_MASK = 0;
+ for (int i = 0; i < SLAB_LOGSIZE; ++i) {
+ SLAB_MASK |= 1 << i;
+ }
+ SLAB_MASK = ~SLAB_MASK;
+
+ // Initialize depot
+ slab_depot_init();
+}
+
+/*! \brief Deinitializes slab subsystem (it is called automatically). */
+void __attribute__ ((destructor)) slab_deinit()
+{
+ // Deinitialize global allocator
+ if (SLAB_LOGSIZE) {
+ slab_depot_destroy();
+ SLAB_LOGSIZE = SLAB_MASK = 0;
+ }
+}
+
+/*
+ * Cache helper functions.
+ */
+
+/* \notice Not used right now.
+static void slab_dump(slab_t* slab) {
+
+ printf("%s: buffers (bufsize=%zuB, %u/%u free): \n",
+ __func__, slab->cache->bufsize, slab->bufs_free,
+ slab->bufs_count);
+
+ void** buf = slab->head;
+ int i = 0, n = 0;
+ while(buf != 0) {
+ size_t diff = (size_t)((char*)buf - (char*)slab->base);
+ printf("-> %lu", diff / slab->cache->bufsize);
+ buf = (void**)(*buf);
+ if (++i == 10) {
+ printf("\n");
+ i = 0;
+ }
+ ++n;
+ }
+
+ printf("\n");
+}
+*/
+
+/*!
+ * \brief Free all slabs from a slab cache.
+ * \return Number of freed slabs.
+ */
+static inline int slab_cache_free_slabs(slab_t* slab)
+{
+ int count = 0;
+ while (slab) {
+ slab_t* next = slab->next;
+ slab_destroy(&slab);
+ ++count;
+ slab = next;
+
+ }
+ return count;
+}
+
+/*
+ * Slab helper functions.
+ */
+
+/*! \brief Return number of slabs in a linked list. */
+static inline unsigned slab_list_walk(slab_t* slab)
+{
+ unsigned count = 0;
+ while(slab) {
+ slab = slab->next;
+ ++count;
+ }
+ return count;
+}
+
+/*! \brief Remove slab from a linked list. */
+static void slab_list_remove(slab_t* slab)
+{
+ // Disconnect from list
+ if (slab->prev) {
+ slab->prev->next = slab->next;
+ }
+ if(slab->next) {
+ slab->next->prev = slab->prev;
+ }
+
+ // Disconnect from cache
+ slab_cache_t* cache = slab->cache;
+ {
+ if (cache->slabs_free == slab) {
+ cache->slabs_free = slab->next;
+ } else if (cache->slabs_full == slab) {
+ cache->slabs_full = slab->next;
+ }
+ }
+}
+
+/*! \brief Insert slab into a linked list. */
+static void slab_list_insert(slab_t** list, slab_t* item)
+{
+ // If list exists, push to the top
+ item->prev = 0;
+ item->next = *list;
+ if(*list) {
+ (*list)->prev = item;
+ }
+ *list = item;
+}
+
+/*! \brief Move slab from one linked list to another. */
+static inline void slab_list_move(slab_t** target, slab_t* slab)
+{
+ slab_list_remove(slab);
+ slab_list_insert(target, slab);
+}
+
+/*
+ * API functions.
+ */
+
+slab_t* slab_create(slab_cache_t* cache)
+{
+ const size_t size = SLAB_SIZE;
+
+ slab_t* slab = slab_depot_alloc(cache->bufsize);
+
+ if (unlikely(slab < 0)) {
+ dbg_mem("%s: failed to allocate aligned memory block\n",
+ __func__);
+ return 0;
+ }
+
+ /* Initialize slab. */
+ slab->magic = SLAB_MAGIC;
+ slab->cache = cache;
+ slab_list_insert(&cache->slabs_free, slab);
+#ifdef MEM_SLAB_CAP
+ ++cache->empty;
+#endif
+
+ /* Already initialized? */
+ if (slab->bufsize == cache->bufsize) {
+ return slab;
+ } else {
+ slab->bufsize = cache->bufsize;
+ }
+
+ /* Ensure the item size can hold at least a size of ptr. */
+ size_t item_size = slab->bufsize;
+ if (unlikely(item_size < SLAB_MIN_BUFLEN)) {
+ item_size = SLAB_MIN_BUFLEN;
+ }
+
+ /* Ensure at least some space for coloring */
+ size_t data_size = size - sizeof(slab_t);
+#ifdef MEM_COLORING
+ size_t free_space = data_size % item_size;
+ if (unlikely(free_space < SLAB_MINCOLOR)) {
+ free_space = SLAB_MINCOLOR;
+ }
+
+
+ /// unsigned short color = __sync_fetch_and_add(&cache->color, 1);
+ unsigned short color = (cache->color += sizeof(void*));
+ color = color % free_space;
+#else
+ const unsigned short color = 0;
+#endif
+
+ /* Calculate useable data size */
+ data_size -= color;
+ slab->bufs_count = data_size / item_size;
+ slab->bufs_free = slab->bufs_count;
+
+ // Save first item as next free
+ slab->base = (char*)slab + sizeof(slab_t) + color;
+ slab->head = (void**)slab->base;
+
+ // Create freelist, skip last member, which is set to NULL
+ char* item = (char*)slab->head;
+ for(unsigned i = 0; i < slab->bufs_count - 1; ++i) {
+ *((void**)item) = item + item_size;
+ item += item_size;
+ }
+
+ // Set last buf to NULL (tail)
+ *((void**)item) = (void*)0;
+
+ // Ensure the last item has a NULL next
+ dbg_mem("%s: created slab (%p, %p) (%zu B)\n",
+ __func__, slab, slab + size, size);
+ return slab;
+}
+
+void slab_destroy(slab_t** slab)
+{
+ /* Disconnect from the list */
+ slab_list_remove(*slab);
+
+ /* Free slab */
+ slab_depot_free(*slab);
+
+ /* Invalidate pointer. */
+ dbg_mem("%s: deleted slab %p\n", __func__, *slab);
+ *slab = 0;
+}
+
+void* slab_alloc(slab_t* slab)
+{
+ // Fetch first free item
+ void **item = 0;
+ {
+ if((item = slab->head)) {
+ slab->head = (void**)*item;
+ --slab->bufs_free;
+ } else {
+ // No more free items
+ return 0;
+ }
+ }
+
+#ifdef MEM_DEBUG
+ // Increment statistics
+ __sync_add_and_fetch(&slab->cache->stat_allocs, 1);
+#endif
+
+ // Move to full?
+ if (unlikely(slab->bufs_free == 0)) {
+ slab_list_move(&slab->cache->slabs_full, slab);
+ } else {
+#ifdef MEM_SLAB_CAP
+ // Mark not empty?
+ if (unlikely(slab->bufs_free == slab->bufs_count - 1)) {
+ --slab->cache->empty;
+ }
+#endif
+ }
+
+ return item;
+}
+
+void slab_free(void* ptr)
+{
+ // Null pointer check
+ if (unlikely(!ptr)) {
+ return;
+ }
+
+ // Get slab start address
+ slab_t* slab = slab_from_ptr(ptr);
+ assert(slab);
+
+ // Check if it exists in directory
+ if (slab->magic == SLAB_MAGIC) {
+
+ // Return buf to slab
+ *((void**)ptr) = (void*)slab->head;
+ slab->head = (void**)ptr;
+ ++slab->bufs_free;
+
+#ifdef MEM_DEBUG
+ // Increment statistics
+ __sync_add_and_fetch(&slab->cache->stat_frees, 1);
+#endif
+
+ // Return to partial
+ if(unlikely(slab->bufs_free == 1)) {
+ slab_list_move(&slab->cache->slabs_free, slab);
+ } else {
+#ifdef MEM_SLAB_CAP
+ // Recycle if empty
+ if(unlikely(slab_isempty(slab))) {
+ if(slab->cache->empty == MEM_SLAB_CAP) {
+ slab_destroy(&slab);
+ } else {
+ ++slab->cache->empty;
+ }
+ }
+#endif
+ }
+
+ } else {
+
+ // Pointer is not a slab
+ // Presuming it's a large block
+ slab_obj_t* bs = (slab_obj_t*)ptr - 1;
+
+#ifdef MEM_POISON
+ // Remove memory barrier
+ mprotect(ptr + bs->size, sizeof(int), PROT_READ|PROT_WRITE);
+#endif
+
+ // Unmap
+ dbg_mem("%s: unmapping large block of %zu bytes at %p\n",
+ __func__, bs->size, ptr);
+ free(bs);
+ }
+}
+
+int slab_cache_init(slab_cache_t* cache, size_t bufsize)
+{
+ if (unlikely(!bufsize)) {
+ return -1;
+ }
+
+ cache->empty = 0;
+ cache->bufsize = bufsize;
+ cache->slabs_free = cache->slabs_full = 0;
+ cache->color = 0;
+
+ /* Initialize stats */
+ cache->stat_allocs = cache->stat_frees = 0;
+
+ dbg_mem("%s: created cache of size %zu\n",
+ __func__, bufsize);
+
+ return 0;
+}
+
+void slab_cache_destroy(slab_cache_t* cache) {
+
+ // Free slabs
+ unsigned free_s = slab_cache_free_slabs(cache->slabs_free);
+ unsigned full_s = slab_cache_free_slabs(cache->slabs_full);
+#ifndef MEM_DEBUG
+ UNUSED(free_s);
+ UNUSED(full_s);
+#else
+ dbg_mem("%s: %u empty/partial, %u full caches\n",
+ __func__, free_s, full_s);
+#endif
+
+ // Invalidate cache
+ cache->bufsize = 0;
+ cache->slabs_free = cache->slabs_full = 0;
+}
+
+void* slab_cache_alloc(slab_cache_t* cache)
+{
+ slab_t* slab = cache->slabs_free;
+ if(!cache->slabs_free) {
+ slab = slab_create(cache);
+ if (slab == NULL) {
+ return NULL;
+ }
+ }
+
+
+ return slab_alloc(slab);
+}
+
+int slab_cache_reap(slab_cache_t* cache)
+{
+ // For now, just free empty slabs
+ slab_t* slab = cache->slabs_free;
+ int count = 0;
+ while (slab) {
+ slab_t* next = slab->next;
+ if (slab_isempty(slab)) {
+ slab_destroy(&slab);
+ ++count;
+ }
+ slab = next;
+
+ }
+
+ cache->empty = 0;
+ return count;
+}
+
+int slab_alloc_init(slab_alloc_t* alloc)
+{
+ // Invalidate
+ memset(alloc, 0, sizeof(slab_alloc_t));
+
+ // Initialize descriptors cache
+ slab_cache_init(&alloc->descriptors, sizeof(slab_cache_t));
+
+ return 0;
+}
+
+void slab_alloc_destroy(slab_alloc_t* alloc)
+{
+ // Destroy all caches
+ for (unsigned i = 0; i < SLAB_CACHE_COUNT; ++i) {
+ if (alloc->caches[i] != 0) {
+ slab_cache_destroy(alloc->caches[i]);
+ }
+ }
+
+ // Destroy cache for descriptors
+ slab_cache_destroy(&alloc->descriptors);
+}
+
+void* slab_alloc_alloc(slab_alloc_t* alloc, size_t size)
+{
+ // Invalid size check
+ if (unlikely(!size)) {
+ return 0;
+ }
+
+#ifdef MEM_POISON
+ // Reserve memory for poison
+ size += sizeof(int);
+#endif
+ // Directly map large block
+ if (unlikely(size > SLAB_SIZE/2)) {
+
+ // Map block
+ size += sizeof(slab_obj_t);
+ slab_obj_t* p = 0;
+ p = malloc(size);
+
+ dbg_mem("%s: mapping large block of %zu bytes at %p\n",
+ __func__, size, p + 1);
+
+ /* Initialize. */
+ p->magic = LOBJ_MAGIC;
+ p->size = size - sizeof(slab_obj_t);
+
+#ifdef MEM_POISON
+ // Reduce real size
+ p->size -= sizeof(int);
+
+ // Memory barrier
+ int* pb = (int*)((char*)p + size - sizeof(int));
+ *pb = POISON_DWORD;
+ mprotect(pb, sizeof(int), PROT_NONE);
+#endif
+
+ return p + 1;
+ }
+
+ // Get cache id from size
+ unsigned cache_id = slab_cache_id(size);
+
+ // Check if associated cache exists
+ if (unlikely(alloc->caches[cache_id] == 0)) {
+
+ // Assert minimum cache size
+ if (unlikely(size < SLAB_MIN_BUFLEN)) {
+ size = SLAB_MIN_BUFLEN;
+ }
+
+ // Calculate cache bufsize
+ size_t bufsize = size;
+ if (cache_id < SLAB_GP_COUNT) {
+ bufsize = get_next_pow2(size);
+ }
+
+ // Create cache
+ dbg_mem("%s: creating cache of %zuB (req. %zuB) (id=%u)\n",
+ __func__, bufsize, size, cache_id);
+
+ slab_cache_t* cache = slab_cache_alloc(&alloc->descriptors);
+ slab_cache_init(cache, bufsize);
+ alloc->caches[cache_id] = cache;
+ }
+
+ // Allocate from cache
+ void* mem = slab_cache_alloc(alloc->caches[cache_id]);
+
+#ifdef MEM_POISON
+ // Memory barrier
+ /*! \todo Broken, need to store the barrier byte size. */
+ //int* pb = (int*)((char*)mem + size - sizeof(int));
+ //mprotect(pb, sizeof(int), PROT_NONE);
+#endif
+ return mem;
+}
+
+void *slab_alloc_realloc(slab_alloc_t* alloc, void *ptr, size_t size)
+{
+ // realloc(0) equals to free(ptr)
+ if (!size) {
+ slab_free(ptr);
+ return 0;
+ }
+
+ // Allocate new buf
+ void *nptr = slab_alloc_alloc(alloc, size);
+ assert(nptr);
+
+ // Copy memory if present
+ if (ptr) {
+ slab_t* slab = slab_from_ptr(ptr);
+ memcpy(nptr, ptr, slab->cache->bufsize);
+
+ // Free old buf
+ slab_free(ptr);
+ }
+
+ return nptr;
+}
+
+void slab_alloc_stats(slab_alloc_t* alloc)
+{
+#ifdef MEM_DEBUG
+ printf("Cache usage:\n");
+ for (int i = 0; i < SLAB_CACHE_COUNT; ++i) {
+
+ if (!alloc->caches[i])
+ continue;
+
+ slab_cache_t* cache = alloc->caches[i];
+ unsigned free_s = slab_list_walk(cache->slabs_free);
+ unsigned full_s = slab_list_walk(cache->slabs_full);
+ printf("%4zu: allocs=%lu frees=%lu "
+ "(%u empty+partial, %u full)\n",
+ cache->bufsize, cache->stat_allocs,
+ cache->stat_frees, free_s, full_s);
+ }
+#else
+ printf("Cache usage: not available, enable MEM_DEBUG and recompile.\n");
+#endif
+}
+
diff --git a/src/common/slab/slab.h b/src/common/slab/slab.h
new file mode 100644
index 0000000..d64188e
--- /dev/null
+++ b/src/common/slab/slab.h
@@ -0,0 +1,353 @@
+/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*!
+ * \file slab.h
+ *
+ * \author Marek Vavrusa <marek.vavusa@nic.cz>
+ *
+ * \brief SLAB allocator.
+ *
+ * SLAB cache works with either custom SLAB sizes and
+ * Next-Highest-Power-Of-2 sizes.
+ *
+ * Slab size is a multiple of PAGE_SIZE and uses
+ * system allocator for larger blocks.
+ *
+ * Allocated SLABs are PAGE_SIZE aligned for a fast O(1)
+ * address-from-item lookup. This results in nearly none memory
+ * overhead for a very small blocks (<64B), but it requires the
+ * underlying allocator to be effective in allocating page size aligned memory
+ * as well. The major disadvantage is that each Slab must be aligned to it's
+ * size as opposed to boundary tags.
+ *
+ * Slab implements simple coloring mechanism to improve
+ * cache line utilisation.
+ *
+ * \ref SLAB_SIZE is a fixed size of a slab. As a rule of thumb, the slab is
+ * effective when the maximum allocated block size is below 1/4 of a SLAB_SIZE.
+ * f.e. 16kB SLAB is most effective up to 4kB block size.
+ *
+ * \ref MEM_POISON flag enables checking read/writes after the allocated memory
+ * and segmentation fault. This poses a significant time and space overhead.
+ * Enable only when debugging.
+ *
+ * \ref MEM_SLAB_CAP defines a maximum limit of a number of empty slabs that a cache
+ * can keep at a time. This results in a slight performance regression,
+ * but actively recycles unuse memory.
+ *
+ * \ref MEM_DEPOT_COUNT defines how many recycled slabs will be cached for a later
+ * use instead of returning them immediately to the OS. This significantly
+ * reduces a number of syscalls in some cases.
+ * f.e. 16 means 16 * SLAB_SIZE cache, for 16kB slabs = 256kB cache
+ *
+ * \ref MEM_COLORING enables simple cache coloring. This is generally a useful
+ * feature since all slabs are page size aligned and
+ * (depending on architecture) this slightly improves performance
+ * and cacheline usage at the cost of a minimum of 64 bytes per slab of
+ * overhead. Undefine MEM_COLORING in common.h to disable coloring.
+ *
+ * Optimal usage for a specific behavior (similar allocation sizes):
+ * \code
+ * slab_cache_t cache;
+ * slab_cache_init(&cache, N); // Initialize, N means cache chunk size
+ * ...
+ * void* mem = slab_cache_alloc(&cache); // Allocate N bytes
+ * ...
+ * slab_free(mem); // Recycle memory
+ * ...
+ * slab_cache_destroy(&cache); // Deinitialize cache
+ * \endcode
+ *
+ *
+ * \todo Allocate slab headers elsewhere and use just first sizeof(void*) bytes
+ * in each slab as a pointer to slab header. This could improve the
+ * performance.
+ *
+ * \note Slab allocation is not thread safe for performance reasons.
+ *
+ * \addtogroup alloc
+ * @{
+ */
+
+#ifndef _KNOTD_COMMON_SLAB_H_
+#define _KNOTD_COMMON_SLAB_H_
+
+#include <pthread.h>
+#include <stdint.h>
+
+/* Constants. */
+#define SLAB_SIZE (4096*4) //!< Slab size (16K blocks)
+#define SLAB_MIN_BUFLEN 8 //!< Minimal allocation block size is 8B.
+#define SLAB_EXP_OFFSET 3 //!< Minimal allocation size is 8B = 2^3, exp is 3.
+#define SLAB_GP_COUNT 10 //!< General-purpose caches count.
+#define SLAB_US_COUNT 10 //!< User-specified caches count.
+#define SLAB_DEPOT_SIZE 16 //!< N slabs cached = N*SLAB_SIZE kB cap
+#define SLAB_CACHE_COUNT (SLAB_GP_COUNT + SLAB_US_COUNT) //!< Slab cache count.
+struct slab_cache_t;
+
+/* Macros. */
+
+/*! \brief Return slab base address from pointer. */
+#define slab_from_ptr(p) ((void*)((size_t)(p) & SLAB_MASK))
+
+/*! \brief Return true if slab is empty. */
+#define slab_isempty(s) ((s)->bufs_free == (s)->bufs_count)
+
+/*!
+ * \brief Slab descriptor.
+ *
+ * Slab is a block of memory used for an allocation of
+ * smaller objects (bufs) later on.
+ * Each slab is currently aligned to page size to easily
+ * determine slab address from buf pointer.
+ *
+ * \warning Do not use slab_t directly as it cannot grow, see slab_cache_t.
+ */
+typedef struct slab_t {
+ char magic; /*!< Identifies memory block type. */
+ unsigned short bufsize; /*!< Slab bufsize. */
+ struct slab_cache_t *cache; /*!< Owner cache. */
+ struct slab_t *prev, *next; /*!< Neighbours in slab lists. */
+ unsigned bufs_count; /*!< Number of bufs in slab. */
+ unsigned bufs_free; /*!< Number of available bufs. */
+ void **head; /*!< Pointer to first available buf. */
+ char* base; /*!< Base address for bufs. */
+} slab_t;
+
+/*!
+ * \brief Slab depot.
+ *
+ * To mitigate slab initialization costs, depot keeps a finite number of
+ * stacked slabs before returning them to the system.
+ */
+typedef struct slab_depot_t {
+ size_t available; /*!< Number of available pages. */
+ slab_t* cache[SLAB_DEPOT_SIZE]; /*!< Stack of free slabs. */
+} slab_depot_t;
+
+/*!
+ * \brief Large object descriptor.
+ *
+ * Large object differs from slab with magic byte and
+ * contains object size.
+ *
+ * Magic needs to be first to overlap with slab_t magic byte.
+ */
+typedef struct slab_obj_t {
+ char magic; /*!< Identifies memory block type. */
+ size_t size; /*!< Object size. */
+} slab_obj_t;
+
+/*!
+ * \brief Slab cache descriptor.
+ *
+ * Slab cache is a list of 0..n slabs with the same buf size.
+ * It is responsible for slab state keeping.
+ *
+ * Once a slab is created, it is moved to free list.
+ * When it is full, it is moved to full list.
+ * Once a buf from full slab is freed, the slab is moved to
+ * free list again (there may be some hysteresis for mitigating
+ * a sequential alloc/free).
+ *
+ * Allocation of new slabs is on-demand, empty slabs are reused if possible.
+ *
+ * \note Slab implementation is different from Bonwick (Usenix 2001)
+ * http://www.usenix.org/event/usenix01/bonwick.html
+ * as it doesn't feature empty and partial list.
+ * This is due to fact, that user space allocator rarely
+ * needs to count free slabs. There is no way the OS could
+ * notify the application, that the memory is scarce.
+ * A slight performance increased is measured in benchmark.
+ *
+ * \note Statistics are only available if MEM_DEBUG is enabled.
+ */
+typedef struct slab_cache_t {
+ unsigned short color; /*!< Current cache color. */
+ unsigned short empty; /*!< Number of empty slabs. */
+ size_t bufsize; /*!< Cache object (buf) size. */
+ slab_t *slabs_free; /*!< List of free slabs. */
+ slab_t *slabs_full; /*!< List of full slabs. */
+
+ /* Statistics. */
+ unsigned long stat_allocs; /*!< Allocation count. */
+ unsigned long stat_frees; /*!< Free count. */
+} slab_cache_t;
+
+/*!
+ * \brief Slab allocator descriptor.
+ *
+ * \note For a number of slab caches, consult SLAB_GP_COUNT
+ * and a number of specific records in SLAB_CACHE_LUT lookup table.
+ *
+ * \warning It is currently not advised to use this general purpose allocator,
+ * as it usually doesn't yield an expected performance for higher
+ * bookkeeping costs and it also depends on the allocation behavior
+ * as well. Look for slab_cache for a specialized use in most cases.
+ */
+typedef struct slab_alloc_t {
+ slab_cache_t descriptors; /*!< Slab cache for cache descriptors. */
+ slab_cache_t* caches[SLAB_CACHE_COUNT]; /*!< Number of slab caches. */
+} slab_alloc_t;
+
+/*!
+ * \brief Create a slab of predefined size.
+ *
+ * At the moment, slabs are equal to page size and page size aligned.
+ * This enables quick and efficient buf to slab lookup by pointer arithmetic.
+ *
+ * Slab uses simple coloring scheme with and the memory block is always
+ * sizeof(void*) aligned.
+ *
+ * \param cache Parent cache.
+ * \retval Slab instance on success.
+ * \retval NULL on error.
+ */
+slab_t* slab_create(slab_cache_t* cache);
+
+/*!
+ * \brief Destroy slab instance.
+ *
+ * Slab is disconnected from any list and freed.
+ * Dereferenced slab parameter is set to NULL.
+ *
+ * \param slab Pointer to given slab.
+ */
+void slab_destroy(slab_t** slab);
+
+/*!
+ * \brief Allocate a buf from slab.
+ *
+ * Returns a pointer to allocated memory or NULL on error.
+ *
+ * \param slab Given slab instance.
+ * \retval Pointer to allocated memory.
+ * \retval NULL on error.
+ */
+void* slab_alloc(slab_t* slab);
+
+/*!
+ * \brief Recycle memory.
+ *
+ * Given memory is returned to owner slab.
+ * Memory content may be rewritten.
+ *
+ * \param ptr Returned memory.
+ */
+void slab_free(void* ptr);
+
+/*!
+ * \brief Create a slab cache.
+ *
+ * Create a slab cache with no allocated slabs.
+ * Slabs are allocated on-demand.
+ *
+ * \param cache Pointer to uninitialized cache.
+ * \param bufsize Single item size for later allocs.
+ * \retval 0 on success.
+ * \retval -1 on error;
+ */
+int slab_cache_init(slab_cache_t* cache, size_t bufsize);
+
+/*!
+ * \brief Destroy a slab cache.
+ *
+ * Destroy a slab cache and all associated slabs.
+ *
+ * \param cache Pointer to slab cache.
+ */
+void slab_cache_destroy(slab_cache_t* cache);
+
+/*!
+ * \brief Allocate from the cache.
+ *
+ * It tries to use partially free caches first,
+ * empty caches second and allocates a new cache
+ * as a last resort.
+ *
+ * \param cache Given slab cache.
+ * \retval Pointer to allocated memory.
+ * \retval NULL on error.
+ */
+void* slab_cache_alloc(slab_cache_t* cache);
+
+/*!
+ * \brief Free unused slabs from cache.
+ *
+ * \param cache Given slab cache.
+ * \return Number of freed slabs.
+ */
+int slab_cache_reap(slab_cache_t* cache);
+
+/*!
+ * \brief Create a general purpose slab allocator.
+ *
+ * \note Please consult struct slab_alloc_t for performance hints.
+ *
+ * \retval 0 on success.
+ * \retval -1 on error.
+ */
+int slab_alloc_init(slab_alloc_t* alloc);
+
+/*!
+ * \brief Delete slab allocator.
+ *
+ * This destroys all associated caches and frees memory.
+ *
+ * \param alloc Given allocator instance.
+ */
+void slab_alloc_destroy(slab_alloc_t* alloc);
+
+/*!
+ * \brief Allocate a block of memory.
+ *
+ * Returns a block of allocated memory.
+ *
+ * \note At least SLAB_MIN_BUFSIZE bytes is allocated.
+ *
+ * \note Please consult struct slab_alloc_t for performance hints.
+ *
+ * \param alloc Allocator instance.
+ * \param size Requested block size.
+ * \retval Pointer to allocated memory.
+ * \retval NULL on error.
+ */
+void* slab_alloc_alloc(slab_alloc_t* alloc, size_t size);
+
+/*!
+ * \brief Reallocate data from one slab to another.
+ *
+ * \param alloc Allocator instance.
+ * \param ptr Pointer to allocated memory.
+ * \param size Requested memory block size.
+ * \retval Pointer to newly allocated memory.
+ * \retval NULL on error.
+ *
+ * \todo Realloc could be probably implement more effectively.
+ */
+void *slab_alloc_realloc(slab_alloc_t* alloc, void *ptr, size_t size);
+
+/*!
+ *
+ * \brief Dump allocator stats.
+ *
+ * \param alloc Allocator instance.
+ */
+void slab_alloc_stats(slab_alloc_t* alloc);
+
+#endif /* _KNOTD_COMMON_SLAB_H_ */
+
+/*! @} */