summaryrefslogtreecommitdiff
path: root/src/common/slab/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/slab/slab.c')
-rw-r--r--src/common/slab/slab.c732
1 files changed, 732 insertions, 0 deletions
diff --git a/src/common/slab/slab.c b/src/common/slab/slab.c
new file mode 100644
index 0000000..ccdf7ca
--- /dev/null
+++ b/src/common/slab/slab.c
@@ -0,0 +1,732 @@
+/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "common/slab/alloc-common.h"
+#include "common/slab/slab.h"
+
+/*
+ * Magic constants.
+ */
+#define SLAB_MAGIC 0x51 /*!< "Sl" magic byte (slab type). */
+#define LOBJ_MAGIC 0x0B /*!< "Ob" magic byte (object type). */
+#define POISON_DWORD 0xdeadbeef /*!< Memory boundary guard magic. */
+#define SLAB_MINCOLOR 64 /*!< Minimum space reserved for cache coloring. */
+#define SLAB_HEADER sizeof(slab_t) /*!< Slab header size. */
+#define ALIGN_PTRSZ __attribute__ ((__aligned__(sizeof(void*))))
+
+/*! \brief Fast cache id lookup table.
+ *
+ * Provides O(1) lookup.
+ * Filled with interesting values from default
+ * or on-demand.
+ */
+unsigned ALIGN_PTRSZ SLAB_CACHE_LUT[SLAB_SIZE] = {
+ [24] = SLAB_GP_COUNT + 1,
+ [800] = SLAB_GP_COUNT + 2
+};
+
+/*! \brief Find the next highest power of 2. */
+static inline unsigned get_next_pow2(unsigned v)
+{
+ // Next highest power of 2
+ --v;
+ v |= v >> 1; v |= v >> 2;
+ v |= v >> 4; v |= v >> 8;
+ v |= v >> 16;
+ ++v;
+
+ return v;
+}
+
+/*! \brief Return binary logarithm of a number, which is a power of 2. */
+static inline unsigned fastlog2(unsigned v)
+{
+ // Works if we know the size is a power of 2
+ register unsigned int r = (v & 0xAAAAAAAA) != 0;
+ r |= ((v & 0xFFFF0000) != 0) << 4;
+ r |= ((v & 0xFF00FF00) != 0) << 3;
+ r |= ((v & 0xF0F0F0F0) != 0) << 2;
+ r |= ((v & 0xCCCCCCCC) != 0) << 1;
+ return r;
+}
+
+/*!
+ * \brief Fast hashing function.
+ *
+ * Finds the next highest power of 2 and returns binary logarithm.
+ * Values are stored in LUT cache for future access.
+ */
+static unsigned slab_cache_id(unsigned size)
+{
+ // Assert cache id of the smallest bufsize is 0
+ if(size <= SLAB_MIN_BUFLEN) {
+ return 0;
+ }
+
+ // Check LUT
+ unsigned id = 0;
+ if ((id = SLAB_CACHE_LUT[size])) {
+ return id;
+ } else {
+
+ // Compute binary logarithm
+ // Next highest power of 2
+ id = fastlog2(get_next_pow2(size));
+
+ // Shift cacheid of SLAB_MIN_BUFLEN to 0
+ id -= SLAB_EXP_OFFSET;
+
+ // Store
+ SLAB_CACHE_LUT[size] = id;
+ }
+
+ return id;
+}
+
+/*
+ * Slab run-time constants.
+ */
+
+size_t SLAB_MASK = 0; /*!< \brief Slab address mask (for computing offsets). */
+static unsigned SLAB_LOGSIZE = 0; /*!< \brief Binary logarithm of slab size. */
+
+/*!
+ * Depot is a caching sub-allocator of slabs.
+ * It mitigates performance impact of sequentially allocating and freeing
+ * from a slab with just a few slab items by caching N slabs before returning
+ * them to the system.
+ *
+ * \todo With wider use, locking or RCU will be necessary.
+ */
+#ifdef MEM_SLAB_DEPOT
+static slab_depot_t _depot_g; /*! \brief Global slab depot. */
+#endif // MEM_SLAB_DEPOT
+
+/*!
+ * \brief Allocate a slab of given bufsize from depot.
+ *
+ * \retval Reserved memory for slab on success.
+ * \retval NULL on errors.
+ */
+static void* slab_depot_alloc(size_t bufsize)
+{
+ void *page = 0;
+#ifdef MEM_SLAB_DEPOT
+ if (_depot_g.available) {
+ for (int i = _depot_g.available - 1; i > -1 ; --i) {
+ if(_depot_g.cache[i]->bufsize == bufsize) {
+ page = _depot_g.cache[i];
+ _depot_g.cache[i] = _depot_g.cache[--_depot_g.available];
+ return page;
+ }
+ }
+ page = _depot_g.cache[--_depot_g.available];
+ } else {
+ if(posix_memalign(&page, SLAB_SIZE, SLAB_SIZE) == 0) {
+ ((slab_t*)page)->bufsize = 0;
+ } else {
+ page = 0;
+ }
+
+ }
+#else // MEM_SLAB_DEPOT
+ if(posix_memalign(&page, SLAB_SIZE, SLAB_SIZE) == 0) {
+ ((slab_t*)page)->bufsize = 0;
+ } else {
+ page = 0;
+ }
+#endif // MEM_SLAB_DEPOT
+
+ return page;
+}
+
+/*!
+ * \brief Return a slab to the depot.
+ *
+ * \note If the depot is full, slab gets immediately freed.
+ */
+static inline void slab_depot_free(void* slab)
+{
+#ifdef MEM_SLAB_DEPOT
+ if (_depot_g.available < SLAB_DEPOT_SIZE) {
+ _depot_g.cache[_depot_g.available++] = slab;
+ } else {
+ free(slab);
+ }
+#else // MEM_SLAB_DEPOT
+ free(slab);
+#endif // MEM_SLAB_DEPOT
+}
+
+/*! \brief Initialize slab depot. */
+static void slab_depot_init()
+{
+#ifdef MEM_SLAB_DEPOT
+ _depot_g.available = 0;
+#endif // MEM_SLAB_DEPOT
+}
+
+/*! \brief Destroy slab depot. */
+static void slab_depot_destroy()
+{
+#ifdef MEM_SLAB_DEPOT
+ while(_depot_g.available) {
+ free(_depot_g.cache[--_depot_g.available]);
+ }
+#endif // MEM_SLAB_DEPOT
+}
+
+/*
+ * Initializers.
+ */
+
+/*! \brief Initializes slab subsystem (it is called automatically). */
+void __attribute__ ((constructor)) slab_init()
+{
+ // Fetch page size
+ SLAB_LOGSIZE = fastlog2(SLAB_SIZE);
+
+ // Compute slab page mask
+ SLAB_MASK = 0;
+ for (int i = 0; i < SLAB_LOGSIZE; ++i) {
+ SLAB_MASK |= 1 << i;
+ }
+ SLAB_MASK = ~SLAB_MASK;
+
+ // Initialize depot
+ slab_depot_init();
+}
+
+/*! \brief Deinitializes slab subsystem (it is called automatically). */
+void __attribute__ ((destructor)) slab_deinit()
+{
+ // Deinitialize global allocator
+ if (SLAB_LOGSIZE) {
+ slab_depot_destroy();
+ SLAB_LOGSIZE = SLAB_MASK = 0;
+ }
+}
+
+/*
+ * Cache helper functions.
+ */
+
+/* \notice Not used right now.
+static void slab_dump(slab_t* slab) {
+
+ printf("%s: buffers (bufsize=%zuB, %u/%u free): \n",
+ __func__, slab->cache->bufsize, slab->bufs_free,
+ slab->bufs_count);
+
+ void** buf = slab->head;
+ int i = 0, n = 0;
+ while(buf != 0) {
+ size_t diff = (size_t)((char*)buf - (char*)slab->base);
+ printf("-> %lu", diff / slab->cache->bufsize);
+ buf = (void**)(*buf);
+ if (++i == 10) {
+ printf("\n");
+ i = 0;
+ }
+ ++n;
+ }
+
+ printf("\n");
+}
+*/
+
+/*!
+ * \brief Free all slabs from a slab cache.
+ * \return Number of freed slabs.
+ */
+static inline int slab_cache_free_slabs(slab_t* slab)
+{
+ int count = 0;
+ while (slab) {
+ slab_t* next = slab->next;
+ slab_destroy(&slab);
+ ++count;
+ slab = next;
+
+ }
+ return count;
+}
+
+/*
+ * Slab helper functions.
+ */
+
+/*! \brief Return number of slabs in a linked list. */
+static inline unsigned slab_list_walk(slab_t* slab)
+{
+ unsigned count = 0;
+ while(slab) {
+ slab = slab->next;
+ ++count;
+ }
+ return count;
+}
+
+/*! \brief Remove slab from a linked list. */
+static void slab_list_remove(slab_t* slab)
+{
+ // Disconnect from list
+ if (slab->prev) {
+ slab->prev->next = slab->next;
+ }
+ if(slab->next) {
+ slab->next->prev = slab->prev;
+ }
+
+ // Disconnect from cache
+ slab_cache_t* cache = slab->cache;
+ {
+ if (cache->slabs_free == slab) {
+ cache->slabs_free = slab->next;
+ } else if (cache->slabs_full == slab) {
+ cache->slabs_full = slab->next;
+ }
+ }
+}
+
+/*! \brief Insert slab into a linked list. */
+static void slab_list_insert(slab_t** list, slab_t* item)
+{
+ // If list exists, push to the top
+ item->prev = 0;
+ item->next = *list;
+ if(*list) {
+ (*list)->prev = item;
+ }
+ *list = item;
+}
+
+/*! \brief Move slab from one linked list to another. */
+static inline void slab_list_move(slab_t** target, slab_t* slab)
+{
+ slab_list_remove(slab);
+ slab_list_insert(target, slab);
+}
+
+/*
+ * API functions.
+ */
+
+slab_t* slab_create(slab_cache_t* cache)
+{
+ const size_t size = SLAB_SIZE;
+
+ slab_t* slab = slab_depot_alloc(cache->bufsize);
+
+ if (unlikely(slab < 0)) {
+ dbg_mem("%s: failed to allocate aligned memory block\n",
+ __func__);
+ return 0;
+ }
+
+ /* Initialize slab. */
+ slab->magic = SLAB_MAGIC;
+ slab->cache = cache;
+ slab_list_insert(&cache->slabs_free, slab);
+#ifdef MEM_SLAB_CAP
+ ++cache->empty;
+#endif
+
+ /* Already initialized? */
+ if (slab->bufsize == cache->bufsize) {
+ return slab;
+ } else {
+ slab->bufsize = cache->bufsize;
+ }
+
+ /* Ensure the item size can hold at least a size of ptr. */
+ size_t item_size = slab->bufsize;
+ if (unlikely(item_size < SLAB_MIN_BUFLEN)) {
+ item_size = SLAB_MIN_BUFLEN;
+ }
+
+ /* Ensure at least some space for coloring */
+ size_t data_size = size - sizeof(slab_t);
+#ifdef MEM_COLORING
+ size_t free_space = data_size % item_size;
+ if (unlikely(free_space < SLAB_MINCOLOR)) {
+ free_space = SLAB_MINCOLOR;
+ }
+
+
+ /// unsigned short color = __sync_fetch_and_add(&cache->color, 1);
+ unsigned short color = (cache->color += sizeof(void*));
+ color = color % free_space;
+#else
+ const unsigned short color = 0;
+#endif
+
+ /* Calculate useable data size */
+ data_size -= color;
+ slab->bufs_count = data_size / item_size;
+ slab->bufs_free = slab->bufs_count;
+
+ // Save first item as next free
+ slab->base = (char*)slab + sizeof(slab_t) + color;
+ slab->head = (void**)slab->base;
+
+ // Create freelist, skip last member, which is set to NULL
+ char* item = (char*)slab->head;
+ for(unsigned i = 0; i < slab->bufs_count - 1; ++i) {
+ *((void**)item) = item + item_size;
+ item += item_size;
+ }
+
+ // Set last buf to NULL (tail)
+ *((void**)item) = (void*)0;
+
+ // Ensure the last item has a NULL next
+ dbg_mem("%s: created slab (%p, %p) (%zu B)\n",
+ __func__, slab, slab + size, size);
+ return slab;
+}
+
+void slab_destroy(slab_t** slab)
+{
+ /* Disconnect from the list */
+ slab_list_remove(*slab);
+
+ /* Free slab */
+ slab_depot_free(*slab);
+
+ /* Invalidate pointer. */
+ dbg_mem("%s: deleted slab %p\n", __func__, *slab);
+ *slab = 0;
+}
+
+void* slab_alloc(slab_t* slab)
+{
+ // Fetch first free item
+ void **item = 0;
+ {
+ if((item = slab->head)) {
+ slab->head = (void**)*item;
+ --slab->bufs_free;
+ } else {
+ // No more free items
+ return 0;
+ }
+ }
+
+#ifdef MEM_DEBUG
+ // Increment statistics
+ __sync_add_and_fetch(&slab->cache->stat_allocs, 1);
+#endif
+
+ // Move to full?
+ if (unlikely(slab->bufs_free == 0)) {
+ slab_list_move(&slab->cache->slabs_full, slab);
+ } else {
+#ifdef MEM_SLAB_CAP
+ // Mark not empty?
+ if (unlikely(slab->bufs_free == slab->bufs_count - 1)) {
+ --slab->cache->empty;
+ }
+#endif
+ }
+
+ return item;
+}
+
+void slab_free(void* ptr)
+{
+ // Null pointer check
+ if (unlikely(!ptr)) {
+ return;
+ }
+
+ // Get slab start address
+ slab_t* slab = slab_from_ptr(ptr);
+ assert(slab);
+
+ // Check if it exists in directory
+ if (slab->magic == SLAB_MAGIC) {
+
+ // Return buf to slab
+ *((void**)ptr) = (void*)slab->head;
+ slab->head = (void**)ptr;
+ ++slab->bufs_free;
+
+#ifdef MEM_DEBUG
+ // Increment statistics
+ __sync_add_and_fetch(&slab->cache->stat_frees, 1);
+#endif
+
+ // Return to partial
+ if(unlikely(slab->bufs_free == 1)) {
+ slab_list_move(&slab->cache->slabs_free, slab);
+ } else {
+#ifdef MEM_SLAB_CAP
+ // Recycle if empty
+ if(unlikely(slab_isempty(slab))) {
+ if(slab->cache->empty == MEM_SLAB_CAP) {
+ slab_destroy(&slab);
+ } else {
+ ++slab->cache->empty;
+ }
+ }
+#endif
+ }
+
+ } else {
+
+ // Pointer is not a slab
+ // Presuming it's a large block
+ slab_obj_t* bs = (slab_obj_t*)ptr - 1;
+
+#ifdef MEM_POISON
+ // Remove memory barrier
+ mprotect(ptr + bs->size, sizeof(int), PROT_READ|PROT_WRITE);
+#endif
+
+ // Unmap
+ dbg_mem("%s: unmapping large block of %zu bytes at %p\n",
+ __func__, bs->size, ptr);
+ free(bs);
+ }
+}
+
+int slab_cache_init(slab_cache_t* cache, size_t bufsize)
+{
+ if (unlikely(!bufsize)) {
+ return -1;
+ }
+
+ cache->empty = 0;
+ cache->bufsize = bufsize;
+ cache->slabs_free = cache->slabs_full = 0;
+ cache->color = 0;
+
+ /* Initialize stats */
+ cache->stat_allocs = cache->stat_frees = 0;
+
+ dbg_mem("%s: created cache of size %zu\n",
+ __func__, bufsize);
+
+ return 0;
+}
+
+void slab_cache_destroy(slab_cache_t* cache) {
+
+ // Free slabs
+ unsigned free_s = slab_cache_free_slabs(cache->slabs_free);
+ unsigned full_s = slab_cache_free_slabs(cache->slabs_full);
+#ifndef MEM_DEBUG
+ UNUSED(free_s);
+ UNUSED(full_s);
+#else
+ dbg_mem("%s: %u empty/partial, %u full caches\n",
+ __func__, free_s, full_s);
+#endif
+
+ // Invalidate cache
+ cache->bufsize = 0;
+ cache->slabs_free = cache->slabs_full = 0;
+}
+
+void* slab_cache_alloc(slab_cache_t* cache)
+{
+ slab_t* slab = cache->slabs_free;
+ if(!cache->slabs_free) {
+ slab = slab_create(cache);
+ if (slab == NULL) {
+ return NULL;
+ }
+ }
+
+
+ return slab_alloc(slab);
+}
+
+int slab_cache_reap(slab_cache_t* cache)
+{
+ // For now, just free empty slabs
+ slab_t* slab = cache->slabs_free;
+ int count = 0;
+ while (slab) {
+ slab_t* next = slab->next;
+ if (slab_isempty(slab)) {
+ slab_destroy(&slab);
+ ++count;
+ }
+ slab = next;
+
+ }
+
+ cache->empty = 0;
+ return count;
+}
+
+int slab_alloc_init(slab_alloc_t* alloc)
+{
+ // Invalidate
+ memset(alloc, 0, sizeof(slab_alloc_t));
+
+ // Initialize descriptors cache
+ slab_cache_init(&alloc->descriptors, sizeof(slab_cache_t));
+
+ return 0;
+}
+
+void slab_alloc_destroy(slab_alloc_t* alloc)
+{
+ // Destroy all caches
+ for (unsigned i = 0; i < SLAB_CACHE_COUNT; ++i) {
+ if (alloc->caches[i] != 0) {
+ slab_cache_destroy(alloc->caches[i]);
+ }
+ }
+
+ // Destroy cache for descriptors
+ slab_cache_destroy(&alloc->descriptors);
+}
+
+void* slab_alloc_alloc(slab_alloc_t* alloc, size_t size)
+{
+ // Invalid size check
+ if (unlikely(!size)) {
+ return 0;
+ }
+
+#ifdef MEM_POISON
+ // Reserve memory for poison
+ size += sizeof(int);
+#endif
+ // Directly map large block
+ if (unlikely(size > SLAB_SIZE/2)) {
+
+ // Map block
+ size += sizeof(slab_obj_t);
+ slab_obj_t* p = 0;
+ p = malloc(size);
+
+ dbg_mem("%s: mapping large block of %zu bytes at %p\n",
+ __func__, size, p + 1);
+
+ /* Initialize. */
+ p->magic = LOBJ_MAGIC;
+ p->size = size - sizeof(slab_obj_t);
+
+#ifdef MEM_POISON
+ // Reduce real size
+ p->size -= sizeof(int);
+
+ // Memory barrier
+ int* pb = (int*)((char*)p + size - sizeof(int));
+ *pb = POISON_DWORD;
+ mprotect(pb, sizeof(int), PROT_NONE);
+#endif
+
+ return p + 1;
+ }
+
+ // Get cache id from size
+ unsigned cache_id = slab_cache_id(size);
+
+ // Check if associated cache exists
+ if (unlikely(alloc->caches[cache_id] == 0)) {
+
+ // Assert minimum cache size
+ if (unlikely(size < SLAB_MIN_BUFLEN)) {
+ size = SLAB_MIN_BUFLEN;
+ }
+
+ // Calculate cache bufsize
+ size_t bufsize = size;
+ if (cache_id < SLAB_GP_COUNT) {
+ bufsize = get_next_pow2(size);
+ }
+
+ // Create cache
+ dbg_mem("%s: creating cache of %zuB (req. %zuB) (id=%u)\n",
+ __func__, bufsize, size, cache_id);
+
+ slab_cache_t* cache = slab_cache_alloc(&alloc->descriptors);
+ slab_cache_init(cache, bufsize);
+ alloc->caches[cache_id] = cache;
+ }
+
+ // Allocate from cache
+ void* mem = slab_cache_alloc(alloc->caches[cache_id]);
+
+#ifdef MEM_POISON
+ // Memory barrier
+ /*! \todo Broken, need to store the barrier byte size. */
+ //int* pb = (int*)((char*)mem + size - sizeof(int));
+ //mprotect(pb, sizeof(int), PROT_NONE);
+#endif
+ return mem;
+}
+
+void *slab_alloc_realloc(slab_alloc_t* alloc, void *ptr, size_t size)
+{
+ // realloc(0) equals to free(ptr)
+ if (!size) {
+ slab_free(ptr);
+ return 0;
+ }
+
+ // Allocate new buf
+ void *nptr = slab_alloc_alloc(alloc, size);
+ assert(nptr);
+
+ // Copy memory if present
+ if (ptr) {
+ slab_t* slab = slab_from_ptr(ptr);
+ memcpy(nptr, ptr, slab->cache->bufsize);
+
+ // Free old buf
+ slab_free(ptr);
+ }
+
+ return nptr;
+}
+
+void slab_alloc_stats(slab_alloc_t* alloc)
+{
+#ifdef MEM_DEBUG
+ printf("Cache usage:\n");
+ for (int i = 0; i < SLAB_CACHE_COUNT; ++i) {
+
+ if (!alloc->caches[i])
+ continue;
+
+ slab_cache_t* cache = alloc->caches[i];
+ unsigned free_s = slab_list_walk(cache->slabs_free);
+ unsigned full_s = slab_list_walk(cache->slabs_full);
+ printf("%4zu: allocs=%lu frees=%lu "
+ "(%u empty+partial, %u full)\n",
+ cache->bufsize, cache->stat_allocs,
+ cache->stat_frees, free_s, full_s);
+ }
+#else
+ printf("Cache usage: not available, enable MEM_DEBUG and recompile.\n");
+#endif
+}
+