diff options
author | Ondřej Surý <ondrej@sury.org> | 2011-11-02 22:44:12 +0100 |
---|---|---|
committer | Ondřej Surý <ondrej@sury.org> | 2011-11-02 22:44:12 +0100 |
commit | c8d5977bb546dae9ed59d81556639c49badd8121 (patch) | |
tree | 4c86750db26c1c3502b60f2cd78ca9611cfa01d6 /src/common/slab | |
download | knot-upstream/0.8.0_pre1.tar.gz |
Imported Upstream version 0.8.0~pre1upstream/0.8.0_pre1
Diffstat (limited to 'src/common/slab')
-rw-r--r-- | src/common/slab/alloc-common.h | 61 | ||||
-rw-r--r-- | src/common/slab/malloc.c | 60 | ||||
-rw-r--r-- | src/common/slab/malloc.h | 43 | ||||
-rw-r--r-- | src/common/slab/slab.c | 732 | ||||
-rw-r--r-- | src/common/slab/slab.h | 353 |
5 files changed, 1249 insertions, 0 deletions
diff --git a/src/common/slab/alloc-common.h b/src/common/slab/alloc-common.h new file mode 100644 index 0000000..32878ab --- /dev/null +++ b/src/common/slab/alloc-common.h @@ -0,0 +1,61 @@ +/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +/*! + * \file alloc-common.h + * + * \author Lubos Slovak <lubos.slovak@nic.cz> + * + * \brief Common macros for alloc. + * + * \addtogroup alloc + * @{ + */ + +#ifndef _KNOTD_COMMON_ALLOC_COMMON_H_ +#define _KNOTD_COMMON_ALLOC_COMMON_H_ + +#include <stdio.h> + +//#define MEM_DEBUG +//#define MEM_NOSLAB +//#define MEM_POISON +#define MEM_SLAB_CAP 5 // Cap slab_cache empty slab count (undefined = inf) +#define MEM_COLORING // Slab cache coloring +//#define MEM_SLAB_DEPOT // Use slab depot for slab caching (not thread-safe) + +/* Eliminate compiler warning with unused parameters. */ +#ifndef UNUSED +#define UNUSED(param) (void)(param) +#endif + +/* Optimisation macros. */ +#ifndef likely +#define likely(x) __builtin_expect((x),1) +#endif +#ifndef unlikely +#define unlikely(x) __builtin_expect((x),0) +#endif + +#ifdef MEM_DEBUG +#define dbg_mem(msg...) fprintf(stderr, msg) +#else +#define dbg_mem(msg...) +#endif + + +#endif /* _KNOTD_COMMON_ALLOC_COMMON_H_ */ + +/*! @} */ diff --git a/src/common/slab/malloc.c b/src/common/slab/malloc.c new file mode 100644 index 0000000..ec5a68d --- /dev/null +++ b/src/common/slab/malloc.c @@ -0,0 +1,60 @@ +/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +/* + * Skip unit if not debugging memory. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/resource.h> + +#include "common/slab/alloc-common.h" + +#ifdef MEM_DEBUG +/* + * ((destructor)) attribute executes this function after main(). + * \see http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html + */ +void __attribute__ ((destructor)) usage_dump() +#else +void usage_dump() +#endif +{ + /* Get resource usage. */ + struct rusage usage; + if (getrusage(RUSAGE_SELF, &usage) < 0) { + memset(&usage, 0, sizeof(struct rusage)); + } + + fprintf(stderr, "\nMemory statistics:"); + fprintf(stderr, "\n==================\n"); + + fprintf(stderr, "User time: %.03lf ms\nSystem time: %.03lf ms\n", + usage.ru_utime.tv_sec * (double) 1000.0 + + usage.ru_utime.tv_usec / (double)1000.0, + usage.ru_stime.tv_sec * (double) 1000.0 + + usage.ru_stime.tv_usec / (double)1000.0); + fprintf(stderr, "Major page faults: %lu (required I/O)\nMinor page faults: %lu\n", + usage.ru_majflt, usage.ru_minflt); + fprintf(stderr, "Number of swaps: %lu\n", + usage.ru_nswap); + fprintf(stderr, "Voluntary context switches: %lu\nInvoluntary context switches: %lu\n", + usage.ru_nvcsw, + usage.ru_nivcsw); + fprintf(stderr, "==================\n"); +} diff --git a/src/common/slab/malloc.h b/src/common/slab/malloc.h new file mode 100644 index 0000000..8ca9f58 --- /dev/null +++ b/src/common/slab/malloc.h @@ -0,0 +1,43 @@ +/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +/*! + * \file malloc.h + * + * \author Marek Vavrusa <marek.vavrusa@nic.cz> + * + * \brief Memory allocation related functions. + * + * \addtogroup alloc + * @{ + */ + +#ifndef _KNOTD_COMMON_MALLOC_H_ +#define _KNOTD_COMMON_MALLOC_H_ + +#include <stdlib.h> + +/*! \brief Print usage statistics. + * + * \note This function has destructor attribute set if MEM_DEBUG is enabled. + * + * \warning Not all printed statistics are available on every OS, + * consult manual page for getrusage(2). + */ +void usage_dump(); + +#endif // _KNOTD_COMMON_MALLOC_H_ + +/*! @} */ diff --git a/src/common/slab/slab.c b/src/common/slab/slab.c new file mode 100644 index 0000000..ccdf7ca --- /dev/null +++ b/src/common/slab/slab.c @@ -0,0 +1,732 @@ +/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> +#include <sys/mman.h> + +#include "common/slab/alloc-common.h" +#include "common/slab/slab.h" + +/* + * Magic constants. + */ +#define SLAB_MAGIC 0x51 /*!< "Sl" magic byte (slab type). */ +#define LOBJ_MAGIC 0x0B /*!< "Ob" magic byte (object type). */ +#define POISON_DWORD 0xdeadbeef /*!< Memory boundary guard magic. */ +#define SLAB_MINCOLOR 64 /*!< Minimum space reserved for cache coloring. */ +#define SLAB_HEADER sizeof(slab_t) /*!< Slab header size. */ +#define ALIGN_PTRSZ __attribute__ ((__aligned__(sizeof(void*)))) + +/*! \brief Fast cache id lookup table. + * + * Provides O(1) lookup. + * Filled with interesting values from default + * or on-demand. + */ +unsigned ALIGN_PTRSZ SLAB_CACHE_LUT[SLAB_SIZE] = { + [24] = SLAB_GP_COUNT + 1, + [800] = SLAB_GP_COUNT + 2 +}; + +/*! \brief Find the next highest power of 2. */ +static inline unsigned get_next_pow2(unsigned v) +{ + // Next highest power of 2 + --v; + v |= v >> 1; v |= v >> 2; + v |= v >> 4; v |= v >> 8; + v |= v >> 16; + ++v; + + return v; +} + +/*! \brief Return binary logarithm of a number, which is a power of 2. */ +static inline unsigned fastlog2(unsigned v) +{ + // Works if we know the size is a power of 2 + register unsigned int r = (v & 0xAAAAAAAA) != 0; + r |= ((v & 0xFFFF0000) != 0) << 4; + r |= ((v & 0xFF00FF00) != 0) << 3; + r |= ((v & 0xF0F0F0F0) != 0) << 2; + r |= ((v & 0xCCCCCCCC) != 0) << 1; + return r; +} + +/*! + * \brief Fast hashing function. + * + * Finds the next highest power of 2 and returns binary logarithm. + * Values are stored in LUT cache for future access. + */ +static unsigned slab_cache_id(unsigned size) +{ + // Assert cache id of the smallest bufsize is 0 + if(size <= SLAB_MIN_BUFLEN) { + return 0; + } + + // Check LUT + unsigned id = 0; + if ((id = SLAB_CACHE_LUT[size])) { + return id; + } else { + + // Compute binary logarithm + // Next highest power of 2 + id = fastlog2(get_next_pow2(size)); + + // Shift cacheid of SLAB_MIN_BUFLEN to 0 + id -= SLAB_EXP_OFFSET; + + // Store + SLAB_CACHE_LUT[size] = id; + } + + return id; +} + +/* + * Slab run-time constants. + */ + +size_t SLAB_MASK = 0; /*!< \brief Slab address mask (for computing offsets). */ +static unsigned SLAB_LOGSIZE = 0; /*!< \brief Binary logarithm of slab size. */ + +/*! + * Depot is a caching sub-allocator of slabs. + * It mitigates performance impact of sequentially allocating and freeing + * from a slab with just a few slab items by caching N slabs before returning + * them to the system. + * + * \todo With wider use, locking or RCU will be necessary. + */ +#ifdef MEM_SLAB_DEPOT +static slab_depot_t _depot_g; /*! \brief Global slab depot. */ +#endif // MEM_SLAB_DEPOT + +/*! + * \brief Allocate a slab of given bufsize from depot. + * + * \retval Reserved memory for slab on success. + * \retval NULL on errors. + */ +static void* slab_depot_alloc(size_t bufsize) +{ + void *page = 0; +#ifdef MEM_SLAB_DEPOT + if (_depot_g.available) { + for (int i = _depot_g.available - 1; i > -1 ; --i) { + if(_depot_g.cache[i]->bufsize == bufsize) { + page = _depot_g.cache[i]; + _depot_g.cache[i] = _depot_g.cache[--_depot_g.available]; + return page; + } + } + page = _depot_g.cache[--_depot_g.available]; + } else { + if(posix_memalign(&page, SLAB_SIZE, SLAB_SIZE) == 0) { + ((slab_t*)page)->bufsize = 0; + } else { + page = 0; + } + + } +#else // MEM_SLAB_DEPOT + if(posix_memalign(&page, SLAB_SIZE, SLAB_SIZE) == 0) { + ((slab_t*)page)->bufsize = 0; + } else { + page = 0; + } +#endif // MEM_SLAB_DEPOT + + return page; +} + +/*! + * \brief Return a slab to the depot. + * + * \note If the depot is full, slab gets immediately freed. + */ +static inline void slab_depot_free(void* slab) +{ +#ifdef MEM_SLAB_DEPOT + if (_depot_g.available < SLAB_DEPOT_SIZE) { + _depot_g.cache[_depot_g.available++] = slab; + } else { + free(slab); + } +#else // MEM_SLAB_DEPOT + free(slab); +#endif // MEM_SLAB_DEPOT +} + +/*! \brief Initialize slab depot. */ +static void slab_depot_init() +{ +#ifdef MEM_SLAB_DEPOT + _depot_g.available = 0; +#endif // MEM_SLAB_DEPOT +} + +/*! \brief Destroy slab depot. */ +static void slab_depot_destroy() +{ +#ifdef MEM_SLAB_DEPOT + while(_depot_g.available) { + free(_depot_g.cache[--_depot_g.available]); + } +#endif // MEM_SLAB_DEPOT +} + +/* + * Initializers. + */ + +/*! \brief Initializes slab subsystem (it is called automatically). */ +void __attribute__ ((constructor)) slab_init() +{ + // Fetch page size + SLAB_LOGSIZE = fastlog2(SLAB_SIZE); + + // Compute slab page mask + SLAB_MASK = 0; + for (int i = 0; i < SLAB_LOGSIZE; ++i) { + SLAB_MASK |= 1 << i; + } + SLAB_MASK = ~SLAB_MASK; + + // Initialize depot + slab_depot_init(); +} + +/*! \brief Deinitializes slab subsystem (it is called automatically). */ +void __attribute__ ((destructor)) slab_deinit() +{ + // Deinitialize global allocator + if (SLAB_LOGSIZE) { + slab_depot_destroy(); + SLAB_LOGSIZE = SLAB_MASK = 0; + } +} + +/* + * Cache helper functions. + */ + +/* \notice Not used right now. +static void slab_dump(slab_t* slab) { + + printf("%s: buffers (bufsize=%zuB, %u/%u free): \n", + __func__, slab->cache->bufsize, slab->bufs_free, + slab->bufs_count); + + void** buf = slab->head; + int i = 0, n = 0; + while(buf != 0) { + size_t diff = (size_t)((char*)buf - (char*)slab->base); + printf("-> %lu", diff / slab->cache->bufsize); + buf = (void**)(*buf); + if (++i == 10) { + printf("\n"); + i = 0; + } + ++n; + } + + printf("\n"); +} +*/ + +/*! + * \brief Free all slabs from a slab cache. + * \return Number of freed slabs. + */ +static inline int slab_cache_free_slabs(slab_t* slab) +{ + int count = 0; + while (slab) { + slab_t* next = slab->next; + slab_destroy(&slab); + ++count; + slab = next; + + } + return count; +} + +/* + * Slab helper functions. + */ + +/*! \brief Return number of slabs in a linked list. */ +static inline unsigned slab_list_walk(slab_t* slab) +{ + unsigned count = 0; + while(slab) { + slab = slab->next; + ++count; + } + return count; +} + +/*! \brief Remove slab from a linked list. */ +static void slab_list_remove(slab_t* slab) +{ + // Disconnect from list + if (slab->prev) { + slab->prev->next = slab->next; + } + if(slab->next) { + slab->next->prev = slab->prev; + } + + // Disconnect from cache + slab_cache_t* cache = slab->cache; + { + if (cache->slabs_free == slab) { + cache->slabs_free = slab->next; + } else if (cache->slabs_full == slab) { + cache->slabs_full = slab->next; + } + } +} + +/*! \brief Insert slab into a linked list. */ +static void slab_list_insert(slab_t** list, slab_t* item) +{ + // If list exists, push to the top + item->prev = 0; + item->next = *list; + if(*list) { + (*list)->prev = item; + } + *list = item; +} + +/*! \brief Move slab from one linked list to another. */ +static inline void slab_list_move(slab_t** target, slab_t* slab) +{ + slab_list_remove(slab); + slab_list_insert(target, slab); +} + +/* + * API functions. + */ + +slab_t* slab_create(slab_cache_t* cache) +{ + const size_t size = SLAB_SIZE; + + slab_t* slab = slab_depot_alloc(cache->bufsize); + + if (unlikely(slab < 0)) { + dbg_mem("%s: failed to allocate aligned memory block\n", + __func__); + return 0; + } + + /* Initialize slab. */ + slab->magic = SLAB_MAGIC; + slab->cache = cache; + slab_list_insert(&cache->slabs_free, slab); +#ifdef MEM_SLAB_CAP + ++cache->empty; +#endif + + /* Already initialized? */ + if (slab->bufsize == cache->bufsize) { + return slab; + } else { + slab->bufsize = cache->bufsize; + } + + /* Ensure the item size can hold at least a size of ptr. */ + size_t item_size = slab->bufsize; + if (unlikely(item_size < SLAB_MIN_BUFLEN)) { + item_size = SLAB_MIN_BUFLEN; + } + + /* Ensure at least some space for coloring */ + size_t data_size = size - sizeof(slab_t); +#ifdef MEM_COLORING + size_t free_space = data_size % item_size; + if (unlikely(free_space < SLAB_MINCOLOR)) { + free_space = SLAB_MINCOLOR; + } + + + /// unsigned short color = __sync_fetch_and_add(&cache->color, 1); + unsigned short color = (cache->color += sizeof(void*)); + color = color % free_space; +#else + const unsigned short color = 0; +#endif + + /* Calculate useable data size */ + data_size -= color; + slab->bufs_count = data_size / item_size; + slab->bufs_free = slab->bufs_count; + + // Save first item as next free + slab->base = (char*)slab + sizeof(slab_t) + color; + slab->head = (void**)slab->base; + + // Create freelist, skip last member, which is set to NULL + char* item = (char*)slab->head; + for(unsigned i = 0; i < slab->bufs_count - 1; ++i) { + *((void**)item) = item + item_size; + item += item_size; + } + + // Set last buf to NULL (tail) + *((void**)item) = (void*)0; + + // Ensure the last item has a NULL next + dbg_mem("%s: created slab (%p, %p) (%zu B)\n", + __func__, slab, slab + size, size); + return slab; +} + +void slab_destroy(slab_t** slab) +{ + /* Disconnect from the list */ + slab_list_remove(*slab); + + /* Free slab */ + slab_depot_free(*slab); + + /* Invalidate pointer. */ + dbg_mem("%s: deleted slab %p\n", __func__, *slab); + *slab = 0; +} + +void* slab_alloc(slab_t* slab) +{ + // Fetch first free item + void **item = 0; + { + if((item = slab->head)) { + slab->head = (void**)*item; + --slab->bufs_free; + } else { + // No more free items + return 0; + } + } + +#ifdef MEM_DEBUG + // Increment statistics + __sync_add_and_fetch(&slab->cache->stat_allocs, 1); +#endif + + // Move to full? + if (unlikely(slab->bufs_free == 0)) { + slab_list_move(&slab->cache->slabs_full, slab); + } else { +#ifdef MEM_SLAB_CAP + // Mark not empty? + if (unlikely(slab->bufs_free == slab->bufs_count - 1)) { + --slab->cache->empty; + } +#endif + } + + return item; +} + +void slab_free(void* ptr) +{ + // Null pointer check + if (unlikely(!ptr)) { + return; + } + + // Get slab start address + slab_t* slab = slab_from_ptr(ptr); + assert(slab); + + // Check if it exists in directory + if (slab->magic == SLAB_MAGIC) { + + // Return buf to slab + *((void**)ptr) = (void*)slab->head; + slab->head = (void**)ptr; + ++slab->bufs_free; + +#ifdef MEM_DEBUG + // Increment statistics + __sync_add_and_fetch(&slab->cache->stat_frees, 1); +#endif + + // Return to partial + if(unlikely(slab->bufs_free == 1)) { + slab_list_move(&slab->cache->slabs_free, slab); + } else { +#ifdef MEM_SLAB_CAP + // Recycle if empty + if(unlikely(slab_isempty(slab))) { + if(slab->cache->empty == MEM_SLAB_CAP) { + slab_destroy(&slab); + } else { + ++slab->cache->empty; + } + } +#endif + } + + } else { + + // Pointer is not a slab + // Presuming it's a large block + slab_obj_t* bs = (slab_obj_t*)ptr - 1; + +#ifdef MEM_POISON + // Remove memory barrier + mprotect(ptr + bs->size, sizeof(int), PROT_READ|PROT_WRITE); +#endif + + // Unmap + dbg_mem("%s: unmapping large block of %zu bytes at %p\n", + __func__, bs->size, ptr); + free(bs); + } +} + +int slab_cache_init(slab_cache_t* cache, size_t bufsize) +{ + if (unlikely(!bufsize)) { + return -1; + } + + cache->empty = 0; + cache->bufsize = bufsize; + cache->slabs_free = cache->slabs_full = 0; + cache->color = 0; + + /* Initialize stats */ + cache->stat_allocs = cache->stat_frees = 0; + + dbg_mem("%s: created cache of size %zu\n", + __func__, bufsize); + + return 0; +} + +void slab_cache_destroy(slab_cache_t* cache) { + + // Free slabs + unsigned free_s = slab_cache_free_slabs(cache->slabs_free); + unsigned full_s = slab_cache_free_slabs(cache->slabs_full); +#ifndef MEM_DEBUG + UNUSED(free_s); + UNUSED(full_s); +#else + dbg_mem("%s: %u empty/partial, %u full caches\n", + __func__, free_s, full_s); +#endif + + // Invalidate cache + cache->bufsize = 0; + cache->slabs_free = cache->slabs_full = 0; +} + +void* slab_cache_alloc(slab_cache_t* cache) +{ + slab_t* slab = cache->slabs_free; + if(!cache->slabs_free) { + slab = slab_create(cache); + if (slab == NULL) { + return NULL; + } + } + + + return slab_alloc(slab); +} + +int slab_cache_reap(slab_cache_t* cache) +{ + // For now, just free empty slabs + slab_t* slab = cache->slabs_free; + int count = 0; + while (slab) { + slab_t* next = slab->next; + if (slab_isempty(slab)) { + slab_destroy(&slab); + ++count; + } + slab = next; + + } + + cache->empty = 0; + return count; +} + +int slab_alloc_init(slab_alloc_t* alloc) +{ + // Invalidate + memset(alloc, 0, sizeof(slab_alloc_t)); + + // Initialize descriptors cache + slab_cache_init(&alloc->descriptors, sizeof(slab_cache_t)); + + return 0; +} + +void slab_alloc_destroy(slab_alloc_t* alloc) +{ + // Destroy all caches + for (unsigned i = 0; i < SLAB_CACHE_COUNT; ++i) { + if (alloc->caches[i] != 0) { + slab_cache_destroy(alloc->caches[i]); + } + } + + // Destroy cache for descriptors + slab_cache_destroy(&alloc->descriptors); +} + +void* slab_alloc_alloc(slab_alloc_t* alloc, size_t size) +{ + // Invalid size check + if (unlikely(!size)) { + return 0; + } + +#ifdef MEM_POISON + // Reserve memory for poison + size += sizeof(int); +#endif + // Directly map large block + if (unlikely(size > SLAB_SIZE/2)) { + + // Map block + size += sizeof(slab_obj_t); + slab_obj_t* p = 0; + p = malloc(size); + + dbg_mem("%s: mapping large block of %zu bytes at %p\n", + __func__, size, p + 1); + + /* Initialize. */ + p->magic = LOBJ_MAGIC; + p->size = size - sizeof(slab_obj_t); + +#ifdef MEM_POISON + // Reduce real size + p->size -= sizeof(int); + + // Memory barrier + int* pb = (int*)((char*)p + size - sizeof(int)); + *pb = POISON_DWORD; + mprotect(pb, sizeof(int), PROT_NONE); +#endif + + return p + 1; + } + + // Get cache id from size + unsigned cache_id = slab_cache_id(size); + + // Check if associated cache exists + if (unlikely(alloc->caches[cache_id] == 0)) { + + // Assert minimum cache size + if (unlikely(size < SLAB_MIN_BUFLEN)) { + size = SLAB_MIN_BUFLEN; + } + + // Calculate cache bufsize + size_t bufsize = size; + if (cache_id < SLAB_GP_COUNT) { + bufsize = get_next_pow2(size); + } + + // Create cache + dbg_mem("%s: creating cache of %zuB (req. %zuB) (id=%u)\n", + __func__, bufsize, size, cache_id); + + slab_cache_t* cache = slab_cache_alloc(&alloc->descriptors); + slab_cache_init(cache, bufsize); + alloc->caches[cache_id] = cache; + } + + // Allocate from cache + void* mem = slab_cache_alloc(alloc->caches[cache_id]); + +#ifdef MEM_POISON + // Memory barrier + /*! \todo Broken, need to store the barrier byte size. */ + //int* pb = (int*)((char*)mem + size - sizeof(int)); + //mprotect(pb, sizeof(int), PROT_NONE); +#endif + return mem; +} + +void *slab_alloc_realloc(slab_alloc_t* alloc, void *ptr, size_t size) +{ + // realloc(0) equals to free(ptr) + if (!size) { + slab_free(ptr); + return 0; + } + + // Allocate new buf + void *nptr = slab_alloc_alloc(alloc, size); + assert(nptr); + + // Copy memory if present + if (ptr) { + slab_t* slab = slab_from_ptr(ptr); + memcpy(nptr, ptr, slab->cache->bufsize); + + // Free old buf + slab_free(ptr); + } + + return nptr; +} + +void slab_alloc_stats(slab_alloc_t* alloc) +{ +#ifdef MEM_DEBUG + printf("Cache usage:\n"); + for (int i = 0; i < SLAB_CACHE_COUNT; ++i) { + + if (!alloc->caches[i]) + continue; + + slab_cache_t* cache = alloc->caches[i]; + unsigned free_s = slab_list_walk(cache->slabs_free); + unsigned full_s = slab_list_walk(cache->slabs_full); + printf("%4zu: allocs=%lu frees=%lu " + "(%u empty+partial, %u full)\n", + cache->bufsize, cache->stat_allocs, + cache->stat_frees, free_s, full_s); + } +#else + printf("Cache usage: not available, enable MEM_DEBUG and recompile.\n"); +#endif +} + diff --git a/src/common/slab/slab.h b/src/common/slab/slab.h new file mode 100644 index 0000000..d64188e --- /dev/null +++ b/src/common/slab/slab.h @@ -0,0 +1,353 @@ +/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +/*! + * \file slab.h + * + * \author Marek Vavrusa <marek.vavusa@nic.cz> + * + * \brief SLAB allocator. + * + * SLAB cache works with either custom SLAB sizes and + * Next-Highest-Power-Of-2 sizes. + * + * Slab size is a multiple of PAGE_SIZE and uses + * system allocator for larger blocks. + * + * Allocated SLABs are PAGE_SIZE aligned for a fast O(1) + * address-from-item lookup. This results in nearly none memory + * overhead for a very small blocks (<64B), but it requires the + * underlying allocator to be effective in allocating page size aligned memory + * as well. The major disadvantage is that each Slab must be aligned to it's + * size as opposed to boundary tags. + * + * Slab implements simple coloring mechanism to improve + * cache line utilisation. + * + * \ref SLAB_SIZE is a fixed size of a slab. As a rule of thumb, the slab is + * effective when the maximum allocated block size is below 1/4 of a SLAB_SIZE. + * f.e. 16kB SLAB is most effective up to 4kB block size. + * + * \ref MEM_POISON flag enables checking read/writes after the allocated memory + * and segmentation fault. This poses a significant time and space overhead. + * Enable only when debugging. + * + * \ref MEM_SLAB_CAP defines a maximum limit of a number of empty slabs that a cache + * can keep at a time. This results in a slight performance regression, + * but actively recycles unuse memory. + * + * \ref MEM_DEPOT_COUNT defines how many recycled slabs will be cached for a later + * use instead of returning them immediately to the OS. This significantly + * reduces a number of syscalls in some cases. + * f.e. 16 means 16 * SLAB_SIZE cache, for 16kB slabs = 256kB cache + * + * \ref MEM_COLORING enables simple cache coloring. This is generally a useful + * feature since all slabs are page size aligned and + * (depending on architecture) this slightly improves performance + * and cacheline usage at the cost of a minimum of 64 bytes per slab of + * overhead. Undefine MEM_COLORING in common.h to disable coloring. + * + * Optimal usage for a specific behavior (similar allocation sizes): + * \code + * slab_cache_t cache; + * slab_cache_init(&cache, N); // Initialize, N means cache chunk size + * ... + * void* mem = slab_cache_alloc(&cache); // Allocate N bytes + * ... + * slab_free(mem); // Recycle memory + * ... + * slab_cache_destroy(&cache); // Deinitialize cache + * \endcode + * + * + * \todo Allocate slab headers elsewhere and use just first sizeof(void*) bytes + * in each slab as a pointer to slab header. This could improve the + * performance. + * + * \note Slab allocation is not thread safe for performance reasons. + * + * \addtogroup alloc + * @{ + */ + +#ifndef _KNOTD_COMMON_SLAB_H_ +#define _KNOTD_COMMON_SLAB_H_ + +#include <pthread.h> +#include <stdint.h> + +/* Constants. */ +#define SLAB_SIZE (4096*4) //!< Slab size (16K blocks) +#define SLAB_MIN_BUFLEN 8 //!< Minimal allocation block size is 8B. +#define SLAB_EXP_OFFSET 3 //!< Minimal allocation size is 8B = 2^3, exp is 3. +#define SLAB_GP_COUNT 10 //!< General-purpose caches count. +#define SLAB_US_COUNT 10 //!< User-specified caches count. +#define SLAB_DEPOT_SIZE 16 //!< N slabs cached = N*SLAB_SIZE kB cap +#define SLAB_CACHE_COUNT (SLAB_GP_COUNT + SLAB_US_COUNT) //!< Slab cache count. +struct slab_cache_t; + +/* Macros. */ + +/*! \brief Return slab base address from pointer. */ +#define slab_from_ptr(p) ((void*)((size_t)(p) & SLAB_MASK)) + +/*! \brief Return true if slab is empty. */ +#define slab_isempty(s) ((s)->bufs_free == (s)->bufs_count) + +/*! + * \brief Slab descriptor. + * + * Slab is a block of memory used for an allocation of + * smaller objects (bufs) later on. + * Each slab is currently aligned to page size to easily + * determine slab address from buf pointer. + * + * \warning Do not use slab_t directly as it cannot grow, see slab_cache_t. + */ +typedef struct slab_t { + char magic; /*!< Identifies memory block type. */ + unsigned short bufsize; /*!< Slab bufsize. */ + struct slab_cache_t *cache; /*!< Owner cache. */ + struct slab_t *prev, *next; /*!< Neighbours in slab lists. */ + unsigned bufs_count; /*!< Number of bufs in slab. */ + unsigned bufs_free; /*!< Number of available bufs. */ + void **head; /*!< Pointer to first available buf. */ + char* base; /*!< Base address for bufs. */ +} slab_t; + +/*! + * \brief Slab depot. + * + * To mitigate slab initialization costs, depot keeps a finite number of + * stacked slabs before returning them to the system. + */ +typedef struct slab_depot_t { + size_t available; /*!< Number of available pages. */ + slab_t* cache[SLAB_DEPOT_SIZE]; /*!< Stack of free slabs. */ +} slab_depot_t; + +/*! + * \brief Large object descriptor. + * + * Large object differs from slab with magic byte and + * contains object size. + * + * Magic needs to be first to overlap with slab_t magic byte. + */ +typedef struct slab_obj_t { + char magic; /*!< Identifies memory block type. */ + size_t size; /*!< Object size. */ +} slab_obj_t; + +/*! + * \brief Slab cache descriptor. + * + * Slab cache is a list of 0..n slabs with the same buf size. + * It is responsible for slab state keeping. + * + * Once a slab is created, it is moved to free list. + * When it is full, it is moved to full list. + * Once a buf from full slab is freed, the slab is moved to + * free list again (there may be some hysteresis for mitigating + * a sequential alloc/free). + * + * Allocation of new slabs is on-demand, empty slabs are reused if possible. + * + * \note Slab implementation is different from Bonwick (Usenix 2001) + * http://www.usenix.org/event/usenix01/bonwick.html + * as it doesn't feature empty and partial list. + * This is due to fact, that user space allocator rarely + * needs to count free slabs. There is no way the OS could + * notify the application, that the memory is scarce. + * A slight performance increased is measured in benchmark. + * + * \note Statistics are only available if MEM_DEBUG is enabled. + */ +typedef struct slab_cache_t { + unsigned short color; /*!< Current cache color. */ + unsigned short empty; /*!< Number of empty slabs. */ + size_t bufsize; /*!< Cache object (buf) size. */ + slab_t *slabs_free; /*!< List of free slabs. */ + slab_t *slabs_full; /*!< List of full slabs. */ + + /* Statistics. */ + unsigned long stat_allocs; /*!< Allocation count. */ + unsigned long stat_frees; /*!< Free count. */ +} slab_cache_t; + +/*! + * \brief Slab allocator descriptor. + * + * \note For a number of slab caches, consult SLAB_GP_COUNT + * and a number of specific records in SLAB_CACHE_LUT lookup table. + * + * \warning It is currently not advised to use this general purpose allocator, + * as it usually doesn't yield an expected performance for higher + * bookkeeping costs and it also depends on the allocation behavior + * as well. Look for slab_cache for a specialized use in most cases. + */ +typedef struct slab_alloc_t { + slab_cache_t descriptors; /*!< Slab cache for cache descriptors. */ + slab_cache_t* caches[SLAB_CACHE_COUNT]; /*!< Number of slab caches. */ +} slab_alloc_t; + +/*! + * \brief Create a slab of predefined size. + * + * At the moment, slabs are equal to page size and page size aligned. + * This enables quick and efficient buf to slab lookup by pointer arithmetic. + * + * Slab uses simple coloring scheme with and the memory block is always + * sizeof(void*) aligned. + * + * \param cache Parent cache. + * \retval Slab instance on success. + * \retval NULL on error. + */ +slab_t* slab_create(slab_cache_t* cache); + +/*! + * \brief Destroy slab instance. + * + * Slab is disconnected from any list and freed. + * Dereferenced slab parameter is set to NULL. + * + * \param slab Pointer to given slab. + */ +void slab_destroy(slab_t** slab); + +/*! + * \brief Allocate a buf from slab. + * + * Returns a pointer to allocated memory or NULL on error. + * + * \param slab Given slab instance. + * \retval Pointer to allocated memory. + * \retval NULL on error. + */ +void* slab_alloc(slab_t* slab); + +/*! + * \brief Recycle memory. + * + * Given memory is returned to owner slab. + * Memory content may be rewritten. + * + * \param ptr Returned memory. + */ +void slab_free(void* ptr); + +/*! + * \brief Create a slab cache. + * + * Create a slab cache with no allocated slabs. + * Slabs are allocated on-demand. + * + * \param cache Pointer to uninitialized cache. + * \param bufsize Single item size for later allocs. + * \retval 0 on success. + * \retval -1 on error; + */ +int slab_cache_init(slab_cache_t* cache, size_t bufsize); + +/*! + * \brief Destroy a slab cache. + * + * Destroy a slab cache and all associated slabs. + * + * \param cache Pointer to slab cache. + */ +void slab_cache_destroy(slab_cache_t* cache); + +/*! + * \brief Allocate from the cache. + * + * It tries to use partially free caches first, + * empty caches second and allocates a new cache + * as a last resort. + * + * \param cache Given slab cache. + * \retval Pointer to allocated memory. + * \retval NULL on error. + */ +void* slab_cache_alloc(slab_cache_t* cache); + +/*! + * \brief Free unused slabs from cache. + * + * \param cache Given slab cache. + * \return Number of freed slabs. + */ +int slab_cache_reap(slab_cache_t* cache); + +/*! + * \brief Create a general purpose slab allocator. + * + * \note Please consult struct slab_alloc_t for performance hints. + * + * \retval 0 on success. + * \retval -1 on error. + */ +int slab_alloc_init(slab_alloc_t* alloc); + +/*! + * \brief Delete slab allocator. + * + * This destroys all associated caches and frees memory. + * + * \param alloc Given allocator instance. + */ +void slab_alloc_destroy(slab_alloc_t* alloc); + +/*! + * \brief Allocate a block of memory. + * + * Returns a block of allocated memory. + * + * \note At least SLAB_MIN_BUFSIZE bytes is allocated. + * + * \note Please consult struct slab_alloc_t for performance hints. + * + * \param alloc Allocator instance. + * \param size Requested block size. + * \retval Pointer to allocated memory. + * \retval NULL on error. + */ +void* slab_alloc_alloc(slab_alloc_t* alloc, size_t size); + +/*! + * \brief Reallocate data from one slab to another. + * + * \param alloc Allocator instance. + * \param ptr Pointer to allocated memory. + * \param size Requested memory block size. + * \retval Pointer to newly allocated memory. + * \retval NULL on error. + * + * \todo Realloc could be probably implement more effectively. + */ +void *slab_alloc_realloc(slab_alloc_t* alloc, void *ptr, size_t size); + +/*! + * + * \brief Dump allocator stats. + * + * \param alloc Allocator instance. + */ +void slab_alloc_stats(slab_alloc_t* alloc); + +#endif /* _KNOTD_COMMON_SLAB_H_ */ + +/*! @} */ |