diff options
author | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
---|---|---|
committer | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
commit | 7c478bd95313f5f23a4c958a745db2134aa03244 (patch) | |
tree | c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/lib/libmtmalloc/common/mtmalloc.c | |
download | illumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz |
OpenSolaris Launch
Diffstat (limited to 'usr/src/lib/libmtmalloc/common/mtmalloc.c')
-rw-r--r-- | usr/src/lib/libmtmalloc/common/mtmalloc.c | 1508 |
1 files changed, 1508 insertions, 0 deletions
diff --git a/usr/src/lib/libmtmalloc/common/mtmalloc.c b/usr/src/lib/libmtmalloc/common/mtmalloc.c new file mode 100644 index 0000000000..c71024abc8 --- /dev/null +++ b/usr/src/lib/libmtmalloc/common/mtmalloc.c @@ -0,0 +1,1508 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <mtmalloc.h> +#include "mtmalloc_impl.h" +#include <unistd.h> +#include <synch.h> +#include <thread.h> +#include <stdio.h> +#include <limits.h> +#include <errno.h> +#include <string.h> +#include <strings.h> +#include <sys/param.h> +#include <sys/sysmacros.h> + +/* + * To turn on the asserts just compile -DDEBUG + */ + +#ifndef DEBUG +#define NDEBUG +#endif + +#include <assert.h> + +/* + * The MT hot malloc implementation contained herein is designed to be + * plug-compatible with the libc version of malloc. It is not intended + * to replace that implementation until we decide that it is ok to break + * customer apps (Solaris 3.0). + * + * For requests up to 2^^16, the allocator initializes itself into NCPUS + * worth of chains of caches. When a memory request is made, the calling thread + * is vectored into one of NCPUS worth of caches. The LWP id gives us a cheap, + * contention-reducing index to use, eventually, this should be replaced with + * the actual CPU sequence number, when an interface to get it is available. + * + * Once the thread is vectored into one of the list of caches the real + * allocation of the memory begins. The size is determined to figure out which + * bucket the allocation should be satisfied from. The management of free + * buckets is done via a bitmask. A free bucket is represented by a 1. The + * first free bit represents the first free bucket. The position of the bit, + * represents the position of the bucket in the arena. + * + * When the memory from the arena is handed out, the address of the cache + * control structure is written in the word preceeding the returned memory. + * This cache control address is used during free() to mark the buffer free + * in the cache control structure. + * + * When all available memory in a cache has been depleted, a new chunk of memory + * is allocated via sbrk(). The new cache is allocated from this chunk of memory + * and initialized in the function create_cache(). New caches are installed at + * the front of a singly linked list of the same size memory pools. This helps + * to ensure that there will tend to be available memory in the beginning of the + * list. + * + * Long linked lists hurt performance. To decrease this effect, there is a + * tunable, requestsize, that bumps up the sbrk allocation size and thus + * increases the number of available blocks within an arena. We also keep + * a "hint" for each cache list, which is the last cache in the list allocated + * from. This lowers the cost of searching if there are a lot of fully + * allocated blocks at the front of the list. + * + * For requests greater than 2^^16 (oversize allocations), there are two pieces + * of overhead. There is the OVERHEAD used to hold the cache addr + * (&oversize_list), plus an oversize_t structure to further describe the block. + * + * The oversize list is kept as defragmented as possible by coalescing + * freed oversized allocations with adjacent neighbors. + * + * Addresses handed out are stored in a hash table, and are aligned on + * MTMALLOC_MIN_ALIGN-byte boundaries at both ends. Request sizes are rounded-up + * where necessary in order to achieve this. This eases the implementation of + * MTDEBUGPATTERN and MTINITPATTERN, particularly where coalescing occurs. + * + * A memalign allocation takes memalign header overhead. There's two + * types of memalign headers distinguished by MTMALLOC_MEMALIGN_MAGIC + * and MTMALLOC_MEMALIGN_MIN_MAGIC. When the size of memory taken to + * get to the aligned address from malloc'ed address is the minimum size + * OVERHEAD, we create a header taking only one OVERHEAD space with magic + * number MTMALLOC_MEMALIGN_MIN_MAGIC, and we know by subtracting OVERHEAD + * from memaligned address, we can get to the malloc'ed address. Otherwise, + * we create a memalign header taking two OVERHEAD space, one stores + * MTMALLOC_MEMALIGN_MAGIC magic number, the other one points back to the + * malloc'ed address. + */ + +#if defined(__i386) || defined(__amd64) +#include <arpa/inet.h> /* for htonl() */ +#endif + +static void * morecore(size_t); +static int setup_caches(void); +static void create_cache(cache_t *, size_t bufsize, uint_t hunks); +static void * malloc_internal(size_t, percpu_t *); +static void * oversize(size_t); +static oversize_t *find_oversize(size_t); +static void add_oversize(oversize_t *); +static void copy_pattern(uint32_t, void *, size_t); +static void * verify_pattern(uint32_t, void *, size_t); +static void reinit_cpu_list(void); +static void reinit_cache(cache_t *); +static void free_oversize(oversize_t *); +static oversize_t *oversize_header_alloc(uintptr_t, size_t); + +/* + * oversize hash table stuff + */ +#define NUM_BUCKETS 67 /* must be prime */ +#define HASH_OVERSIZE(caddr) ((uintptr_t)(caddr) % NUM_BUCKETS) +oversize_t *ovsz_hashtab[NUM_BUCKETS]; + +/* + * Gets a decent "current cpu identifier", to be used to reduce contention. + * Eventually, this should be replaced by an interface to get the actual + * CPU sequence number in libthread/liblwp. + */ +extern uint_t _thr_self(); +#pragma weak _thr_self +#define get_curcpu_func() (curcpu_func)_thr_self + +#define ALIGN(x, a) ((((uintptr_t)(x) + ((uintptr_t)(a) - 1)) \ + & ~((uintptr_t)(a) - 1))) + +/* need this to deal with little endianess of x86 */ +#if defined(__i386) || defined(__amd64) +#define FLIP_EM(x) htonl((x)) +#else +#define FLIP_EM(x) (x) +#endif + +#define INSERT_ONLY 0 +#define COALESCE_LEFT 0x00000001 +#define COALESCE_RIGHT 0x00000002 +#define COALESCE_WITH_BOTH_SIDES (COALESCE_LEFT | COALESCE_RIGHT) + +#define OVERHEAD 8 /* size needed to write cache addr */ +#define HUNKSIZE 8192 /* just a multiplier */ + +#define MAX_CACHED_SHIFT 16 /* 64K is the max cached size */ +#define MAX_CACHED (1 << MAX_CACHED_SHIFT) +#define MIN_CACHED_SHIFT 4 /* smaller requests rounded up */ +#define MTMALLOC_MIN_ALIGN 8 /* min guaranteed alignment */ + +#define NUM_CACHES (MAX_CACHED_SHIFT - MIN_CACHED_SHIFT + 1) +#define CACHELIST_SIZE ALIGN(NUM_CACHES * sizeof (cache_head_t), \ + CACHE_COHERENCY_UNIT) + +#define MINSIZE 9 /* for requestsize, tunable */ +#define MAXSIZE 256 /* arbitrary, big enough, for requestsize */ + +#define FREEPATTERN 0xdeadbeef /* debug fill pattern for free buf */ +#define INITPATTERN 0xbaddcafe /* debug fill pattern for new buf */ + +#define misaligned(p) ((unsigned)(p) & (sizeof (int) - 1)) +#define IS_OVERSIZE(x, y) (((x) < (y)) && (((x) > MAX_CACHED)? 1 : 0)) + +static long requestsize = MINSIZE; /* 9 pages per cache; tunable; 9 is min */ + +static uint_t cpu_mask; +static curcpu_func curcpu; + +static int32_t debugopt; +static int32_t reinit; + +static percpu_t *cpu_list; +static oversize_t oversize_list; +static mutex_t oversize_lock; + +static int ncpus; + +#define MTMALLOC_OVERSIZE_MAGIC ((uintptr_t)&oversize_list) +#define MTMALLOC_MEMALIGN_MAGIC ((uintptr_t)&oversize_list + 1) +#define MTMALLOC_MEMALIGN_MIN_MAGIC ((uintptr_t)&oversize_list + 2) + +/* + * We require allocations handed out to be aligned on MTMALLOC_MIN_ALIGN-byte + * boundaries. We round up sizeof (oversize_t) (when necessary) to ensure that + * this is achieved. + */ +#define OVSZ_SIZE (ALIGN(sizeof (oversize_t), MTMALLOC_MIN_ALIGN)) +#define OVSZ_HEADER_SIZE (OVSZ_SIZE + OVERHEAD) + +/* + * memalign header takes 2 OVERHEAD space. One for memalign magic, and the + * other one points back to the start address of originally allocated space. + */ +#define MEMALIGN_HEADER_SIZE 2 * OVERHEAD +#define MEMALIGN_HEADER_ALLOC(x, shift, malloc_addr)\ + if (shift == OVERHEAD)\ + *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \ + MTMALLOC_MEMALIGN_MIN_MAGIC; \ + else {\ + *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \ + MTMALLOC_MEMALIGN_MAGIC; \ + *((uintptr_t *)((caddr_t)x - 2 * OVERHEAD)) = \ + (uintptr_t)malloc_addr; \ + } + +void * +malloc(size_t bytes) +{ + percpu_t *list_rotor; + uint_t list_index; + + /* + * this test is due to linking with libthread. + * There are malloc calls prior to this library + * being initialized. + * + * If setup_caches fails, we set ENOMEM and return NULL + */ + if (cpu_list == (percpu_t *)NULL) { + if (setup_caches() == 0) { + errno = ENOMEM; + return (NULL); + } + } + + if (bytes > MAX_CACHED) + return (oversize(bytes)); + + list_index = (curcpu() & cpu_mask); + + list_rotor = &cpu_list[list_index]; + + return (malloc_internal(bytes, list_rotor)); +} + +void * +realloc(void * ptr, size_t bytes) +{ + void *new, *data_ptr; + cache_t *cacheptr; + caddr_t mem; + size_t shift = 0; + + if (ptr == NULL) + return (malloc(bytes)); + + if (bytes == 0) { + free(ptr); + return (NULL); + } + + data_ptr = ptr; + mem = (caddr_t)ptr - OVERHEAD; + + new = malloc(bytes); + + if (new == NULL) + return (NULL); + + /* + * If new == ptr, ptr has previously been freed. Passing a freed pointer + * to realloc() is not allowed - unless the caller specifically states + * otherwise, in which case we must avoid freeing ptr (ie new) before we + * return new. There is (obviously) no requirement to memcpy() ptr to + * new before we return. + */ + if (new == ptr) { + if (!(debugopt & MTDOUBLEFREE)) + abort(); + return (new); + } + + if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) { + mem -= OVERHEAD; + ptr = (void *)*(uintptr_t *)mem; + mem = (caddr_t)ptr - OVERHEAD; + shift = (size_t)((uintptr_t)data_ptr - (uintptr_t)ptr); + } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) { + ptr = (void *) mem; + mem -= OVERHEAD; + shift = OVERHEAD; + } + + if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) { + oversize_t *old; + + old = (oversize_t *)(mem - OVSZ_SIZE); + (void) memcpy(new, data_ptr, MIN(bytes, old->size - shift)); + free(ptr); + return (new); + } + + cacheptr = (cache_t *)*(uintptr_t *)mem; + + (void) memcpy(new, data_ptr, + MIN(cacheptr->mt_size - OVERHEAD - shift, bytes)); + free(ptr); + + return (new); +} + +void * +calloc(size_t nelem, size_t bytes) +{ + void * ptr; + size_t size = nelem * bytes; + + ptr = malloc(size); + if (ptr == NULL) + return (NULL); + bzero(ptr, size); + + return (ptr); +} + +void +free(void * ptr) +{ + cache_t *cacheptr; + caddr_t mem; + int32_t i; + caddr_t freeblocks; + uintptr_t offset; + uchar_t mask; + int32_t which_bit, num_bytes; + + if (ptr == NULL) + return; + + mem = (caddr_t)ptr - OVERHEAD; + + if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) { + mem -= OVERHEAD; + ptr = (void *)*(uintptr_t *)mem; + mem = (caddr_t)ptr - OVERHEAD; + } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) { + ptr = (void *) mem; + mem -= OVERHEAD; + } + + if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) { + oversize_t *big, **opp; + int bucket; + + big = (oversize_t *)(mem - OVSZ_SIZE); + (void) mutex_lock(&oversize_lock); + + bucket = HASH_OVERSIZE(big->addr); + for (opp = &ovsz_hashtab[bucket]; *opp != NULL; + opp = &(*opp)->hash_next) + if (*opp == big) + break; + + if (*opp == NULL) { + if (!(debugopt & MTDOUBLEFREE)) + abort(); + (void) mutex_unlock(&oversize_lock); + return; + } + + *opp = big->hash_next; /* remove big from the hash table */ + big->hash_next = NULL; + + if (debugopt & MTDEBUGPATTERN) + copy_pattern(FREEPATTERN, ptr, big->size); + add_oversize(big); + (void) mutex_unlock(&oversize_lock); + return; + } + + cacheptr = (cache_t *)*(uintptr_t *)mem; + freeblocks = cacheptr->mt_freelist; + + /* + * This is the distance measured in bits into the arena. + * The value of offset is in bytes but there is a 1-1 correlation + * between distance into the arena and distance into the + * freelist bitmask. + */ + offset = mem - cacheptr->mt_arena; + + /* + * i is total number of bits to offset into freelist bitmask. + */ + + i = offset / cacheptr->mt_size; + + num_bytes = i >> 3; + + /* + * which_bit is the bit offset into the byte in the freelist. + * if our freelist bitmask looks like 0xf3 and we are freeing + * block 5 (ie: the 6th block) our mask will be 0xf7 after + * the free. Things go left to right that's why the mask is 0x80 + * and not 0x01. + */ + which_bit = i - (num_bytes << 3); + + mask = 0x80 >> which_bit; + + freeblocks += num_bytes; + + if (debugopt & MTDEBUGPATTERN) + copy_pattern(FREEPATTERN, ptr, cacheptr->mt_size - OVERHEAD); + + (void) mutex_lock(&cacheptr->mt_cache_lock); + + if (*freeblocks & mask) { + if (!(debugopt & MTDOUBLEFREE)) + abort(); + } else { + *freeblocks |= mask; + cacheptr->mt_nfree++; + } + + (void) mutex_unlock(&cacheptr->mt_cache_lock); +} + +void * +memalign(size_t alignment, size_t size) +{ + size_t alloc_size; + uintptr_t offset; + void *alloc_buf; + void *ret_buf; + + if (size == 0 || alignment == 0 || + misaligned(alignment) || + (alignment & (alignment - 1)) != 0) { + errno = EINVAL; + return (NULL); + } + + /* <= MTMALLOC_MIN_ALIGN, malloc can provide directly */ + if (alignment <= MTMALLOC_MIN_ALIGN) + return (malloc(size)); + + alloc_size = size + alignment - MTMALLOC_MIN_ALIGN; + + if (alloc_size < size) { /* overflow */ + errno = ENOMEM; + return (NULL); + } + + alloc_buf = malloc(alloc_size); + + if (alloc_buf == NULL) + /* malloc sets errno */ + return (NULL); + + /* + * If alloc_size > MAX_CACHED, malloc() will have returned a multiple of + * MTMALLOC_MIN_ALIGN, having rounded-up alloc_size if necessary. Since + * we will use alloc_size to return the excess fragments to the free + * list, we also round-up alloc_size if necessary. + */ + if ((alloc_size > MAX_CACHED) && + (alloc_size & (MTMALLOC_MIN_ALIGN - 1))) + alloc_size = ALIGN(alloc_size, MTMALLOC_MIN_ALIGN); + + if ((offset = (uintptr_t)alloc_buf & (alignment - 1)) == 0) { + /* aligned correctly */ + + size_t frag_size = alloc_size - + (size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE); + + /* + * If the leftover piece of the memory > MAX_CACHED, + * split off the piece and return it back to the freelist. + */ + if (IS_OVERSIZE(frag_size, alloc_size)) { + oversize_t *orig, *tail; + uintptr_t taddr; + size_t data_size; + taddr = ALIGN((uintptr_t)alloc_buf + size, + MTMALLOC_MIN_ALIGN); + data_size = taddr - (uintptr_t)alloc_buf; + orig = (oversize_t *)((uintptr_t)alloc_buf - + OVSZ_HEADER_SIZE); + frag_size = orig->size - data_size - + OVSZ_HEADER_SIZE; + orig->size = data_size; + tail = oversize_header_alloc(taddr, frag_size); + free_oversize(tail); + } + ret_buf = alloc_buf; + } else { + uchar_t oversize_bits = 0; + size_t head_sz, data_sz, tail_sz; + uintptr_t ret_addr, taddr, shift, tshift; + oversize_t *orig, *tail; + size_t tsize; + + /* needs to be aligned */ + shift = alignment - offset; + + assert(shift >= MTMALLOC_MIN_ALIGN); + + ret_addr = ((uintptr_t)alloc_buf + shift); + ret_buf = (void *)ret_addr; + + if (alloc_size <= MAX_CACHED) { + MEMALIGN_HEADER_ALLOC(ret_addr, shift, alloc_buf); + return (ret_buf); + } + + /* + * Only check for the fragments when the memory is allocted + * from oversize_list. Split off a fragment and return it + * to the oversize freelist when it's > MAX_CACHED. + */ + + head_sz = shift - MAX(MEMALIGN_HEADER_SIZE, OVSZ_HEADER_SIZE); + + tail_sz = alloc_size - + (shift + size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE); + + oversize_bits |= IS_OVERSIZE(head_sz, alloc_size) | + IS_OVERSIZE(size, alloc_size) << DATA_SHIFT | + IS_OVERSIZE(tail_sz, alloc_size) << TAIL_SHIFT; + + switch (oversize_bits) { + case NONE_OVERSIZE: + case DATA_OVERSIZE: + MEMALIGN_HEADER_ALLOC(ret_addr, shift, + alloc_buf); + break; + case HEAD_OVERSIZE: + /* + * If we can extend data > MAX_CACHED and have + * head still > MAX_CACHED, we split head-end + * as the case of head-end and data oversized, + * otherwise just create memalign header. + */ + tsize = (shift + size) - (MAX_CACHED + 8 + + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE); + + if (!IS_OVERSIZE(tsize, alloc_size)) { + MEMALIGN_HEADER_ALLOC(ret_addr, shift, + alloc_buf); + break; + } else { + tsize += OVSZ_HEADER_SIZE; + taddr = ALIGN((uintptr_t)alloc_buf + + tsize, MTMALLOC_MIN_ALIGN); + tshift = ret_addr - taddr; + MEMALIGN_HEADER_ALLOC(ret_addr, tshift, + taddr); + ret_addr = taddr; + shift = ret_addr - (uintptr_t)alloc_buf; + } + /* FALLTHROUGH */ + case HEAD_AND_DATA_OVERSIZE: + /* + * Split off the head fragment and + * return it back to oversize freelist. + * Create oversize header for the piece + * of (data + tail fragment). + */ + orig = (oversize_t *)((uintptr_t)alloc_buf - + OVSZ_HEADER_SIZE); + (void) oversize_header_alloc(ret_addr - + OVSZ_HEADER_SIZE, + (orig->size - shift)); + orig->size = shift - OVSZ_HEADER_SIZE; + + /* free up the head fragment */ + free_oversize(orig); + break; + case TAIL_OVERSIZE: + /* + * If we can extend data > MAX_CACHED and have + * tail-end still > MAX_CACHED, we split tail + * end, otherwise just create memalign header. + */ + orig = (oversize_t *)((uintptr_t)alloc_buf - + OVSZ_HEADER_SIZE); + tsize = orig->size - (MAX_CACHED + 8 + + shift + OVSZ_HEADER_SIZE + + MTMALLOC_MIN_ALIGN); + if (!IS_OVERSIZE(tsize, alloc_size)) { + MEMALIGN_HEADER_ALLOC(ret_addr, shift, + alloc_buf); + break; + } else { + size = MAX_CACHED + 8; + } + /* FALLTHROUGH */ + case DATA_AND_TAIL_OVERSIZE: + /* + * Split off the tail fragment and + * return it back to oversize freelist. + * Create memalign header and adjust + * the size for the piece of + * (head fragment + data). + */ + taddr = ALIGN(ret_addr + size, + MTMALLOC_MIN_ALIGN); + data_sz = (size_t)(taddr - + (uintptr_t)alloc_buf); + orig = (oversize_t *)((uintptr_t)alloc_buf - + OVSZ_HEADER_SIZE); + tsize = orig->size - data_sz; + orig->size = data_sz; + MEMALIGN_HEADER_ALLOC(ret_buf, shift, + alloc_buf); + tsize -= OVSZ_HEADER_SIZE; + tail = oversize_header_alloc(taddr, tsize); + free_oversize(tail); + break; + case HEAD_AND_TAIL_OVERSIZE: + /* + * Split off the head fragment. + * We try to free up tail-end when we can + * extend data size to (MAX_CACHED + 8) + * and remain tail-end oversized. + * The bottom line is all split pieces + * should be oversize in size. + */ + orig = (oversize_t *)((uintptr_t)alloc_buf - + OVSZ_HEADER_SIZE); + tsize = orig->size - (MAX_CACHED + 8 + + OVSZ_HEADER_SIZE + shift + + MTMALLOC_MIN_ALIGN); + + if (!IS_OVERSIZE(tsize, alloc_size)) { + /* + * If the chunk is not big enough + * to make both data and tail oversize + * we just keep them as one piece. + */ + (void) oversize_header_alloc(ret_addr - + OVSZ_HEADER_SIZE, + orig->size - shift); + orig->size = shift - + OVSZ_HEADER_SIZE; + free_oversize(orig); + break; + } else { + /* + * extend data size > MAX_CACHED + * and handle it as head, data, tail + * are all oversized. + */ + size = MAX_CACHED + 8; + } + /* FALLTHROUGH */ + case ALL_OVERSIZE: + /* + * split off the head and tail fragments, + * return them back to the oversize freelist. + * Alloc oversize header for data seg. + */ + orig = (oversize_t *)((uintptr_t)alloc_buf - + OVSZ_HEADER_SIZE); + tsize = orig->size; + orig->size = shift - OVSZ_HEADER_SIZE; + free_oversize(orig); + + taddr = ALIGN(ret_addr + size, + MTMALLOC_MIN_ALIGN); + data_sz = taddr - ret_addr; + assert(tsize > (shift + data_sz + + OVSZ_HEADER_SIZE)); + tail_sz = tsize - + (shift + data_sz + OVSZ_HEADER_SIZE); + + /* create oversize header for data seg */ + (void) oversize_header_alloc(ret_addr - + OVSZ_HEADER_SIZE, data_sz); + + /* create oversize header for tail fragment */ + tail = oversize_header_alloc(taddr, tail_sz); + free_oversize(tail); + break; + default: + /* should not reach here */ + assert(0); + } + } + return (ret_buf); +} + + +void * +valloc(size_t size) +{ + static unsigned pagesize; + + if (size == 0) + return (NULL); + + if (!pagesize) + pagesize = sysconf(_SC_PAGESIZE); + + return (memalign(pagesize, size)); +} + +void +mallocctl(int cmd, long value) +{ + switch (cmd) { + + case MTDEBUGPATTERN: + /* + * Reinitialize free blocks in case malloc() is called prior + * to mallocctl(). + */ + if (value && !(debugopt & cmd)) { + reinit++; + debugopt |= cmd; + reinit_cpu_list(); + } + /*FALLTHRU*/ + case MTDOUBLEFREE: + case MTINITBUFFER: + if (value) + debugopt |= cmd; + else + debugopt &= ~cmd; + break; + case MTCHUNKSIZE: + if (value >= MINSIZE && value <= MAXSIZE) + requestsize = value; + break; + default: + break; + } +} + +/* + * if this function is changed, update the fallback code in setup_caches to + * set ncpus to the number of possible return values. (currently 1) + */ +static uint_t +fallback_curcpu(void) +{ + return (0); +} + +/* + * Returns non-zero on success, zero on failure. + * + * This carefully doesn't set cpu_list until initialization is finished. + */ +static int +setup_caches(void) +{ + static mutex_t init_lock = DEFAULTMUTEX; + + uintptr_t oldbrk; + uintptr_t newbrk; + + size_t cache_space_needed; + size_t padding; + + curcpu_func new_curcpu; + uint_t new_cpu_mask; + percpu_t *new_cpu_list; + + uint_t i, j; + uintptr_t list_addr; + + (void) mutex_lock(&init_lock); + if (cpu_list != NULL) { + (void) mutex_unlock(&init_lock); + return (1); /* success -- already initialized */ + } + + new_curcpu = get_curcpu_func(); + if (new_curcpu == NULL) { + new_curcpu = fallback_curcpu; + ncpus = 1; + } else { + if ((ncpus = 2 * sysconf(_SC_NPROCESSORS_CONF)) <= 0) + ncpus = 4; /* decent default value */ + } + assert(ncpus > 0); + + /* round ncpus up to a power of 2 */ + while (ncpus & (ncpus - 1)) + ncpus++; + + new_cpu_mask = ncpus - 1; /* create the cpu mask */ + + /* + * We now do some magic with the brk. What we want to get in the + * end is a bunch of well-aligned stuff in a big initial allocation. + * Along the way, we do sanity checks to make sure no one else has + * touched the brk (which shouldn't happen, but it's always good to + * check) + * + * First, make sure sbrk is sane, and store the current brk in oldbrk. + */ + oldbrk = (uintptr_t)sbrk(0); + if ((void *)oldbrk == (void *)-1) { + (void) mutex_unlock(&init_lock); + return (0); /* sbrk is broken -- we're doomed. */ + } + + /* + * Now, align the brk to a multiple of CACHE_COHERENCY_UNIT, so that + * the percpu structures and cache lists will be properly aligned. + * + * 2. All hunks will be page-aligned, assuming HUNKSIZE >= PAGESIZE, + * so they can be paged out individually. + */ + newbrk = ALIGN(oldbrk, CACHE_COHERENCY_UNIT); + if (newbrk != oldbrk && (uintptr_t)sbrk(newbrk - oldbrk) != oldbrk) { + (void) mutex_unlock(&init_lock); + return (0); /* someone else sbrked */ + } + + /* + * For each cpu, there is one percpu_t and a list of caches + */ + cache_space_needed = ncpus * (sizeof (percpu_t) + CACHELIST_SIZE); + + new_cpu_list = (percpu_t *)sbrk(cache_space_needed); + + if (new_cpu_list == (percpu_t *)-1 || + (uintptr_t)new_cpu_list != newbrk) { + (void) mutex_unlock(&init_lock); + return (0); /* someone else sbrked */ + } + + /* + * Finally, align the brk to HUNKSIZE so that all hunks are + * page-aligned, to avoid edge-effects. + */ + + newbrk = (uintptr_t)new_cpu_list + cache_space_needed; + + padding = ALIGN(newbrk, HUNKSIZE) - newbrk; + + if (padding > 0 && (uintptr_t)sbrk(padding) != newbrk) { + (void) mutex_unlock(&init_lock); + return (0); /* someone else sbrked */ + } + + list_addr = ((uintptr_t)new_cpu_list + (sizeof (percpu_t) * ncpus)); + + /* initialize the percpu list */ + for (i = 0; i < ncpus; i++) { + new_cpu_list[i].mt_caches = (cache_head_t *)list_addr; + for (j = 0; j < NUM_CACHES; j++) { + new_cpu_list[i].mt_caches[j].mt_cache = NULL; + new_cpu_list[i].mt_caches[j].mt_hint = NULL; + } + + bzero(&new_cpu_list[i].mt_parent_lock, sizeof (mutex_t)); + + /* get the correct cache list alignment */ + list_addr += CACHELIST_SIZE; + } + + /* + * Initialize oversize listhead + */ + oversize_list.next_bysize = &oversize_list; + oversize_list.prev_bysize = &oversize_list; + oversize_list.next_byaddr = &oversize_list; + oversize_list.prev_byaddr = &oversize_list; + oversize_list.addr = NULL; + oversize_list.size = 0; /* sentinal */ + + /* + * now install the global variables, leaving cpu_list for last, so that + * there aren't any race conditions. + */ + curcpu = new_curcpu; + cpu_mask = new_cpu_mask; + cpu_list = new_cpu_list; + + (void) mutex_unlock(&init_lock); + + return (1); +} + +static void +create_cache(cache_t *cp, size_t size, uint_t chunksize) +{ + long nblocks; + + bzero(&cp->mt_cache_lock, sizeof (mutex_t)); + cp->mt_size = size; + cp->mt_freelist = ((caddr_t)cp + sizeof (cache_t)); + cp->mt_span = chunksize * HUNKSIZE - sizeof (cache_t); + cp->mt_hunks = chunksize; + /* + * rough calculation. We will need to adjust later. + */ + nblocks = cp->mt_span / cp->mt_size; + nblocks >>= 3; + if (nblocks == 0) { /* less than 8 free blocks in this pool */ + int32_t numblocks = 0; + long i = cp->mt_span; + size_t sub = cp->mt_size; + uchar_t mask = 0; + + while (i > sub) { + numblocks++; + i -= sub; + } + nblocks = numblocks; + cp->mt_arena = (caddr_t)ALIGN(cp->mt_freelist + 8, 8); + cp->mt_nfree = numblocks; + while (numblocks--) { + mask |= 0x80 >> numblocks; + } + *(cp->mt_freelist) = mask; + } else { + cp->mt_arena = (caddr_t)ALIGN((caddr_t)cp->mt_freelist + + nblocks, 32); + /* recompute nblocks */ + nblocks = (uintptr_t)((caddr_t)cp->mt_freelist + + cp->mt_span - cp->mt_arena) / cp->mt_size; + cp->mt_nfree = ((nblocks >> 3) << 3); + /* Set everything to free */ + (void) memset(cp->mt_freelist, 0xff, nblocks >> 3); + } + + if (debugopt & MTDEBUGPATTERN) + copy_pattern(FREEPATTERN, cp->mt_arena, cp->mt_size * nblocks); + + cp->mt_next = NULL; +} + +static void +reinit_cpu_list(void) +{ + oversize_t *wp = oversize_list.next_bysize; + percpu_t *cpuptr; + cache_t *thiscache; + cache_head_t *cachehead; + + if (wp == NULL || cpu_list == NULL) { + reinit = 0; + return; + } + + /* Reinitialize free oversize blocks. */ + (void) mutex_lock(&oversize_lock); + if (debugopt & MTDEBUGPATTERN) + for (; wp != &oversize_list; wp = wp->next_bysize) + copy_pattern(FREEPATTERN, wp->addr, wp->size); + (void) mutex_unlock(&oversize_lock); + + /* Reinitialize free blocks. */ + for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) { + (void) mutex_lock(&cpuptr->mt_parent_lock); + for (cachehead = &cpuptr->mt_caches[0]; cachehead < + &cpuptr->mt_caches[NUM_CACHES]; cachehead++) { + for (thiscache = cachehead->mt_cache; thiscache != NULL; + thiscache = thiscache->mt_next) { + (void) mutex_lock(&thiscache->mt_cache_lock); + if (thiscache->mt_nfree == 0) { + (void) mutex_unlock( + &thiscache->mt_cache_lock); + continue; + } + if (thiscache != NULL) + reinit_cache(thiscache); + (void) mutex_unlock(&thiscache->mt_cache_lock); + } + } + (void) mutex_unlock(&cpuptr->mt_parent_lock); + } + reinit = 0; +} + +static void +reinit_cache(cache_t *thiscache) +{ + uint32_t *freeblocks; /* not a uintptr_t on purpose */ + int32_t i, n; + caddr_t ret; + + freeblocks = (uint32_t *)thiscache->mt_freelist; + while (freeblocks < (uint32_t *)thiscache->mt_arena) { + if (*freeblocks & 0xffffffff) { + for (i = 0; i < 32; i++) { + if (FLIP_EM(*freeblocks) & (0x80000000 >> i)) { + n = (uintptr_t)(((freeblocks - + (uint32_t *)thiscache->mt_freelist) << 5) + + i) * thiscache->mt_size; + ret = thiscache->mt_arena + n; + ret += OVERHEAD; + copy_pattern(FREEPATTERN, ret, + thiscache->mt_size); + } + } + } + freeblocks++; + } +} + +static void * +malloc_internal(size_t size, percpu_t *cpuptr) +{ + cache_head_t *cachehead; + cache_t *thiscache, *hintcache; + int32_t i, n, logsz, bucket; + uint32_t index; + uint32_t *freeblocks; /* not a uintptr_t on purpose */ + caddr_t ret; + + logsz = MIN_CACHED_SHIFT; + + while (size > (1 << logsz)) + logsz++; + + bucket = logsz - MIN_CACHED_SHIFT; + + (void) mutex_lock(&cpuptr->mt_parent_lock); + + /* + * Find a cache of the appropriate size with free buffers. + * + * We don't need to lock each cache as we check their mt_nfree count, + * since: + * 1. We are only looking for caches with mt_nfree > 0. If a + * free happens during our search, it will increment mt_nfree, + * which will not effect the test. + * 2. Allocations can decrement mt_nfree, but they can't happen + * as long as we hold mt_parent_lock. + */ + + cachehead = &cpuptr->mt_caches[bucket]; + + /* Search through the list, starting at the mt_hint */ + thiscache = cachehead->mt_hint; + + while (thiscache != NULL && thiscache->mt_nfree == 0) + thiscache = thiscache->mt_next; + + if (thiscache == NULL) { + /* wrap around -- search up to the hint */ + thiscache = cachehead->mt_cache; + hintcache = cachehead->mt_hint; + + while (thiscache != NULL && thiscache != hintcache && + thiscache->mt_nfree == 0) + thiscache = thiscache->mt_next; + + if (thiscache == hintcache) + thiscache = NULL; + } + + + if (thiscache == NULL) { /* there are no free caches */ + int32_t thisrequest = requestsize; + int32_t buffer_size = (1 << logsz) + OVERHEAD; + + thiscache = (cache_t *)morecore(thisrequest * HUNKSIZE); + + if (thiscache == (cache_t *)-1) { + (void) mutex_unlock(&cpuptr->mt_parent_lock); + errno = EAGAIN; + return (NULL); + } + create_cache(thiscache, buffer_size, thisrequest); + + /* link in the new block at the beginning of the list */ + thiscache->mt_next = cachehead->mt_cache; + cachehead->mt_cache = thiscache; + } + + /* update the hint to the cache we found or created */ + cachehead->mt_hint = thiscache; + + /* thiscache now points to a cache with available space */ + (void) mutex_lock(&thiscache->mt_cache_lock); + + freeblocks = (uint32_t *)thiscache->mt_freelist; + while (freeblocks < (uint32_t *)thiscache->mt_arena) { + if (*freeblocks & 0xffffffff) + break; + freeblocks++; + if (freeblocks < (uint32_t *)thiscache->mt_arena && + *freeblocks & 0xffffffff) + break; + freeblocks++; + if (freeblocks < (uint32_t *)thiscache->mt_arena && + *freeblocks & 0xffffffff) + break; + freeblocks++; + if (freeblocks < (uint32_t *)thiscache->mt_arena && + *freeblocks & 0xffffffff) + break; + freeblocks++; + } + + /* + * the offset from mt_freelist to freeblocks is the offset into + * the arena. Be sure to include the offset into freeblocks + * of the bitmask. n is the offset. + */ + for (i = 0; i < 32; ) { + if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) + break; + if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) + break; + if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) + break; + if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) + break; + } + index = 0x80000000 >> --i; + + + *freeblocks &= FLIP_EM(~index); + + thiscache->mt_nfree--; + + (void) mutex_unlock(&thiscache->mt_cache_lock); + (void) mutex_unlock(&cpuptr->mt_parent_lock); + + n = (uintptr_t)(((freeblocks - (uint32_t *)thiscache->mt_freelist) << 5) + + i) * thiscache->mt_size; + /* + * Now you have the offset in n, you've changed the free mask + * in the freelist. Nothing left to do but find the block + * in the arena and put the value of thiscache in the word + * ahead of the handed out address and return the memory + * back to the user. + */ + ret = thiscache->mt_arena + n; + + /* Store the cache addr for this buf. Makes free go fast. */ + *(uintptr_t *)ret = (uintptr_t)thiscache; + + /* + * This assert makes sure we don't hand out memory that is not + * owned by this cache. + */ + assert(ret + thiscache->mt_size <= thiscache->mt_freelist + + thiscache->mt_span); + + ret += OVERHEAD; + + assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */ + + if (reinit == 0 && (debugopt & MTDEBUGPATTERN)) + if (verify_pattern(FREEPATTERN, ret, size)) + abort(); /* reference after free */ + + if (debugopt & MTINITBUFFER) + copy_pattern(INITPATTERN, ret, size); + return ((void *)ret); +} + +static void * +morecore(size_t bytes) +{ + void * ret; + + if (bytes > LONG_MAX) { + intptr_t wad; + /* + * The request size is too big. We need to do this in + * chunks. Sbrk only takes an int for an arg. + */ + if (bytes == ULONG_MAX) + return ((void *)-1); + + ret = sbrk(0); + wad = LONG_MAX; + while (wad > 0) { + if (sbrk(wad) == (void *)-1) { + if (ret != sbrk(0)) + (void) sbrk(-LONG_MAX); + return ((void *)-1); + } + bytes -= LONG_MAX; + wad = bytes; + } + } else + ret = sbrk(bytes); + + return (ret); +} + + +static void * +oversize(size_t size) +{ + caddr_t ret; + oversize_t *big; + int bucket; + + /* + * The idea with the global lock is that we are sure to + * block in the kernel anyway since given an oversize alloc + * we are sure to have to call morecore(); + */ + (void) mutex_lock(&oversize_lock); + + /* + * Since we ensure every address we hand back is + * MTMALLOC_MIN_ALIGN-byte aligned, ALIGNing size ensures that the + * memory handed out is MTMALLOC_MIN_ALIGN-byte aligned at both ends. + * This eases the implementation of MTDEBUGPATTERN and MTINITPATTERN, + * particularly where coalescing occurs. + */ + size = ALIGN(size, MTMALLOC_MIN_ALIGN); + + if ((big = find_oversize(size)) != NULL) { + if (reinit == 0 && (debugopt & MTDEBUGPATTERN)) + if (verify_pattern(FREEPATTERN, big->addr, size)) + abort(); /* reference after free */ + } else { + /* Get more 8-byte aligned memory from heap */ + ret = morecore(size + OVSZ_HEADER_SIZE); + if (ret == (caddr_t)-1) { + (void) mutex_unlock(&oversize_lock); + errno = ENOMEM; + return (NULL); + } + big = oversize_header_alloc((uintptr_t)ret, size); + } + ret = big->addr; + + /* Add big to the hash table at the head of the relevant bucket. */ + bucket = HASH_OVERSIZE(ret); + big->hash_next = ovsz_hashtab[bucket]; + ovsz_hashtab[bucket] = big; + + if (debugopt & MTINITBUFFER) + copy_pattern(INITPATTERN, ret, size); + + (void) mutex_unlock(&oversize_lock); + assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */ + return ((void *)ret); +} + +static void +insert_oversize(oversize_t *op, oversize_t *nx) +{ + oversize_t *sp; + + /* locate correct insertion point in size-ordered list */ + for (sp = oversize_list.next_bysize; + sp != &oversize_list && (op->size > sp->size); + sp = sp->next_bysize) + ; + + /* link into size-ordered list */ + op->next_bysize = sp; + op->prev_bysize = sp->prev_bysize; + op->prev_bysize->next_bysize = op; + op->next_bysize->prev_bysize = op; + + /* + * link item into address-ordered list + * (caller provides insertion point as an optimization) + */ + op->next_byaddr = nx; + op->prev_byaddr = nx->prev_byaddr; + op->prev_byaddr->next_byaddr = op; + op->next_byaddr->prev_byaddr = op; + +} + +static void +unlink_oversize(oversize_t *lp) +{ + /* unlink from address list */ + lp->prev_byaddr->next_byaddr = lp->next_byaddr; + lp->next_byaddr->prev_byaddr = lp->prev_byaddr; + + /* unlink from size list */ + lp->prev_bysize->next_bysize = lp->next_bysize; + lp->next_bysize->prev_bysize = lp->prev_bysize; +} + +static void +position_oversize_by_size(oversize_t *op) +{ + oversize_t *sp; + + if (op->size > op->next_bysize->size || + op->size < op->prev_bysize->size) { + + /* unlink from size list */ + op->prev_bysize->next_bysize = op->next_bysize; + op->next_bysize->prev_bysize = op->prev_bysize; + + /* locate correct insertion point in size-ordered list */ + for (sp = oversize_list.next_bysize; + sp != &oversize_list && (op->size > sp->size); + sp = sp->next_bysize) + ; + + /* link into size-ordered list */ + op->next_bysize = sp; + op->prev_bysize = sp->prev_bysize; + op->prev_bysize->next_bysize = op; + op->next_bysize->prev_bysize = op; + } +} + +static void +add_oversize(oversize_t *lp) +{ + int merge_flags = INSERT_ONLY; + oversize_t *nx; /* ptr to item right of insertion point */ + oversize_t *pv; /* ptr to item left of insertion point */ + uint_t size_lp, size_pv, size_nx; + uintptr_t endp_lp, endp_pv, endp_nx; + + /* + * Locate insertion point in address-ordered list + */ + + for (nx = oversize_list.next_byaddr; + nx != &oversize_list && (lp->addr > nx->addr); + nx = nx->next_byaddr) + ; + + /* + * Determine how to add chunk to oversize freelist + */ + + size_lp = OVSZ_HEADER_SIZE + lp->size; + endp_lp = ALIGN((uintptr_t)lp + size_lp, MTMALLOC_MIN_ALIGN); + size_lp = endp_lp - (uintptr_t)lp; + + pv = nx->prev_byaddr; + + if (pv->size) { + + size_pv = OVSZ_HEADER_SIZE + pv->size; + endp_pv = ALIGN((uintptr_t)pv + size_pv, + MTMALLOC_MIN_ALIGN); + size_pv = endp_pv - (uintptr_t)pv; + + /* Check for adjacency with left chunk */ + if ((uintptr_t)lp == endp_pv) + merge_flags |= COALESCE_LEFT; + } + + if (nx->size) { + + /* Check for adjacency with right chunk */ + if ((uintptr_t)nx == endp_lp) { + size_nx = OVSZ_HEADER_SIZE + nx->size; + endp_nx = ALIGN((uintptr_t)nx + size_nx, + MTMALLOC_MIN_ALIGN); + size_nx = endp_nx - (uintptr_t)nx; + merge_flags |= COALESCE_RIGHT; + } + } + + /* + * If MTDEBUGPATTERN==1, lp->addr will have been overwritten with + * FREEPATTERN for lp->size bytes. If we can merge, the oversize + * header(s) that will also become part of the memory available for + * reallocation (ie lp and/or nx) must also be overwritten with + * FREEPATTERN or we will SIGABRT when this memory is next reallocated. + */ + switch (merge_flags) { + + case INSERT_ONLY: /* Coalescing not possible */ + insert_oversize(lp, nx); + break; + case COALESCE_LEFT: + pv->size += size_lp; + position_oversize_by_size(pv); + if (debugopt & MTDEBUGPATTERN) + copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE); + break; + case COALESCE_RIGHT: + unlink_oversize(nx); + lp->size += size_nx; + insert_oversize(lp, pv->next_byaddr); + if (debugopt & MTDEBUGPATTERN) + copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE); + break; + case COALESCE_WITH_BOTH_SIDES: /* Merge (with right) to the left */ + pv->size += size_lp + size_nx; + unlink_oversize(nx); + position_oversize_by_size(pv); + if (debugopt & MTDEBUGPATTERN) { + copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE); + copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE); + } + break; + } +} + +/* + * Find memory on our list that is at least size big. If we find a block that is + * big enough, we break it up and return the associated oversize_t struct back + * to the calling client. Any leftover piece of that block is returned to the + * freelist. + */ +static oversize_t * +find_oversize(size_t size) +{ + oversize_t *wp = oversize_list.next_bysize; + while (wp != &oversize_list && size > wp->size) + wp = wp->next_bysize; + + if (wp == &oversize_list) /* empty list or nothing big enough */ + return (NULL); + /* breaking up a chunk of memory */ + if ((long)((wp->size - (size + OVSZ_HEADER_SIZE + MTMALLOC_MIN_ALIGN))) + > MAX_CACHED) { + caddr_t off; + oversize_t *np; + size_t osize; + off = (caddr_t)ALIGN(wp->addr + size, + MTMALLOC_MIN_ALIGN); + osize = wp->size; + wp->size = (size_t)(off - wp->addr); + np = oversize_header_alloc((uintptr_t)off, + osize - (wp->size + OVSZ_HEADER_SIZE)); + if ((long)np->size < 0) + abort(); + unlink_oversize(wp); + add_oversize(np); + } else { + unlink_oversize(wp); + } + return (wp); +} + +static void +copy_pattern(uint32_t pattern, void *buf_arg, size_t size) +{ + uint32_t *bufend = (uint32_t *)((char *)buf_arg + size); + uint32_t *buf = buf_arg; + + while (buf < bufend - 3) { + buf[3] = buf[2] = buf[1] = buf[0] = pattern; + buf += 4; + } + while (buf < bufend) + *buf++ = pattern; +} + +static void * +verify_pattern(uint32_t pattern, void *buf_arg, size_t size) +{ + uint32_t *bufend = (uint32_t *)((char *)buf_arg + size); + uint32_t *buf; + + for (buf = buf_arg; buf < bufend; buf++) + if (*buf != pattern) + return (buf); + return (NULL); +} + +static void +free_oversize(oversize_t *ovp) +{ + assert(((uintptr_t)ovp->addr & 7) == 0); /* are we 8 byte aligned */ + assert(ovp->size > MAX_CACHED); + + ovp->next_bysize = ovp->prev_bysize = NULL; + ovp->next_byaddr = ovp->prev_byaddr = NULL; + (void) mutex_lock(&oversize_lock); + add_oversize(ovp); + (void) mutex_unlock(&oversize_lock); +} + +static oversize_t * +oversize_header_alloc(uintptr_t mem, size_t size) +{ + oversize_t *ovsz_hdr; + + assert(size > MAX_CACHED); + + ovsz_hdr = (oversize_t *)mem; + ovsz_hdr->prev_bysize = NULL; + ovsz_hdr->next_bysize = NULL; + ovsz_hdr->prev_byaddr = NULL; + ovsz_hdr->next_byaddr = NULL; + ovsz_hdr->hash_next = NULL; + ovsz_hdr->size = size; + mem += OVSZ_SIZE; + *(uintptr_t *)mem = MTMALLOC_OVERSIZE_MAGIC; + mem += OVERHEAD; + assert(((uintptr_t)mem & 7) == 0); /* are we 8 byte aligned */ + ovsz_hdr->addr = (caddr_t)mem; + return (ovsz_hdr); +} |