diff options
author | Robert Mustacchi <rm@joyent.com> | 2013-09-29 13:06:51 -0700 |
---|---|---|
committer | Robert Mustacchi <rm@joyent.com> | 2014-01-21 18:20:39 -0800 |
commit | 4f364e7c95ee7fd9d5bbeddc1940e92405bb0e72 (patch) | |
tree | 8f95ebd8dfeb9ab49e53704d900b2d0f0f217b37 | |
parent | 38849194df07385a46363bb46861688fde59a98a (diff) | |
download | illumos-joyent-4f364e7c95ee7fd9d5bbeddc1940e92405bb0e72.tar.gz |
4489 need ptcumem
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Garrett D'Amore <garrett@damore.org>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Garrett D'Amore <garrett@damore.org>
31 files changed, 2184 insertions, 55 deletions
diff --git a/exception_lists/check_rtime b/exception_lists/check_rtime index ce606dc0ea..fce897b09b 100644 --- a/exception_lists/check_rtime +++ b/exception_lists/check_rtime @@ -63,6 +63,8 @@ SKIP ^usr/lib/sysevent/modules/picl_slm.so$ # Objects that are allowed to have executable data segments EXEC_DATA ^MACH(lib)/ld\.so\.1$ EXEC_DATA ^lib/libc\.so\.1$ # 6524709, 32-bit, needed for x86 only +EXEC_DATA ^lib/amd64/libumem\.so\.1$ # ptcumem +EXEC_DATA ^lib/libumem\.so\.1$ # ptcumem EXEC_DATA ^opt/SUNWdtrt/tst/.*/ustack/tst\.helper\.exe$ EXEC_DATA ^platform/.*/MACH(kernel)/unix$ EXEC_DATA ^platform/.*/multiboot$ diff --git a/usr/src/cmd/mdb/common/kmdb/kmdb_umemglue.c b/usr/src/cmd/mdb/common/kmdb/kmdb_umemglue.c index 4193b0dcd5..c2289ec7f7 100644 --- a/usr/src/cmd/mdb/common/kmdb/kmdb_umemglue.c +++ b/usr/src/cmd/mdb/common/kmdb/kmdb_umemglue.c @@ -24,8 +24,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <mdb/mdb_debug.h> #include <mdb/mdb_err.h> #include <mdb/mdb_io.h> @@ -101,6 +99,14 @@ umem_atomic_add_64(uint64_t *target, int64_t delta) *target = *target + delta; } +uint64_t +umem_atomic_swap_64(volatile uint64_t *t, uint64_t v) +{ + uint64_t old = *t; + *t = v; + return (old); +} + /* * Standalone umem must be manually initialized */ diff --git a/usr/src/cmd/mdb/common/modules/libc/libc.c b/usr/src/cmd/mdb/common/modules/libc/libc.c index 27dcade228..44e4f49b87 100644 --- a/usr/src/cmd/mdb/common/modules/libc/libc.c +++ b/usr/src/cmd/mdb/common/modules/libc/libc.c @@ -23,6 +23,9 @@ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. */ +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ #include <sys/mdb_modapi.h> #include <mdb/mdb_whatis.h> @@ -681,6 +684,12 @@ d_ulwp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) prt_addr((void *)(addr + OFFSET(ul_spinlock)), 1), prt_addr((void *)(addr + OFFSET(ul_fpuenv)), 0)); + HD("tmem.size &tmem.roots"); + mdb_printf(OFFSTR "%-21H %s\n", + OFFSET(ul_tmem), + ulwp.ul_tmem.tm_size, + prt_addr((void *)(addr + OFFSET(ul_tmem) + sizeof (size_t)), 0)); + return (DCMD_OK); } diff --git a/usr/src/cmd/mdb/common/modules/libumem/libumem.c b/usr/src/cmd/mdb/common/modules/libumem/libumem.c index 4a77c5aa82..0984edbdf0 100644 --- a/usr/src/cmd/mdb/common/modules/libumem/libumem.c +++ b/usr/src/cmd/mdb/common/modules/libumem/libumem.c @@ -23,6 +23,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + #include "umem.h" #include <libproc.h> #include <mdb/mdb_modapi.h> @@ -34,6 +38,8 @@ #include <umem_impl.h> #include <sys/vmem_impl_user.h> +#include <thr_uberdata.h> +#include <stdio.h> #include "umem_pagesize.h" @@ -44,24 +50,33 @@ typedef struct datafmt { char *fmt; } datafmt_t; +static datafmt_t ptcfmt[] = { + { " ", "tid", "---", "%3u " }, + { " memory", " cached", "-------", "%7lH " }, + { " %", "cap", "---", "%3u " }, + { " %", NULL, "---", "%3u " }, + { NULL, NULL, NULL, NULL } +}; + static datafmt_t umemfmt[] = { { "cache ", "name ", "-------------------------", "%-25s " }, { " buf", " size", "------", "%6u " }, - { " buf", "in use", "------", "%6u " }, - { " buf", " total", "------", "%6u " }, - { " memory", " in use", "---------", "%9u " }, + { " buf", " in use", "-------", "%7u " }, + { " buf", " in ptc", "-------", "%7s " }, + { " buf", " total", "-------", "%7u " }, + { " memory", " in use", "-------", "%7H " }, { " alloc", " succeed", "---------", "%9u " }, - { "alloc", " fail", "-----", "%5llu " }, + { "alloc", " fail", "-----", "%5llu" }, { NULL, NULL, NULL, NULL } }; static datafmt_t vmemfmt[] = { { "vmem ", "name ", "-------------------------", "%-*s " }, - { " memory", " in use", "---------", "%9llu " }, - { " memory", " total", "----------", "%10llu " }, - { " memory", " import", "---------", "%9llu " }, + { " memory", " in use", "---------", "%9H " }, + { " memory", " total", "----------", "%10H " }, + { " memory", " import", "---------", "%9H " }, { " alloc", " succeed", "---------", "%9llu " }, { "alloc", " fail", "-----", "%5llu " }, { NULL, NULL, NULL, NULL } @@ -105,14 +120,105 @@ typedef struct umastat_vmem { int kv_fail; } umastat_vmem_t; +/*ARGSUSED*/ +static int +umastat_cache_nptc(uintptr_t addr, const umem_cache_t *cp, int *nptc) +{ + if (!(cp->cache_flags & UMF_PTC)) + return (WALK_NEXT); + + (*nptc)++; + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_cache_hdr(uintptr_t addr, const umem_cache_t *cp, void *ignored) +{ + if (!(cp->cache_flags & UMF_PTC)) + return (WALK_NEXT); + + mdb_printf("%3d ", cp->cache_bufsize); + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_lwp_ptc(uintptr_t addr, void *buf, int *nbufs) +{ + (*nbufs)++; + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_lwp_cache(uintptr_t addr, const umem_cache_t *cp, ulwp_t *ulwp) +{ + char walk[60]; + int nbufs = 0; + + if (!(cp->cache_flags & UMF_PTC)) + return (WALK_NEXT); + + (void) snprintf(walk, sizeof (walk), "umem_ptc_%d", cp->cache_bufsize); + + if (mdb_pwalk(walk, (mdb_walk_cb_t)umastat_lwp_ptc, + &nbufs, (uintptr_t)ulwp->ul_self) == -1) { + mdb_warn("unable to walk '%s'", walk); + return (WALK_ERR); + } + + mdb_printf("%3d ", ulwp->ul_tmem.tm_size ? + (nbufs * cp->cache_bufsize * 100) / ulwp->ul_tmem.tm_size : 0); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_lwp(uintptr_t addr, const ulwp_t *ulwp, void *ignored) +{ + size_t size; + datafmt_t *dfp = ptcfmt; + + mdb_printf((dfp++)->fmt, ulwp->ul_lwpid); + mdb_printf((dfp++)->fmt, ulwp->ul_tmem.tm_size); + + if (umem_readvar(&size, "umem_ptc_size") == -1) { + mdb_warn("unable to read 'umem_ptc_size'"); + return (WALK_ERR); + } + + mdb_printf((dfp++)->fmt, (ulwp->ul_tmem.tm_size * 100) / size); + + if (mdb_walk("umem_cache", + (mdb_walk_cb_t)umastat_lwp_cache, (void *)ulwp) == -1) { + mdb_warn("can't walk 'umem_cache'"); + return (WALK_ERR); + } + + mdb_printf("\n"); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_cache_ptc(uintptr_t addr, const void *ignored, int *nptc) +{ + (*nptc)++; + return (WALK_NEXT); +} + static int umastat_cache(uintptr_t addr, const umem_cache_t *cp, umastat_vmem_t **kvp) { umastat_vmem_t *kv; datafmt_t *dfp = umemfmt; + char buf[10]; int magsize; - int avail, alloc, total; + int avail, alloc, total, nptc = 0; size_t meminuse = (cp->cache_slab_create - cp->cache_slab_destroy) * cp->cache_slabsize; @@ -130,6 +236,21 @@ umastat_cache(uintptr_t addr, const umem_cache_t *cp, umastat_vmem_t **kvp) (void) mdb_pwalk("umem_cpu_cache", cpu_avail, &avail, addr); (void) mdb_pwalk("umem_slab_partial", slab_avail, &avail, addr); + if (cp->cache_flags & UMF_PTC) { + char walk[60]; + + (void) snprintf(walk, sizeof (walk), + "umem_ptc_%d", cp->cache_bufsize); + + if (mdb_walk(walk, + (mdb_walk_cb_t)umastat_cache_ptc, &nptc) == -1) { + mdb_warn("unable to walk '%s'", walk); + return (WALK_ERR); + } + + (void) snprintf(buf, sizeof (buf), "%d", nptc); + } + for (kv = *kvp; kv != NULL; kv = kv->kv_next) { if (kv->kv_addr == (uintptr_t)cp->cache_arena) goto out; @@ -147,6 +268,7 @@ out: mdb_printf((dfp++)->fmt, cp->cache_name); mdb_printf((dfp++)->fmt, cp->cache_bufsize); mdb_printf((dfp++)->fmt, total - avail); + mdb_printf((dfp++)->fmt, cp->cache_flags & UMF_PTC ? buf : "-"); mdb_printf((dfp++)->fmt, total); mdb_printf((dfp++)->fmt, meminuse); mdb_printf((dfp++)->fmt, alloc); @@ -165,8 +287,8 @@ umastat_vmem_totals(uintptr_t addr, const vmem_t *v, umastat_vmem_t *kv) if (kv == NULL || kv->kv_alloc == 0) return (WALK_NEXT); - mdb_printf("Total [%s]%*s %6s %6s %6s %9u %9u %5u\n", v->vm_name, - 17 - strlen(v->vm_name), "", "", "", "", + mdb_printf("Total [%s]%*s %6s %7s %7s %7s %7H %9u %5u\n", v->vm_name, + 17 - strlen(v->vm_name), "", "", "", "", "", kv->kv_meminuse, kv->kv_alloc, kv->kv_fail); return (WALK_NEXT); @@ -209,20 +331,67 @@ umastat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { umastat_vmem_t *kv = NULL; datafmt_t *dfp; + int nptc = 0, i; if (argc != 0) return (DCMD_USAGE); + /* + * We need to determine if we have any caches that have per-thread + * caching enabled. + */ + if (mdb_walk("umem_cache", + (mdb_walk_cb_t)umastat_cache_nptc, &nptc) == -1) { + mdb_warn("can't walk 'umem_cache'"); + return (DCMD_ERR); + } + + if (nptc) { + for (dfp = ptcfmt; dfp->hdr2 != NULL; dfp++) + mdb_printf("%s ", dfp->hdr1); + + for (i = 0; i < nptc; i++) + mdb_printf("%s ", dfp->hdr1); + + mdb_printf("\n"); + + for (dfp = ptcfmt; dfp->hdr2 != NULL; dfp++) + mdb_printf("%s ", dfp->hdr2); + + if (mdb_walk("umem_cache", + (mdb_walk_cb_t)umastat_cache_hdr, NULL) == -1) { + mdb_warn("can't walk 'umem_cache'"); + return (DCMD_ERR); + } + + mdb_printf("\n"); + + for (dfp = ptcfmt; dfp->hdr2 != NULL; dfp++) + mdb_printf("%s ", dfp->dashes); + + for (i = 0; i < nptc; i++) + mdb_printf("%s ", dfp->dashes); + + mdb_printf("\n"); + + if (mdb_walk("ulwp", (mdb_walk_cb_t)umastat_lwp, NULL) == -1) { + mdb_warn("can't walk 'ulwp'"); + return (DCMD_ERR); + } + + mdb_printf("\n"); + } + for (dfp = umemfmt; dfp->hdr1 != NULL; dfp++) - mdb_printf("%s ", dfp->hdr1); + mdb_printf("%s%s", dfp == umemfmt ? "" : " ", dfp->hdr1); mdb_printf("\n"); for (dfp = umemfmt; dfp->hdr1 != NULL; dfp++) - mdb_printf("%s ", dfp->hdr2); + mdb_printf("%s%s", dfp == umemfmt ? "" : " ", dfp->hdr2); mdb_printf("\n"); for (dfp = umemfmt; dfp->hdr1 != NULL; dfp++) - mdb_printf("%s ", dfp->dashes); + mdb_printf("%s%s", dfp == umemfmt ? "" : " ", dfp->dashes); mdb_printf("\n"); if (mdb_walk("umem_cache", (mdb_walk_cb_t)umastat_cache, &kv) == -1) { @@ -231,7 +400,7 @@ umastat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) } for (dfp = umemfmt; dfp->hdr1 != NULL; dfp++) - mdb_printf("%s ", dfp->dashes); + mdb_printf("%s%s", dfp == umemfmt ? "" : " ", dfp->dashes); mdb_printf("\n"); if (mdb_walk("vmem", (mdb_walk_cb_t)umastat_vmem_totals, kv) == -1) { diff --git a/usr/src/cmd/mdb/common/modules/libumem/umem.c b/usr/src/cmd/mdb/common/modules/libumem/umem.c index 26a62c7b52..73dd4d6e89 100644 --- a/usr/src/cmd/mdb/common/modules/libumem/umem.c +++ b/usr/src/cmd/mdb/common/modules/libumem/umem.c @@ -24,7 +24,7 @@ */ /* - * Copyright 2011 Joyent, Inc. All rights reserved. + * Copyright 2012 Joyent, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. */ @@ -36,6 +36,8 @@ #include <alloca.h> #include <limits.h> #include <mdb/mdb_whatis.h> +#include <thr_uberdata.h> +#include <stdio.h> #include "misc.h" #include "leaky.h" @@ -104,12 +106,58 @@ umem_update_variables(void) return (0); } +static int +umem_ptc_walk_init(mdb_walk_state_t *wsp) +{ + if (wsp->walk_addr == NULL) { + if (mdb_layered_walk("ulwp", wsp) == -1) { + mdb_warn("couldn't walk 'ulwp'"); + return (WALK_ERR); + } + } + + return (WALK_NEXT); +} + +static int +umem_ptc_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t this; + int rval; + + if (wsp->walk_layer != NULL) { + this = (uintptr_t)((ulwp_t *)wsp->walk_layer)->ul_self + + (uintptr_t)wsp->walk_arg; + } else { + this = wsp->walk_addr + (uintptr_t)wsp->walk_arg; + } + + for (;;) { + if (mdb_vread(&this, sizeof (void *), this) == -1) { + mdb_warn("couldn't read ptc buffer at %p", this); + return (WALK_ERR); + } + + if (this == NULL) + break; + + rval = wsp->walk_callback(this, &this, wsp->walk_cbdata); + + if (rval != WALK_NEXT) + return (rval); + } + + return (wsp->walk_layer != NULL ? WALK_NEXT : WALK_DONE); +} + /*ARGSUSED*/ static int -umem_init_walkers(uintptr_t addr, const umem_cache_t *c, void *ignored) +umem_init_walkers(uintptr_t addr, const umem_cache_t *c, int *sizes) { mdb_walker_t w; char descr[64]; + char name[64]; + int i; (void) mdb_snprintf(descr, sizeof (descr), "walk the %s cache", c->cache_name); @@ -124,6 +172,45 @@ umem_init_walkers(uintptr_t addr, const umem_cache_t *c, void *ignored) if (mdb_add_walker(&w) == -1) mdb_warn("failed to add %s walker", c->cache_name); + if (!(c->cache_flags & UMF_PTC)) + return (WALK_NEXT); + + /* + * For the per-thread cache walker, the address is the offset in the + * tm_roots[] array of the ulwp_t. + */ + for (i = 0; sizes[i] != 0; i++) { + if (sizes[i] == c->cache_bufsize) + break; + } + + if (sizes[i] == 0) { + mdb_warn("cache %s is cached per-thread, but could not find " + "size in umem_alloc_sizes\n", c->cache_name); + return (WALK_NEXT); + } + + if (i >= NTMEMBASE) { + mdb_warn("index for %s (%d) exceeds root slots (%d)\n", + c->cache_name, i, NTMEMBASE); + return (WALK_NEXT); + } + + (void) mdb_snprintf(name, sizeof (name), + "umem_ptc_%d", c->cache_bufsize); + (void) mdb_snprintf(descr, sizeof (descr), + "walk the per-thread cache for %s", c->cache_name); + + w.walk_name = name; + w.walk_descr = descr; + w.walk_init = umem_ptc_walk_init; + w.walk_step = umem_ptc_walk_step; + w.walk_fini = NULL; + w.walk_init_arg = (void *)offsetof(ulwp_t, ul_tmem.tm_roots[i]); + + if (mdb_add_walker(&w) == -1) + mdb_warn("failed to add %s walker", w.walk_name); + return (WALK_NEXT); } @@ -132,6 +219,8 @@ static void umem_statechange_cb(void *arg) { static int been_ready = 0; + GElf_Sym sym; + int *sizes; #ifndef _KMDB leaky_cleanup(1); /* state changes invalidate leaky state */ @@ -147,7 +236,25 @@ umem_statechange_cb(void *arg) return; been_ready = 1; - (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, NULL); + + /* + * In order to determine the tm_roots offset of any cache that is + * cached per-thread, we need to have the umem_alloc_sizes array. + * Read this, assuring that it is zero-terminated. + */ + if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) { + mdb_warn("unable to lookup 'umem_alloc_sizes'"); + return; + } + + sizes = mdb_zalloc(sym.st_size + sizeof (int), UM_SLEEP | UM_GC); + + if (mdb_vread(sizes, sym.st_size, (uintptr_t)sym.st_value) == -1) { + mdb_warn("couldn't read 'umem_alloc_sizes'"); + return; + } + + (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, sizes); } int @@ -788,9 +895,9 @@ umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp) } \ } -int +static int umem_read_magazines(umem_cache_t *cp, uintptr_t addr, - void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags) + void ***maglistp, size_t *magcntp, size_t *magmaxp) { umem_magazine_t *ump, *mp; void **maglist = NULL; @@ -807,7 +914,7 @@ umem_read_magazines(umem_cache_t *cp, uintptr_t addr, *maglistp = NULL; *magcntp = 0; *magmaxp = 0; - return (WALK_NEXT); + return (0); } /* @@ -828,11 +935,11 @@ umem_read_magazines(umem_cache_t *cp, uintptr_t addr, if (magbsize >= PAGESIZE / 2) { mdb_warn("magazine size for cache %p unreasonable (%x)\n", addr, magbsize); - return (WALK_ERR); + return (-1); } - maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags); - mp = mdb_alloc(magbsize, alloc_flags); + maglist = mdb_alloc(magmax * sizeof (void *), UM_SLEEP); + mp = mdb_alloc(magbsize, UM_SLEEP); if (mp == NULL || maglist == NULL) goto fail; @@ -875,23 +982,80 @@ umem_read_magazines(umem_cache_t *cp, uintptr_t addr, dprintf(("magazine layer: %d buffers\n", magcnt)); - if (!(alloc_flags & UM_GC)) - mdb_free(mp, magbsize); + mdb_free(mp, magbsize); *maglistp = maglist; *magcntp = magcnt; *magmaxp = magmax; - return (WALK_NEXT); + return (0); fail: - if (!(alloc_flags & UM_GC)) { - if (mp) - mdb_free(mp, magbsize); - if (maglist) - mdb_free(maglist, magmax * sizeof (void *)); + if (mp) + mdb_free(mp, magbsize); + if (maglist) + mdb_free(maglist, magmax * sizeof (void *)); + + return (-1); +} + +typedef struct umem_read_ptc_walk { + void **urpw_buf; + size_t urpw_cnt; + size_t urpw_max; +} umem_read_ptc_walk_t; + +/*ARGSUSED*/ +static int +umem_read_ptc_walk_buf(uintptr_t addr, + const void *ignored, umem_read_ptc_walk_t *urpw) +{ + if (urpw->urpw_cnt == urpw->urpw_max) { + size_t nmax = urpw->urpw_max ? (urpw->urpw_max << 1) : 1; + void **new = mdb_zalloc(nmax * sizeof (void *), UM_SLEEP); + + if (nmax > 1) { + size_t osize = urpw->urpw_max * sizeof (void *); + bcopy(urpw->urpw_buf, new, osize); + mdb_free(urpw->urpw_buf, osize); + } + + urpw->urpw_buf = new; + urpw->urpw_max = nmax; } - return (WALK_ERR); + + urpw->urpw_buf[urpw->urpw_cnt++] = (void *)addr; + + return (WALK_NEXT); +} + +static int +umem_read_ptc(umem_cache_t *cp, + void ***buflistp, size_t *bufcntp, size_t *bufmaxp) +{ + umem_read_ptc_walk_t urpw; + char walk[60]; + int rval; + + if (!(cp->cache_flags & UMF_PTC)) + return (0); + + (void) snprintf(walk, sizeof (walk), "umem_ptc_%d", cp->cache_bufsize); + + urpw.urpw_buf = *buflistp; + urpw.urpw_cnt = *bufcntp; + urpw.urpw_max = *bufmaxp; + + if ((rval = mdb_walk(walk, + (mdb_walk_cb_t)umem_read_ptc_walk_buf, &urpw)) == -1) { + mdb_warn("couldn't walk %s", walk); + } + + *buflistp = urpw.urpw_buf; + *bufcntp = urpw.urpw_cnt; + *bufmaxp = urpw.urpw_max; + + return (rval); } static int @@ -1022,13 +1186,19 @@ umem_walk_init_common(mdb_walk_state_t *wsp, int type) /* * Read in the contents of the magazine layer */ - if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax, - UM_SLEEP) == WALK_ERR) + if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax) != 0) + goto out2; + + /* + * Read in the contents of the per-thread caches, if any + */ + if (umem_read_ptc(cp, &maglist, &magcnt, &magmax) != 0) goto out2; /* - * We have all of the buffers from the magazines; if we are walking - * allocated buffers, sort them so we can bsearch them later. + * We have all of the buffers from the magazines and from the + * per-thread cache (if any); if we are walking allocated buffers, + * sort them so we can bsearch them later. */ if (type & UM_ALLOCATED) qsort(maglist, magcnt, sizeof (void *), addrcmp); diff --git a/usr/src/cmd/mdb/intel/amd64/libumem/Makefile b/usr/src/cmd/mdb/intel/amd64/libumem/Makefile index a8352ff865..704ff65873 100644 --- a/usr/src/cmd/mdb/intel/amd64/libumem/Makefile +++ b/usr/src/cmd/mdb/intel/amd64/libumem/Makefile @@ -42,6 +42,7 @@ include ../../../../Makefile.cmd include ../../../../Makefile.cmd.64 CPPFLAGS += -I$(SRC)/lib/libumem/common +CPPFLAGS += -I$(SRC)/lib/libc/inc CPPFLAGS += -I$(MODSRCS_DIR) include ../../Makefile.amd64 diff --git a/usr/src/cmd/mdb/intel/ia32/libumem/Makefile b/usr/src/cmd/mdb/intel/ia32/libumem/Makefile index 2399f51d31..a1ab338f40 100644 --- a/usr/src/cmd/mdb/intel/ia32/libumem/Makefile +++ b/usr/src/cmd/mdb/intel/ia32/libumem/Makefile @@ -40,6 +40,7 @@ MODSRCS = \ include ../../../../Makefile.cmd +CPPFLAGS += -I$(SRC)/lib/libc/inc CPPFLAGS += -I$(SRC)/lib/libumem/common CPPFLAGS += -I$(MODSRCS_DIR) diff --git a/usr/src/cmd/mdb/sparc/v7/libumem/Makefile b/usr/src/cmd/mdb/sparc/v7/libumem/Makefile index 4553b15eba..906d05d5ea 100644 --- a/usr/src/cmd/mdb/sparc/v7/libumem/Makefile +++ b/usr/src/cmd/mdb/sparc/v7/libumem/Makefile @@ -41,6 +41,7 @@ MODSRCS = \ include ../../../../Makefile.cmd CPPFLAGS += -I$(SRC)/lib/libumem/common +CPPFLAGS += -I$(SRC)/lib/libc/inc CPPFLAGS += -I$(MODSRCS_DIR) include ../../Makefile.sparcv7 diff --git a/usr/src/cmd/mdb/sparc/v9/libumem/Makefile b/usr/src/cmd/mdb/sparc/v9/libumem/Makefile index 2cbeb25f5e..09ea0473c6 100644 --- a/usr/src/cmd/mdb/sparc/v9/libumem/Makefile +++ b/usr/src/cmd/mdb/sparc/v9/libumem/Makefile @@ -54,6 +54,7 @@ KMOD_SOURCES_DIFFERENT=$(POUND_SIGN) include ../../../../Makefile.cmd CPPFLAGS += -I$(SRC)/lib/libumem/common +CPPFLAGS += -I$(SRC)/lib/libc/inc CPPFLAGS += -I$(MODSRCS_DIR) include ../../../../Makefile.cmd.64 diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile index 9ddd748eb6..873c2ded87 100644 --- a/usr/src/lib/libc/amd64/Makefile +++ b/usr/src/lib/libc/amd64/Makefile @@ -20,6 +20,7 @@ # # # Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2012, Joyent, Inc. All rights reserved. # # Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. # Copyright 2011 Nexenta Systems, Inc. All rights reserved. @@ -798,6 +799,7 @@ THREADSOBJS= \ assfail.o \ cancel.o \ door_calls.o \ + tmem.o \ pthr_attr.o \ pthr_barrier.o \ pthr_cond.o \ @@ -1119,6 +1121,7 @@ TIL= \ thread_pool.o \ thrp_unwind.o \ tls.o \ + tmem.o \ tsd.o $(TIL:%=pics/%) := CFLAGS64 += $(LIBCBASE)/threads/amd64.il diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com index 31a7bc945f..b21f87a0d6 100644 --- a/usr/src/lib/libc/i386/Makefile.com +++ b/usr/src/lib/libc/i386/Makefile.com @@ -20,6 +20,7 @@ # # # Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2013, Joyent, Inc. All rights reserved. # Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. # # Copyright 2011 Nexenta Systems, Inc. All rights reserved. @@ -840,6 +841,7 @@ THREADSOBJS= \ assfail.o \ cancel.o \ door_calls.o \ + tmem.o \ pthr_attr.o \ pthr_barrier.o \ pthr_cond.o \ @@ -1177,6 +1179,7 @@ TIL= \ thread_pool.o \ tls.o \ tsd.o \ + tmem.o \ unwind.o THREADS_INLINES = $(LIBCBASE)/threads/i386.il diff --git a/usr/src/lib/libc/inc/thr_uberdata.h b/usr/src/lib/libc/inc/thr_uberdata.h index 42c08049b2..de0d4a6b05 100644 --- a/usr/src/lib/libc/inc/thr_uberdata.h +++ b/usr/src/lib/libc/inc/thr_uberdata.h @@ -22,6 +22,9 @@ /* * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ #ifndef _THR_UBERDATA_H #define _THR_UBERDATA_H @@ -488,6 +491,29 @@ typedef struct { #endif /* _SYSCALL32 */ /* + * As part of per-thread caching libumem (ptcumem), we add a small amount to the + * thread's uberdata to facilitate it. The tm_roots are the roots of linked + * lists which is used by libumem to chain together allocations. tm_size is used + * to track the total amount of data stored across those linked lists. For more + * information, see libumem's big theory statement. + */ +#define NTMEMBASE 16 + +typedef struct { + size_t tm_size; + void *tm_roots[NTMEMBASE]; +} tumem_t; + +#ifdef _SYSCALL32 +typedef struct { + uint32_t tm_size; + caddr32_t tm_roots[NTMEMBASE]; +} tumem32_t; +#endif + +typedef void (*tmem_func_t)(void *, int); + +/* * Maximum number of read locks allowed for one thread on one rwlock. * This could be as large as INT_MAX, but the SUSV3 test suite would * take an inordinately long time to complete. This is big enough. @@ -653,6 +679,7 @@ typedef struct ulwp { #if defined(sparc) void *ul_unwind_ret; /* used only by _ex_clnup_handler() */ #endif + tumem_t ul_tmem; /* used only by umem */ } ulwp_t; #define ul_cursig ul_cp.s.cursig /* deferred signal number */ @@ -1083,6 +1110,7 @@ typedef struct ulwp32 { #if defined(sparc) caddr32_t ul_unwind_ret; /* used only by _ex_clnup_handler() */ #endif + tumem32_t ul_tmem; /* used only by umem */ } ulwp32_t; #define REPLACEMENT_SIZE32 ((size_t)&((ulwp32_t *)NULL)->ul_sigmask) @@ -1205,6 +1233,7 @@ extern ulwp_t *find_lwp(thread_t); extern void finish_init(void); extern void update_sched(ulwp_t *); extern void queue_alloc(void); +extern void tmem_exit(void); extern void tsd_exit(void); extern void tsd_free(ulwp_t *); extern void tls_setup(void); diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers index 1cdc95be16..1882a337d5 100644 --- a/usr/src/lib/libc/port/mapfile-vers +++ b/usr/src/lib/libc/port/mapfile-vers @@ -25,6 +25,7 @@ # Use is subject to license terms. # # Copyright (c) 2012 by Delphix. All rights reserved. +# Copyright (c) 2012, Joyent, Inc. All rights reserved. # Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. # Copyright (c) 2013 Gary Mills # @@ -2903,6 +2904,9 @@ $endif thr_wait_mutator; _thr_wait_mutator; __tls_get_addr; + _tmem_get_base; + _tmem_get_nentries; + _tmem_set_cleanup; tpool_create; tpool_dispatch; tpool_destroy; diff --git a/usr/src/lib/libc/port/threads/thr.c b/usr/src/lib/libc/port/threads/thr.c index ae55fbddf5..b5d848449d 100644 --- a/usr/src/lib/libc/port/threads/thr.c +++ b/usr/src/lib/libc/port/threads/thr.c @@ -22,6 +22,9 @@ /* * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ #include "lint.h" #include "thr_uberdata.h" @@ -771,6 +774,7 @@ _thrp_exit() } lmutex_unlock(&udp->link_lock); + tmem_exit(); /* deallocate tmem allocations */ tsd_exit(); /* deallocate thread-specific data */ tls_exit(); /* deallocate thread-local storage */ heldlock_exit(); /* deal with left-over held locks */ diff --git a/usr/src/lib/libc/port/threads/tmem.c b/usr/src/lib/libc/port/threads/tmem.c new file mode 100644 index 0000000000..00203de593 --- /dev/null +++ b/usr/src/lib/libc/port/threads/tmem.c @@ -0,0 +1,85 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + +#include "lint.h" +#include "thr_uberdata.h" + +/* + * This file implements the private interface with libumem for per-thread + * caching umem (ptcumem). For the full details on how tcumem works and how + * these functions work, see section 8.4 of the big theory statement in + * lib/libumem/common/umem.c. + */ +static tmem_func_t tmem_cleanup = NULL; + +uintptr_t +_tmem_get_base(void) +{ + return ((uintptr_t)&curthread->ul_tmem - (uintptr_t)curthread); +} + +int +_tmem_get_nentries(void) +{ + return (NTMEMBASE); +} + +void +_tmem_set_cleanup(tmem_func_t f) +{ + tmem_cleanup = f; +} + +/* + * This is called by _thrp_exit() to clean up any per-thread allocations that + * are still hanging around and haven't been cleaned up. + */ +void +tmem_exit(void) +{ + int ii; + void *buf, *next; + tumem_t *tp = &curthread->ul_tmem; + + + if (tp->tm_size == 0) + return; + + /* + * Since we have something stored here, we need to ensure we declared a + * clean up handler. If we haven't that's broken and our single private + * consumer should be shot. + */ + if (tmem_cleanup == NULL) + abort(); + for (ii = 0; ii < NTMEMBASE; ii++) { + buf = tp->tm_roots[ii]; + while (buf != NULL) { + next = *(void **)buf; + tmem_cleanup(buf, ii); + buf = next; + } + } +} diff --git a/usr/src/lib/libc/sparc/Makefile.com b/usr/src/lib/libc/sparc/Makefile.com index cc6bae0df4..25482d7324 100644 --- a/usr/src/lib/libc/sparc/Makefile.com +++ b/usr/src/lib/libc/sparc/Makefile.com @@ -20,6 +20,7 @@ # # # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2012, Joyent, Inc. All rights reserved. # Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. # # Copyright 2011 Nexenta Systems, Inc. All rights reserved. @@ -875,6 +876,7 @@ THREADSOBJS= \ assfail.o \ cancel.o \ door_calls.o \ + tmem.o \ pthr_attr.o \ pthr_barrier.o \ pthr_cond.o \ diff --git a/usr/src/lib/libc/sparcv9/Makefile.com b/usr/src/lib/libc/sparcv9/Makefile.com index 54b3258fe9..fe6844273e 100644 --- a/usr/src/lib/libc/sparcv9/Makefile.com +++ b/usr/src/lib/libc/sparcv9/Makefile.com @@ -20,6 +20,7 @@ # # # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2012, Joyent, Inc. All rights reserved. # Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. # # Copyright 2011 Nexenta Systems, Inc. All rights reserved. @@ -822,6 +823,7 @@ THREADSOBJS= \ assfail.o \ cancel.o \ door_calls.o \ + tmem.o \ pthr_attr.o \ pthr_barrier.o \ pthr_cond.o \ diff --git a/usr/src/lib/libumem/Makefile.com b/usr/src/lib/libumem/Makefile.com index 0e726c5646..61f7e9503d 100644 --- a/usr/src/lib/libumem/Makefile.com +++ b/usr/src/lib/libumem/Makefile.com @@ -22,6 +22,8 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # +# Copyright (c) 2012, Joyent, Inc. All rights reserved. +# # # The build process for libumem is sightly different from that used by other @@ -65,10 +67,12 @@ SRCS_standalone = $(OBJECTS_standalone:%.o=../common/%.c) # Architecture-dependent files common to both versions of libumem OBJECTS_common_isadep = \ - asm_subr.o + asm_subr.o \ + umem_genasm.o SRCS_common_isadep = \ - $(ISASRCDIR)/asm_subr.s + $(ISASRCDIR)/asm_subr.s \ + $(ISASRCDIR)/umem_genasm.c # Architecture-independent files common to both versions of libumem OBJECTS_common_common = \ @@ -140,6 +144,7 @@ DTS_ERRNO= STAND_RENAMED_FUNCS= \ atomic_add_64 \ atomic_add_32_nv \ + atomic_swap_64 \ snprintf \ vsnprintf diff --git a/usr/src/lib/libumem/amd64/umem_genasm.c b/usr/src/lib/libumem/amd64/umem_genasm.c new file mode 100644 index 0000000000..00cc18ab67 --- /dev/null +++ b/usr/src/lib/libumem/amd64/umem_genasm.c @@ -0,0 +1,604 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2013 Joyent, Inc. All rights reserved. + */ + +/* + * Don't Panic! If you find the blocks of assembly that follow confusing and + * you're questioning why they exist, please go read section 8 of the umem.c big + * theory statement. Next familiarize yourself with the malloc and free + * implementations in libumem's malloc.c. + * + * What follows is the amd64 implementation of the thread caching automatic + * assembly generation. The amd64 calling conventions are documented in the + * 64-bit System V ABI. For our purposes what matters is that our first argument + * will come in rdi. Our functions have to preserve rbp, rbx, and r12->r15. We + * are free to do whatever we want with rax, rcx, rdx, rsi, rdi, and r8->r11. + * + * For both our implementation of malloc and free we only use the registers we + * don't have to preserve. + * + * Malloc register usage: + * o. rdi: Original size to malloc. This never changes and is preserved. + * o. rsi: Adjusted malloc size for malloc_data_tag(s). + * o. rcx: Pointer to the tmem_t in the ulwp_t. + * o. rdx: Pointer to the tmem_t array of roots + * o. r8: Size of the cache + * o. r9: Scratch register + * + * Free register usage: + * o. rdi: Original buffer to free. This never changes and is preserved. + * o. rax: The actual buffer, adjusted for the hidden malloc_data_t(s). + * o. rcx: Pointer to the tmem_t in the ulwp_t. + * o. rdx: Pointer to the tmem_t array of roots + * o. r8: Size of the cache + * o. r9: Scratch register + * + * Once we determine what cache we are using, we increment %rdx to the + * appropriate offset and set %r8 with the size of the cache. This means that + * when we break out to the normal buffer allocation point %rdx contains the + * head of the linked list and %r8 is the amount that we have to adjust the + * thread's cached amount by. + * + * Each block of assembly has psuedocode that describes its purpose. + */ + +#include <atomic.h> +#include <inttypes.h> +#include <sys/types.h> +#include <strings.h> +#include <umem_impl.h> +#include "umem_base.h" + +#include <stdio.h> + +const int umem_genasm_supported = 1; +static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc; +static size_t umem_genasm_msize = 576; +static uintptr_t umem_genasm_fptr = (uintptr_t)&_free; +static size_t umem_genasm_fsize = 576; +static uintptr_t umem_genasm_omptr = (uintptr_t)umem_malloc; +static uintptr_t umem_genasm_ofptr = (uintptr_t)umem_malloc_free; + +#define UMEM_GENASM_MAX64 (UINT32_MAX / sizeof (uintptr_t)) +#define PTC_JMPADDR(dest, src) (dest - (src + 4)) +#define PTC_ROOT_SIZE sizeof (uintptr_t) +#define MULTINOP 0x0000441f0f + +/* + * void *ptcmalloc(size_t orig_size); + * + * size_t size = orig_size + 8; + * if (size > UMEM_SECOND_ALIGN) + * size += 8; + * + * if (size < orig_size) + * goto tomalloc; ! This is overflow + * + * if (size > cache_max) + * goto tomalloc + * + * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; + * void **roots = t->tm_roots; + */ +#define PTC_MALINIT_JOUT 0x13 +#define PTC_MALINIT_MCS 0x1a +#define PTC_MALINIT_JOV 0x20 +#define PTC_MALINIT_SOFF 0x30 +static const uint8_t malinit[] = { + 0x48, 0x8d, 0x77, 0x08, /* leaq 0x8(%rdi),%rsi */ + 0x48, 0x83, 0xfe, 0x10, /* cmpq $0x10, %rsi */ + 0x76, 0x04, /* jbe +0x4 */ + 0x48, 0x8d, 0x77, 0x10, /* leaq 0x10(%rdi),%rsi */ + 0x48, 0x39, 0xfe, /* cmpq %rdi,%rsi */ + 0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jb +errout */ + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +errout */ + 0x64, 0x48, 0x8b, 0x0c, 0x25, + 0x00, 0x00, 0x00, 0x00, /* movq %fs:0x0,%rcx */ + 0x48, 0x81, 0xc1, + 0x00, 0x00, 0x00, 0x00, /* addq $SOFF, %rcx */ + 0x48, 0x8d, 0x51, 0x08, /* leaq 0x8(%rcx),%rdx */ +}; + +/* + * void ptcfree(void *buf); + * + * if (buf == NULL) + * return; + * + * malloc_data_t *tag = buf; + * tag--; + * int size = tag->malloc_size; + * int tagval = UMEM_MALLOC_DECODE(tag->malloc_tag, size); + * if (tagval == MALLOC_SECOND_MAGIC) { + * tag--; + * } else if (tagval != MALLOC_MAGIC) { + * goto tofree; + * } + * + * if (size > cache_max) + * goto tofree; + * + * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; + * void **roots = t->tm_roots; + */ +#define PTC_FRINI_JDONE 0x05 +#define PTC_FRINI_JFREE 0x25 +#define PTC_FRINI_MCS 0x30 +#define PTC_FRINI_JOV 0x36 +#define PTC_FRINI_SOFF 0x46 +static const uint8_t freeinit[] = { + 0x48, 0x85, 0xff, /* testq %rdi,%rdi */ + 0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* jmp $JDONE (done) */ + 0x8b, 0x77, 0xf8, /* movl -0x8(%rdi),%esi */ + 0x8b, 0x47, 0xfc, /* movl -0x4(%rdi),%eax */ + 0x01, 0xf0, /* addl %esi,%eax */ + 0x3d, 0x00, 0x70, 0xba, 0x16, /* cmpl $MALLOC_2_MAGIC, %eax */ + 0x75, 0x06, /* jne +0x6 (checkover) */ + 0x48, 0x8d, 0x47, 0xf0, /* leaq -0x10(%rdi),%eax */ + 0xeb, 0x0f, /* jmp +0xf (freebuf) */ + 0x3d, 0x00, 0xc0, 0x10, 0x3a, /* cmpl $MALLOC_MAGIC, %eax */ + 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jmp +JFREE (goto torfree) */ + 0x48, 0x8d, 0x47, 0xf8, /* leaq -0x8(%rdi),%rax */ + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +errout */ + 0x64, 0x48, 0x8b, 0x0c, 0x25, + 0x00, 0x00, 0x00, 0x00, /* movq %fs:0x0,%rcx */ + 0x48, 0x81, 0xc1, + 0x00, 0x00, 0x00, 0x00, /* addq $SOFF, %rcx */ + 0x48, 0x8d, 0x51, 0x08, /* leaq 0x8(%rcx),%rdx */ +}; + +/* + * if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * } else ... ! goto next cache + */ +#define PTC_INICACHE_CMP 0x03 +#define PTC_INICACHE_SIZE 0x0c +#define PTC_INICACHE_JMP 0x11 +static const uint8_t inicache[] = { + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x77, 0x0c, /* ja +0xc (next cache) */ + 0x49, 0xc7, 0xc0, + 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ + 0xe9, 0x00, 0x00, 0x00, 0x00, /* jmp $JMP (allocbuf) */ +}; + +/* + * if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * roots += $CACHE_NUM; + * } else ... ! goto next cache + */ +#define PTC_GENCACHE_CMP 0x03 +#define PTC_GENCACHE_SIZE 0x0c +#define PTC_GENCACHE_NUM 0x13 +#define PTC_GENCACHE_JMP 0x18 +static const uint8_t gencache[] = { + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x77, 0x14, /* ja +0xc (next cache) */ + 0x49, 0xc7, 0xc0, + 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ + 0x48, 0x81, 0xc2, + 0x00, 0x00, 0x00, 0x00, /* addq $8*ii, %rdx */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf ) */ +}; + +/* + * else if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * roots += $CACHE_NUM; + * } else { + * goto tofunc; ! goto tomalloc if ptcmalloc. + * } ! goto tofree if ptcfree. + */ +#define PTC_FINCACHE_CMP 0x03 +#define PTC_FINCACHE_JMP 0x08 +#define PTC_FINCACHE_SIZE 0x0c +#define PTC_FINCACHE_NUM 0x13 +static const uint8_t fincache[] = { + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x77, 0x00, /* ja +JMP (to real malloc) */ + 0x49, 0xc7, 0xc0, + 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ + 0x48, 0x81, 0xc2, + 0x00, 0x00, 0x00, 0x00, /* addq $8*ii, %rdx */ + +}; + +/* + * if (*root == NULL) + * goto tomalloc; + * + * malloc_data_t *ret = *root; + * *root = *(void **)ret; + * t->tm_size += csize; + * ret->malloc_size = size; + * + * if (size > UMEM_SECOND_ALIGN) { + * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); + * ret += 2; + * } else { + * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); + * ret += 1; + * } + * + * return ((void *)ret); + * tomalloc: + * return (malloc(orig_size)); + */ +#define PTC_MALFINI_ALLABEL 0x00 +#define PTC_MALFINI_JMLABEL 0x40 +#define PTC_MALFINI_JMADDR 0x41 +static const uint8_t malfini[] = { + 0x48, 0x8b, 0x02, /* movl (%rdx),%rax */ + 0x48, 0x85, 0xc0, /* testq %rax,%rax */ + 0x74, 0x38, /* je +0x38 (errout) */ + 0x4c, 0x8b, 0x08, /* movq (%rax),%r9 */ + 0x4c, 0x89, 0x0a, /* movq %r9,(%rdx) */ + 0x4c, 0x29, 0x01, /* subq %rsi,(%rcx) */ + 0x48, 0x83, 0xfe, 0x10, /* cmpq $0x10,%rsi */ + 0x76, 0x15, /* jbe +0x15 */ + 0x41, 0xb9, 0x00, 0x70, 0xba, 0x16, /* movl $MALLOC_MAGIC_2, %r9d */ + 0x89, 0x70, 0x08, /* movl %r9d,0x8(%rax) */ + 0x41, 0x29, 0xf1, /* subl %esi, %r9d */ + 0x44, 0x89, 0x48, 0x0c, /* movl %r9d, 0xc(%rax) */ + 0x48, 0x83, 0xc0, 0x10, /* addq $0x10, %rax */ + 0xc3, /* ret */ + 0x41, 0xb9, 0x00, 0xc0, 0x10, 0x3a, /* movl %MALLOC_MAGIC, %r9d */ + 0x89, 0x30, /* movl %esi,(%rax) */ + 0x41, 0x29, 0xf1, /* subl %esi,%r9d */ + 0x44, 0x89, 0x48, 0x04, /* movl %r9d,0x4(%rax) */ + 0x48, 0x83, 0xc0, 0x08, /* addq $0x8,%rax */ + 0xc3, /* ret */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $MALLOC */ +}; + +/* + * if (t->tm_size + csize > umem_ptc_size) + * goto tofree; + * + * t->tm_size += csize + * *(void **)tag = *root; + * *root = tag; + * return; + * tofree: + * free(buf); + * return; + */ +#define PTC_FRFINI_RBUFLABEL 0x00 +#define PTC_FRFINI_CACHEMAX 0x09 +#define PTC_FRFINI_DONELABEL 0x1b +#define PTC_FRFINI_JFLABEL 0x1c +#define PTC_FRFINI_JFADDR 0x1d +static const uint8_t freefini[] = { + 0x4c, 0x8b, 0x09, /* movq (%rcx),%r9 */ + 0x4d, 0x01, 0xc1, /* addq %r8, %r9 */ + 0x49, 0x81, 0xf9, + 0x00, 0x00, 0x00, 0x00, /* cmpl $THR_CACHE_MAX, %r9 */ + 0x77, 0x0d, /* jae +0xd (torfree) */ + 0x4c, 0x01, 0x01, /* addq %r8,(%rcx) */ + 0x4c, 0x8b, 0x0a, /* movq (%rdx),%r9 */ + 0x4c, 0x89, 0x08, /* movq %r9,(%rax) */ + 0x48, 0x89, 0x02, /* movq %rax,(%rdx) */ + 0xc3, /* ret */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */ +}; + +/* + * Construct the initial part of malloc. off contains the offset from curthread + * to the root of the tmem structure. ep is the address of the label to error + * and jump to free. csize is the size of the largest umem_cache in ptcumem. + */ +static int +genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize) +{ + uint32_t addr; + + bcopy(malinit, bp, sizeof (malinit)); + addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT); + bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr)); + bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize)); + addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV); + bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr)); + bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off)); + + return (sizeof (malinit)); +} + +static int +genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mcs) +{ + uint32_t addr; + + bcopy(freeinit, bp, sizeof (freeinit)); + addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE); + bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr)); + addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE); + bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr)); + bcopy(&mcs, bp + PTC_FRINI_MCS, sizeof (mcs)); + addr = PTC_JMPADDR(ep, PTC_FRINI_JOV); + bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr)); + bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off)); + return (sizeof (freeinit)); +} + + +/* + * Create the initial cache entry of the specified size. The value of ap tells + * us what the address of the label to try and allocate a buffer. This value is + * an offset from the current base to that value. + */ +static int +genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap) +{ + uint32_t addr; + + bcopy(inicache, bp, sizeof (inicache)); + bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize)); + addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP); + ASSERT(addr != 0); + bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr)); + + return (sizeof (inicache)); +} + +static int +genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap) +{ + uint32_t addr; + uint32_t coff; + + ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num); + ASSERT(num != 0); + bcopy(gencache, bp, sizeof (gencache)); + bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize)); + coff = num * PTC_ROOT_SIZE; + bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff)); + addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP); + bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr)); + + return (sizeof (gencache)); +} + +static int +genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep) +{ + uint8_t eap; + uint32_t coff; + + ASSERT(ep <= 0xff && ep > 7); + ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num); + bcopy(fincache, bp, sizeof (fincache)); + bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize)); + coff = num * PTC_ROOT_SIZE; + bcopy(&coff, bp + PTC_FINCACHE_NUM, sizeof (coff)); + eap = ep - PTC_FINCACHE_JMP - 1; + bcopy(&eap, bp + PTC_FINCACHE_JMP, sizeof (eap)); + + return (sizeof (fincache)); +} + +static int +genasm_malfini(uint8_t *bp, uintptr_t mptr) +{ + uint32_t addr; + + bcopy(malfini, bp, sizeof (malfini)); + addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR)); + bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr)); + + return (sizeof (malfini)); +} + +static int +genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr) +{ + uint32_t addr; + + bcopy(freefini, bp, sizeof (freefini)); + bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr)); + addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR)); + bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr)); + + return (sizeof (freefini)); +} + +/* + * The malloc inline assembly is constructed as follows: + * + * o Malloc prologue assembly + * o Generic first-cache check + * o n Generic cache checks (where n = _tmem_get_entries() - 2) + * o Generic last-cache check + * o Malloc epilogue assembly + * + * Generally there are at least three caches. When there is only one cache we + * only use the generic last-cache. In the case where there are two caches, we + * just leave out the middle ones. + */ +static int +genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes) +{ + int ii, off; + uint8_t *bp; + size_t total; + uint32_t allocoff, erroff; + + total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache); + + if (nents >= 2) + total += sizeof (inicache) + sizeof (gencache) * (nents - 2); + + if (total > len) + return (1); + + erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL; + allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL; + + bp = base; + + off = genasm_malinit(bp, umem_tmem_off, erroff, + umem_alloc_sizes[nents-1]); + bp += off; + allocoff -= off; + erroff -= off; + + if (nents > 1) { + off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff); + bp += off; + allocoff -= off; + erroff -= off; + } + + for (ii = 1; ii < nents - 1; ii++) { + off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff); + bp += off; + allocoff -= off; + erroff -= off; + } + + bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], + erroff); + bp += genasm_malfini(bp, umem_genasm_omptr); + ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); + + return (0); +} + +static int +genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes) +{ + uint8_t *bp; + int ii, off; + size_t total; + uint32_t rbufoff, retoff, erroff; + + /* Assume that nents has already been audited for us */ + total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache); + if (nents >= 2) + total += sizeof (inicache) + sizeof (gencache) * (nents - 2); + + if (total > len) + return (1); + + erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL); + rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL); + retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL); + + bp = base; + + off = genasm_frinit(bp, umem_tmem_off, retoff, erroff, + umem_alloc_sizes[nents - 1]); + bp += off; + erroff -= off; + rbufoff -= off; + + if (nents > 1) { + off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff); + bp += off; + erroff -= off; + rbufoff -= off; + } + + for (ii = 1; ii < nents - 1; ii++) { + off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff); + bp += off; + rbufoff -= off; + erroff -= off; + } + + bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], + erroff); + bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr); + ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); + + return (0); +} + +/*ARGSUSED*/ +int +umem_genasm(int *cp, umem_cache_t **caches, int nc) +{ + int nents, i; + uint8_t *mptr; + uint8_t *fptr; + uint64_t v, *vptr; + + mptr = (void *)((uintptr_t)umem_genasm_mptr + 5); + fptr = (void *)((uintptr_t)umem_genasm_fptr + 5); + if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 || + umem_genasm_fptr == 0 || umem_genasm_fsize == 0) + return (1); + + /* + * The total number of caches that we can service is the minimum of: + * o the amount supported by libc + * o the total number of umem caches + * o we use a single byte addl, so it's MAX_UINT32 / sizeof (uintptr_t) + * For 64-bit, this is MAX_UINT32 >> 3, a lot. + */ + nents = _tmem_get_nentries(); + + if (UMEM_GENASM_MAX64 < nents) + nents = UMEM_GENASM_MAX64; + + if (nc < nents) + nents = nc; + + /* Based on our constraints, this is not an error */ + if (nents == 0 || umem_ptc_size == 0) + return (0); + + /* Take into account the jump */ + if (genasm_malloc(mptr, umem_genasm_msize, nents, cp) != 0) + return (1); + + if (genasm_free(fptr, umem_genasm_fsize, nents, cp) != 0) + return (1); + + + /* nop out the jump with a multibyte jump */ + vptr = (void *)umem_genasm_mptr; + v = MULTINOP; + v |= *vptr & (0xffffffULL << 40); + (void) atomic_swap_64(vptr, v); + vptr = (void *)umem_genasm_fptr; + v = MULTINOP; + v |= *vptr & (0xffffffULL << 40); + (void) atomic_swap_64(vptr, v); + + for (i = 0; i < nents; i++) + caches[i]->cache_flags |= UMF_PTC; + + return (0); +} diff --git a/usr/src/lib/libumem/common/envvar.c b/usr/src/lib/libumem/common/envvar.c index fc3d490a01..0c4d872814 100644 --- a/usr/src/lib/libumem/common/envvar.c +++ b/usr/src/lib/libumem/common/envvar.c @@ -22,7 +22,10 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2012 Joyent, Inc. All rights reserved. + */ + +/* + * Copyright (c) 2012 Joyent, Inc. All rights reserved. */ #include <ctype.h> @@ -151,7 +154,10 @@ static umem_env_item_t umem_options_items[] = { NULL, 0, NULL, &vmem_sbrk_pagesize }, #endif - + { "perthread_cache", "Evolving", ITEM_SIZE, + "Size (in bytes) of per-thread allocation cache", + NULL, 0, NULL, &umem_ptc_size + }, { NULL, "-- end of UMEM_OPTIONS --", ITEM_INVALID } }; diff --git a/usr/src/lib/libumem/common/linktest_stand.c b/usr/src/lib/libumem/common/linktest_stand.c index 8ae9fdbec8..dd8333828b 100644 --- a/usr/src/lib/libumem/common/linktest_stand.c +++ b/usr/src/lib/libumem/common/linktest_stand.c @@ -24,8 +24,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * This file is used to verify that the standalone's external dependencies * haven't changed in a way that'll break things that use it. @@ -34,6 +32,7 @@ void __umem_assert_failed(void) {} void atomic_add_64(void) {} void atomic_add_32_nv(void) {} +void atomic_swap_64(void) {} void dladdr1(void) {} void bcopy(void) {} void bzero(void) {} diff --git a/usr/src/lib/libumem/common/malloc.c b/usr/src/lib/libumem/common/malloc.c index 906f369d29..3d19e5b320 100644 --- a/usr/src/lib/libumem/common/malloc.c +++ b/usr/src/lib/libumem/common/malloc.c @@ -24,8 +24,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <unistd.h> #include <errno.h> #include <string.h> @@ -50,8 +48,17 @@ typedef struct malloc_data { uint32_t malloc_stat; /* = UMEM_MALLOC_ENCODE(state, malloc_size) */ } malloc_data_t; +/* + * Because we do not support ptcumem on non-x86 today, we have to create these + * weak aliases. + */ +#ifndef _x86 +#pragma weak malloc = umem_malloc +#pragma weak free = umem_malloc_free +#endif /* !_x86 */ + void * -malloc(size_t size_arg) +umem_malloc(size_t size_arg) { #ifdef _LP64 uint32_t high_size = 0; @@ -369,7 +376,7 @@ process_memalign: } void -free(void *buf) +umem_malloc_free(void *buf) { if (buf == NULL) return; diff --git a/usr/src/lib/libumem/common/mapfile-vers b/usr/src/lib/libumem/common/mapfile-vers index 102bd989f7..888a1570f2 100644 --- a/usr/src/lib/libumem/common/mapfile-vers +++ b/usr/src/lib/libumem/common/mapfile-vers @@ -20,6 +20,7 @@ # # # Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2012, Joyent, Inc. All rights reserved. # # @@ -38,6 +39,17 @@ $mapfile_version 2 +$if _x86 +LOAD_SEGMENT umem { + FLAGS = READ WRITE EXECUTE; + ASSIGN_SECTION { + IS_NAME = .text; + FILE_BASENAME = asm_subr.o + }; +}; +$endif + + SYMBOL_VERSION SUNW_1.1 { global: calloc { FLAGS = NODIRECT }; diff --git a/usr/src/lib/libumem/common/stub_stand.c b/usr/src/lib/libumem/common/stub_stand.c index 54635558c3..2c82364ef1 100644 --- a/usr/src/lib/libumem/common/stub_stand.c +++ b/usr/src/lib/libumem/common/stub_stand.c @@ -23,6 +23,9 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ /* * Stubs for the standalone to reduce the dependence on external libraries @@ -125,3 +128,21 @@ issetugid(void) { return (1); } + +int +_tmem_get_nentries(void) +{ + return (0); +} + +uintptr_t +_tmem_get_base(void) +{ + return (0); +} + +/*ARGSUSED*/ +void +_tmem_set_cleanup(void (*f)(int, void *)) +{ +} diff --git a/usr/src/lib/libumem/common/umem.c b/usr/src/lib/libumem/common/umem.c index 9ee030dd47..00028e5f80 100644 --- a/usr/src/lib/libumem/common/umem.c +++ b/usr/src/lib/libumem/common/umem.c @@ -21,11 +21,14 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Copyright 2012 Joyent, Inc. All rights reserved. * Use is subject to license terms. */ /* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* * based on usr/src/uts/common/os/kmem.c r1.64 from 2001/12/18 * * The slab allocator, as described in the following two papers: @@ -43,7 +46,7 @@ * * 1. Overview * ----------- - * umem is very close to kmem in implementation. There are four major + * umem is very close to kmem in implementation. There are seven major * areas of divergence: * * * Initialization @@ -56,6 +59,10 @@ * * * lock ordering * + * * changing UMEM_MAXBUF + * + * * Per-thread caching for malloc/free + * * 2. Initialization * ----------------- * kmem is initialized early on in boot, and knows that no one will call @@ -365,6 +372,232 @@ * * The second place to update, which is not required, is the umem_alloc_sizes. * These determine the default cache sizes that we're going to support. + * + * 8. Per-thread caching for malloc/free + * ------------------------------------- + * + * "Time is an illusion. Lunchtime doubly so." -- Douglas Adams + * + * Time may be an illusion, but CPU cycles aren't. While libumem is designed + * to be a highly scalable allocator, that scalability comes with a fixed cycle + * penalty even in the absence of contention: libumem must acquire (and release + * a per-CPU lock for each allocation. When contention is low and malloc(3C) + * frequency is high, this overhead can dominate execution time. To alleviate + * this, we allow for per-thread caching, a lock-free means of caching recent + * deallocations on a per-thread basis for use in satisfying subsequent calls + * + * In addition to improving performance, we also want to: + * * Minimize fragmentation + * * Not add additional memory overhead (no larger malloc tags) + * + * In the ulwp_t of each thread there is a private data structure called a + * umem_t that looks like: + * + * typedef struct { + * size_t tm_size; + * void *tm_roots[NTMEMBASE]; (Currently 16) + * } tmem_t; + * + * Each of the roots is treated as the head of a linked list. Each entry in the + * list can be thought of as a void ** which points to the next entry, until one + * of them points to NULL. If the head points to NULL, the list is empty. + * + * Each head corresponds to a umem_cache. Currently there is a linear mapping + * where the first root corresponds to the first cache, second root to the + * second cache, etc. This works because every allocation that malloc makes to + * umem_alloc that can be satisified by a umem_cache will actually return a + * number of bytes equal to the size of that cache. Because of this property and + * a one to one mapping between caches and roots we can guarantee that every + * entry in a given root's list will be able to satisfy the same requests as the + * corresponding cache. + * + * The choice of sixteen roots is based on where we believe we get the biggest + * bang for our buck. The per-thread caches will cache up to 256 byte and 448 + * byte allocations on ILP32 and LP64 respectively. Generally applications plan + * more carefully how they do larger allocations than smaller ones. Therefore + * sixteen roots is a reasonable compromise between the amount of additional + * overhead per thread, and the likelihood of a program to benefit from it. + * + * The maximum amount of memory that can be cached in each thread is determined + * by the perthread_cache UMEM_OPTION. It corresponds to the umem_ptc_size + * value. The default value for this is currently 1 MB. Once umem_init() has + * finished this cannot be directly tuned without directly modifying the + * instruction text. If, upon calling free(3C), the amount cached would exceed + * this maximum, we instead actually return the buffer to the umem_cache instead + * of holding onto it in the thread. + * + * When a thread calls malloc(3C) it first determines which umem_cache it + * would be serviced by. If the allocation is not covered by ptcumem it goes to + * the normal malloc instead. Next, it checks if the tmem_root's list is empty + * or not. If it is empty, we instead go and allocate the memory from + * umem_alloc. If it is not empty, we remove the head of the list, set the + * appropriate malloc tags, and return that buffer. + * + * When a thread calls free(3C) it first looks at the malloc tag and if it is + * invalid or the allocation exceeds the largest cache in ptcumem and sends it + * off to the original free() to handle and clean up appropriately. Next, it + * checks if the allocation size is covered by one of the per-thread roots and + * if it isn't, it passes it off to the original free() to be released. Finally, + * before it inserts this buffer as the head, it checks if adding this buffer + * would put the thread over its maximum cache size. If it would, it frees the + * buffer back to the umem_cache. Otherwise it increments the threads total + * cached amount and makes the buffer the new head of the appropriate tm_root. + * + * When a thread exits, all of the buffers that it has in its per-thread cache + * will be passed to umem_free() and returned to the appropriate umem_cache. + * + * 8.1 Handling addition and removal of umem_caches + * ------------------------------------------------ + * + * The set of umem_caches that are used to back calls to umem_alloc() and + * ultimately malloc() are determined at program execution time. The default set + * of caches is defined below in umem_alloc_sizes[]. Various umem_options exist + * that modify the set of caches: size_add, size_clear, and size_remove. Because + * the set of caches can only be determined once umem_init() has been called and + * we have the additional goals of minimizing additional fragmentation and + * metadata space overhead in the malloc tags, this forces our hand to go down a + * slightly different path: the one tread by fasttrap and trapstat. + * + * During umem_init we're going to dynamically construct a new version of + * malloc(3C) and free(3C) that utilizes the known cache sizes and then ensure + * that ptcmalloc and ptcfree replace malloc and free as entries in the plt. If + * ptcmalloc and ptcfree cannot handle a request, they simply jump to the + * original libumem implementations. + * + * After creating all of the umem_caches, but before making them visible, + * umem_cache_init checks that umem_genasm_supported is non-zero. This value is + * set by each architecture in $ARCH/umem_genasm.c to indicate whether or not + * they support this. If the value is zero, then this process is skipped. + * Similarly, if the cache size has been tuned to zero by UMEM_OPTIONS, then + * this is also skipped. + * + * In umem_genasm.c, each architecture's implementation implements a single + * function called umem_genasm() that is responsible for generating the + * appropriate versions of ptcmalloc() and ptcfree(), placing them in the + * appropriate memory location, and finally doing the switch from malloc() and + * free() to ptcmalloc() and ptcfree(). Once the change has been made, there is + * no way to switch back, short of restarting the program or modifying program + * text with mdb. + * + * 8.2 Modifying the Procedure Linkage Table (PLT) + * ----------------------------------------------- + * + * The last piece of this puzzle is how we actually jam ptcmalloc() into the + * PLT. To handle this, we have defined two functions, _malloc and _free and + * used a special mapfile directive to place them into the a readable, + * writeable, and executable segment. Next we use a standard #pragma weak for + * malloc and free and direct them to those symbols. By default, those symbols + * have text defined as nops for our generated functions and when they're + * invoked, they jump to the default malloc and free functions. + * + * When umem_genasm() is called, it goes through and generates new malloc() and + * free() functions in the text provided for by _malloc and _free just after the + * jump. Once both have been successfully generated, umem_genasm() nops over the + * original jump so that we now call into the genasm versions of these + * functions. + * + * 8.3 umem_genasm() + * ----------------- + * + * umem_genasm() is currently implemented for i386 and amd64. This section + * describes the theory behind the construction. For specific byte code to + * assembly instructions and niceish C and asm versions of ptcmalloc and + * ptcfree, see the individual umem_genasm.c files. The layout consists of the + * following sections: + * + * o. function-specfic prologue + * o. function-generic cache-selecting elements + * o. function-specific epilogue + * + * There are three different generic cache elements that exist: + * + * o. the last or only cache + * o. the intermediary caches if more than two + * o. the first one if more than one cache + * + * The malloc and free prologues and epilogues mimic the necessary portions of + * libumem's malloc and free. This includes things like checking for size + * overflow, setting and verifying the malloc tags. + * + * It is an important constraint that these functions do not make use of the + * call instruction. The only jmp outside of the individual functions is to the + * original libumem malloc and free respectively. Because doing things like + * setting errno or raising an internal umem error on improper malloc tags would + * require using calls into the PLT, whenever we encounter one of those cases we + * just jump to the original malloc and free functions reusing the same stack + * frame. + * + * Each of the above sections, the three caches, and the malloc and free + * prologue and epilogue are implemented as blocks of machine code with the + * corresponding assembly in comments. There are known offsets into each block + * that corresponds to locations of data and addresses that we only know at run + * time. These blocks are copied as necessary and the blanks filled in + * appropriately. + * + * As mentioned in section 8.2, the trampoline library uses specifically named + * variables to communicate the buffers and size to use. These variables are: + * + * o. umem_genasm_mptr: The buffer for ptcmalloc + * o. umem_genasm_msize: The size in bytes of the above buffer + * o. umem_genasm_fptr: The buffer for ptcfree + * o. umem_genasm_fsize: The size in bytes of the above buffer + * + * Finally, to enable the generated assembly we need to remove the previous jump + * to the actual malloc that exists at the start of these buffers. On x86, this + * is a five byte region. We could zero out the jump offset to be a jmp +0, but + * using nops can be faster. We specifically use a single five byte nop on x86 + * as it is faster. When porting ptcumem to other architectures, the various + * opcode changes and options should be analyzed. + * + * 8.4 Interface with libc.so + * -------------------------- + * + * The tmem_t structure as described in the beginning of section 8, is part of a + * private interface with libc. There are three functions that exist to cover + * this. They are not documented in man pages or header files. They are in the + * SUNWprivate part of libc's mapfile. + * + * o. _tmem_get_base(void) + * + * Returns the offset from the ulwp_t (curthread) to the tmem_t structure. + * This is a constant for all threads and is effectively a way to to do + * ::offsetof ulwp_t ul_tmem without having to know the specifics of the + * structure outside of libc. + * + * o. _tmem_get_nentries(void) + * + * Returns the number of roots that exist in the tmem_t. This is one part + * of the cap on the number of umem_caches that we can back with tmem. + * + * o. _tmem_set_cleanup(void (*)(void *, int)) + * + * This sets a clean up handler that gets called back when a thread exits. + * There is one call per buffer, the void * is a pointer to the buffer on + * the list, the int is the index into the roots array for this buffer. + * + * 8.5 Tuning and disabling per-thread caching + * ------------------------------------------- + * + * There is only one tunable for per-thread caching: the amount of memory each + * thread should be able to cache. This is specified via the perthread_cache + * UMEM_OPTION option. No attempt is made to to sanity check the specified + * value; the limit is simply the maximum value of a size_t. + * + * If the perthread_cache UMEM_OPTION is set to zero, nomagazines was requested, + * or UMEM_DEBUG has been turned on then we will never call into umem_genasm; + * however, the trampoline audit library and jump will still be in place. + * + * 8.6 Observing efficacy of per-thread caching + * -------------------------------------------- + * + * To understand the efficacy of per-thread caching, use the ::umastat dcmd + * to see the percentage of capacity consumed on a per-thread basis, the + * degree to which each umem cache contributes to per-thread cache consumption, + * and the number of buffers in per-thread caches on a per-umem cache basis. + * If more detail is required, the specific buffers in a per-thread cache can + * be iterated over with the umem_ptc_* walkers. (These walkers allow an + * optional ulwp_t to be specified to iterate only over a particular thread's + * cache.) */ #include <umem_impl.h> @@ -473,8 +706,10 @@ size_t umem_lite_minsize = 0; /* minimum buffer size for UMF_LITE */ size_t umem_lite_maxalign = 1024; /* maximum buffer alignment for UMF_LITE */ size_t umem_maxverify; /* maximum bytes to inspect in debug routines */ size_t umem_minfirewall; /* hardware-enforced redzone threshold */ +size_t umem_ptc_size = 1048576; /* size of per-thread cache (in bytes) */ uint_t umem_flags = 0; +uintptr_t umem_tmem_off; mutex_t umem_init_lock; /* locks initialization */ cond_t umem_init_cv; /* initialization CV */ @@ -482,6 +717,8 @@ thread_t umem_init_thr; /* thread initializing */ int umem_init_env_ready; /* environ pre-initted */ int umem_ready = UMEM_READY_STARTUP; +int umem_ptc_enabled; /* per-thread caching enabled */ + static umem_nofail_callback_t *nofail_callback; static mutex_t umem_nofail_exit_lock; static thread_t umem_nofail_exit_thr; @@ -2838,6 +3075,24 @@ umem_alloc_sizes_remove(size_t size) umem_alloc_sizes[i] = 0; } +/* + * We've been called back from libc to indicate that thread is terminating and + * that it needs to release the per-thread memory that it has. We get to know + * which entry in the thread's tmem array the allocation came from. Currently + * this refers to first n umem_caches which makes this a pretty simple indexing + * job. + */ +static void +umem_cache_tmem_cleanup(void *buf, int entry) +{ + size_t size; + umem_cache_t *cp; + + size = umem_alloc_sizes[entry]; + cp = umem_alloc_table[(size - 1) >> UMEM_ALIGN_SHIFT]; + _umem_cache_free(cp, buf); +} + static int umem_cache_init(void) { @@ -2953,6 +3208,16 @@ umem_cache_init(void) umem_alloc_caches[i] = cp; } + umem_tmem_off = _tmem_get_base(); + _tmem_set_cleanup(umem_cache_tmem_cleanup); + + if (umem_genasm_supported && !(umem_flags & UMF_DEBUG) && + !(umem_flags & UMF_NOMAGAZINE) && + umem_ptc_size > 0) { + umem_ptc_enabled = umem_genasm(umem_alloc_sizes, + umem_alloc_caches, i) == 0 ? 1 : 0; + } + /* * Initialization cannot fail at this point. Make the caches * visible to umem_alloc() and friends. diff --git a/usr/src/lib/libumem/common/umem_base.h b/usr/src/lib/libumem/common/umem_base.h index e78bebfb58..c845331fbc 100644 --- a/usr/src/lib/libumem/common/umem_base.h +++ b/usr/src/lib/libumem/common/umem_base.h @@ -22,12 +22,13 @@ * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ #ifndef _UMEM_BASE_H #define _UMEM_BASE_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <umem_impl.h> #ifdef __cplusplus @@ -75,6 +76,8 @@ extern volatile uint32_t umem_reaping; #define UMEM_REAP_ADDING 0x00000001 /* umem_reap() is active */ #define UMEM_REAP_ACTIVE 0x00000002 /* update thread is reaping */ +extern uintptr_t umem_tmem_off; + /* * umem.c: tunables */ @@ -97,6 +100,7 @@ extern size_t umem_lite_minsize; extern size_t umem_lite_maxalign; extern size_t umem_maxverify; extern size_t umem_minfirewall; +extern size_t umem_ptc_size; extern uint32_t umem_flags; @@ -139,6 +143,20 @@ extern int umem_create_update_thread(void); void umem_setup_envvars(int); void umem_process_envvars(void); +/* + * umem_genasm.c: private interfaces + */ +extern const int umem_genasm_supported; +extern int umem_genasm(int *, umem_cache_t **, int); + +/* + * malloc.c: traditional malloc/free interface for genasm + */ +extern void *umem_malloc(size_t); +extern void umem_malloc_free(void *); +extern void *_malloc(size_t); +extern void _free(void *); + #ifdef __cplusplus } #endif diff --git a/usr/src/lib/libumem/common/umem_impl.h b/usr/src/lib/libumem/common/umem_impl.h index 84313c32ed..f63246e166 100644 --- a/usr/src/lib/libumem/common/umem_impl.h +++ b/usr/src/lib/libumem/common/umem_impl.h @@ -21,10 +21,13 @@ */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Copyright 2012 Joyent, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 Joyent, Inc. All rights reserved. + */ + #ifndef _UMEM_IMPL_H #define _UMEM_IMPL_H @@ -63,6 +66,7 @@ extern "C" { #define UMF_HASH 0x00000200 /* cache has hash table */ #define UMF_RANDOMIZE 0x00000400 /* randomize other umem_flags */ +#define UMF_PTC 0x00000800 /* cache has per-thread caching */ #define UMF_BUFTAG (UMF_DEADBEEF | UMF_REDZONE) #define UMF_TOUCH (UMF_BUFTAG | UMF_LITE | UMF_CONTENTS) @@ -395,6 +399,13 @@ extern void umem_startup(caddr_t, size_t, size_t, caddr_t, caddr_t); extern int umem_add(caddr_t, size_t); #endif +/* + * Private interface with libc for tcumem. + */ +extern uintptr_t _tmem_get_base(void); +extern int _tmem_get_nentries(void); +extern void _tmem_set_cleanup(void(*)(void *, int)); + #ifdef __cplusplus } #endif diff --git a/usr/src/lib/libumem/i386/asm_subr.s b/usr/src/lib/libumem/i386/asm_subr.s index 2edb2b49b5..5ad5345c6d 100644 --- a/usr/src/lib/libumem/i386/asm_subr.s +++ b/usr/src/lib/libumem/i386/asm_subr.s @@ -24,10 +24,32 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/asm_linkage.h> +#define NOP4 \ + nop; \ + nop; \ + nop; \ + nop; + +#define NOP16 \ + NOP4 \ + NOP4 \ + NOP4 \ + NOP4 + +#define NOP64 \ + NOP16 \ + NOP16 \ + NOP16 \ + NOP16 + +#define NOP256 \ + NOP64 \ + NOP64 \ + NOP64 \ + NOP64 + #if defined(lint) void * @@ -69,4 +91,25 @@ _breakpoint(void) SET_SIZE(_breakpoint) #endif + ENTRY(_malloc) + jmp umem_malloc; + NOP256 + NOP256 +#if defined(__amd64) + NOP64 +#endif + SET_SIZE(_malloc) + + ENTRY(_free) + jmp umem_malloc_free; + NOP256 + NOP256 +#if defined(__amd64) + NOP64 +#endif + SET_SIZE(_free) + + ANSI_PRAGMA_WEAK2(malloc,_malloc,function) + ANSI_PRAGMA_WEAK2(free,_free,function) + #endif /* lint */ diff --git a/usr/src/lib/libumem/i386/umem_genasm.c b/usr/src/lib/libumem/i386/umem_genasm.c new file mode 100644 index 0000000000..530a83e486 --- /dev/null +++ b/usr/src/lib/libumem/i386/umem_genasm.c @@ -0,0 +1,595 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* + * Don't Panic! If you find the blocks of assembly that follow confusing and + * you're questioning why they exist, please go read section 8 of the umem.c big + * theory statement. Next familiarize yourself with the malloc and free + * implementations in libumem's malloc.c. + * + * What follows is the i386 implementation of the thread caching automatic + * assembly generation. With i386 a function only has three registers it's + * allowed to change without restoring them: eax, ecx, and edx. All others have + * to be preserved. Since the set of registers we have available is so small, we + * have to make use of esi, ebx, and edi and save their original values to the + * stack. + * + * Malloc register usage: + * o. esi: Size of the malloc (passed into us and modified) + * o. edi: Size of the cache + * o. eax: Buffer to return + * o. ebx: Scratch space and temporary values + * o. ecx: Pointer to the tmem_t in the ulwp_t. + * o. edx: Pointer to the tmem_t array of roots + * + * Free register usage: + * o. esi: Size of the malloc (passed into us and modified) + * o. edi: Size of the cache + * o. eax: Buffer to free + * o. ebx: Scratch space and temporary values + * o. ecx: Pointer to the tmem_t in the ulwp_t. + * o. edx: Pointer to the tmem_t array of roots + * + * Once we determine what cache we are using, we increment %edx to the + * appropriate offset and set %edi with the size of the cache. This means that + * when we break out to the normal buffer allocation point %edx contains the + * head of the linked list and %edi is the amount that we have to adjust the + * total amount cached by the thread. + * + * Each block of assembly has psuedocode that describes its purpose. + */ + +#include <inttypes.h> +#include <strings.h> +#include <umem_impl.h> +#include "umem_base.h" + +#include <atomic.h> + +const int umem_genasm_supported = 1; +static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc; +static size_t umem_genasm_msize = 512; +static uintptr_t umem_genasm_fptr = (uintptr_t)&_free; +static size_t umem_genasm_fsize = 512; +static uintptr_t umem_genasm_omptr = (uintptr_t)umem_malloc; +static uintptr_t umem_genasm_ofptr = (uintptr_t)umem_malloc_free; +/* + * The maximum number of caches we can support. We use a single byte addl so + * this is 255 (UINT8_MAX) / sizeof (uintptr_t). In this case 63 + */ +#define UMEM_GENASM_MAX32 63 + +#define PTC_JMPADDR(dest, src) (dest - (src + 4)) +#define PTC_ROOT_SIZE sizeof (uintptr_t) +#define MULTINOP 0x0000441f0f + +/* + * void *ptcmalloc(size_t orig_size); + * + * size_t size = orig_size + 8; + * + * if (size < orig_size) + * goto tomalloc; ! This is overflow + * + * if (size > cache_size) + * goto tomalloc; + * + * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; + * void **roots = t->tm_roots; + */ +#define PTC_MALINIT_JOUT 0x0e +#define PTC_MALINIT_MCS 0x14 +#define PTC_MALINIT_JOV 0x1a +#define PTC_MALINIT_SOFF 0x27 +static const uint8_t malinit[] = { + 0x55, /* pushl %ebp */ + 0x89, 0xe5, /* movl %esp, %ebp */ + 0x57, /* pushl %edi */ + 0x56, /* pushl %esi */ + 0x53, /* pushl %ebx */ + 0x8b, 0x75, 0x08, /* movl 0x8(%ebp), %esi */ + 0x83, 0xc6, 0x08, /* addl $0x8,%esi */ + 0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jc +$JMP (errout) */ + 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ + 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */ + 0x65, 0x8b, 0x0d, 0x00, 0x00, 0x00, 0x00, /* movl %gs:0x0,%ecx */ + 0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $OFF, %ecx */ + 0x8d, 0x51, 0x04 /* leal 0x4(%ecx), %edx */ +}; + +/* + * void ptcfree(void *buf); + * + * if (buf == NULL) + * return; + * + * malloc_data_t *tag = buf; + * tag--; + * int size = tag->malloc_size; + * int tagtval = UMEM_MALLOC_DECODE(tag->malloc_tag, size); + * + * if (tagval != MALLOC_MAGIC) + * goto tofree; + * + * if (size > cache_max) + * goto tofree; + * + * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; + * void **roots = t->tm_roots; + */ +#define PTC_FRINI_JDONE 0x0d +#define PTC_FRINI_JFREE 0x23 +#define PTC_FRINI_MCS 0x29 +#define PTC_FRINI_JOV 0x2f +#define PTC_FRINI_SOFF 0x3c +static const uint8_t freeinit[] = { + 0x55, /* pushl %ebp */ + 0x89, 0xe5, /* movl %esp, %ebp */ + 0x57, /* pushl %edi */ + 0x56, /* pushl %esi */ + 0x53, /* pushl %ebx */ + 0x8b, 0x45, 0x08, /* movl 0x8(%ebp), %eax */ + 0x85, 0xc0, /* testl %eax, %eax */ + 0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* je $JDONE (done) */ + 0x83, 0xe8, 0x08, /* subl $0x8,%eax */ + 0x8b, 0x30, /* movl (%eax),%esi */ + 0x8b, 0x50, 0x04, /* movl 0x4(%eax),%edx */ + 0x01, 0xf2, /* addl %esi,%edx */ + 0x81, 0xfa, 0x00, 0xc0, 0x10, 0x3a, /* cmpl MAGIC32, %edx */ + 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jne +JFREE (goto freebuf) */ + + 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ + 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */ + 0x65, 0x8b, 0x0d, 0x00, 0x0, 0x00, 0x00, /* movl %gs:0x0,%ecx */ + 0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $0xOFF, %ecx */ + 0x8d, 0x51, 0x04 /* leal 0x4(%ecx),%edx */ +}; + +/* + * if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * } else ... ! goto next cache + */ +#define PTC_INICACHE_CMP 0x02 +#define PTC_INICACHE_SIZE 0x09 +#define PTC_INICACHE_JMP 0x0e +static const uint8_t inicache[] = { + 0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ + 0x77, 0x0a, /* ja +0xa */ + 0xbf, 0xff, 0x00, 0x00, 0x00, /* movl sizeof ($C0), %edi */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */ +}; + +/* + * if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * roots += $CACHE_NUM; + * } else ... ! goto next cache + */ +#define PTC_GENCACHE_CMP 0x02 +#define PTC_GENCACHE_NUM 0x0a +#define PTC_GENCACHE_SIZE 0x0c +#define PTC_GENCACHE_JMP 0x11 +static const uint8_t gencache[] = { + 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($CACHE), %esi */ + 0x77, 0x0d, /* ja +0xd (next cache) */ + 0x83, 0xc2, 0x00, /* addl $4*$ii, %edx */ + 0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CACHE), %edi */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */ +}; + +/* + * else if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * roots += $CACHE_NUM; + * } else { + * goto tofunc; ! goto tomalloc if ptcmalloc. + * } ! goto tofree if ptcfree. + */ +#define PTC_FINCACHE_CMP 0x02 +#define PTC_FINCACHE_JMP 0x07 +#define PTC_FINCACHE_NUM 0x0a +#define PTC_FINCACHE_SIZE 0x0c +static const uint8_t fincache[] = { + 0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($CLAST), %esi */ + 0x77, 0x00, /* ja +$JMP (to errout) */ + 0x83, 0xc2, 0x00, /* addl $4*($NCACHES-1), %edx */ + 0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CLAST), %edi */ +}; + +/* + * if (*root == NULL) + * goto tomalloc; + * + * malloc_data_t *ret = *root; + * *root = *(void **)ret; + * t->tm_size += csize; + * ret->malloc_size = size; + * + * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); + * ret++; + * + * return ((void *)ret); + * tomalloc: + * return (malloc(orig_size)); + */ +#define PTC_MALFINI_ALLABEL 0x00 +#define PTC_MALFINI_JMLABEL 0x20 +#define PTC_MALFINI_JMADDR 0x25 +static const uint8_t malfini[] = { + /* allocbuf: */ + 0x8b, 0x02, /* movl (%edx), %eax */ + 0x85, 0xc0, /* testl %eax, %eax */ + 0x74, 0x1a, /* je +0x1a (errout) */ + 0x8b, 0x18, /* movl (%eax), %esi */ + 0x89, 0x1a, /* movl %esi, (%edx) */ + 0x29, 0x39, /* subl %edi, (%ecx) */ + 0x89, 0x30, /* movl %esi, ($eax) */ + 0xba, 0x00, 0xc0, 0x10, 0x3a, /* movl $0x3a10c000,%edx */ + 0x29, 0xf2, /* subl %esi, %edx */ + 0x89, 0x50, 0x04, /* movl %edx, 0x4(%eax) */ + 0x83, 0xc0, 0x08, /* addl %0x8, %eax */ + 0x5b, /* popl %ebx */ + 0x5e, /* popl %esi */ + 0x5f, /* popl %edi */ + 0xc9, /* leave */ + 0xc3, /* ret */ + /* errout: */ + 0x5b, /* popl %ebx */ + 0x5e, /* popl %esi */ + 0x5f, /* popl %edi */ + 0xc9, /* leave */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $malloc */ +}; + +/* + * if (t->tm_size + csize > umem_ptc_size) + * goto tofree; + * + * t->tm_size += csize + * *(void **)tag = *root; + * *root = tag; + * return; + * tofree: + * free(buf); + * return; + */ +#define PTC_FRFINI_RBUFLABEL 0x00 +#define PTC_FRFINI_CACHEMAX 0x06 +#define PTC_FRFINI_DONELABEL 0x14 +#define PTC_FRFINI_JFLABEL 0x19 +#define PTC_FRFINI_JFADDR 0x1e +static const uint8_t freefini[] = { + /* freebuf: */ + 0x8b, 0x19, /* movl (%ecx),%ebx */ + 0x01, 0xfb, /* addl %edi,%ebx */ + 0x81, 0xfb, 0x00, 0x00, 0x00, 0x00, /* cmpl maxsize, %ebx */ + 0x73, 0x0d, /* jae +0xd <tofree> */ + 0x01, 0x39, /* addl %edi,(%ecx) */ + 0x8b, 0x3a, /* movl (%edx),%edi */ + 0x89, 0x38, /* movl %edi,(%eax) */ + 0x89, 0x02, /* movl %eax,(%edx) */ + /* done: */ + 0x5b, /* popl %ebx */ + 0x5e, /* popl %esi */ + 0x5f, /* popl %edi */ + 0xc9, /* leave */ + 0xc3, /* ret */ + /* realfree: */ + 0x5b, /* popl %ebx */ + 0x5e, /* popl %esi */ + 0x5f, /* popl %edi */ + 0xc9, /* leave */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */ +}; + +/* + * Construct the initial part of malloc. off contains the offset from curthread + * to the root of the tmem structure. ep is the address of the label to error + * and jump to free. csize is the size of the largest umem_cache in ptcumem. + */ +static int +genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize) +{ + uint32_t addr; + + bcopy(malinit, bp, sizeof (malinit)); + addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT); + bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr)); + bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize)); + addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV); + bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr)); + bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off)); + + return (sizeof (malinit)); +} + +static int +genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mc) +{ + uint32_t addr; + + bcopy(freeinit, bp, sizeof (freeinit)); + addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE); + bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr)); + addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE); + bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr)); + bcopy(&mc, bp + PTC_FRINI_MCS, sizeof (mc)); + addr = PTC_JMPADDR(ep, PTC_FRINI_JOV); + bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr)); + bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off)); + return (sizeof (freeinit)); +} + +/* + * Create the initial cache entry of the specified size. The value of ap tells + * us what the address of the label to try and allocate a buffer. This value is + * an offset from the current base to that value. + */ +static int +genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap) +{ + uint32_t addr; + + bcopy(inicache, bp, sizeof (inicache)); + bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize)); + addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP); + ASSERT(addr != 0); + bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr)); + + return (sizeof (inicache)); +} + +static int +genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap) +{ + uint32_t addr; + uint8_t coff; + + ASSERT(256 / PTC_ROOT_SIZE > num); + ASSERT(num != 0); + bcopy(gencache, bp, sizeof (gencache)); + bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize)); + coff = num * PTC_ROOT_SIZE; + bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff)); + addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP); + bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr)); + + return (sizeof (gencache)); +} + +static int +genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep) +{ + uint8_t addr; + + ASSERT(ep <= 0xff && ep > 7); + ASSERT(256 / PTC_ROOT_SIZE > num); + bcopy(fincache, bp, sizeof (fincache)); + bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize)); + addr = num * PTC_ROOT_SIZE; + bcopy(&addr, bp + PTC_FINCACHE_NUM, sizeof (addr)); + addr = ep - PTC_FINCACHE_JMP - 1; + bcopy(&addr, bp + PTC_FINCACHE_JMP, sizeof (addr)); + + return (sizeof (fincache)); +} + +static int +genasm_malfini(uint8_t *bp, uintptr_t mptr) +{ + uint32_t addr; + + bcopy(malfini, bp, sizeof (malfini)); + addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR)); + bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr)); + + return (sizeof (malfini)); +} + +static int +genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr) +{ + uint32_t addr; + + bcopy(freefini, bp, sizeof (freefini)); + bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr)); + addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR)); + bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr)); + + return (sizeof (freefini)); +} + +/* + * The malloc inline assembly is constructed as follows: + * + * o Malloc prologue assembly + * o Generic first-cache check + * o n Generic cache checks (where n = _tmem_get_entries() - 2) + * o Generic last-cache check + * o Malloc epilogue assembly + * + * Generally there are at least three caches. When there is only one cache we + * only use the generic last-cache. In the case where there are two caches, we + * just leave out the middle ones. + */ +static int +genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes) +{ + int ii, off; + uint8_t *bp; + size_t total; + uint32_t allocoff, erroff; + + total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache); + + if (nents >= 2) + total += sizeof (inicache) + sizeof (gencache) * (nents - 2); + + if (total > len) + return (1); + + erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL; + allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL; + + bp = base; + + off = genasm_malinit(bp, umem_tmem_off, erroff, + umem_alloc_sizes[nents-1]); + bp += off; + allocoff -= off; + erroff -= off; + + if (nents > 1) { + off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff); + bp += off; + allocoff -= off; + erroff -= off; + } + + for (ii = 1; ii < nents - 1; ii++) { + off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff); + bp += off; + allocoff -= off; + erroff -= off; + } + + bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], + erroff); + bp += genasm_malfini(bp, umem_genasm_omptr); + ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); + + return (0); +} + +static int +genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes) +{ + uint8_t *bp; + int ii, off; + size_t total; + uint32_t rbufoff, retoff, erroff; + + /* Assume that nents has already been audited for us */ + total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache); + if (nents >= 2) + total += sizeof (inicache) + sizeof (gencache) * (nents - 2); + + if (total > len) + return (1); + + erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL); + rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL); + retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL); + + bp = base; + + off = genasm_frinit(bp, umem_tmem_off, retoff, erroff, + umem_alloc_sizes[nents - 1]); + bp += off; + erroff -= off; + rbufoff -= off; + + if (nents > 1) { + off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff); + bp += off; + erroff -= off; + rbufoff -= off; + } + + for (ii = 1; ii < nents - 1; ii++) { + off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff); + bp += off; + rbufoff -= off; + erroff -= off; + } + + bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], + erroff); + bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr); + ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); + + return (0); +} + +int +umem_genasm(int *alloc_sizes, umem_cache_t **caches, int ncaches) +{ + int nents, i; + uint8_t *mptr; + uint8_t *fptr; + uint64_t v, *vptr; + + mptr = (void *)((uintptr_t)umem_genasm_mptr + 5); + fptr = (void *)((uintptr_t)umem_genasm_fptr + 5); + if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 || + umem_genasm_fptr == 0 || umem_genasm_fsize == 0) + return (1); + + /* + * The total number of caches that we can service is the minimum of: + * o the amount supported by libc + * o the total number of umem caches + * o we use a single byte addl, so it's 255 / sizeof (uintptr_t). For + * 32-bit, this is 63. + */ + nents = _tmem_get_nentries(); + + if (UMEM_GENASM_MAX32 < nents) + nents = UMEM_GENASM_MAX32; + + if (ncaches < nents) + nents = ncaches; + + /* Based on our constraints, this is not an error */ + if (nents == 0 || umem_ptc_size == 0) + return (0); + + /* Take into account the jump */ + if (genasm_malloc(mptr, umem_genasm_msize, nents, + alloc_sizes) != 0) + return (1); + + if (genasm_free(fptr, umem_genasm_fsize, nents, + alloc_sizes) != 0) + return (1); + + /* nop out the jump with a multibyte jump */ + vptr = (void *)umem_genasm_mptr; + v = MULTINOP; + v |= *vptr & (0xffffffULL << 40); + (void) atomic_swap_64(vptr, v); + vptr = (void *)umem_genasm_fptr; + v = MULTINOP; + v |= *vptr & (0xffffffULL << 40); + (void) atomic_swap_64(vptr, v); + + for (i = 0; i < nents; i++) + caches[i]->cache_flags |= UMF_PTC; + + return (0); +} diff --git a/usr/src/lib/libumem/sparc/umem_genasm.c b/usr/src/lib/libumem/sparc/umem_genasm.c new file mode 100644 index 0000000000..4bdea8122d --- /dev/null +++ b/usr/src/lib/libumem/sparc/umem_genasm.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2012 Joyent, Inc. All rights reserved. + */ + +/* + * Don't Panic! If you wonder why this seemingly empty file exists, it's because + * there is no sparc implementation for ptcumem. Go read libumem's big theory + * statement in lib/libumem/common/umem.c, particularly section eight. + */ + +const int umem_genasm_supported = 0; + +/*ARGSUSED*/ +int +umem_genasm(int *cp, int nc) +{ + return (1); +} diff --git a/usr/src/man/man3malloc/umem_alloc.3malloc b/usr/src/man/man3malloc/umem_alloc.3malloc index cc8e3df369..d8680ca083 100644 --- a/usr/src/man/man3malloc/umem_alloc.3malloc +++ b/usr/src/man/man3malloc/umem_alloc.3malloc @@ -174,6 +174,19 @@ Set the underlying function used to allocate memory. This option can be set to \fBmmap\fR(2)-based source. If set to a value that is not supported, \fBsbrk\fR will be used. .RE +.sp +.ne 2 +.na +\fB\fBperthread_cache\fR=\fBsize\fR\fR +.ad +.RS 16n +libumem allows for each thread to cache recently freed small allocations for +future allocations. The size argument, which accepts k, m, g, and t, suffixes +denotes the maximum amount of memory each thread can use for this purpose. The +default amount used is 1 MB. Any buffers in the per-thread cache are freed when +the thread exits. The efficacy of the per-thread cache can be determined with +the \fB::umastat\fR \fBmdb\fR(1) \fIdcmd\fR debugger command. +.RE .ne 2 .na |