diff options
37 files changed, 2635 insertions, 52 deletions
@@ -1,3 +1,28 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 2012, Joyent, Inc. All rights reserved. +# +# This lists all the files that illumos-joyent delivers as part of a build. +# s bin=usr/bin d boot 0755 root sys d boot/acpi 0755 root sys @@ -1205,6 +1230,8 @@ f lib/amd64/libtsol.so.2 0755 root bin s lib/amd64/libtsol.so=libtsol.so.2 f lib/amd64/libumem.so.1 0755 root bin s lib/amd64/libumem.so=libumem.so.1 +f lib/amd64/libumem_trampoline.so.1 0755 root bin +s lib/amd64/libumem_trampoline.so=libumem_trampoline.so.1 f lib/amd64/libuuid.so.1 0755 root bin s lib/amd64/libuuid.so=libuuid.so.1 f lib/amd64/libuutil.so.1 0755 root bin @@ -1396,6 +1423,8 @@ f lib/libtsol.so.2 0755 root bin s lib/libtsol.so=libtsol.so.2 f lib/libumem.so.1 0755 root bin s lib/libumem.so=libumem.so.1 +f lib/libumem_trampoline.so.1 0755 root bin +s lib/libumem_trampoline.so=libumem_trampoline.so.1 f lib/libuuid.so.1 0755 root bin s lib/libuuid.so=libuuid.so.1 f lib/libuutil.so.1 0755 root bin @@ -4946,6 +4975,8 @@ s usr/lib/amd64/libtsol.so=../../../lib/amd64/libtsol.so.2 s usr/lib/amd64/libtsol.so.2=../../../lib/amd64/libtsol.so.2 s usr/lib/amd64/libumem.so.1=../../../lib/amd64/libumem.so.1 s usr/lib/amd64/libumem.so=../../../lib/amd64/libumem.so.1 +s usr/lib/amd64/libumem_trampoline.so.1=../../../lib/amd64/libumem_tramopline.so.1 +s usr/lib/amd64/libumem_trampoline.so=../../../lib/amd64/libumem_tramopline.so.1 s usr/lib/amd64/libuuid.so.1=../../../lib/amd64/libuuid.so.1 s usr/lib/amd64/libuuid.so=../../../lib/amd64/libuuid.so.1 s usr/lib/amd64/libuutil.so.1=../../../lib/amd64/libuutil.so.1 @@ -6367,6 +6398,8 @@ s usr/lib/libtsol.so=../../lib/libtsol.so.2 s usr/lib/libtsol.so.2=../../lib/libtsol.so.2 s usr/lib/libumem.so.1=../../lib/libumem.so.1 s usr/lib/libumem.so=../../lib/libumem.so.1 +s usr/lib/libumem_trampoline.so.1=../../lib/libumem_tramopline.so.1 +s usr/lib/libumem_trampoline.so=../../lib/libumem_tramopline.so.1 f usr/lib/libunistat.so.1 0755 root bin s usr/lib/libunistat.so=libunistat.so.1 s usr/lib/libuuid.so.1=../../lib/libuuid.so.1 diff --git a/usr/src/cmd/mdb/common/modules/libc/libc.c b/usr/src/cmd/mdb/common/modules/libc/libc.c index da8bbc6828..e3c9b9198f 100644 --- a/usr/src/cmd/mdb/common/modules/libc/libc.c +++ b/usr/src/cmd/mdb/common/modules/libc/libc.c @@ -22,6 +22,9 @@ /* * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ #include <sys/mdb_modapi.h> #include <mdb/mdb_whatis.h> @@ -679,6 +682,12 @@ d_ulwp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) prt_addr((void *)(addr + OFFSET(ul_spinlock)), 1), prt_addr((void *)(addr + OFFSET(ul_fpuenv)), 0)); + HD("tmem.size &tmem.roots"); + mdb_printf(OFFSTR "%-21H %s\n", + OFFSET(ul_tmem), + ulwp.ul_tmem.tm_size, + prt_addr((void *)(addr + OFFSET(ul_tmem) + sizeof (size_t)), 0)); + return (DCMD_OK); } diff --git a/usr/src/cmd/mdb/common/modules/libumem/libumem.c b/usr/src/cmd/mdb/common/modules/libumem/libumem.c index 4a77c5aa82..0984edbdf0 100644 --- a/usr/src/cmd/mdb/common/modules/libumem/libumem.c +++ b/usr/src/cmd/mdb/common/modules/libumem/libumem.c @@ -23,6 +23,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + #include "umem.h" #include <libproc.h> #include <mdb/mdb_modapi.h> @@ -34,6 +38,8 @@ #include <umem_impl.h> #include <sys/vmem_impl_user.h> +#include <thr_uberdata.h> +#include <stdio.h> #include "umem_pagesize.h" @@ -44,24 +50,33 @@ typedef struct datafmt { char *fmt; } datafmt_t; +static datafmt_t ptcfmt[] = { + { " ", "tid", "---", "%3u " }, + { " memory", " cached", "-------", "%7lH " }, + { " %", "cap", "---", "%3u " }, + { " %", NULL, "---", "%3u " }, + { NULL, NULL, NULL, NULL } +}; + static datafmt_t umemfmt[] = { { "cache ", "name ", "-------------------------", "%-25s " }, { " buf", " size", "------", "%6u " }, - { " buf", "in use", "------", "%6u " }, - { " buf", " total", "------", "%6u " }, - { " memory", " in use", "---------", "%9u " }, + { " buf", " in use", "-------", "%7u " }, + { " buf", " in ptc", "-------", "%7s " }, + { " buf", " total", "-------", "%7u " }, + { " memory", " in use", "-------", "%7H " }, { " alloc", " succeed", "---------", "%9u " }, - { "alloc", " fail", "-----", "%5llu " }, + { "alloc", " fail", "-----", "%5llu" }, { NULL, NULL, NULL, NULL } }; static datafmt_t vmemfmt[] = { { "vmem ", "name ", "-------------------------", "%-*s " }, - { " memory", " in use", "---------", "%9llu " }, - { " memory", " total", "----------", "%10llu " }, - { " memory", " import", "---------", "%9llu " }, + { " memory", " in use", "---------", "%9H " }, + { " memory", " total", "----------", "%10H " }, + { " memory", " import", "---------", "%9H " }, { " alloc", " succeed", "---------", "%9llu " }, { "alloc", " fail", "-----", "%5llu " }, { NULL, NULL, NULL, NULL } @@ -105,14 +120,105 @@ typedef struct umastat_vmem { int kv_fail; } umastat_vmem_t; +/*ARGSUSED*/ +static int +umastat_cache_nptc(uintptr_t addr, const umem_cache_t *cp, int *nptc) +{ + if (!(cp->cache_flags & UMF_PTC)) + return (WALK_NEXT); + + (*nptc)++; + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_cache_hdr(uintptr_t addr, const umem_cache_t *cp, void *ignored) +{ + if (!(cp->cache_flags & UMF_PTC)) + return (WALK_NEXT); + + mdb_printf("%3d ", cp->cache_bufsize); + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_lwp_ptc(uintptr_t addr, void *buf, int *nbufs) +{ + (*nbufs)++; + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_lwp_cache(uintptr_t addr, const umem_cache_t *cp, ulwp_t *ulwp) +{ + char walk[60]; + int nbufs = 0; + + if (!(cp->cache_flags & UMF_PTC)) + return (WALK_NEXT); + + (void) snprintf(walk, sizeof (walk), "umem_ptc_%d", cp->cache_bufsize); + + if (mdb_pwalk(walk, (mdb_walk_cb_t)umastat_lwp_ptc, + &nbufs, (uintptr_t)ulwp->ul_self) == -1) { + mdb_warn("unable to walk '%s'", walk); + return (WALK_ERR); + } + + mdb_printf("%3d ", ulwp->ul_tmem.tm_size ? + (nbufs * cp->cache_bufsize * 100) / ulwp->ul_tmem.tm_size : 0); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_lwp(uintptr_t addr, const ulwp_t *ulwp, void *ignored) +{ + size_t size; + datafmt_t *dfp = ptcfmt; + + mdb_printf((dfp++)->fmt, ulwp->ul_lwpid); + mdb_printf((dfp++)->fmt, ulwp->ul_tmem.tm_size); + + if (umem_readvar(&size, "umem_ptc_size") == -1) { + mdb_warn("unable to read 'umem_ptc_size'"); + return (WALK_ERR); + } + + mdb_printf((dfp++)->fmt, (ulwp->ul_tmem.tm_size * 100) / size); + + if (mdb_walk("umem_cache", + (mdb_walk_cb_t)umastat_lwp_cache, (void *)ulwp) == -1) { + mdb_warn("can't walk 'umem_cache'"); + return (WALK_ERR); + } + + mdb_printf("\n"); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_cache_ptc(uintptr_t addr, const void *ignored, int *nptc) +{ + (*nptc)++; + return (WALK_NEXT); +} + static int umastat_cache(uintptr_t addr, const umem_cache_t *cp, umastat_vmem_t **kvp) { umastat_vmem_t *kv; datafmt_t *dfp = umemfmt; + char buf[10]; int magsize; - int avail, alloc, total; + int avail, alloc, total, nptc = 0; size_t meminuse = (cp->cache_slab_create - cp->cache_slab_destroy) * cp->cache_slabsize; @@ -130,6 +236,21 @@ umastat_cache(uintptr_t addr, const umem_cache_t *cp, umastat_vmem_t **kvp) (void) mdb_pwalk("umem_cpu_cache", cpu_avail, &avail, addr); (void) mdb_pwalk("umem_slab_partial", slab_avail, &avail, addr); + if (cp->cache_flags & UMF_PTC) { + char walk[60]; + + (void) snprintf(walk, sizeof (walk), + "umem_ptc_%d", cp->cache_bufsize); + + if (mdb_walk(walk, + (mdb_walk_cb_t)umastat_cache_ptc, &nptc) == -1) { + mdb_warn("unable to walk '%s'", walk); + return (WALK_ERR); + } + + (void) snprintf(buf, sizeof (buf), "%d", nptc); + } + for (kv = *kvp; kv != NULL; kv = kv->kv_next) { if (kv->kv_addr == (uintptr_t)cp->cache_arena) goto out; @@ -147,6 +268,7 @@ out: mdb_printf((dfp++)->fmt, cp->cache_name); mdb_printf((dfp++)->fmt, cp->cache_bufsize); mdb_printf((dfp++)->fmt, total - avail); + mdb_printf((dfp++)->fmt, cp->cache_flags & UMF_PTC ? buf : "-"); mdb_printf((dfp++)->fmt, total); mdb_printf((dfp++)->fmt, meminuse); mdb_printf((dfp++)->fmt, alloc); @@ -165,8 +287,8 @@ umastat_vmem_totals(uintptr_t addr, const vmem_t *v, umastat_vmem_t *kv) if (kv == NULL || kv->kv_alloc == 0) return (WALK_NEXT); - mdb_printf("Total [%s]%*s %6s %6s %6s %9u %9u %5u\n", v->vm_name, - 17 - strlen(v->vm_name), "", "", "", "", + mdb_printf("Total [%s]%*s %6s %7s %7s %7s %7H %9u %5u\n", v->vm_name, + 17 - strlen(v->vm_name), "", "", "", "", "", kv->kv_meminuse, kv->kv_alloc, kv->kv_fail); return (WALK_NEXT); @@ -209,20 +331,67 @@ umastat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { umastat_vmem_t *kv = NULL; datafmt_t *dfp; + int nptc = 0, i; if (argc != 0) return (DCMD_USAGE); + /* + * We need to determine if we have any caches that have per-thread + * caching enabled. + */ + if (mdb_walk("umem_cache", + (mdb_walk_cb_t)umastat_cache_nptc, &nptc) == -1) { + mdb_warn("can't walk 'umem_cache'"); + return (DCMD_ERR); + } + + if (nptc) { + for (dfp = ptcfmt; dfp->hdr2 != NULL; dfp++) + mdb_printf("%s ", dfp->hdr1); + + for (i = 0; i < nptc; i++) + mdb_printf("%s ", dfp->hdr1); + + mdb_printf("\n"); + + for (dfp = ptcfmt; dfp->hdr2 != NULL; dfp++) + mdb_printf("%s ", dfp->hdr2); + + if (mdb_walk("umem_cache", + (mdb_walk_cb_t)umastat_cache_hdr, NULL) == -1) { + mdb_warn("can't walk 'umem_cache'"); + return (DCMD_ERR); + } + + mdb_printf("\n"); + + for (dfp = ptcfmt; dfp->hdr2 != NULL; dfp++) + mdb_printf("%s ", dfp->dashes); + + for (i = 0; i < nptc; i++) + mdb_printf("%s ", dfp->dashes); + + mdb_printf("\n"); + + if (mdb_walk("ulwp", (mdb_walk_cb_t)umastat_lwp, NULL) == -1) { + mdb_warn("can't walk 'ulwp'"); + return (DCMD_ERR); + } + + mdb_printf("\n"); + } + for (dfp = umemfmt; dfp->hdr1 != NULL; dfp++) - mdb_printf("%s ", dfp->hdr1); + mdb_printf("%s%s", dfp == umemfmt ? "" : " ", dfp->hdr1); mdb_printf("\n"); for (dfp = umemfmt; dfp->hdr1 != NULL; dfp++) - mdb_printf("%s ", dfp->hdr2); + mdb_printf("%s%s", dfp == umemfmt ? "" : " ", dfp->hdr2); mdb_printf("\n"); for (dfp = umemfmt; dfp->hdr1 != NULL; dfp++) - mdb_printf("%s ", dfp->dashes); + mdb_printf("%s%s", dfp == umemfmt ? "" : " ", dfp->dashes); mdb_printf("\n"); if (mdb_walk("umem_cache", (mdb_walk_cb_t)umastat_cache, &kv) == -1) { @@ -231,7 +400,7 @@ umastat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) } for (dfp = umemfmt; dfp->hdr1 != NULL; dfp++) - mdb_printf("%s ", dfp->dashes); + mdb_printf("%s%s", dfp == umemfmt ? "" : " ", dfp->dashes); mdb_printf("\n"); if (mdb_walk("vmem", (mdb_walk_cb_t)umastat_vmem_totals, kv) == -1) { diff --git a/usr/src/cmd/mdb/common/modules/libumem/umem.c b/usr/src/cmd/mdb/common/modules/libumem/umem.c index 0bd52baf1e..e1b855dc4d 100644 --- a/usr/src/cmd/mdb/common/modules/libumem/umem.c +++ b/usr/src/cmd/mdb/common/modules/libumem/umem.c @@ -24,7 +24,7 @@ */ /* - * Copyright 2011 Joyent, Inc. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #include "umem.h" @@ -35,6 +35,8 @@ #include <alloca.h> #include <limits.h> #include <mdb/mdb_whatis.h> +#include <thr_uberdata.h> +#include <stdio.h> #include "misc.h" #include "leaky.h" @@ -103,12 +105,56 @@ umem_update_variables(void) return (0); } +static int +umem_ptc_walk_init(mdb_walk_state_t *wsp) +{ + if (wsp->walk_addr == NULL) { + if (mdb_layered_walk("ulwp", wsp) == -1) { + mdb_warn("couldn't walk 'ulwp'"); + return (WALK_ERR); + } + } + + return (WALK_NEXT); +} + +static int +umem_ptc_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t this; + int rval; + + if (wsp->walk_layer != NULL) { + this = (uintptr_t)((ulwp_t *)wsp->walk_layer)->ul_self + + (uintptr_t)wsp->walk_arg; + } else { + this = wsp->walk_addr + (uintptr_t)wsp->walk_arg; + } + + for (;;) { + if (mdb_vread(&this, sizeof (void *), this) == -1) + mdb_warn("couldn't read ptc buffer at %p", this); + + if (this == NULL) + break; + + rval = wsp->walk_callback(this, &this, wsp->walk_cbdata); + + if (rval != WALK_NEXT) + return (rval); + } + + return (wsp->walk_layer != NULL ? WALK_NEXT : WALK_DONE); +} + /*ARGSUSED*/ static int -umem_init_walkers(uintptr_t addr, const umem_cache_t *c, void *ignored) +umem_init_walkers(uintptr_t addr, const umem_cache_t *c, int *sizes) { mdb_walker_t w; char descr[64]; + char name[64]; + int i; (void) mdb_snprintf(descr, sizeof (descr), "walk the %s cache", c->cache_name); @@ -123,6 +169,45 @@ umem_init_walkers(uintptr_t addr, const umem_cache_t *c, void *ignored) if (mdb_add_walker(&w) == -1) mdb_warn("failed to add %s walker", c->cache_name); + if (!(c->cache_flags & UMF_PTC)) + return (WALK_NEXT); + + /* + * For the per-thread cache walker, the address is the offset in the + * tm_roots[] array of the ulwp_t. + */ + for (i = 0; sizes[i] != 0; i++) { + if (sizes[i] == c->cache_bufsize) + break; + } + + if (sizes[i] == 0) { + mdb_warn("cache %s is cached per-thread, but could not find " + "size in umem_alloc_sizes\n", c->cache_name); + return (WALK_NEXT); + } + + if (i >= NTMEMBASE) { + mdb_warn("index for %s (%d) exceeds root slots (%d)\n", + c->cache_name, i, NTMEMBASE); + return (WALK_NEXT); + } + + (void) mdb_snprintf(name, sizeof (name), + "umem_ptc_%d", c->cache_bufsize); + (void) mdb_snprintf(descr, sizeof (descr), + "walk the per-thread cache for %s", c->cache_name); + + w.walk_name = name; + w.walk_descr = descr; + w.walk_init = umem_ptc_walk_init; + w.walk_step = umem_ptc_walk_step; + w.walk_fini = NULL; + w.walk_init_arg = (void *)offsetof(ulwp_t, ul_tmem.tm_roots[i]); + + if (mdb_add_walker(&w) == -1) + mdb_warn("failed to add %s walker", w.walk_name); + return (WALK_NEXT); } @@ -131,6 +216,8 @@ static void umem_statechange_cb(void *arg) { static int been_ready = 0; + GElf_Sym sym; + int *sizes; #ifndef _KMDB leaky_cleanup(1); /* state changes invalidate leaky state */ @@ -146,7 +233,25 @@ umem_statechange_cb(void *arg) return; been_ready = 1; - (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, NULL); + + /* + * In order to determine the tm_roots offset of any cache that is + * cached per-thread, we need to have the umem_alloc_sizes array. + * Read this, assuring that it is zero-terminated. + */ + if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) { + mdb_warn("unable to lookup 'umem_alloc_sizes'"); + return; + } + + sizes = mdb_zalloc(sym.st_size + sizeof (int), UM_SLEEP | UM_GC); + + if (mdb_vread(sizes, sym.st_size, (uintptr_t)sym.st_value) == -1) { + mdb_warn("couldn't read 'umem_alloc_sizes'"); + return; + } + + (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, sizes); } int @@ -787,9 +892,9 @@ umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp) } \ } -int +static int umem_read_magazines(umem_cache_t *cp, uintptr_t addr, - void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags) + void ***maglistp, size_t *magcntp, size_t *magmaxp) { umem_magazine_t *ump, *mp; void **maglist = NULL; @@ -806,7 +911,7 @@ umem_read_magazines(umem_cache_t *cp, uintptr_t addr, *maglistp = NULL; *magcntp = 0; *magmaxp = 0; - return (WALK_NEXT); + return (0); } /* @@ -827,11 +932,11 @@ umem_read_magazines(umem_cache_t *cp, uintptr_t addr, if (magbsize >= PAGESIZE / 2) { mdb_warn("magazine size for cache %p unreasonable (%x)\n", addr, magbsize); - return (WALK_ERR); + return (-1); } - maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags); - mp = mdb_alloc(magbsize, alloc_flags); + maglist = mdb_alloc(magmax * sizeof (void *), UM_SLEEP); + mp = mdb_alloc(magbsize, UM_SLEEP); if (mp == NULL || maglist == NULL) goto fail; @@ -874,23 +979,80 @@ umem_read_magazines(umem_cache_t *cp, uintptr_t addr, dprintf(("magazine layer: %d buffers\n", magcnt)); - if (!(alloc_flags & UM_GC)) - mdb_free(mp, magbsize); + mdb_free(mp, magbsize); *maglistp = maglist; *magcntp = magcnt; *magmaxp = magmax; - return (WALK_NEXT); + return (0); fail: - if (!(alloc_flags & UM_GC)) { - if (mp) - mdb_free(mp, magbsize); - if (maglist) - mdb_free(maglist, magmax * sizeof (void *)); + if (mp) + mdb_free(mp, magbsize); + if (maglist) + mdb_free(maglist, magmax * sizeof (void *)); + + return (-1); +} + +typedef struct umem_read_ptc_walk { + void **urpw_buf; + size_t urpw_cnt; + size_t urpw_max; +} umem_read_ptc_walk_t; + +/*ARGSUSED*/ +static int +umem_read_ptc_walk_buf(uintptr_t addr, + const void *ignored, umem_read_ptc_walk_t *urpw) +{ + if (urpw->urpw_cnt == urpw->urpw_max) { + size_t nmax = urpw->urpw_max ? (urpw->urpw_max << 1) : 1; + void **new = mdb_zalloc(nmax * sizeof (void *), UM_SLEEP); + + if (nmax > 1) { + size_t osize = urpw->urpw_max * sizeof (void *); + bcopy(urpw->urpw_buf, new, osize); + mdb_free(urpw->urpw_buf, osize); + } + + urpw->urpw_buf = new; + urpw->urpw_max = nmax; } - return (WALK_ERR); + + urpw->urpw_buf[urpw->urpw_cnt++] = (void *)addr; + + return (WALK_NEXT); +} + +static int +umem_read_ptc(umem_cache_t *cp, uintptr_t addr, + void ***buflistp, size_t *bufcntp, size_t *bufmaxp) +{ + umem_read_ptc_walk_t urpw; + char walk[60]; + int rval; + + if (!(cp->cache_flags & UMF_PTC)) + return (0); + + (void) snprintf(walk, sizeof (walk), "umem_ptc_%d", cp->cache_bufsize); + + urpw.urpw_buf = *buflistp; + urpw.urpw_cnt = *bufcntp; + urpw.urpw_max = *bufmaxp; + + if ((rval = mdb_pwalk(walk, + (mdb_walk_cb_t)umem_read_ptc_walk_buf, &urpw, addr)) == -1) { + mdb_warn("couldn't walk %s", walk); + } + + *buflistp = urpw.urpw_buf; + *bufcntp = urpw.urpw_cnt; + *bufmaxp = urpw.urpw_max; + + return (rval); } static int @@ -1021,13 +1183,19 @@ umem_walk_init_common(mdb_walk_state_t *wsp, int type) /* * Read in the contents of the magazine layer */ - if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax, - UM_SLEEP) == WALK_ERR) + if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax) != 0) + goto out2; + + /* + * Read in the contents of the per-thread caches, if any + */ + if (umem_read_ptc(cp, addr, &maglist, &magcnt, &magmax) != 0) goto out2; /* - * We have all of the buffers from the magazines; if we are walking - * allocated buffers, sort them so we can bsearch them later. + * We have all of the buffers from the magazines and from the + * per-thread cache (if any); if we are walking allocated buffers, + * sort them so we can bsearch them later. */ if (type & UM_ALLOCATED) qsort(maglist, magcnt, sizeof (void *), addrcmp); @@ -2102,7 +2270,7 @@ whatis_run_umem(mdb_whatis_t *w, void *ignored) wi.wi_w = w; /* umem's metadata is allocated from the umem_internal_arena */ - if (mdb_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1) + if (umem_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1) mdb_warn("unable to readvar \"umem_internal_arena\""); /* diff --git a/usr/src/cmd/mdb/intel/amd64/libumem/Makefile b/usr/src/cmd/mdb/intel/amd64/libumem/Makefile index 944ee5a4c6..b1b38c5a2a 100644 --- a/usr/src/cmd/mdb/intel/amd64/libumem/Makefile +++ b/usr/src/cmd/mdb/intel/amd64/libumem/Makefile @@ -22,7 +22,7 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" +# Copyright (c) 2012 Joyent, Inc. All rights reserved. MODULE = libumem.so MDBTGT = proc @@ -43,6 +43,7 @@ include ../../../../Makefile.cmd include ../../../../Makefile.cmd.64 CPPFLAGS += -I$(SRC)/lib/libumem/common +CPPFLAGS += -I$(SRC)/lib/libc/inc CPPFLAGS += -I$(MODSRCS_DIR) include ../../Makefile.amd64 diff --git a/usr/src/cmd/mdb/intel/ia32/libumem/Makefile b/usr/src/cmd/mdb/intel/ia32/libumem/Makefile index 12c6f70fb2..1a1f23446c 100644 --- a/usr/src/cmd/mdb/intel/ia32/libumem/Makefile +++ b/usr/src/cmd/mdb/intel/ia32/libumem/Makefile @@ -22,7 +22,7 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" +# Copyright (c) 2012 Joyent, Inc. All rights reserved. MODULE = libumem.so MDBTGT = proc @@ -41,6 +41,7 @@ MODSRCS = \ include ../../../../Makefile.cmd +CPPFLAGS += -I$(SRC)/lib/libc/inc CPPFLAGS += -I$(SRC)/lib/libumem/common CPPFLAGS += -I$(MODSRCS_DIR) diff --git a/usr/src/cmd/mdb/sparc/v7/libumem/Makefile b/usr/src/cmd/mdb/sparc/v7/libumem/Makefile index e080ddf325..47072b4380 100644 --- a/usr/src/cmd/mdb/sparc/v7/libumem/Makefile +++ b/usr/src/cmd/mdb/sparc/v7/libumem/Makefile @@ -22,7 +22,7 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" +# Copyright (c) 2012 Joyent, Inc. All rights reserved. MODULE = libumem.so MDBTGT = proc @@ -42,6 +42,7 @@ MODSRCS = \ include ../../../../Makefile.cmd CPPFLAGS += -I$(SRC)/lib/libumem/common +CPPFLAGS += -I$(SRC)/lib/libc/inc CPPFLAGS += -I$(MODSRCS_DIR) include ../../Makefile.sparcv7 diff --git a/usr/src/cmd/mdb/sparc/v9/libumem/Makefile b/usr/src/cmd/mdb/sparc/v9/libumem/Makefile index 9b1bc1210b..16d8d80221 100644 --- a/usr/src/cmd/mdb/sparc/v9/libumem/Makefile +++ b/usr/src/cmd/mdb/sparc/v9/libumem/Makefile @@ -22,7 +22,7 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" +# Copyright (c) 2012 Joyent, Inc. All rights reserved. MODULE = libumem.so MDBTGT = proc @@ -55,6 +55,7 @@ KMOD_SOURCES_DIFFERENT=$(POUND_SIGN) include ../../../../Makefile.cmd CPPFLAGS += -I$(SRC)/lib/libumem/common +CPPFLAGS += -I$(SRC)/lib/libc/inc CPPFLAGS += -I$(MODSRCS_DIR) include ../../../../Makefile.cmd.64 diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile index b2558d9f5c..5b2de48852 100644 --- a/usr/src/lib/Makefile +++ b/usr/src/lib/Makefile @@ -161,7 +161,8 @@ SUBDIRS += \ librdc \ libinstzones \ libpkg \ - libpcidb + libpcidb \ + libumem_trampoline SUBDIRS += \ passwdutil \ diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile index be27c779db..1f71b06463 100644 --- a/usr/src/lib/libc/amd64/Makefile +++ b/usr/src/lib/libc/amd64/Makefile @@ -20,6 +20,7 @@ # # # Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2012, Joyent, Inc. All rights reserved. # # Copyright 2011 Nexenta Systems, Inc. All rights reserved. # Use is subject to license terms. @@ -796,6 +797,7 @@ THREADSOBJS= \ assfail.o \ cancel.o \ door_calls.o \ + tmem.o \ pthr_attr.o \ pthr_barrier.o \ pthr_cond.o \ diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com index 21c6d70877..e01da08091 100644 --- a/usr/src/lib/libc/i386/Makefile.com +++ b/usr/src/lib/libc/i386/Makefile.com @@ -20,6 +20,7 @@ # # # Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2012, Joyent, Inc. All rights reserved. # # Copyright 2011 Nexenta Systems, Inc. All rights reserved. # Use is subject to license terms. @@ -838,6 +839,7 @@ THREADSOBJS= \ assfail.o \ cancel.o \ door_calls.o \ + tmem.o \ pthr_attr.o \ pthr_barrier.o \ pthr_cond.o \ diff --git a/usr/src/lib/libc/inc/thr_uberdata.h b/usr/src/lib/libc/inc/thr_uberdata.h index af3083138d..2b8d000b29 100644 --- a/usr/src/lib/libc/inc/thr_uberdata.h +++ b/usr/src/lib/libc/inc/thr_uberdata.h @@ -22,6 +22,9 @@ /* * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ #ifndef _THR_UBERDATA_H #define _THR_UBERDATA_H @@ -488,6 +491,28 @@ typedef struct { #endif /* _SYSCALL32 */ /* + * As part of per-thread caching libumem (ptcumem), we add a small amount to the + * thread's uberdata to facilitate it. The tm_roots are the roots of linked + * lists which is used by libumem to chain together allocations. tm_size is used + * to track the total amount of data stored across those linked lists. + */ +#define NTMEMBASE 16 + +typedef struct { + size_t tm_size; + void *tm_roots[NTMEMBASE]; +} tumem_t; + +#ifdef _SYSCALL32 +typedef struct { + uint32_t tm_size; + caddr32_t tm_roots[NTMEMBASE]; +} tumem32_t; +#endif + +typedef void (*tmem_func_t)(void *, int); + +/* * Maximum number of read locks allowed for one thread on one rwlock. * This could be as large as INT_MAX, but the SUSV3 test suite would * take an inordinately long time to complete. This is big enough. @@ -653,6 +678,7 @@ typedef struct ulwp { #if defined(sparc) void *ul_unwind_ret; /* used only by _ex_clnup_handler() */ #endif + tumem_t ul_tmem; /* used only by umem */ } ulwp_t; #define ul_cursig ul_cp.s.cursig /* deferred signal number */ @@ -1074,6 +1100,7 @@ typedef struct ulwp32 { #if defined(sparc) caddr32_t ul_unwind_ret; /* used only by _ex_clnup_handler() */ #endif + tumem32_t ul_tmem; /* used only by umem */ } ulwp32_t; #define REPLACEMENT_SIZE32 ((size_t)&((ulwp32_t *)NULL)->ul_sigmask) @@ -1196,6 +1223,7 @@ extern ulwp_t *find_lwp(thread_t); extern void finish_init(void); extern void update_sched(ulwp_t *); extern void queue_alloc(void); +extern void tmem_exit(void); extern void tsd_exit(void); extern void tsd_free(ulwp_t *); extern void tls_setup(void); diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers index c2313fc09e..81199991b7 100644 --- a/usr/src/lib/libc/port/mapfile-vers +++ b/usr/src/lib/libc/port/mapfile-vers @@ -25,6 +25,7 @@ # Use is subject to license terms. # # Copyright (c) 2011 by Delphix. All rights reserved. +# Copyright (c) 2012, Joyent, Inc. All rights reserved. # # @@ -2864,6 +2865,9 @@ $endif thr_wait_mutator; _thr_wait_mutator; __tls_get_addr; + _tmem_get_base; + _tmem_get_nentries; + _tmem_set_cleanup; tpool_create; tpool_dispatch; tpool_destroy; diff --git a/usr/src/lib/libc/port/threads/thr.c b/usr/src/lib/libc/port/threads/thr.c index ae55fbddf5..b5d848449d 100644 --- a/usr/src/lib/libc/port/threads/thr.c +++ b/usr/src/lib/libc/port/threads/thr.c @@ -22,6 +22,9 @@ /* * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ #include "lint.h" #include "thr_uberdata.h" @@ -771,6 +774,7 @@ _thrp_exit() } lmutex_unlock(&udp->link_lock); + tmem_exit(); /* deallocate tmem allocations */ tsd_exit(); /* deallocate thread-specific data */ tls_exit(); /* deallocate thread-local storage */ heldlock_exit(); /* deal with left-over held locks */ diff --git a/usr/src/lib/libc/port/threads/tmem.c b/usr/src/lib/libc/port/threads/tmem.c new file mode 100644 index 0000000000..00203de593 --- /dev/null +++ b/usr/src/lib/libc/port/threads/tmem.c @@ -0,0 +1,85 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + +#include "lint.h" +#include "thr_uberdata.h" + +/* + * This file implements the private interface with libumem for per-thread + * caching umem (ptcumem). For the full details on how tcumem works and how + * these functions work, see section 8.4 of the big theory statement in + * lib/libumem/common/umem.c. + */ +static tmem_func_t tmem_cleanup = NULL; + +uintptr_t +_tmem_get_base(void) +{ + return ((uintptr_t)&curthread->ul_tmem - (uintptr_t)curthread); +} + +int +_tmem_get_nentries(void) +{ + return (NTMEMBASE); +} + +void +_tmem_set_cleanup(tmem_func_t f) +{ + tmem_cleanup = f; +} + +/* + * This is called by _thrp_exit() to clean up any per-thread allocations that + * are still hanging around and haven't been cleaned up. + */ +void +tmem_exit(void) +{ + int ii; + void *buf, *next; + tumem_t *tp = &curthread->ul_tmem; + + + if (tp->tm_size == 0) + return; + + /* + * Since we have something stored here, we need to ensure we declared a + * clean up handler. If we haven't that's broken and our single private + * consumer should be shot. + */ + if (tmem_cleanup == NULL) + abort(); + for (ii = 0; ii < NTMEMBASE; ii++) { + buf = tp->tm_roots[ii]; + while (buf != NULL) { + next = *(void **)buf; + tmem_cleanup(buf, ii); + buf = next; + } + } +} diff --git a/usr/src/lib/libc/sparc/Makefile.com b/usr/src/lib/libc/sparc/Makefile.com index 97aa14a7d6..4267e4b6c1 100644 --- a/usr/src/lib/libc/sparc/Makefile.com +++ b/usr/src/lib/libc/sparc/Makefile.com @@ -20,6 +20,7 @@ # # # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2012, Joyent, Inc. All rights reserved. # # Copyright 2011 Nexenta Systems, Inc. All rights reserved. # Use is subject to license terms. @@ -873,6 +874,7 @@ THREADSOBJS= \ assfail.o \ cancel.o \ door_calls.o \ + tmem.o \ pthr_attr.o \ pthr_barrier.o \ pthr_cond.o \ diff --git a/usr/src/lib/libc/sparcv9/Makefile.com b/usr/src/lib/libc/sparcv9/Makefile.com index 462d07e8a2..518e255e36 100644 --- a/usr/src/lib/libc/sparcv9/Makefile.com +++ b/usr/src/lib/libc/sparcv9/Makefile.com @@ -20,6 +20,7 @@ # # # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2012, Joyent, Inc. All rights reserved. # # Copyright 2011 Nexenta Systems, Inc. All rights reserved. # Use is subject to license terms. @@ -820,6 +821,7 @@ THREADSOBJS= \ assfail.o \ cancel.o \ door_calls.o \ + tmem.o \ pthr_attr.o \ pthr_barrier.o \ pthr_cond.o \ diff --git a/usr/src/lib/libumem/Makefile.com b/usr/src/lib/libumem/Makefile.com index 6306d194e0..1507bb78c3 100644 --- a/usr/src/lib/libumem/Makefile.com +++ b/usr/src/lib/libumem/Makefile.com @@ -22,7 +22,7 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" +# Copyright (c) 2012, Joyent, Inc. All rights reserved. # # @@ -67,10 +67,12 @@ SRCS_standalone = $(OBJECTS_standalone:%.o=../common/%.c) # Architecture-dependent files common to both versions of libumem OBJECTS_common_isadep = \ - asm_subr.o + asm_subr.o \ + umem_genasm.o SRCS_common_isadep = \ - $(ISASRCDIR)/asm_subr.s + $(ISASRCDIR)/asm_subr.s \ + $(ISASRCDIR)/umem_genasm.c # Architecture-independent files common to both versions of libumem OBJECTS_common_common = \ @@ -117,6 +119,8 @@ MAPFILE_SUPPLEMENTAL = $(MAPFILE_SUPPLEMENTAL_$(CURTYPE)) LDLIBS += -lc +DYNFLAGS += -Wl,-Plibumem_trampoline.so.1 + LDFLAGS_standalone = $(ZNOVERSION) $(BREDUCE) -M../common/mapfile-vers \ -M$(MAPFILE_SUPPLEMENTAL) -dy -r LDFLAGS = $(LDFLAGS_$(CURTYPE)) diff --git a/usr/src/lib/libumem/amd64/umem_genasm.c b/usr/src/lib/libumem/amd64/umem_genasm.c new file mode 100644 index 0000000000..7dad57505b --- /dev/null +++ b/usr/src/lib/libumem/amd64/umem_genasm.c @@ -0,0 +1,609 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2012 Joyent, Inc. All rights reserved. + */ + +/* + * Don't Panic! If you find the blocks of assembly that follow confusing and + * you're questioning why they exist, please go read section 8 of the umem.c big + * theory statement. Next familiarize yourself with the malloc and free + * implementations in libumem's malloc.c. + * + * What follows is the amd64 implementation of the thread caching automatic + * assembly generation. The amd64 calling conventions are documented in the + * 64-bit System V ABI. For our purposes what matters is that our first argument + * will come in rdi. Our functions have to preserve rbp, rbx, and r12->r15. We + * are free to do whatever we want with rax, rcx, rdx, rsi, rdi, and r8->r11. + * + * For both our implementation of malloc and free we only use the registers we + * don't have to preserve. + * + * Malloc register usage: + * o. rdi: Original size to malloc. This never changes and is preserved. + * o. rsi: Adjusted malloc size for malloc_data_tag(s). + * o. rcx: Pointer to the tmem_t in the ulwp_t. + * o. rdx: Pointer to the tmem_t array of roots + * o. r8: Size of the cache + * o. r9: Scratch register + * + * Free register usage: + * o. rdi: Original buffer to free. This never changes and is preserved. + * o. rax: The actual buffer, adjusted for the hidden malloc_data_t(s). + * o. rcx: Pointer to the tmem_t in the ulwp_t. + * o. rdx: Pointer to the tmem_t array of roots + * o. r8: Size of the cache + * o. r9: Scratch register + * + * Once we determine what cache we are using, we increment %rdx to the + * appropriate offset and set %r8 with the size of the cache. This means that + * when we break out to the normal buffer allocation point %rdx contains the + * head of the linked list and %r8 is the amount that we have to adjust the + * thread's cached amount by. + * + * Each block of assembly has psuedocode that describes its purpose. + */ + +#include <atomic.h> +#include <inttypes.h> +#include <sys/types.h> +#include <strings.h> +#include <umem_impl.h> +#include "umem_base.h" + +int umem_genasm_supported = 1; +uintptr_t umem_genasm_mptr; +uintptr_t umem_genasm_msize; +uintptr_t umem_genasm_fptr; +uintptr_t umem_genasm_fsize; +static uintptr_t umem_genasm_omptr; +static uintptr_t umem_genasm_ofptr; + +#define UMEM_GENASM_MAX64 (UINT32_MAX / sizeof (uintptr_t)) +#define PTC_JMPADDR(dest, src) (dest - (src + 4)) +#define PTC_ROOT_SIZE sizeof (uintptr_t) +#define MULTINOP 0x0000441f0f + +/* + * void *ptcmalloc(size_t orig_size); + * + * size_t size = orig_size + 8; + * if (size > UMEM_SECOND_ALIGN) + * size += 8; + * + * if (size < orig_size) + * goto tomalloc; ! This is overflow + * + * if (size > cache_max) + * goto tomalloc + * + * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; + * void **roots = t->tm_roots; + */ +#define PTC_MALINIT_JOUT 0x13 +#define PTC_MALINIT_MCS 0x1a +#define PTC_MALINIT_JOV 0x20 +#define PTC_MALINIT_SOFF 0x30 +static const uint8_t malinit[] = { + 0x48, 0x8d, 0x77, 0x08, /* leaq 0x8(%rdi),%rsi */ + 0x48, 0x83, 0xfe, 0x10, /* cmpq $0x10, %rsi */ + 0x76, 0x04, /* jbe +0x4 */ + 0x48, 0x8d, 0x77, 0x10, /* leaq 0x10(%rdi),%rsi */ + 0x48, 0x39, 0xfe, /* cmpq %rdi,%rsi */ + 0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jb +errout */ + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +errout */ + 0x64, 0x48, 0x8b, 0x0c, 0x25, + 0x00, 0x00, 0x00, 0x00, /* movq %fs:0x0,%rcx */ + 0x48, 0x81, 0xc1, + 0x00, 0x00, 0x00, 0x00, /* addq $SOFF, %rcx */ + 0x48, 0x8d, 0x51, 0x08, /* leaq 0x8(%rcx),%rdx */ +}; + +/* + * void ptcfree(void *buf); + * + * if (buf == NULL) + * return; + * + * malloc_data_t *tag = buf; + * tag--; + * int size = tag->malloc_size; + * int tagval = UMEM_MALLOC_DECODE(tag->malloc_tag, size); + * if (tagval == MALLOC_SECOND_MAGIC) { + * tag--; + * } else if (tagval != MALLOC_MAGIC) { + * goto tofree; + * } + * + * if (size > cache_max) + * goto tofree; + * + * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; + * void **roots = t->tm_roots; + */ +#define PTC_FRINI_JDONE 0x05 +#define PTC_FRINI_JFREE 0x25 +#define PTC_FRINI_MCS 0x30 +#define PTC_FRINI_JOV 0x36 +#define PTC_FRINI_SOFF 0x46 +static const uint8_t freeinit[] = { + 0x48, 0x85, 0xff, /* testq %rdi,%rdi */ + 0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* jmp $JDONE (done) */ + 0x8b, 0x77, 0xf8, /* movl -0x8(%rdi),%esi */ + 0x8b, 0x47, 0xfc, /* movl -0x4(%rdi),%eax */ + 0x01, 0xf0, /* addl %esi,%eax */ + 0x3d, 0x00, 0x70, 0xba, 0x16, /* cmpl $MALLOC_2_MAGIC, %eax */ + 0x75, 0x06, /* jne +0x6 (checkover) */ + 0x48, 0x8d, 0x47, 0xf0, /* leaq -0x10(%rdi),%eax */ + 0xeb, 0x0f, /* jmp +0xf (freebuf) */ + 0x3d, 0x00, 0xc0, 0x10, 0x3a, /* cmpl $MALLOC_MAGIC, %eax */ + 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jmp +JFREE (goto torfree) */ + 0x48, 0x8d, 0x47, 0xf8, /* leaq -0x8(%rdi),%rax */ + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +errout */ + 0x64, 0x48, 0x8b, 0x0c, 0x25, + 0x00, 0x00, 0x00, 0x00, /* movq %fs:0x0,%rcx */ + 0x48, 0x81, 0xc1, + 0x00, 0x00, 0x00, 0x00, /* addq $SOFF, %rcx */ + 0x48, 0x8d, 0x51, 0x08, /* leaq 0x8(%rcx),%rdx */ +}; + +/* + * if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * } else ... ! goto next cache + */ +#define PTC_INICACHE_CMP 0x03 +#define PTC_INICACHE_SIZE 0x0c +#define PTC_INICACHE_JMP 0x11 +static const uint8_t inicache[] = { + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x77, 0x0c, /* ja +0xc (next cache) */ + 0x49, 0xc7, 0xc0, + 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ + 0xe9, 0x00, 0x00, 0x00, 0x00, /* jmp $JMP (allocbuf) */ +}; + +/* + * if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * roots += $CACHE_NUM; + * } else ... ! goto next cache + */ +#define PTC_GENCACHE_CMP 0x03 +#define PTC_GENCACHE_SIZE 0x0c +#define PTC_GENCACHE_NUM 0x13 +#define PTC_GENCACHE_JMP 0x18 +static const uint8_t gencache[] = { + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x77, 0x14, /* ja +0xc (next cache) */ + 0x49, 0xc7, 0xc0, + 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ + 0x48, 0x81, 0xc2, + 0x00, 0x00, 0x00, 0x00, /* addq $8*ii, %rdx */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf ) */ +}; + +/* + * else if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * roots += $CACHE_NUM; + * } else { + * goto tofunc; ! goto tomalloc if ptcmalloc. + * } ! goto tofree if ptcfree. + */ +#define PTC_FINCACHE_CMP 0x03 +#define PTC_FINCACHE_JMP 0x08 +#define PTC_FINCACHE_SIZE 0x0c +#define PTC_FINCACHE_NUM 0x13 +static const uint8_t fincache[] = { + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x77, 0x00, /* ja +JMP (to real malloc) */ + 0x49, 0xc7, 0xc0, + 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ + 0x48, 0x81, 0xc2, + 0x00, 0x00, 0x00, 0x00, /* addq $8*ii, %rdx */ + +}; + +/* + * if (*root == NULL) + * goto tomalloc; + * + * malloc_data_t *ret = *root; + * *root = *(void **)ret; + * t->tm_size += csize; + * ret->malloc_size = size; + * + * if (size > UMEM_SECOND_ALIGN) { + * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); + * ret += 2; + * } else { + * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); + * ret += 1; + * } + * + * return ((void *)ret); + * tomalloc: + * return (malloc(orig_size)); + */ +#define PTC_MALFINI_ALLABEL 0x00 +#define PTC_MALFINI_JMLABEL 0x40 +#define PTC_MALFINI_JMADDR 0x41 +static const uint8_t malfini[] = { + 0x48, 0x8b, 0x02, /* movl (%rdx),%rax */ + 0x48, 0x85, 0xc0, /* testq %rax,%rax */ + 0x74, 0x38, /* je +0x38 (errout) */ + 0x4c, 0x8b, 0x08, /* movq (%rax),%r9 */ + 0x4c, 0x89, 0x0a, /* movq %r9,(%rdx) */ + 0x4c, 0x29, 0x01, /* subq %rsi,(%rcx) */ + 0x48, 0x83, 0xfe, 0x10, /* cmpq $0x10,%rsi */ + 0x76, 0x15, /* jbe +0x15 */ + 0x41, 0xb9, 0x00, 0x70, 0xba, 0x16, /* movl $MALLOC_MAGIC_2, %r9d */ + 0x89, 0x70, 0x08, /* movl %r9d,0x8(%rax) */ + 0x41, 0x29, 0xf1, /* subl %esi, %r9d */ + 0x44, 0x89, 0x48, 0x0c, /* movl %r9d, 0xc(%rax) */ + 0x48, 0x83, 0xc0, 0x10, /* addq $0x10, %rax */ + 0xc3, /* ret */ + 0x41, 0xb9, 0x00, 0xc0, 0x10, 0x3a, /* movl %MALLOC_MAGIC, %r9d */ + 0x89, 0x30, /* movl %esi,(%rax) */ + 0x41, 0x29, 0xf1, /* subl %esi,%r9d */ + 0x44, 0x89, 0x48, 0x04, /* movl %r9d,0x4(%rax) */ + 0x48, 0x83, 0xc0, 0x08, /* addq $0x8,%rax */ + 0xc3, /* ret */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $MALLOC */ +}; + +/* + * if (t->tm_size + csize > umem_ptc_size) + * goto tofree; + * + * t->tm_size += csize + * *(void **)tag = *root; + * *root = tag; + * return; + * tofree: + * free(buf); + * return; + */ +#define PTC_FRFINI_RBUFLABEL 0x00 +#define PTC_FRFINI_CACHEMAX 0x09 +#define PTC_FRFINI_DONELABEL 0x1b +#define PTC_FRFINI_JFLABEL 0x1c +#define PTC_FRFINI_JFADDR 0x1d +static const uint8_t freefini[] = { + 0x4c, 0x8b, 0x09, /* movq (%rcx),%r9 */ + 0x4d, 0x01, 0xc1, /* addq %r8, %r9 */ + 0x49, 0x81, 0xf9, + 0x00, 0x00, 0x00, 0x00, /* cmpl $THR_CACHE_MAX, %r9 */ + 0x77, 0x0d, /* jae +0xd (torfree) */ + 0x4c, 0x01, 0x01, /* addq %r8,(%rcx) */ + 0x4c, 0x8b, 0x0a, /* movq (%rdx),%r9 */ + 0x4c, 0x89, 0x08, /* movq %r9,(%rax) */ + 0x48, 0x89, 0x02, /* movq %rax,(%rdx) */ + 0xc3, /* ret */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */ +}; + +/* + * Construct the initial part of malloc. off contains the offset from curthread + * to the root of the tmem structure. ep is the address of the label to error + * and jump to free. csize is the size of the largest umem_cache in ptcumem. + */ +static int +genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize) +{ + uint32_t addr; + + bcopy(malinit, bp, sizeof (malinit)); + addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT); + bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr)); + bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize)); + addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV); + bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr)); + bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off)); + + return (sizeof (malinit)); +} + +static int +genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mcs) +{ + uint32_t addr; + + bcopy(freeinit, bp, sizeof (freeinit)); + addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE); + bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr)); + addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE); + bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr)); + bcopy(&mcs, bp + PTC_FRINI_MCS, sizeof (mcs)); + addr = PTC_JMPADDR(ep, PTC_FRINI_JOV); + bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr)); + bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off)); + return (sizeof (freeinit)); +} + + +/* + * Create the initial cache entry of the specified size. The value of ap tells + * us what the address of the label to try and allocate a buffer. This value is + * an offset from the current base to that value. + */ +static int +genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap) +{ + uint32_t addr; + + bcopy(inicache, bp, sizeof (inicache)); + bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize)); + addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP); + ASSERT(addr != 0); + bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr)); + + return (sizeof (inicache)); +} + +static int +genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap) +{ + uint32_t addr; + uint32_t coff; + + ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num); + ASSERT(num != 0); + bcopy(gencache, bp, sizeof (gencache)); + bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize)); + coff = num * PTC_ROOT_SIZE; + bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff)); + addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP); + bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr)); + + return (sizeof (gencache)); +} + +static int +genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep) +{ + uint8_t eap; + uint32_t coff; + + ASSERT(ep <= 0xff && ep > 7); + ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num); + bcopy(fincache, bp, sizeof (fincache)); + bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize)); + coff = num * PTC_ROOT_SIZE; + bcopy(&coff, bp + PTC_FINCACHE_NUM, sizeof (coff)); + eap = ep - PTC_FINCACHE_JMP - 1; + bcopy(&eap, bp + PTC_FINCACHE_JMP, sizeof (eap)); + + return (sizeof (fincache)); +} + +static int +genasm_malfini(uint8_t *bp, uintptr_t mptr) +{ + uint32_t addr; + + bcopy(malfini, bp, sizeof (malfini)); + addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR)); + bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr)); + + return (sizeof (malfini)); +} + +static int +genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr) +{ + uint32_t addr; + + bcopy(freefini, bp, sizeof (freefini)); + bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr)); + addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR)); + bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr)); + + return (sizeof (freefini)); +} + +/* + * The malloc inline assembly is constructed as follows: + * + * o Malloc prologue assembly + * o Generic first-cache check + * o n Generic cache checks (where n = _tmem_get_entries() - 2) + * o Generic last-cache check + * o Malloc epilogue assembly + * + * Generally there are at least three caches. When there is only one cache we + * only use the generic last-cache. In the case where there are two caches, we + * just leave out the middle ones. + */ +static int +genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes) +{ + int ii, off; + uint8_t *bp; + size_t total; + uint32_t allocoff, erroff; + + total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache); + + if (nents >= 2) + total += sizeof (inicache) + sizeof (gencache) * (nents - 2); + + if (total > len) + return (1); + + erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL; + allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL; + + bp = base; + + off = genasm_malinit(bp, umem_tmem_off, erroff, + umem_alloc_sizes[nents-1]); + bp += off; + allocoff -= off; + erroff -= off; + + if (nents > 1) { + off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff); + bp += off; + allocoff -= off; + erroff -= off; + } + + for (ii = 1; ii < nents - 1; ii++) { + off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff); + bp += off; + allocoff -= off; + erroff -= off; + } + + bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], + erroff); + bp += genasm_malfini(bp, umem_genasm_omptr); + ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); + + return (0); +} + +static int +genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes) +{ + uint8_t *bp; + int ii, off; + size_t total; + uint32_t rbufoff, retoff, erroff; + + /* Assume that nents has already been audited for us */ + total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache); + if (nents >= 2) + total += sizeof (inicache) + sizeof (gencache) * (nents - 2); + + if (total > len) + return (1); + + erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL); + rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL); + retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL); + + bp = base; + + off = genasm_frinit(bp, umem_tmem_off, retoff, erroff, + umem_alloc_sizes[nents - 1]); + bp += off; + erroff -= off; + rbufoff -= off; + + if (nents > 1) { + off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff); + bp += off; + erroff -= off; + rbufoff -= off; + } + + for (ii = 1; ii < nents - 1; ii++) { + off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff); + bp += off; + rbufoff -= off; + erroff -= off; + } + + bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], + erroff); + bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr); + ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); + + return (0); +} + +/*ARGSUSED*/ +int +umem_genasm(int *cp, umem_cache_t **caches, int nc) +{ + int nents, i; + uint8_t *mptr; + uint8_t *fptr; + uint32_t *ptr; + uint64_t v, *vptr; + + mptr = (void *)((uintptr_t)&umem_genasm_mptr + 5); + fptr = (void *)((uintptr_t)&umem_genasm_fptr + 5); + if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 || + umem_genasm_fptr == 0 || umem_genasm_fsize == 0) + return (1); + + /* + * The total number of caches that we can service is the minimum of: + * o the amount supported by libc + * o the total number of umem caches + * o we use a single byte addl, so its MAX_UINT32 / sizeof (uintptr_t). + * For 64-bit, this is MAX_UINT32 >> 3, a lot. + */ + nents = _tmem_get_nentries(); + + if (UMEM_GENASM_MAX64 < nents) + nents = UMEM_GENASM_MAX64; + + if (nc < nents) + nents = nc; + + /* Based on our constraints, this is not an error */ + if (nents == 0 || umem_ptc_size == 0) + return (0); + + /* Grab the original malloc and free locations */ + ptr = (void *)(mptr - 4); + umem_genasm_omptr = *ptr + (uintptr_t)mptr; + ptr = (void *)(fptr - 4); + umem_genasm_ofptr = *ptr + (uintptr_t)fptr; + + /* Take into account the jump */ + if (genasm_malloc(mptr, umem_genasm_fsize - 5, nents, cp) != 0) + return (1); + + if (genasm_free(fptr, umem_genasm_fsize - 5, nents, cp) != 0) + return (1); + + /* nop out the jump with a multibyte jump */ + vptr = (void *)&umem_genasm_mptr; + v = MULTINOP; + v |= *vptr & (0xffffffULL << 40); + (void) atomic_swap_64(vptr, v); + vptr = (void *)&umem_genasm_fptr; + v = MULTINOP; + v |= *vptr & (0xffffffULL << 40); + (void) atomic_swap_64(vptr, v); + + + for (i = 0; i < nents; i++) + caches[i]->cache_flags |= UMF_PTC; + + return (0); +} diff --git a/usr/src/lib/libumem/common/envvar.c b/usr/src/lib/libumem/common/envvar.c index fc3d490a01..0c4d872814 100644 --- a/usr/src/lib/libumem/common/envvar.c +++ b/usr/src/lib/libumem/common/envvar.c @@ -22,7 +22,10 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2012 Joyent, Inc. All rights reserved. + */ + +/* + * Copyright (c) 2012 Joyent, Inc. All rights reserved. */ #include <ctype.h> @@ -151,7 +154,10 @@ static umem_env_item_t umem_options_items[] = { NULL, 0, NULL, &vmem_sbrk_pagesize }, #endif - + { "perthread_cache", "Evolving", ITEM_SIZE, + "Size (in bytes) of per-thread allocation cache", + NULL, 0, NULL, &umem_ptc_size + }, { NULL, "-- end of UMEM_OPTIONS --", ITEM_INVALID } }; diff --git a/usr/src/lib/libumem/common/mapfile-vers b/usr/src/lib/libumem/common/mapfile-vers index 102bd989f7..6a05f0cfaa 100644 --- a/usr/src/lib/libumem/common/mapfile-vers +++ b/usr/src/lib/libumem/common/mapfile-vers @@ -20,6 +20,7 @@ # # # Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2012, Joyent, Inc. All rights reserved. # # @@ -72,6 +73,10 @@ SYMBOL_VERSION SUNWprivate_1.1 { vmem_walk; vmem_xalloc; vmem_xfree; + umem_genasm_mptr; + umem_genasm_fptr; + umem_genasm_fsize; + umem_genasm_msize; local: *; }; diff --git a/usr/src/lib/libumem/common/stub_stand.c b/usr/src/lib/libumem/common/stub_stand.c index 54635558c3..1a90c6be75 100644 --- a/usr/src/lib/libumem/common/stub_stand.c +++ b/usr/src/lib/libumem/common/stub_stand.c @@ -23,6 +23,9 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ /* * Stubs for the standalone to reduce the dependence on external libraries @@ -125,3 +128,29 @@ issetugid(void) { return (1); } + +int +_tmem_get_nentries(void) +{ + return (0); +} + +uintptr_t +_tmem_get_base(void) +{ + return (0); +} + +/*ARGSUSED*/ +void +_tmem_set_cleanup(void (*f)(int, void *)) +{ +} + +uint64_t +atomic_swap_64(volatile uint64_t *t, uint64_t v) +{ + uint64_t old = *t; + *t = v; + return (old); +} diff --git a/usr/src/lib/libumem/common/umem.c b/usr/src/lib/libumem/common/umem.c index 9ee030dd47..e22106e979 100644 --- a/usr/src/lib/libumem/common/umem.c +++ b/usr/src/lib/libumem/common/umem.c @@ -21,11 +21,14 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Copyright 2012 Joyent, Inc. All rights reserved. * Use is subject to license terms. */ /* + * Copyright (c) 2012 Joyent, Inc. All rights reserved. + */ + +/* * based on usr/src/uts/common/os/kmem.c r1.64 from 2001/12/18 * * The slab allocator, as described in the following two papers: @@ -43,7 +46,7 @@ * * 1. Overview * ----------- - * umem is very close to kmem in implementation. There are four major + * umem is very close to kmem in implementation. There are seven major * areas of divergence: * * * Initialization @@ -56,6 +59,10 @@ * * * lock ordering * + * * changing UMEM_MAXBUF + * + * * Per-thread caching for malloc/free + * * 2. Initialization * ----------------- * kmem is initialized early on in boot, and knows that no one will call @@ -365,6 +372,237 @@ * * The second place to update, which is not required, is the umem_alloc_sizes. * These determine the default cache sizes that we're going to support. + * + * 8. Per-thread caching for malloc/free + * ------------------------------------- + * + * "Time is an illusion. Lunchtime doubly so." -- Douglas Adams + * + * Time may be an illusion, but CPU cycles aren't. While libumem is designed + * to be a highly scalable allocator, that scalability comes with a fixed cycle + * penalty even in the absence of contention: libumem must acquire (and release + * a per-CPU lock for each allocation. When contention is low and malloc(3C) + * frequency is high, this overhead can dominate execution time. To alleviate + * this, we allow for per-thread caching, a lock-free means of caching recent + * deallocations on a per-thread basis for use in satisfying subsequent calls + * + * In addition to improving performance, we also want to: + * * Minimize fragmentation + * * Not add additional memory overhead (no larger malloc tags) + * + * In the ulwp_t of each thread there is a private data structure called a + * umem_t that looks like: + * + * typedef struct { + * size_t tm_size; + * void *tm_roots[NTMEMBASE]; (Currently 16) + * } tmem_t; + * + * Each of the roots is treated as the head of a linked list. Each entry in the + * list can be thought of as a void ** which points to the next entry, until one + * of them points to NULL. If the head points to NULL, the list is empty. + * + * Each head corresponds to a umem_cache. Currently there is a linear mapping + * where the first root corresponds to the first cache, second root to the + * second cache, etc. This works because every allocation that malloc makes to + * umem_alloc that can be satisified by a umem_cache will actually return a + * number of bytes equal to the size of that cache. Because of this property and + * a one to one mapping between caches and roots we can guarantee that every + * entry in a given root's list will be able to satisfy the same requests as the + * corresponding cache. + * + * The maximum amount of memory that can be cached in each thread is determined + * by the perthread_cache UMEM_OPTION. It corresponds to the umem_ptc_size + * value. The default value for this is currently 1 MB. Once umem_init() has + * finished this cannot be directly tuned without directly modifying the + * instruction text. If, upon calling free(3C), the amount cached would exceed + * this maximum, we instead actually return the buffer to the umem_cache instead + * of holding onto it in the thread. + * + * When a thread calls malloc(3C) it first determines which umem_cache it + * would be serviced by. If the allocation is not covered by ptcumem it goes to + * the normal malloc instead. Next, it checks if the tmem_root's list is empty + * or not. If it is empty, we instead go and allocate the memory from + * umem_alloc. If it is not empty, we remove the head of the list, set the + * appropriate malloc tags, and return that buffer. + * + * When a thread calls free(3C) it first looks at the malloc tag and if it is + * invalid or the allocation exceeds the largest cache in ptcumem and sends it + * off to the original free() to handle and clean up appropriately. Next, it + * checks if the allocation size is covered by one of the per-thread roots and + * if it isn't, it passes it off to the original free() to be released. Finally, + * before it inserts this buffer as the head, it checks if adding this buffer + * would put the thread over its maximum cache size. If it would, it frees the + * buffer back to the umem_cache. Otherwise it increments the threads total + * cached amount and makes the buffer the new head of the appropriate tm_root. + * + * When a thread exits, all of the buffers that it has in its per-thread cache + * will be passed to umem_free() and returned to the appropriate umem_cache. + * + * 8.1 Handling addition and removal of umem_caches + * ------------------------------------------------ + * + * The set of umem_caches that are used to back calls to umem_alloc() and + * ultimately malloc() are determined at program execution time. The default set + * of caches is defined below in umem_alloc_sizes[]. Various umem_options exist + * that modify the set of caches: size_add, size_clear, and size_remove. Because + * the set of caches can only be determined once umem_init() has been called and + * we have the additional goals of minimizing additional fragmentation and + * metadata space overhead in the malloc tags, this forces our hand to go down a + * slightly different path: the one tread by fasttrap and trapstat. + * + * During umem_init we're going to dynamically construct a new version of + * malloc(3C) and free(3C) that utilizes the known cache sizes and then ensure + * that ptcmalloc and ptcfree replace malloc and free as entries in the plt. If + * ptcmalloc and ptcfree cannot handle a request, they simply jump to the + * original libumem implementations. + * + * After creating all of the umem_caches, but before making them visible, + * umem_cache_init checks that umem_genasm_supported is non-zero. This value is + * set by each architecture in $ARCH/umem_genasm.c to indicate whether or not + * they support this. If the value is zero, then this process is skipped. + * Similarly, if the cache size has been tuned to zero by UMEM_OPTIONS, then + * this is also skipped. + * + * In umem_genasm.c, each architecture's implementation implements a single + * function called umem_genasm() that is responsible for generating the + * appropriate versions of ptcmalloc() and ptcfree(), placing them in the + * appropriate memory location, and finally doing the switch from malloc() and + * free() to ptcmalloc() and ptcfree(). Once the change has been made, there is + * no way to switch back, short of restarting the program or modifying program + * text with mdb. + * + * 8.2 Modifying the Procedure Linkage Table (PLT) + * ----------------------------------------------- + * + * The last piece of this puzzle is how we actually jam ptcmalloc() into the + * PLT. The dyanmic linker has support for global and local audit libraries. + * For the full explanation of audit libraries consult the Linkers and Libraries + * guide or the linker source. A local auditer can attach to a single library + * and interpose on all of the relocations that come in from and leave to that + * same library. To facilitate our work, we have created a local audit library + * for libumem that is called libumem_trampoline and is located in + * lib/libumem_trampoline/. + * + * When any resolution is done to malloc(), the audit library allows us to + * replace the address with an address that it specifies. There are two 4k + * sections in the libumem_trampoline's bss which we use as the stomping grounds + * for ptcmalloc and ptcfree. When the audit library audits the malloc and free + * functions from libumem, it encodes their address and sets its buffers to + * contain a simple trampoline which consists of a jmp instruction and a four + * byte offset to the original malloc and free. libumem_trampoline's mapfile + * explicitly makes its bss rwx instead of rw to support this. + * + * When umem_genasm() is called, it uses a similar mechanism to get the address + * and size of the trampoline libraries malloc (mbuf) and free (fbuf) buffers. + * After validating that the size will be able to contain all of the + * instructions, it starts laying out ptcmalloc and ptcfree at mbuf[4] and + * fbuf[4]. Once both have been successfully generated, umem_genasm() stores a + * single five byte nop over the original jump. + * + * 8.3 umem_genasm() + * ----------------- + * + * umem_genasm() is currently implemented for i386 and amd64. This section + * describes the theory behind the construction. For specific byte code to + * assembly instructions and niceish C and asm versions of ptcmalloc and + * ptcfree, see the individual umem_genasm.c files. The layout consists of the + * following sections: + * + * o. function-specfic prologue + * o. function-generic cache-selecting elements + * o. function-specific epilogue + * + * There are three different generic cache elements that exist: + * + * o. the last or only cache + * o. the intermediary caches if more than two + * o. the first one if more than one cache + * + * The malloc and free prologues and epilogues mimic the necessary portions of + * libumem's malloc and free. This includes things like checking for size + * overflow, setting and verifying the malloc tags. + * + * It is an important constraint that these functions do not make use of the + * call instruction. The only jmp outside of the individual functions is to the + * original libumem malloc and free respectively. Because doing things like + * setting errno or raising an internal umem error on improper malloc tags would + * require using calls into the PLT, whenever we encounter one of those cases we + * just jump to the original malloc and free functions reusing the same stack + * frame. + * + * Each of the above sections, the three caches, and the malloc and free + * prologue and epilogue are implemented as blocks of machine code with the + * corresponding assembly in comments. There are known offsets into each block + * that corresponds to locations of data and addresses that we only know at run + * time. These blocks are copied as necessary and the blanks filled in + * appropriately. + * + * As mentioned in section 8.2, the trampoline library uses specifically named + * variables to communicate the buffers and size to use. These variables are: + * + * o. umem_genasm_mptr: The buffer for ptcmalloc + * o. umem_genasm_msize: The size in bytes of the above buffer + * o. umem_genasm_fptr: The buffer for ptcfree + * o. umem_genasm_fsize: The size in bytes of the above buffer + * + * Finally, to enable the generated assembly we need to remove the previous jump + * to the actual malloc that exists at the start of these buffers. This is a + * five byte region. We could zero out the jump offset to be a jmp +0, but + * using nops can be faster. We specifically use a single five byte nop which is + * faster. The opcode for the five byte nop is 0x 0f 1f 44 00 00. On x86, + * remember integers are little endian, so it will be written the other way + * around. + * + * 8.4 Interface with libc.so + * -------------------------- + * + * The tmem_t structure as described in the beginning of section 8, is part of a + * private interface with libc. There are three functions that exist to cover + * this. They are not documented in man pages or header files. They are in the + * SUNWprivate part of libc's makefile. + * + * o. _tmem_get_base(void) + * + * Returns the offset from the ulwp_t (curthread) to the tmem_t structure. + * This is a constant for all threads and is effectively a way to to do + * ::offsetof ulwp_t ul_tmem without having to know the specifics of the + * structure outside of libc. + * + * o. _tmem_get_nentries(void) + * + * Returns the number of roots that exist in the tmem_t. This is one part + * of the cap on the number of umem_caches that we can back with tmem. + * + * o. _tmem_set_cleanup(void (*)(void *, int)) + * + * This sets a clean up handler that gets called back when a thread exits. + * There is one call per buffer, the void * is a pointer to the buffer on + * the list, the int is the index into the roots array for this buffer. + * + * 8.5 Tuning and disabling per-thread caching + * ------------------------------------------- + * + * There is only one tunable for per-thread caching: the amount of memory each + * thread should be able to cache. This is specified via the perthread_cache + * UMEM_OPTION option. No attempt is made to to sanity check the specified + * value; the limit is simply the maximum value of a size_t. + * + * If the perthread_cache UMEM_OPTION is set to zero, nomagazines was requested, + * or UMEM_DEBUG has been turned on then we will never call into umem_genasm; + * however, the trampoline audit library and jump will still be in place. + * + * 8.6 Observing efficacy of per-thread caching + * -------------------------------------------- + * + * To understand the efficacy of per-thread caching, use the ::umastat dcmd + * to see the percentage of capacity consumed on a per-thread basis, the + * degree to which each umem cache contributes to per-thread cache consumption, + * and the number of buffers in per-thread caches on a per-umem cache basis. + * If more detail is required, the specific buffers in a per-thread cache can + * be iterated over with the umem_ptc_* walkers. (These walkers allow an + * optional ulwp_t to be specified to iterate only over a particular thread's + * cache.) */ #include <umem_impl.h> @@ -473,8 +711,10 @@ size_t umem_lite_minsize = 0; /* minimum buffer size for UMF_LITE */ size_t umem_lite_maxalign = 1024; /* maximum buffer alignment for UMF_LITE */ size_t umem_maxverify; /* maximum bytes to inspect in debug routines */ size_t umem_minfirewall; /* hardware-enforced redzone threshold */ +size_t umem_ptc_size = 1048576; /* size of per-thread cache (in bytes) */ uint_t umem_flags = 0; +uintptr_t umem_tmem_off; mutex_t umem_init_lock; /* locks initialization */ cond_t umem_init_cv; /* initialization CV */ @@ -482,6 +722,8 @@ thread_t umem_init_thr; /* thread initializing */ int umem_init_env_ready; /* environ pre-initted */ int umem_ready = UMEM_READY_STARTUP; +int umem_ptc_enabled; /* per-thread caching enabled */ + static umem_nofail_callback_t *nofail_callback; static mutex_t umem_nofail_exit_lock; static thread_t umem_nofail_exit_thr; @@ -2838,6 +3080,24 @@ umem_alloc_sizes_remove(size_t size) umem_alloc_sizes[i] = 0; } +/* + * We've been called back from libc to indicate that thread is terminating and + * that it needs to release the per-thread memory that it has. We get to know + * which entry in the thread's tmem array the allocation came from. Currently + * this refers to first n umem_caches which makes this a pretty simple indexing + * job. + */ +static void +umem_cache_tmem_cleanup(void *buf, int entry) +{ + size_t size; + umem_cache_t *cp; + + size = umem_alloc_sizes[entry]; + cp = umem_alloc_table[(size - 1) >> UMEM_ALIGN_SHIFT]; + _umem_cache_free(cp, buf); +} + static int umem_cache_init(void) { @@ -2953,6 +3213,16 @@ umem_cache_init(void) umem_alloc_caches[i] = cp; } + umem_tmem_off = _tmem_get_base(); + _tmem_set_cleanup(umem_cache_tmem_cleanup); + + if (umem_genasm_supported && !(umem_flags & UMF_DEBUG) && + !(umem_flags & UMF_NOMAGAZINE) && + umem_ptc_size > 0) { + umem_ptc_enabled = umem_genasm(umem_alloc_sizes, + umem_alloc_caches, i) == 0 ? 1 : 0; + } + /* * Initialization cannot fail at this point. Make the caches * visible to umem_alloc() and friends. diff --git a/usr/src/lib/libumem/common/umem_base.h b/usr/src/lib/libumem/common/umem_base.h index e78bebfb58..26e00bc282 100644 --- a/usr/src/lib/libumem/common/umem_base.h +++ b/usr/src/lib/libumem/common/umem_base.h @@ -22,12 +22,13 @@ * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ #ifndef _UMEM_BASE_H #define _UMEM_BASE_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <umem_impl.h> #ifdef __cplusplus @@ -75,6 +76,8 @@ extern volatile uint32_t umem_reaping; #define UMEM_REAP_ADDING 0x00000001 /* umem_reap() is active */ #define UMEM_REAP_ACTIVE 0x00000002 /* update thread is reaping */ +extern uintptr_t umem_tmem_off; + /* * umem.c: tunables */ @@ -97,6 +100,7 @@ extern size_t umem_lite_minsize; extern size_t umem_lite_maxalign; extern size_t umem_maxverify; extern size_t umem_minfirewall; +extern size_t umem_ptc_size; extern uint32_t umem_flags; @@ -139,6 +143,12 @@ extern int umem_create_update_thread(void); void umem_setup_envvars(int); void umem_process_envvars(void); +/* + * umem_genasm.c: private interfaces + */ +extern int umem_genasm_supported; +extern int umem_genasm(int *, umem_cache_t **, int); + #ifdef __cplusplus } #endif diff --git a/usr/src/lib/libumem/common/umem_impl.h b/usr/src/lib/libumem/common/umem_impl.h index 84313c32ed..f63246e166 100644 --- a/usr/src/lib/libumem/common/umem_impl.h +++ b/usr/src/lib/libumem/common/umem_impl.h @@ -21,10 +21,13 @@ */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Copyright 2012 Joyent, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 Joyent, Inc. All rights reserved. + */ + #ifndef _UMEM_IMPL_H #define _UMEM_IMPL_H @@ -63,6 +66,7 @@ extern "C" { #define UMF_HASH 0x00000200 /* cache has hash table */ #define UMF_RANDOMIZE 0x00000400 /* randomize other umem_flags */ +#define UMF_PTC 0x00000800 /* cache has per-thread caching */ #define UMF_BUFTAG (UMF_DEADBEEF | UMF_REDZONE) #define UMF_TOUCH (UMF_BUFTAG | UMF_LITE | UMF_CONTENTS) @@ -395,6 +399,13 @@ extern void umem_startup(caddr_t, size_t, size_t, caddr_t, caddr_t); extern int umem_add(caddr_t, size_t); #endif +/* + * Private interface with libc for tcumem. + */ +extern uintptr_t _tmem_get_base(void); +extern int _tmem_get_nentries(void); +extern void _tmem_set_cleanup(void(*)(void *, int)); + #ifdef __cplusplus } #endif diff --git a/usr/src/lib/libumem/i386/umem_genasm.c b/usr/src/lib/libumem/i386/umem_genasm.c new file mode 100644 index 0000000000..0bfa338e2b --- /dev/null +++ b/usr/src/lib/libumem/i386/umem_genasm.c @@ -0,0 +1,603 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2012 Joyent, Inc. All rights reserved. + */ + +/* + * Don't Panic! If you find the blocks of assembly that follow confusing and + * you're questioning why they exist, please go read section 8 of the umem.c big + * theory statement. Next familiarize yourself with the malloc and free + * implementations in libumem's malloc.c. + * + * What follows is the i386 implementation of the thread caching automatic + * assembly generation. With i386 a function only has three registers its + * allowed to change without restoring them: eax, ecx, and edx. All others have + * to be preserved. Since the set of registers we have available is so small, we + * have to make use of esi, ebx, and edi and save their original values to the + * stack. + * + * Malloc register usage: + * o. esi: Size of the malloc (passed into us and modified) + * o. edi: Size of the cache + * o. eax: Buffer to return + * o. ebx: Scratch space and temporary values + * o. ecx: Pointer to the tmem_t in the ulwp_t. + * o. edx: Pointer to the tmem_t array of roots + * + * Free register usage: + * o. esi: Size of the malloc (passed into us and modified) + * o. edi: Size of the cache + * o. eax: Buffer to free + * o. ebx: Scratch space and temporary values + * o. ecx: Pointer to the tmem_t in the ulwp_t. + * o. edx: Pointer to the tmem_t array of roots + * + * Once we determine what cache we are using, we increment %edx to the + * appropriate offset and set %edi with the size of the cache. This means that + * when we break out to the normal buffer allocation point %edx contains the + * head of the linked list and %edi is the amount that we have to adjust the + * total amount cached by the thread. + * + * Each block of assembly has psuedocode that describes its purpose. + */ + +#include <inttypes.h> +#include <strings.h> +#include <umem_impl.h> +#include "umem_base.h" + +#include <atomic.h> + +int umem_genasm_supported = 1; +uintptr_t umem_genasm_mptr; +size_t umem_genasm_msize; +uintptr_t umem_genasm_fptr; +size_t umem_genasm_fsize; +static uintptr_t umem_genasm_omptr; +static uintptr_t umem_genasm_ofptr; + +/* + * The maximum number of caches we can support. We use a single byte addl so + * this is 255 (UINT8_MAX) / sizeof (uintptr_t). In this case 63 + */ +#define UMEM_GENASM_MAX32 63 + +#define PTC_JMPADDR(dest, src) (dest - (src + 4)) +#define PTC_ROOT_SIZE sizeof (uintptr_t) +#define MULTINOP 0x0000441f0f + +/* + * void *ptcmalloc(size_t orig_size); + * + * size_t size = orig_size + 8; + * + * if (size < orig_size) + * goto tomalloc; ! This is overflow + * + * if (size > cache_size) + * goto tomalloc; + * + * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; + * void **roots = t->tm_roots; + */ +#define PTC_MALINIT_JOUT 0x0e +#define PTC_MALINIT_MCS 0x14 +#define PTC_MALINIT_JOV 0x1a +#define PTC_MALINIT_SOFF 0x27 +static const uint8_t malinit[] = { + 0x55, /* pushl %ebp */ + 0x89, 0xe5, /* movl %esp, %ebp */ + 0x57, /* pushl %edi */ + 0x56, /* pushl %esi */ + 0x53, /* pushl %ebx */ + 0x8b, 0x75, 0x08, /* movl 0x8(%ebp), %esi */ + 0x83, 0xc6, 0x08, /* addl $0x8,%esi */ + 0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jc +$JMP (errout) */ + 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ + 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */ + 0x65, 0x8b, 0x0d, 0x00, 0x0, 0x00, 0x00, /* movl %gs:0x0,%ecx */ + 0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $OFF, %ecx */ + 0x8d, 0x51, 0x04 /* leal 0x4(%ecx), %edx */ +}; + +/* + * void ptcfree(void *buf); + * + * if (buf == NULL) + * return; + * + * malloc_data_t *tag = buf; + * tag--; + * int size = tag->malloc_size; + * int tagtval = UMEM_MALLOC_DECODE(tag->malloc_tag, size); + * + * if (tagval != MALLOC_MAGIC) + * goto tofree; + * + * if (size > cache_max) + * goto tofree; + * + * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; + * void **roots = t->tm_roots; + */ +#define PTC_FRINI_JDONE 0x0d +#define PTC_FRINI_JFREE 0x23 +#define PTC_FRINI_MCS 0x29 +#define PTC_FRINI_JOV 0x2f +#define PTC_FRINI_SOFF 0x3c +static const uint8_t freeinit[] = { + 0x55, /* pushl %ebp */ + 0x89, 0xe5, /* movl %esp, %ebp */ + 0x57, /* pushl %edi */ + 0x56, /* pushl %esi */ + 0x53, /* pushl %ebx */ + 0x8b, 0x45, 0x08, /* movl 0x8(%ebp), %eax */ + 0x85, 0xc0, /* testl %eax, %eax */ + 0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* je $JDONE (done) */ + 0x83, 0xe8, 0x08, /* subl $0x8,%eax */ + 0x8b, 0x30, /* movl (%eax),%esi */ + 0x8b, 0x50, 0x04, /* movl 0x4(%eax),%edx */ + 0x01, 0xf2, /* addl %esi,%edx */ + 0x81, 0xfa, 0x00, 0xc0, 0x10, 0x3a, /* cmpl MAGIC32, %edx */ + 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jne +JFREE (goto freebuf) */ + + 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ + 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */ + 0x65, 0x8b, 0x0d, 0x00, 0x0, 0x00, 0x00, /* movl %gs:0x0,%ecx */ + 0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $0xOFF, %ecx */ + 0x8d, 0x51, 0x04 /* leal 0x4(%ecx),%edx */ +}; + +/* + * if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * } else ... ! goto next cache + */ +#define PTC_INICACHE_CMP 0x02 +#define PTC_INICACHE_SIZE 0x09 +#define PTC_INICACHE_JMP 0x0e +static const uint8_t inicache[] = { + 0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ + 0x77, 0x0a, /* ja +0xa */ + 0xbf, 0xff, 0x00, 0x00, 0x00, /* movl sizeof ($C0), %edi */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */ +}; + +/* + * if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * roots += $CACHE_NUM; + * } else ... ! goto next cache + */ +#define PTC_GENCACHE_CMP 0x02 +#define PTC_GENCACHE_NUM 0x0a +#define PTC_GENCACHE_SIZE 0x0c +#define PTC_GENCACHE_JMP 0x11 +static const uint8_t gencache[] = { + 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($CACHE), %esi */ + 0x77, 0x0d, /* ja +0xd (next cache) */ + 0x83, 0xc2, 0x00, /* addl $4*$ii, %edx */ + 0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CACHE), %edi */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */ +}; + +/* + * else if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * roots += $CACHE_NUM; + * } else { + * goto tofunc; ! goto tomalloc if ptcmalloc. + * } ! goto tofree if ptcfree. + */ +#define PTC_FINCACHE_CMP 0x02 +#define PTC_FINCACHE_JMP 0x07 +#define PTC_FINCACHE_NUM 0x0a +#define PTC_FINCACHE_SIZE 0x0c +static const uint8_t fincache[] = { + 0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($CLAST), %esi */ + 0x77, 0x00, /* ja +$JMP (to errout) */ + 0x83, 0xc2, 0x00, /* addl $4*($NCACHES-1), %edx */ + 0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CLAST), %edi */ +}; + +/* + * if (*root == NULL) + * goto tomalloc; + * + * malloc_data_t *ret = *root; + * *root = *(void **)ret; + * t->tm_size += csize; + * ret->malloc_size = size; + * + * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); + * ret++; + * + * return ((void *)ret); + * tomalloc: + * return (malloc(orig_size)); + */ +#define PTC_MALFINI_ALLABEL 0x00 +#define PTC_MALFINI_JMLABEL 0x20 +#define PTC_MALFINI_JMADDR 0x25 +static const uint8_t malfini[] = { + /* allocbuf: */ + 0x8b, 0x02, /* movl (%edx), %eax */ + 0x85, 0xc0, /* testl %eax, %eax */ + 0x74, 0x1a, /* je +0x1a (errout) */ + 0x8b, 0x18, /* movl (%eax), %esi */ + 0x89, 0x1a, /* movl %esi, (%edx) */ + 0x29, 0x39, /* subl %edi, (%ecx) */ + 0x89, 0x30, /* movl %esi, ($eax) */ + 0xba, 0x00, 0xc0, 0x10, 0x3a, /* movl $0x3a10c000,%edx */ + 0x29, 0xf2, /* subl %esi, %edx */ + 0x89, 0x50, 0x04, /* movl %edx, 0x4(%eax) */ + 0x83, 0xc0, 0x08, /* addl %0x8, %eax */ + 0x5b, /* popl %ebx */ + 0x5e, /* popl %esi */ + 0x5f, /* popl %edi */ + 0xc9, /* leave */ + 0xc3, /* ret */ + /* errout: */ + 0x5b, /* popl %ebx */ + 0x5e, /* popl %esi */ + 0x5f, /* popl %edi */ + 0xc9, /* leave */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $malloc */ +}; + +/* + * if (t->tm_size + csize > umem_ptc_size) + * goto tofree; + * + * t->tm_size += csize + * *(void **)tag = *root; + * *root = tag; + * return; + * tofree: + * free(buf); + * return; + */ +#define PTC_FRFINI_RBUFLABEL 0x00 +#define PTC_FRFINI_CACHEMAX 0x06 +#define PTC_FRFINI_DONELABEL 0x14 +#define PTC_FRFINI_JFLABEL 0x19 +#define PTC_FRFINI_JFADDR 0x1e +static const uint8_t freefini[] = { + /* freebuf: */ + 0x8b, 0x19, /* movl (%ecx),%ebx */ + 0x01, 0xfb, /* addl %edi,%ebx */ + 0x81, 0xfb, 0x00, 0x00, 0x00, 0x00, /* cmpl maxsize, %ebx */ + 0x73, 0x0d, /* jae +0xd <tofree> */ + 0x01, 0x39, /* addl %edi,(%ecx) */ + 0x8b, 0x3a, /* movl (%edx),%edi */ + 0x89, 0x38, /* movl %edi,(%eax) */ + 0x89, 0x02, /* movl %eax,(%edx) */ + /* done: */ + 0x5b, /* popl %ebx */ + 0x5e, /* popl %esi */ + 0x5f, /* popl %edi */ + 0xc9, /* leave */ + 0xc3, /* ret */ + /* realfree: */ + 0x5b, /* popl %ebx */ + 0x5e, /* popl %esi */ + 0x5f, /* popl %edi */ + 0xc9, /* leave */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */ +}; + +/* + * Construct the initial part of malloc. off contains the offset from curthread + * to the root of the tmem structure. ep is the address of the label to error + * and jump to free. csize is the size of the largest umem_cache in ptcumem. + */ +static int +genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize) +{ + uint32_t addr; + + bcopy(malinit, bp, sizeof (malinit)); + addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT); + bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr)); + bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize)); + addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV); + bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr)); + bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off)); + + return (sizeof (malinit)); +} + +static int +genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mc) +{ + uint32_t addr; + + bcopy(freeinit, bp, sizeof (freeinit)); + addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE); + bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr)); + addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE); + bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr)); + bcopy(&mc, bp + PTC_FRINI_MCS, sizeof (mc)); + addr = PTC_JMPADDR(ep, PTC_FRINI_JOV); + bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr)); + bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off)); + return (sizeof (freeinit)); +} + +/* + * Create the initial cache entry of the specified size. The value of ap tells + * us what the address of the label to try and allocate a buffer. This value is + * an offset from the current base to that value. + */ +static int +genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap) +{ + uint32_t addr; + + bcopy(inicache, bp, sizeof (inicache)); + bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize)); + addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP); + ASSERT(addr != 0); + bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr)); + + return (sizeof (inicache)); +} + +static int +genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap) +{ + uint32_t addr; + uint8_t coff; + + ASSERT(256 / PTC_ROOT_SIZE > num); + ASSERT(num != 0); + bcopy(gencache, bp, sizeof (gencache)); + bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize)); + coff = num * PTC_ROOT_SIZE; + bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff)); + addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP); + bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr)); + + return (sizeof (gencache)); +} + +static int +genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep) +{ + uint8_t addr; + + ASSERT(ep <= 0xff && ep > 7); + ASSERT(256 / PTC_ROOT_SIZE > num); + bcopy(fincache, bp, sizeof (fincache)); + bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize)); + addr = num * PTC_ROOT_SIZE; + bcopy(&addr, bp + PTC_FINCACHE_NUM, sizeof (addr)); + addr = ep - PTC_FINCACHE_JMP - 1; + bcopy(&addr, bp + PTC_FINCACHE_JMP, sizeof (addr)); + + return (sizeof (fincache)); +} + +static int +genasm_malfini(uint8_t *bp, uintptr_t mptr) +{ + uint32_t addr; + + bcopy(malfini, bp, sizeof (malfini)); + addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR)); + bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr)); + + return (sizeof (malfini)); +} + +static int +genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr) +{ + uint32_t addr; + + bcopy(freefini, bp, sizeof (freefini)); + bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr)); + addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR)); + bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr)); + + return (sizeof (freefini)); +} + +/* + * The malloc inline assembly is constructed as follows: + * + * o Malloc prologue assembly + * o Generic first-cache check + * o n Generic cache checks (where n = _tmem_get_entries() - 2) + * o Generic last-cache check + * o Malloc epilogue assembly + * + * Generally there are at least three caches. When there is only one cache we + * only use the generic last-cache. In the case where there are two caches, we + * just leave out the middle ones. + */ +static int +genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes) +{ + int ii, off; + uint8_t *bp; + size_t total; + uint32_t allocoff, erroff; + + total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache); + + if (nents >= 2) + total += sizeof (inicache) + sizeof (gencache) * (nents - 2); + + if (total > len) + return (1); + + erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL; + allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL; + + bp = base; + + off = genasm_malinit(bp, umem_tmem_off, erroff, + umem_alloc_sizes[nents-1]); + bp += off; + allocoff -= off; + erroff -= off; + + if (nents > 1) { + off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff); + bp += off; + allocoff -= off; + erroff -= off; + } + + for (ii = 1; ii < nents - 1; ii++) { + off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff); + bp += off; + allocoff -= off; + erroff -= off; + } + + bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], + erroff); + bp += genasm_malfini(bp, umem_genasm_omptr); + ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); + + return (0); +} + +static int +genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes) +{ + uint8_t *bp; + int ii, off; + size_t total; + uint32_t rbufoff, retoff, erroff; + + /* Assume that nents has already been audited for us */ + total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache); + if (nents >= 2) + total += sizeof (inicache) + sizeof (gencache) * (nents - 2); + + if (total > len) + return (1); + + erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL); + rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL); + retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL); + + bp = base; + + off = genasm_frinit(bp, umem_tmem_off, retoff, erroff, + umem_alloc_sizes[nents - 1]); + bp += off; + erroff -= off; + rbufoff -= off; + + if (nents > 1) { + off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff); + bp += off; + erroff -= off; + rbufoff -= off; + } + + for (ii = 1; ii < nents - 1; ii++) { + off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff); + bp += off; + rbufoff -= off; + erroff -= off; + } + + bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], + erroff); + bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr); + ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); + + return (0); +} + +int +umem_genasm(int *alloc_sizes, umem_cache_t **caches, int ncaches) +{ + int nents, i; + uint8_t *mptr; + uint8_t *fptr; + uint32_t *ptr; + uint64_t v, *vptr; + + mptr = (void *)((uintptr_t)&umem_genasm_mptr + 5); + fptr = (void *)((uintptr_t)&umem_genasm_fptr + 5); + if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 || + umem_genasm_fptr == 0 || umem_genasm_fsize == 0) + return (1); + + /* + * The total number of caches that we can service is the minimum of: + * o the amount supported by libc + * o the total number of umem caches + * o we use a single byte addl, so its 255 / sizeof (uintptr_t). For + * 32-bit, this is 63. + */ + nents = _tmem_get_nentries(); + + if (UMEM_GENASM_MAX32 < nents) + nents = UMEM_GENASM_MAX32; + + if (ncaches < nents) + nents = ncaches; + + /* Based on our constraints, this is not an error */ + if (nents == 0 || umem_ptc_size == 0) + return (0); + + /* Grab the original malloc and free locations */ + ptr = (void *)(mptr - 4); + umem_genasm_omptr = *ptr + (uintptr_t)mptr; + ptr = (void *)(fptr - 4); + umem_genasm_ofptr = *ptr + (uintptr_t)fptr; + + /* Take into account the jump */ + if (genasm_malloc(mptr, umem_genasm_fsize - 5, nents, + alloc_sizes) != 0) + return (1); + + if (genasm_free(fptr, umem_genasm_fsize - 5, nents, + alloc_sizes) != 0) + return (1); + + /* nop out the jump with a multibyte jump */ + vptr = (void *)&umem_genasm_mptr; + v = MULTINOP; + v |= *vptr & (0xffffffULL << 40); + (void) atomic_swap_64(vptr, v); + vptr = (void *)&umem_genasm_fptr; + v = MULTINOP; + v |= *vptr & (0xffffffULL << 40); + (void) atomic_swap_64(vptr, v); + + for (i = 0; i < nents; i++) + caches[i]->cache_flags |= UMF_PTC; + + return (0); +} diff --git a/usr/src/lib/libumem/sparc/umem_genasm.c b/usr/src/lib/libumem/sparc/umem_genasm.c new file mode 100644 index 0000000000..77dcc4a6a5 --- /dev/null +++ b/usr/src/lib/libumem/sparc/umem_genasm.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2012 Joyent, Inc. All rights reserved. + */ + +/* + * Don't Panic! If you wonder why this seemingly empty file exists, it's because + * there is no sparc implementation for ptcumem. Go read libumem's big theory + * statement in lib/libumem/common/umem.c, particularly section eight. + */ + +int umem_genasm_supported = 0; + +/*ARGSUSED*/ +int +umem_genasm(int *cp, int nc) +{ + return (1); +} diff --git a/usr/src/lib/libumem_trampoline/Makefile b/usr/src/lib/libumem_trampoline/Makefile new file mode 100644 index 0000000000..e1140efa8c --- /dev/null +++ b/usr/src/lib/libumem_trampoline/Makefile @@ -0,0 +1,50 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 2012 Joyent, Inc. All rights reserved. +# Use is subject to license terms. +# + +include ../Makefile.lib + +SUBDIRS= $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET= all +clean := TARGET= clean +clobber := TARGET= clobber +install := TARGET= install +lint := TARGET= lint + +.KEEP_STATE: + +all clean clobber install lint: $(SUBDIRS) + +install_h: $(ROOTHDRS) + +check: $(CHECKHDRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../Makefile.targ diff --git a/usr/src/lib/libumem_trampoline/Makefile.com b/usr/src/lib/libumem_trampoline/Makefile.com new file mode 100644 index 0000000000..a43fad0fd5 --- /dev/null +++ b/usr/src/lib/libumem_trampoline/Makefile.com @@ -0,0 +1,49 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 2012 Joyent, Inc. All rights reserved. +# Use is subject to license terms. +# + +LIBRARY = libumem_trampoline.a +VERS = .1 +OBJECTS = trampoline.o + +include ../../Makefile.lib + +# install this library in the root filesystem +include ../../Makefile.rootfs + +LIBS = $(DYNLIB) $(LINTLIB) + +SRCDIR = ../common + +$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC) + +CFLAGS += $(CCVERBOSE) + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../Makefile.targ diff --git a/usr/src/lib/libumem_trampoline/amd64/Makefile b/usr/src/lib/libumem_trampoline/amd64/Makefile new file mode 100644 index 0000000000..132720332b --- /dev/null +++ b/usr/src/lib/libumem_trampoline/amd64/Makefile @@ -0,0 +1,29 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2012 (c) Joyent, Inc. All rights reserved. +# Use is subject to license terms. +# + +include ../Makefile.com +include ../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) diff --git a/usr/src/lib/libumem_trampoline/common/llib-lumem_trampoline b/usr/src/lib/libumem_trampoline/common/llib-lumem_trampoline new file mode 100644 index 0000000000..8fda64fc80 --- /dev/null +++ b/usr/src/lib/libumem_trampoline/common/llib-lumem_trampoline @@ -0,0 +1,28 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2012 Joyent, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ diff --git a/usr/src/lib/libumem_trampoline/common/mapfile-vers b/usr/src/lib/libumem_trampoline/common/mapfile-vers new file mode 100644 index 0000000000..c52eeda977 --- /dev/null +++ b/usr/src/lib/libumem_trampoline/common/mapfile-vers @@ -0,0 +1,62 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 2012, Joyent, Inc. All rights reserved. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +# +# The BSS must me executable for ptcumem to work properly. If it is not, +# programs will get a SEGV. For more information read section of 8 libumem's big +# theory statement in lib/libumem/common/umem.c. +# +LOAD_SEGMENT bss { + FLAGS = READ WRITE EXECUTE; +}; + +SYMBOL_VERSION SUNWprivate_1.1 { + global: + la_version; + la_objopen; +$if _ELF32 + la_symbind32; +$endif +$if _ELF64 + la_symbind64; +$endif + local: + *; +}; diff --git a/usr/src/lib/libumem_trampoline/common/trampoline.c b/usr/src/lib/libumem_trampoline/common/trampoline.c new file mode 100644 index 0000000000..200f32aabc --- /dev/null +++ b/usr/src/lib/libumem_trampoline/common/trampoline.c @@ -0,0 +1,166 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2012 Joyent, Inc. All rights reserved. + */ + +/* + * This is a local link auditing library for libumem(3LIB). It provides a means + * for us to implement the per-thread caching component of libumem. When any + * binary or library attempts to bind to libumem's malloc and free symbols we + * instead point them to a private buffer in our own BSS. Our mapfile ensures + * that this BSS is readable, writeable, and executable. By default these + * private buffers contain a jmp instruction to the original libumem malloc and + * free. + * + * When libumem tries to generate its assembly, we key off of private symbol + * names and replace their values with pointers to our values. For more + * information on this process, see section 8 of the big theory statement for + * libumem in lib/libumem/common/umem.c. + * + * Note that this is very x86 specific currently. This includes x86 instructions + * and making assumptions about alignment of variables, see the lint warnings. + * By the current construction, SPARC is basically a no-op. + */ +#include <dlfcn.h> +#include <unistd.h> +#include <sys/types.h> +#include <libelf.h> +#include <link.h> + +#if defined(__i386) || defined(__amd64) +#define LIBUMEM_TRAMPOLINE_JMP32 0xe9 +#endif /* defined(__i386) || defined(__amd64) */ + +/* + * This is our malloc trampoline. We give it the name "malloc" to make it + * appear somewhat like malloc. + */ +static uint8_t malloc[4096]; +static uint8_t free[4096]; +static size_t msize = sizeof (malloc); +static size_t fsize = sizeof (free); + +/* + * We don't want to link against libc, so we define our own versions of the + * string functions as necessary. + */ +static int +la_strcmp(const char *s1, const char *s2) +{ + if (s1 == s2) + return (0); + while (*s1 == *s2++) + if (*s1++ == '\0') + return (0); + + return (*(unsigned char *)s1 - *(unsigned char *)--s2); +} + +static char * +la_strrchr(char *str, char c) +{ + char *r; + + r = NULL; + do { + if (*str == c) + r = str; + } while (*str++); + return (r); +} + +/*ARGSUSED*/ +uint_t +la_version(uint_t version) +{ + return (LAV_CURRENT); +} + +/*ARGSUSED*/ +uint_t +la_objopen(Link_map *lmp, Lmid_t lmid, uintptr_t *cookie) +{ +#if defined(__i386) || defined(__amd64) + char *objname; + + if ((objname = la_strrchr(lmp->l_name, '/')) == NULL || + *(++objname) == '\0') + objname = lmp->l_name; + + if (la_strcmp(objname, "libumem.so.1") == 0 || + la_strcmp(objname, "libumem.so") == 0) + return (LA_FLG_BINDFROM | LA_FLG_BINDTO); +#endif /* defined(__i386) || defined(__amd64) */ + + return (0); +} + +#if defined(_LP64) +/*ARGSUSED*/ +uintptr_t +la_symbind64(Elf64_Sym *symp, uint_t symndx, uintptr_t *refcook, + uintptr_t *defcook, uint_t *sb_flags, char const *sym_name) +#else +/*ARGSUSED*/ +uintptr_t +la_symbind32(Elf32_Sym *symp, uint_t symndx, uintptr_t *refcook, + uintptr_t *defcook, uint_t *sb_flags) +#endif +{ +#if defined(__i386) || defined(__amd64) + int i = 0; + +#if !defined(_LP64) + char const *sym_name = (char const *) symp->st_name; +#endif + + if (la_strcmp(sym_name, "malloc") == 0) { + if (malloc[i] == '\0') { + malloc[i++] = LIBUMEM_TRAMPOLINE_JMP32; + /*LINTED E_BAD_PTR_CAST_ALIGN*/ + *(uint32_t *)&malloc[i] = (uint32_t)(symp->st_value - + (uintptr_t)&malloc[i + sizeof (uint32_t)]); + } + + return ((uintptr_t)malloc); + } else if (la_strcmp(sym_name, "free") == 0) { + if (free[i] == '\0') { + free[i++] = LIBUMEM_TRAMPOLINE_JMP32; + /*LINTED E_BAD_PTR_CAST_ALIGN*/ + *(uint32_t *)&free[i] = (uint32_t)(symp->st_value - + (uintptr_t)&free[i + sizeof (uint32_t)]); + } + + return ((uintptr_t)free); + } else if (la_strcmp(sym_name, "umem_genasm_mptr") == 0) { + return ((uintptr_t)malloc); + } else if (la_strcmp(sym_name, "umem_genasm_msize") == 0) { + return ((uintptr_t)&msize); + } else if (la_strcmp(sym_name, "umem_genasm_fptr") == 0) { + return ((uintptr_t)free); + } else if (la_strcmp(sym_name, "umem_genasm_fsize") == 0) { + return ((uintptr_t)&fsize); + } else { + return (symp->st_value); + } +#endif /* defined(__i386) || defined(__amd64) */ +} diff --git a/usr/src/lib/libumem_trampoline/i386/Makefile b/usr/src/lib/libumem_trampoline/i386/Makefile new file mode 100644 index 0000000000..76c3ccc672 --- /dev/null +++ b/usr/src/lib/libumem_trampoline/i386/Makefile @@ -0,0 +1,29 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2012 (c) Joyent, Inc. All rights reserved. +# Use is subject to license terms. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/libumem_trampoline/sparc/Makefile b/usr/src/lib/libumem_trampoline/sparc/Makefile new file mode 100644 index 0000000000..76c3ccc672 --- /dev/null +++ b/usr/src/lib/libumem_trampoline/sparc/Makefile @@ -0,0 +1,29 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2012 (c) Joyent, Inc. All rights reserved. +# Use is subject to license terms. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/libumem_trampoline/sparcv9/Makefile b/usr/src/lib/libumem_trampoline/sparcv9/Makefile new file mode 100644 index 0000000000..132720332b --- /dev/null +++ b/usr/src/lib/libumem_trampoline/sparcv9/Makefile @@ -0,0 +1,29 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2012 (c) Joyent, Inc. All rights reserved. +# Use is subject to license terms. +# + +include ../Makefile.com +include ../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) diff --git a/usr/src/man/man3malloc/umem_alloc.3malloc b/usr/src/man/man3malloc/umem_alloc.3malloc index 65bf79e5f3..eb6b270398 100644 --- a/usr/src/man/man3malloc/umem_alloc.3malloc +++ b/usr/src/man/man3malloc/umem_alloc.3malloc @@ -1,5 +1,6 @@ '\" te .\" Copyright (c) 2008 Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright (c) 2012 Joyent, Inc. All Rights Reserved. .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] @@ -173,6 +174,19 @@ Set the underlying function used to allocate memory. This option can be set to \fBmmap\fR(2)-based source. If set to a value that is not supported, \fBsbrk\fR will be used. .RE +.sp +.ne 2 +.na +\fB\fBperthread_cache\fR=\fBsize\fR\fR +.ad +.RS 16n +libumem allows for each thread to cache recently freed small allocations for +future allocations. The size argument, which accepts k, m, g, and t, suffixes +denotes the maximum amount of memory each thread can use for this purpose. The +default amount used is 1 MB. Any buffers in the per-thread cache are freed when +the thread exits. The efficacy of the per-thread cache can be determined with +the \fB::umastat\fR \fBmdb\fR(1) \fIdcmd\fR debugger command. +.RE .RE |
