/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * Copyright (c) 2016 by Delphix. All rights reserved. */ /* * Multidata, as described in the following papers: * * Adi Masputra, * Multidata V.2: VA-Disjoint Packet Extents Framework Interface * Design Specification. August 2004. * Available as http://sac.sfbay/PSARC/2004/594/materials/mmd2.pdf. * * Adi Masputra, * Multidata Interface Design Specification. Sep 2002. * Available as http://sac.sfbay/PSARC/2002/276/materials/mmd.pdf. * * Adi Masputra, Frank DiMambro, Kacheong Poon, * An Efficient Networking Transmit Mechanism for Solaris: * Multidata Transmit (MDT). May 2002. * Available as http://sac.sfbay/PSARC/2002/276/materials/mdt.pdf. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int mmd_constructor(void *, void *, int); static void mmd_destructor(void *, void *); static int pdslab_constructor(void *, void *, int); static void pdslab_destructor(void *, void *); static int pattbl_constructor(void *, void *, int); static void pattbl_destructor(void *, void *); static void mmd_esballoc_free(caddr_t); static int mmd_copy_pattbl(patbkt_t *, multidata_t *, pdesc_t *, int); static boolean_t pbuf_ref_valid(multidata_t *, pdescinfo_t *); #pragma inline(pbuf_ref_valid) static boolean_t pdi_in_range(pdescinfo_t *, pdescinfo_t *); #pragma inline(pdi_in_range) static pdesc_t *mmd_addpdesc_int(multidata_t *, pdescinfo_t *, int *, int); #pragma inline(mmd_addpdesc_int) static void mmd_destroy_pattbl(patbkt_t **); #pragma inline(mmd_destroy_pattbl) static pattr_t *mmd_find_pattr(patbkt_t *, uint_t); #pragma inline(mmd_find_pattr) static pdesc_t *mmd_destroy_pdesc(multidata_t *, pdesc_t *); #pragma inline(mmd_destroy_pdesc) static pdesc_t *mmd_getpdesc(multidata_t *, pdesc_t *, pdescinfo_t *, uint_t, boolean_t); #pragma inline(mmd_getpdesc) static struct kmem_cache *mmd_cache; static struct kmem_cache *pd_slab_cache; static struct kmem_cache *pattbl_cache; int mmd_debug = 1; #define MMD_DEBUG(s) if (mmd_debug > 0) cmn_err s /* * Set to this to true to bypass pdesc bounds checking. */ boolean_t mmd_speed_over_safety = B_FALSE; /* * Patchable kmem_cache flags. */ int mmd_kmem_flags = 0; int pdslab_kmem_flags = 0; int pattbl_kmem_flags = 0; /* * Alignment (in bytes) of our kmem caches. */ #define MULTIDATA_CACHE_ALIGN 64 /* * Default number of packet descriptors per descriptor slab. Making * this too small will trigger more descriptor slab allocation; making * it too large will create too many unclaimed descriptors. */ #define PDSLAB_SZ 15 uint_t pdslab_sz = PDSLAB_SZ; /* * Default attribute hash table size. It's okay to set this to a small * value (even to 1) because there aren't that many attributes currently * defined, and because we assume there won't be many attributes associated * with a Multidata at a given time. Increasing the size will reduce * attribute search time (given a large number of attributes in a Multidata), * and decreasing it will reduce the memory footprints and the overhead * associated with managing the table. */ #define PATTBL_SZ 1 uint_t pattbl_sz = PATTBL_SZ; /* * Attribute hash key. */ #define PATTBL_HASH(x, sz) ((x) % (sz)) /* * Structure that precedes each Multidata metadata. */ struct mmd_buf_info { frtn_t frp; /* free routine */ uint_t buf_len; /* length of kmem buffer */ }; /* * The size of each metadata buffer. */ #define MMD_CACHE_SIZE \ (sizeof (struct mmd_buf_info) + sizeof (multidata_t)) /* * Called during startup in order to create the Multidata kmem caches. */ void mmd_init(void) { pdslab_sz = MAX(1, pdslab_sz); /* at least 1 descriptor */ pattbl_sz = MAX(1, pattbl_sz); /* at least 1 bucket */ mmd_cache = kmem_cache_create("multidata", MMD_CACHE_SIZE, MULTIDATA_CACHE_ALIGN, mmd_constructor, mmd_destructor, NULL, NULL, NULL, mmd_kmem_flags); pd_slab_cache = kmem_cache_create("multidata_pdslab", PDESC_SLAB_SIZE(pdslab_sz), MULTIDATA_CACHE_ALIGN, pdslab_constructor, pdslab_destructor, NULL, (void *)(uintptr_t)pdslab_sz, NULL, pdslab_kmem_flags); pattbl_cache = kmem_cache_create("multidata_pattbl", sizeof (patbkt_t) * pattbl_sz, MULTIDATA_CACHE_ALIGN, pattbl_constructor, pattbl_destructor, NULL, (void *)(uintptr_t)pattbl_sz, NULL, pattbl_kmem_flags); } /* * Create a Multidata message block. */ multidata_t * mmd_alloc(mblk_t *hdr_mp, mblk_t **mmd_mp, int kmflags) { uchar_t *buf; multidata_t *mmd; uint_t mmd_mplen; struct mmd_buf_info *buf_info; ASSERT(hdr_mp != NULL); ASSERT(mmd_mp != NULL); /* * Caller should never pass in a chain of mblks since we * only care about the first one, hence the assertions. */ ASSERT(hdr_mp->b_cont == NULL); if ((buf = kmem_cache_alloc(mmd_cache, kmflags)) == NULL) return (NULL); buf_info = (struct mmd_buf_info *)buf; buf_info->frp.free_arg = (caddr_t)buf; mmd = (multidata_t *)(buf_info + 1); mmd_mplen = sizeof (*mmd); if ((*mmd_mp = desballoc((uchar_t *)mmd, mmd_mplen, BPRI_HI, &(buf_info->frp))) == NULL) { kmem_cache_free(mmd_cache, buf); return (NULL); } DB_TYPE(*mmd_mp) = M_MULTIDATA; (*mmd_mp)->b_wptr += mmd_mplen; mmd->mmd_dp = (*mmd_mp)->b_datap; mmd->mmd_hbuf = hdr_mp; return (mmd); } /* * Associate additional payload buffer to the Multidata. */ int mmd_addpldbuf(multidata_t *mmd, mblk_t *pld_mp) { int i; ASSERT(mmd != NULL); ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); ASSERT(pld_mp != NULL); mutex_enter(&mmd->mmd_pd_slab_lock); for (i = 0; i < MULTIDATA_MAX_PBUFS && mmd->mmd_pbuf_cnt < MULTIDATA_MAX_PBUFS; i++) { if (mmd->mmd_pbuf[i] == pld_mp) { /* duplicate entry */ MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding " "pld 0x%p to mmd 0x%p since it has been " "previously added into slot %d (total %d)\n", (void *)pld_mp, (void *)mmd, i, mmd->mmd_pbuf_cnt)); mutex_exit(&mmd->mmd_pd_slab_lock); return (-1); } else if (mmd->mmd_pbuf[i] == NULL) { mmd->mmd_pbuf[i] = pld_mp; mmd->mmd_pbuf_cnt++; mutex_exit(&mmd->mmd_pd_slab_lock); return (i); } } /* all slots are taken */ MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding pld 0x%p to mmd 0x%p " "since no slot space is left (total %d max %d)\n", (void *)pld_mp, (void *)mmd, mmd->mmd_pbuf_cnt, MULTIDATA_MAX_PBUFS)); mutex_exit(&mmd->mmd_pd_slab_lock); return (-1); } /* * Multidata metadata kmem cache constructor routine. */ /* ARGSUSED */ static int mmd_constructor(void *buf, void *cdrarg, int kmflags) { struct mmd_buf_info *buf_info; multidata_t *mmd; bzero((void *)buf, MMD_CACHE_SIZE); buf_info = (struct mmd_buf_info *)buf; buf_info->frp.free_func = mmd_esballoc_free; buf_info->buf_len = MMD_CACHE_SIZE; mmd = (multidata_t *)(buf_info + 1); mmd->mmd_magic = MULTIDATA_MAGIC; mutex_init(&(mmd->mmd_pd_slab_lock), NULL, MUTEX_DRIVER, NULL); QL_INIT(&(mmd->mmd_pd_slab_q)); QL_INIT(&(mmd->mmd_pd_q)); return (0); } /* * Multidata metadata kmem cache destructor routine. */ /* ARGSUSED */ static void mmd_destructor(void *buf, void *cdrarg) { multidata_t *mmd; #ifdef DEBUG int i; #endif mmd = (multidata_t *)((uchar_t *)buf + sizeof (struct mmd_buf_info)); ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); ASSERT(mmd->mmd_dp == NULL); ASSERT(mmd->mmd_hbuf == NULL); ASSERT(mmd->mmd_pbuf_cnt == 0); #ifdef DEBUG for (i = 0; i < MULTIDATA_MAX_PBUFS; i++) ASSERT(mmd->mmd_pbuf[i] == NULL); #endif ASSERT(mmd->mmd_pattbl == NULL); mutex_destroy(&(mmd->mmd_pd_slab_lock)); ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q)); ASSERT(mmd->mmd_slab_cnt == 0); ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q)); ASSERT(mmd->mmd_pd_cnt == 0); ASSERT(mmd->mmd_hbuf_ref == 0); ASSERT(mmd->mmd_pbuf_ref == 0); } /* * Multidata message block free callback routine. */ static void mmd_esballoc_free(caddr_t buf) { multidata_t *mmd; pdesc_t *pd; pdesc_slab_t *slab; int i; ASSERT(buf != NULL); ASSERT(((struct mmd_buf_info *)buf)->buf_len == MMD_CACHE_SIZE); mmd = (multidata_t *)(buf + sizeof (struct mmd_buf_info)); ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); ASSERT(mmd->mmd_dp != NULL); ASSERT(mmd->mmd_dp->db_ref == 1); /* remove all packet descriptors and private attributes */ pd = Q2PD(mmd->mmd_pd_q.ql_next); while (pd != Q2PD(&(mmd->mmd_pd_q))) pd = mmd_destroy_pdesc(mmd, pd); ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q)); ASSERT(mmd->mmd_pd_cnt == 0); ASSERT(mmd->mmd_hbuf_ref == 0); ASSERT(mmd->mmd_pbuf_ref == 0); /* remove all global attributes */ if (mmd->mmd_pattbl != NULL) mmd_destroy_pattbl(&(mmd->mmd_pattbl)); /* remove all descriptor slabs */ slab = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_next); while (slab != Q2PDSLAB(&(mmd->mmd_pd_slab_q))) { pdesc_slab_t *slab_next = Q2PDSLAB(slab->pds_next); remque(&(slab->pds_next)); slab->pds_next = NULL; slab->pds_prev = NULL; slab->pds_mmd = NULL; slab->pds_used = 0; kmem_cache_free(pd_slab_cache, slab); ASSERT(mmd->mmd_slab_cnt > 0); mmd->mmd_slab_cnt--; slab = slab_next; } ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q)); ASSERT(mmd->mmd_slab_cnt == 0); mmd->mmd_dp = NULL; /* finally, free all associated message blocks */ if (mmd->mmd_hbuf != NULL) { freeb(mmd->mmd_hbuf); mmd->mmd_hbuf = NULL; } for (i = 0; i < MULTIDATA_MAX_PBUFS; i++) { if (mmd->mmd_pbuf[i] != NULL) { freeb(mmd->mmd_pbuf[i]); mmd->mmd_pbuf[i] = NULL; ASSERT(mmd->mmd_pbuf_cnt > 0); mmd->mmd_pbuf_cnt--; } } ASSERT(mmd->mmd_pbuf_cnt == 0); ASSERT(MUTEX_NOT_HELD(&(mmd->mmd_pd_slab_lock))); kmem_cache_free(mmd_cache, buf); } /* * Multidata message block copy routine, called by copyb() when it * encounters a M_MULTIDATA data block type. This routine should * not be called by anyone other than copyb(), since it may go away * (read: become static to this module) once some sort of copy callback * routine is made available. */ mblk_t * mmd_copy(mblk_t *bp, int kmflags) { multidata_t *mmd, *n_mmd; mblk_t *n_hbuf = NULL, *n_pbuf[MULTIDATA_MAX_PBUFS]; mblk_t **pmp_last = &n_pbuf[MULTIDATA_MAX_PBUFS - 1]; mblk_t **pmp; mblk_t *n_bp = NULL; pdesc_t *pd; uint_t n_pbuf_cnt = 0; int idx, i; #define FREE_PBUFS() { \ for (pmp = &n_pbuf[0]; pmp <= pmp_last; pmp++) \ if (*pmp != NULL) freeb(*pmp); \ } #define REL_OFF(p, base, n_base) \ ((uchar_t *)(n_base) + ((uchar_t *)(p) - (uchar_t *)base)) ASSERT(bp != NULL && DB_TYPE(bp) == M_MULTIDATA); mmd = mmd_getmultidata(bp); /* copy the header buffer */ if (mmd->mmd_hbuf != NULL && (n_hbuf = copyb(mmd->mmd_hbuf)) == NULL) return (NULL); /* copy the payload buffer(s) */ mutex_enter(&mmd->mmd_pd_slab_lock); bzero((void *)&n_pbuf[0], sizeof (mblk_t *) * MULTIDATA_MAX_PBUFS); n_pbuf_cnt = mmd->mmd_pbuf_cnt; for (i = 0; i < n_pbuf_cnt; i++) { ASSERT(mmd->mmd_pbuf[i] != NULL); n_pbuf[i] = copyb(mmd->mmd_pbuf[i]); if (n_pbuf[i] == NULL) { FREE_PBUFS(); mutex_exit(&mmd->mmd_pd_slab_lock); return (NULL); } } /* allocate new Multidata */ n_mmd = mmd_alloc(n_hbuf, &n_bp, kmflags); if (n_mmd == NULL) { if (n_hbuf != NULL) freeb(n_hbuf); if (n_pbuf_cnt != 0) FREE_PBUFS(); mutex_exit(&mmd->mmd_pd_slab_lock); return (NULL); } /* * Add payload buffer(s); upon success, leave n_pbuf array * alone, as the newly-created Multidata had already contained * the mblk pointers stored in the array. These will be freed * along with the Multidata itself. */ for (i = 0, pmp = &n_pbuf[0]; i < n_pbuf_cnt; i++, pmp++) { idx = mmd_addpldbuf(n_mmd, *pmp); if (idx < 0) { FREE_PBUFS(); freeb(n_bp); mutex_exit(&mmd->mmd_pd_slab_lock); return (NULL); } } /* copy over global attributes */ if (mmd->mmd_pattbl != NULL && mmd_copy_pattbl(mmd->mmd_pattbl, n_mmd, NULL, kmflags) < 0) { freeb(n_bp); mutex_exit(&mmd->mmd_pd_slab_lock); return (NULL); } /* copy over packet descriptors and their atttributes */ pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE); /* first pdesc */ while (pd != NULL) { pdesc_t *n_pd; pdescinfo_t *pdi, n_pdi; uchar_t *n_base, *base; pdesc_t *pd_next; /* next pdesc */ pd_next = mmd_getpdesc(pd->pd_slab->pds_mmd, pd, NULL, 1, B_TRUE); /* skip if already removed */ if (pd->pd_flags & PDESC_REM_DEFER) { pd = pd_next; continue; } pdi = &(pd->pd_pdi); bzero(&n_pdi, sizeof (n_pdi)); /* * Calculate new descriptor values based on the offset of * each pointer relative to the associated buffer(s). */ ASSERT(pdi->flags & PDESC_HAS_REF); if (pdi->flags & PDESC_HBUF_REF) { n_base = n_mmd->mmd_hbuf->b_rptr; base = mmd->mmd_hbuf->b_rptr; n_pdi.flags |= PDESC_HBUF_REF; n_pdi.hdr_base = REL_OFF(pdi->hdr_base, base, n_base); n_pdi.hdr_rptr = REL_OFF(pdi->hdr_rptr, base, n_base); n_pdi.hdr_wptr = REL_OFF(pdi->hdr_wptr, base, n_base); n_pdi.hdr_lim = REL_OFF(pdi->hdr_lim, base, n_base); } if (pdi->flags & PDESC_PBUF_REF) { n_pdi.flags |= PDESC_PBUF_REF; n_pdi.pld_cnt = pdi->pld_cnt; for (i = 0; i < pdi->pld_cnt; i++) { idx = pdi->pld_ary[i].pld_pbuf_idx; ASSERT(idx < MULTIDATA_MAX_PBUFS); ASSERT(n_mmd->mmd_pbuf[idx] != NULL); ASSERT(mmd->mmd_pbuf[idx] != NULL); n_base = n_mmd->mmd_pbuf[idx]->b_rptr; base = mmd->mmd_pbuf[idx]->b_rptr; n_pdi.pld_ary[i].pld_pbuf_idx = idx; /* * We can't copy the pointers just like that, * so calculate the relative offset. */ n_pdi.pld_ary[i].pld_rptr = REL_OFF(pdi->pld_ary[i].pld_rptr, base, n_base); n_pdi.pld_ary[i].pld_wptr = REL_OFF(pdi->pld_ary[i].pld_wptr, base, n_base); } } /* add the new descriptor to the new Multidata */ n_pd = mmd_addpdesc_int(n_mmd, &n_pdi, NULL, kmflags); if (n_pd == NULL || (pd->pd_pattbl != NULL && mmd_copy_pattbl(pd->pd_pattbl, n_mmd, n_pd, kmflags) < 0)) { freeb(n_bp); mutex_exit(&mmd->mmd_pd_slab_lock); return (NULL); } pd = pd_next; } #undef REL_OFF #undef FREE_PBUFS mutex_exit(&mmd->mmd_pd_slab_lock); return (n_bp); } /* * Given a Multidata message block, return the Multidata metadata handle. */ multidata_t * mmd_getmultidata(mblk_t *mp) { multidata_t *mmd; ASSERT(mp != NULL); if (DB_TYPE(mp) != M_MULTIDATA) return (NULL); mmd = (multidata_t *)mp->b_rptr; ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); return (mmd); } /* * Return the start and end addresses of the associated buffer(s). */ void mmd_getregions(multidata_t *mmd, mbufinfo_t *mbi) { int i; ASSERT(mmd != NULL); ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); ASSERT(mbi != NULL); bzero((void *)mbi, sizeof (mbufinfo_t)); if (mmd->mmd_hbuf != NULL) { mbi->hbuf_rptr = mmd->mmd_hbuf->b_rptr; mbi->hbuf_wptr = mmd->mmd_hbuf->b_wptr; } mutex_enter(&mmd->mmd_pd_slab_lock); for (i = 0; i < mmd->mmd_pbuf_cnt; i++) { ASSERT(mmd->mmd_pbuf[i] != NULL); mbi->pbuf_ary[i].pbuf_rptr = mmd->mmd_pbuf[i]->b_rptr; mbi->pbuf_ary[i].pbuf_wptr = mmd->mmd_pbuf[i]->b_wptr; } mbi->pbuf_cnt = mmd->mmd_pbuf_cnt; mutex_exit(&mmd->mmd_pd_slab_lock); } /* * Return the Multidata statistics. */ uint_t mmd_getcnt(multidata_t *mmd, uint_t *hbuf_ref, uint_t *pbuf_ref) { uint_t pd_cnt; ASSERT(mmd != NULL); ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); mutex_enter(&(mmd->mmd_pd_slab_lock)); if (hbuf_ref != NULL) *hbuf_ref = mmd->mmd_hbuf_ref; if (pbuf_ref != NULL) *pbuf_ref = mmd->mmd_pbuf_ref; pd_cnt = mmd->mmd_pd_cnt; mutex_exit(&(mmd->mmd_pd_slab_lock)); return (pd_cnt); } #define HBUF_REF_VALID(mmd, pdi) \ ((mmd)->mmd_hbuf != NULL && (pdi)->hdr_rptr != NULL && \ (pdi)->hdr_wptr != NULL && (pdi)->hdr_base != NULL && \ (pdi)->hdr_lim != NULL && (pdi)->hdr_lim >= (pdi)->hdr_base && \ (pdi)->hdr_wptr >= (pdi)->hdr_rptr && \ (pdi)->hdr_base <= (pdi)->hdr_rptr && \ (pdi)->hdr_lim >= (pdi)->hdr_wptr && \ (pdi)->hdr_base >= (mmd)->mmd_hbuf->b_rptr && \ MBLKIN((mmd)->mmd_hbuf, \ (pdi->hdr_base - (mmd)->mmd_hbuf->b_rptr), \ PDESC_HDRSIZE(pdi))) /* * Bounds check payload area(s). */ static boolean_t pbuf_ref_valid(multidata_t *mmd, pdescinfo_t *pdi) { int i = 0, idx; boolean_t valid = B_TRUE; struct pld_ary_s *pa; mutex_enter(&mmd->mmd_pd_slab_lock); if (pdi->pld_cnt == 0 || pdi->pld_cnt > mmd->mmd_pbuf_cnt) { mutex_exit(&mmd->mmd_pd_slab_lock); return (B_FALSE); } pa = &pdi->pld_ary[0]; while (valid && i < pdi->pld_cnt) { valid = (((idx = pa->pld_pbuf_idx) < mmd->mmd_pbuf_cnt) && pa->pld_rptr != NULL && pa->pld_wptr != NULL && pa->pld_wptr >= pa->pld_rptr && pa->pld_rptr >= mmd->mmd_pbuf[idx]->b_rptr && MBLKIN(mmd->mmd_pbuf[idx], (pa->pld_rptr - mmd->mmd_pbuf[idx]->b_rptr), PDESC_PLD_SPAN_SIZE(pdi, i))); if (!valid) { MMD_DEBUG((CE_WARN, "pbuf_ref_valid: pdi 0x%p pld out of bound; " "index %d has pld_cnt %d pbuf_idx %d " "(mmd_pbuf_cnt %d), " "pld_rptr 0x%p pld_wptr 0x%p len %d " "(valid 0x%p-0x%p len %d)\n", (void *)pdi, i, pdi->pld_cnt, idx, mmd->mmd_pbuf_cnt, (void *)pa->pld_rptr, (void *)pa->pld_wptr, (int)PDESC_PLD_SPAN_SIZE(pdi, i), (void *)mmd->mmd_pbuf[idx]->b_rptr, (void *)mmd->mmd_pbuf[idx]->b_wptr, (int)MBLKL(mmd->mmd_pbuf[idx]))); } /* advance to next entry */ i++; pa++; } mutex_exit(&mmd->mmd_pd_slab_lock); return (valid); } /* * Add a packet descriptor to the Multidata. */ pdesc_t * mmd_addpdesc(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags) { ASSERT(mmd != NULL); ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); ASSERT(pdi != NULL); ASSERT(pdi->flags & PDESC_HAS_REF); /* do the references refer to invalid memory regions? */ if (!mmd_speed_over_safety && (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) || ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi)))) { if (err != NULL) *err = EINVAL; return (NULL); } return (mmd_addpdesc_int(mmd, pdi, err, kmflags)); } /* * Internal routine to add a packet descriptor, called when mmd_addpdesc * or mmd_copy tries to allocate and add a descriptor to a Multidata. */ static pdesc_t * mmd_addpdesc_int(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags) { pdesc_slab_t *slab, *slab_last; pdesc_t *pd; ASSERT(pdi->flags & PDESC_HAS_REF); ASSERT(!(pdi->flags & PDESC_HBUF_REF) || HBUF_REF_VALID(mmd, pdi)); ASSERT(!(pdi->flags & PDESC_PBUF_REF) || pbuf_ref_valid(mmd, pdi)); if (err != NULL) *err = 0; mutex_enter(&(mmd->mmd_pd_slab_lock)); /* * Is slab list empty or the last-added slab is full? If so, * allocate new slab for the descriptor; otherwise, use the * last-added slab instead. */ slab_last = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_prev); if (mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q) || slab_last->pds_used == slab_last->pds_sz) { slab = kmem_cache_alloc(pd_slab_cache, kmflags); if (slab == NULL) { if (err != NULL) *err = ENOMEM; mutex_exit(&(mmd->mmd_pd_slab_lock)); return (NULL); } slab->pds_mmd = mmd; ASSERT(slab->pds_used == 0); ASSERT(slab->pds_next == NULL && slab->pds_prev == NULL); /* insert slab at end of list */ insque(&(slab->pds_next), mmd->mmd_pd_slab_q.ql_prev); mmd->mmd_slab_cnt++; } else { slab = slab_last; } ASSERT(slab->pds_used < slab->pds_sz); pd = &(slab->pds_free_desc[slab->pds_used++]); ASSERT(pd->pd_magic == PDESC_MAGIC); pd->pd_next = NULL; pd->pd_prev = NULL; pd->pd_slab = slab; pd->pd_pattbl = NULL; /* copy over the descriptor info from caller */ PDI_COPY(pdi, &(pd->pd_pdi)); if (pd->pd_flags & PDESC_HBUF_REF) mmd->mmd_hbuf_ref++; if (pd->pd_flags & PDESC_PBUF_REF) mmd->mmd_pbuf_ref += pd->pd_pdi.pld_cnt; mmd->mmd_pd_cnt++; /* insert descriptor at end of list */ insque(&(pd->pd_next), mmd->mmd_pd_q.ql_prev); mutex_exit(&(mmd->mmd_pd_slab_lock)); return (pd); } /* * Packet descriptor slab kmem cache constructor routine. */ /* ARGSUSED */ static int pdslab_constructor(void *buf, void *cdrarg, int kmflags) { pdesc_slab_t *slab; uint_t cnt = (uint_t)(uintptr_t)cdrarg; int i; ASSERT(cnt > 0); /* slab size can't be zero */ slab = (pdesc_slab_t *)buf; slab->pds_next = NULL; slab->pds_prev = NULL; slab->pds_mmd = NULL; slab->pds_used = 0; slab->pds_sz = cnt; for (i = 0; i < cnt; i++) { pdesc_t *pd = &(slab->pds_free_desc[i]); pd->pd_magic = PDESC_MAGIC; } return (0); } /* * Packet descriptor slab kmem cache destructor routine. */ /* ARGSUSED */ static void pdslab_destructor(void *buf, void *cdrarg) { pdesc_slab_t *slab; slab = (pdesc_slab_t *)buf; ASSERT(slab->pds_next == NULL); ASSERT(slab->pds_prev == NULL); ASSERT(slab->pds_mmd == NULL); ASSERT(slab->pds_used == 0); ASSERT(slab->pds_sz > 0); } /* * Remove a packet descriptor from the in-use descriptor list, * called by mmd_rempdesc or during free. */ static pdesc_t * mmd_destroy_pdesc(multidata_t *mmd, pdesc_t *pd) { pdesc_t *pd_next; pd_next = Q2PD(pd->pd_next); remque(&(pd->pd_next)); /* remove all local attributes */ if (pd->pd_pattbl != NULL) mmd_destroy_pattbl(&(pd->pd_pattbl)); /* don't decrease counts for a removed descriptor */ if (!(pd->pd_flags & PDESC_REM_DEFER)) { if (pd->pd_flags & PDESC_HBUF_REF) { ASSERT(mmd->mmd_hbuf_ref > 0); mmd->mmd_hbuf_ref--; } if (pd->pd_flags & PDESC_PBUF_REF) { ASSERT(mmd->mmd_pbuf_ref > 0); mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt; } ASSERT(mmd->mmd_pd_cnt > 0); mmd->mmd_pd_cnt--; } return (pd_next); } /* * Remove a packet descriptor from the Multidata. */ void mmd_rempdesc(pdesc_t *pd) { multidata_t *mmd; ASSERT(pd->pd_magic == PDESC_MAGIC); ASSERT(pd->pd_slab != NULL); mmd = pd->pd_slab->pds_mmd; ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); mutex_enter(&(mmd->mmd_pd_slab_lock)); /* * We can't deallocate the associated resources if the Multidata * is shared with other threads, because it's possible that the * descriptor handle value is held by those threads. That's why * we simply mark the entry as "removed" and decrement the counts. * If there are no other threads, then we free the descriptor. */ if (mmd->mmd_dp->db_ref > 1) { pd->pd_flags |= PDESC_REM_DEFER; if (pd->pd_flags & PDESC_HBUF_REF) { ASSERT(mmd->mmd_hbuf_ref > 0); mmd->mmd_hbuf_ref--; } if (pd->pd_flags & PDESC_PBUF_REF) { ASSERT(mmd->mmd_pbuf_ref > 0); mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt; } ASSERT(mmd->mmd_pd_cnt > 0); mmd->mmd_pd_cnt--; } else { (void) mmd_destroy_pdesc(mmd, pd); } mutex_exit(&(mmd->mmd_pd_slab_lock)); } /* * A generic routine to traverse the packet descriptor in-use list. */ static pdesc_t * mmd_getpdesc(multidata_t *mmd, pdesc_t *pd, pdescinfo_t *pdi, uint_t forw, boolean_t mutex_held) { pdesc_t *pd_head; ASSERT(pd == NULL || pd->pd_slab->pds_mmd == mmd); ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); ASSERT(!mutex_held || MUTEX_HELD(&(mmd->mmd_pd_slab_lock))); if (!mutex_held) mutex_enter(&(mmd->mmd_pd_slab_lock)); pd_head = Q2PD(&(mmd->mmd_pd_q)); if (pd == NULL) { /* * We're called by mmd_get{first,last}pdesc, and so * return either the first or last list element. */ pd = forw ? Q2PD(mmd->mmd_pd_q.ql_next) : Q2PD(mmd->mmd_pd_q.ql_prev); } else { /* * We're called by mmd_get{next,prev}pdesc, and so * return either the next or previous list element. */ pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev); } while (pd != pd_head) { /* skip element if it has been removed */ if (!(pd->pd_flags & PDESC_REM_DEFER)) break; pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev); } if (!mutex_held) mutex_exit(&(mmd->mmd_pd_slab_lock)); /* return NULL if we're back at the beginning */ if (pd == pd_head) pd = NULL; /* got an entry; copy descriptor info to caller */ if (pd != NULL && pdi != NULL) PDI_COPY(&(pd->pd_pdi), pdi); ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC); return (pd); } /* * Return the first packet descriptor in the in-use list. */ pdesc_t * mmd_getfirstpdesc(multidata_t *mmd, pdescinfo_t *pdi) { return (mmd_getpdesc(mmd, NULL, pdi, 1, B_FALSE)); } /* * Return the last packet descriptor in the in-use list. */ pdesc_t * mmd_getlastpdesc(multidata_t *mmd, pdescinfo_t *pdi) { return (mmd_getpdesc(mmd, NULL, pdi, 0, B_FALSE)); } /* * Return the next packet descriptor in the in-use list. */ pdesc_t * mmd_getnextpdesc(pdesc_t *pd, pdescinfo_t *pdi) { return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 1, B_FALSE)); } /* * Return the previous packet descriptor in the in-use list. */ pdesc_t * mmd_getprevpdesc(pdesc_t *pd, pdescinfo_t *pdi) { return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 0, B_FALSE)); } /* * Check to see if pdi stretches over c_pdi; used to ensure that a packet * descriptor's header and payload span may not be extended beyond the * current boundaries. */ static boolean_t pdi_in_range(pdescinfo_t *pdi, pdescinfo_t *c_pdi) { int i; struct pld_ary_s *pa = &pdi->pld_ary[0]; struct pld_ary_s *c_pa = &c_pdi->pld_ary[0]; if (pdi->hdr_base < c_pdi->hdr_base || pdi->hdr_lim > c_pdi->hdr_lim) return (B_FALSE); /* * We don't allow the number of span to be reduced, for the sake * of simplicity. Instead, we provide PDESC_PLD_SPAN_CLEAR() to * clear a packet descriptor. Note that we allow the span count to * be increased, and the bounds check for the new one happens * in pbuf_ref_valid. */ if (pdi->pld_cnt < c_pdi->pld_cnt) return (B_FALSE); /* compare only those which are currently defined */ for (i = 0; i < c_pdi->pld_cnt; i++, pa++, c_pa++) { if (pa->pld_pbuf_idx != c_pa->pld_pbuf_idx || pa->pld_rptr < c_pa->pld_rptr || pa->pld_wptr > c_pa->pld_wptr) return (B_FALSE); } return (B_TRUE); } /* * Modify the layout of a packet descriptor. */ pdesc_t * mmd_adjpdesc(pdesc_t *pd, pdescinfo_t *pdi) { multidata_t *mmd; pdescinfo_t *c_pdi; ASSERT(pd != NULL); ASSERT(pdi != NULL); ASSERT(pd->pd_magic == PDESC_MAGIC); mmd = pd->pd_slab->pds_mmd; ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); /* entry has been removed */ if (pd->pd_flags & PDESC_REM_DEFER) return (NULL); /* caller doesn't intend to specify any buffer reference? */ if (!(pdi->flags & PDESC_HAS_REF)) return (NULL); /* do the references refer to invalid memory regions? */ if (!mmd_speed_over_safety && (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) || ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi)))) return (NULL); /* they're not subsets of current references? */ c_pdi = &(pd->pd_pdi); if (!pdi_in_range(pdi, c_pdi)) return (NULL); /* copy over the descriptor info from caller */ PDI_COPY(pdi, c_pdi); return (pd); } /* * Copy the contents of a packet descriptor into a new buffer. If the * descriptor points to more than one buffer fragments, the contents * of both fragments will be joined, with the header buffer fragment * preceding the payload buffer fragment(s). */ mblk_t * mmd_transform(pdesc_t *pd) { multidata_t *mmd; pdescinfo_t *pdi; mblk_t *mp; int h_size = 0, p_size = 0; int i, len; ASSERT(pd != NULL); ASSERT(pd->pd_magic == PDESC_MAGIC); mmd = pd->pd_slab->pds_mmd; ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); /* entry has been removed */ if (pd->pd_flags & PDESC_REM_DEFER) return (NULL); mutex_enter(&mmd->mmd_pd_slab_lock); pdi = &(pd->pd_pdi); if (pdi->flags & PDESC_HBUF_REF) h_size = PDESC_HDRL(pdi); if (pdi->flags & PDESC_PBUF_REF) { for (i = 0; i < pdi->pld_cnt; i++) p_size += PDESC_PLD_SPAN_SIZE(pdi, i); } /* allocate space large enough to hold the fragment(s) */ ASSERT(h_size + p_size >= 0); if ((mp = allocb(h_size + p_size, BPRI_HI)) == NULL) { mutex_exit(&mmd->mmd_pd_slab_lock); return (NULL); } /* copy over the header fragment */ if ((pdi->flags & PDESC_HBUF_REF) && h_size > 0) { bcopy(pdi->hdr_rptr, mp->b_wptr, h_size); mp->b_wptr += h_size; } /* copy over the payload fragment */ if ((pdi->flags & PDESC_PBUF_REF) && p_size > 0) { for (i = 0; i < pdi->pld_cnt; i++) { len = PDESC_PLD_SPAN_SIZE(pdi, i); if (len > 0) { bcopy(pdi->pld_ary[i].pld_rptr, mp->b_wptr, len); mp->b_wptr += len; } } } mutex_exit(&mmd->mmd_pd_slab_lock); return (mp); } /* * Return a chain of mblks representing the Multidata packet. */ mblk_t * mmd_transform_link(pdesc_t *pd) { multidata_t *mmd; pdescinfo_t *pdi; mblk_t *nmp = NULL; ASSERT(pd != NULL); ASSERT(pd->pd_magic == PDESC_MAGIC); mmd = pd->pd_slab->pds_mmd; ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); /* entry has been removed */ if (pd->pd_flags & PDESC_REM_DEFER) return (NULL); pdi = &(pd->pd_pdi); /* duplicate header buffer */ if ((pdi->flags & PDESC_HBUF_REF)) { if ((nmp = dupb(mmd->mmd_hbuf)) == NULL) return (NULL); nmp->b_rptr = pdi->hdr_rptr; nmp->b_wptr = pdi->hdr_wptr; } /* duplicate payload buffer(s) */ if (pdi->flags & PDESC_PBUF_REF) { int i; mblk_t *mp; struct pld_ary_s *pa = &pdi->pld_ary[0]; mutex_enter(&mmd->mmd_pd_slab_lock); for (i = 0; i < pdi->pld_cnt; i++, pa++) { ASSERT(mmd->mmd_pbuf[pa->pld_pbuf_idx] != NULL); /* skip empty ones */ if (PDESC_PLD_SPAN_SIZE(pdi, i) == 0) continue; mp = dupb(mmd->mmd_pbuf[pa->pld_pbuf_idx]); if (mp == NULL) { if (nmp != NULL) freemsg(nmp); mutex_exit(&mmd->mmd_pd_slab_lock); return (NULL); } mp->b_rptr = pa->pld_rptr; mp->b_wptr = pa->pld_wptr; if (nmp == NULL) nmp = mp; else linkb(nmp, mp); } mutex_exit(&mmd->mmd_pd_slab_lock); } return (nmp); } /* * Return duplicate message block(s) of the associated buffer(s). */ int mmd_dupbufs(multidata_t *mmd, mblk_t **hmp, mblk_t **pmp) { ASSERT(mmd != NULL); ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); if (hmp != NULL) { *hmp = NULL; if (mmd->mmd_hbuf != NULL && (*hmp = dupb(mmd->mmd_hbuf)) == NULL) return (-1); } if (pmp != NULL) { int i; mblk_t *mp; mutex_enter(&mmd->mmd_pd_slab_lock); *pmp = NULL; for (i = 0; i < mmd->mmd_pbuf_cnt; i++) { ASSERT(mmd->mmd_pbuf[i] != NULL); mp = dupb(mmd->mmd_pbuf[i]); if (mp == NULL) { if (hmp != NULL && *hmp != NULL) freeb(*hmp); if (*pmp != NULL) freemsg(*pmp); mutex_exit(&mmd->mmd_pd_slab_lock); return (-1); } if (*pmp == NULL) *pmp = mp; else linkb(*pmp, mp); } mutex_exit(&mmd->mmd_pd_slab_lock); } return (0); } /* * Return the layout of a packet descriptor. */ int mmd_getpdescinfo(pdesc_t *pd, pdescinfo_t *pdi) { ASSERT(pd != NULL); ASSERT(pd->pd_magic == PDESC_MAGIC); ASSERT(pd->pd_slab != NULL); ASSERT(pd->pd_slab->pds_mmd->mmd_magic == MULTIDATA_MAGIC); ASSERT(pdi != NULL); /* entry has been removed */ if (pd->pd_flags & PDESC_REM_DEFER) return (-1); /* copy descriptor info to caller */ PDI_COPY(&(pd->pd_pdi), pdi); return (0); } /* * Add a global or local attribute to a Multidata. Global attribute * association is specified by a NULL packet descriptor. */ pattr_t * mmd_addpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai, boolean_t persistent, int kmflags) { patbkt_t **tbl_p; patbkt_t *tbl, *o_tbl; patbkt_t *bkt; pattr_t *pa; uint_t size; ASSERT(mmd != NULL); ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC); ASSERT(pai != NULL); /* pointer to the attribute hash table (local or global) */ tbl_p = pd != NULL ? &(pd->pd_pattbl) : &(mmd->mmd_pattbl); /* * See if the hash table has not yet been created; if so, * we create the table and store its address atomically. */ if ((tbl = *tbl_p) == NULL) { tbl = kmem_cache_alloc(pattbl_cache, kmflags); if (tbl == NULL) return (NULL); /* if someone got there first, use their table instead */ if ((o_tbl = atomic_cas_ptr(tbl_p, NULL, tbl)) != NULL) { kmem_cache_free(pattbl_cache, tbl); tbl = o_tbl; } } ASSERT(tbl->pbkt_tbl_sz > 0); bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]); /* attribute of the same type already exists? */ if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL) return (NULL); size = sizeof (*pa) + pai->len; if ((pa = kmem_zalloc(size, kmflags)) == NULL) return (NULL); pa->pat_magic = PATTR_MAGIC; pa->pat_lock = &(bkt->pbkt_lock); pa->pat_mmd = mmd; pa->pat_buflen = size; pa->pat_type = pai->type; pai->buf = pai->len > 0 ? ((uchar_t *)(pa + 1)) : NULL; if (persistent) pa->pat_flags = PATTR_PERSIST; /* insert attribute at end of hash chain */ mutex_enter(&(bkt->pbkt_lock)); insque(&(pa->pat_next), bkt->pbkt_pattr_q.ql_prev); mutex_exit(&(bkt->pbkt_lock)); return (pa); } /* * Attribute hash table kmem cache constructor routine. */ /* ARGSUSED */ static int pattbl_constructor(void *buf, void *cdrarg, int kmflags) { patbkt_t *bkt; uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg; uint_t i; ASSERT(tbl_sz > 0); /* table size can't be zero */ for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) { mutex_init(&(bkt->pbkt_lock), NULL, MUTEX_DRIVER, NULL); QL_INIT(&(bkt->pbkt_pattr_q)); /* first bucket contains the table size */ bkt->pbkt_tbl_sz = i == 0 ? tbl_sz : 0; } return (0); } /* * Attribute hash table kmem cache destructor routine. */ /* ARGSUSED */ static void pattbl_destructor(void *buf, void *cdrarg) { patbkt_t *bkt; uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg; uint_t i; ASSERT(tbl_sz > 0); /* table size can't be zero */ for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) { mutex_destroy(&(bkt->pbkt_lock)); ASSERT(bkt->pbkt_pattr_q.ql_next == &(bkt->pbkt_pattr_q)); ASSERT(i > 0 || bkt->pbkt_tbl_sz == tbl_sz); } } /* * Destroy an attribute hash table, called by mmd_rempdesc or during free. */ static void mmd_destroy_pattbl(patbkt_t **tbl) { patbkt_t *bkt; pattr_t *pa, *pa_next; uint_t i, tbl_sz; ASSERT(tbl != NULL); bkt = *tbl; tbl_sz = bkt->pbkt_tbl_sz; /* make sure caller passes in the first bucket */ ASSERT(tbl_sz > 0); /* destroy the contents of each bucket */ for (i = 0; i < tbl_sz; i++, bkt++) { /* we ought to be exclusive at this point */ ASSERT(MUTEX_NOT_HELD(&(bkt->pbkt_lock))); pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next); while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) { ASSERT(pa->pat_magic == PATTR_MAGIC); pa_next = Q2PATTR(pa->pat_next); remque(&(pa->pat_next)); kmem_free(pa, pa->pat_buflen); pa = pa_next; } } kmem_cache_free(pattbl_cache, *tbl); *tbl = NULL; /* commit all previous stores */ membar_producer(); } /* * Copy the contents of an attribute hash table, called by mmd_copy. */ static int mmd_copy_pattbl(patbkt_t *src_tbl, multidata_t *n_mmd, pdesc_t *n_pd, int kmflags) { patbkt_t *bkt; pattr_t *pa; pattrinfo_t pai; uint_t i, tbl_sz; ASSERT(src_tbl != NULL); bkt = src_tbl; tbl_sz = bkt->pbkt_tbl_sz; /* make sure caller passes in the first bucket */ ASSERT(tbl_sz > 0); for (i = 0; i < tbl_sz; i++, bkt++) { mutex_enter(&(bkt->pbkt_lock)); pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next); while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) { pattr_t *pa_next = Q2PATTR(pa->pat_next); /* skip if it's removed */ if (pa->pat_flags & PATTR_REM_DEFER) { pa = pa_next; continue; } pai.type = pa->pat_type; pai.len = pa->pat_buflen - sizeof (*pa); if (mmd_addpattr(n_mmd, n_pd, &pai, (pa->pat_flags & PATTR_PERSIST) != 0, kmflags) == NULL) { mutex_exit(&(bkt->pbkt_lock)); return (-1); } /* copy over the contents */ if (pai.buf != NULL) bcopy(pa + 1, pai.buf, pai.len); pa = pa_next; } mutex_exit(&(bkt->pbkt_lock)); } return (0); } /* * Search for an attribute type within an attribute hash bucket. */ static pattr_t * mmd_find_pattr(patbkt_t *bkt, uint_t type) { pattr_t *pa_head, *pa; mutex_enter(&(bkt->pbkt_lock)); pa_head = Q2PATTR(&(bkt->pbkt_pattr_q)); pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next); while (pa != pa_head) { ASSERT(pa->pat_magic == PATTR_MAGIC); /* return a match; we treat removed entry as non-existent */ if (pa->pat_type == type && !(pa->pat_flags & PATTR_REM_DEFER)) break; pa = Q2PATTR(pa->pat_next); } mutex_exit(&(bkt->pbkt_lock)); return (pa == pa_head ? NULL : pa); } /* * Remove an attribute from a Multidata. */ void mmd_rempattr(pattr_t *pa) { kmutex_t *pat_lock = pa->pat_lock; ASSERT(pa->pat_magic == PATTR_MAGIC); /* ignore if attribute was marked as persistent */ if ((pa->pat_flags & PATTR_PERSIST) != 0) return; mutex_enter(pat_lock); /* * We can't deallocate the associated resources if the Multidata * is shared with other threads, because it's possible that the * attribute handle value is held by those threads. That's why * we simply mark the entry as "removed". If there are no other * threads, then we free the attribute. */ if (pa->pat_mmd->mmd_dp->db_ref > 1) { pa->pat_flags |= PATTR_REM_DEFER; } else { remque(&(pa->pat_next)); kmem_free(pa, pa->pat_buflen); } mutex_exit(pat_lock); } /* * Find an attribute (according to its type) and return its handle. */ pattr_t * mmd_getpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai) { patbkt_t *tbl, *bkt; pattr_t *pa; ASSERT(mmd != NULL); ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); ASSERT(pai != NULL); /* get the right attribute hash table (local or global) */ tbl = pd != NULL ? pd->pd_pattbl : mmd->mmd_pattbl; /* attribute hash table doesn't exist? */ if (tbl == NULL) return (NULL); ASSERT(tbl->pbkt_tbl_sz > 0); bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]); if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL) { ASSERT(pa->pat_buflen >= sizeof (*pa)); pai->len = pa->pat_buflen - sizeof (*pa); pai->buf = pai->len > 0 ? (uchar_t *)pa + sizeof (pattr_t) : NULL; } ASSERT(pa == NULL || pa->pat_magic == PATTR_MAGIC); return (pa); } /* * Return total size of buffers and total size of areas referenced * by all in-use (unremoved) packet descriptors. */ void mmd_getsize(multidata_t *mmd, uint_t *ptotal, uint_t *pinuse) { pdesc_t *pd; pdescinfo_t *pdi; int i; ASSERT(mmd != NULL); ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); mutex_enter(&mmd->mmd_pd_slab_lock); if (ptotal != NULL) { *ptotal = 0; if (mmd->mmd_hbuf != NULL) *ptotal += MBLKL(mmd->mmd_hbuf); for (i = 0; i < mmd->mmd_pbuf_cnt; i++) { ASSERT(mmd->mmd_pbuf[i] != NULL); *ptotal += MBLKL(mmd->mmd_pbuf[i]); } } if (pinuse != NULL) { *pinuse = 0; /* first pdesc */ pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE); while (pd != NULL) { pdi = &pd->pd_pdi; /* next pdesc */ pd = mmd_getpdesc(mmd, pd, NULL, 1, B_TRUE); /* skip over removed descriptor */ if (pdi->flags & PDESC_REM_DEFER) continue; if (pdi->flags & PDESC_HBUF_REF) *pinuse += PDESC_HDRL(pdi); if (pdi->flags & PDESC_PBUF_REF) { for (i = 0; i < pdi->pld_cnt; i++) *pinuse += PDESC_PLDL(pdi, i); } } } mutex_exit(&mmd->mmd_pd_slab_lock); }