1 files changed, 804 insertions, 0 deletions
diff --git a/usr/src/uts/common/disp/cmt.c b/usr/src/uts/common/disp/cmt.c
new file mode 100644
index 0000000000..1bf0704346
--- /dev/null
+++ b/usr/src/uts/common/disp/cmt.c
@@ -0,0 +1,804 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/cpupart.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/kstat.h>
+#include <sys/processor.h>
+#include <sys/disp.h>
+#include <sys/group.h>
+#include <sys/pghw.h>
+#include <sys/bitset.h>
+#include <sys/lgrp.h>
+#include <sys/cmt.h>
+
+/*
+ * CMT scheduler / dispatcher support
+ *
+ * This file implements CMT scheduler support using Processor Groups.
+ * The CMT processor group class creates and maintains the CMT class
+ * specific processor group pg_cmt_t.
+ *
+ * ---------------------------- <-- pg_cmt_t *
+ * | pghw_t                   |
+ * ----------------------------
+ * | CMT class specific data  |
+ * | - hierarchy linkage      |
+ * | - CMT load balancing data|
+ * | - active CPU group/bitset|
+ * ----------------------------
+ *
+ * The scheduler/dispatcher leverages knowledge of the performance
+ * relevant CMT sharing relationships existing between cpus to implement
+ * optimized affinity and load balancing policies.
+ *
+ * Load balancing policy seeks to improve performance by minimizing
+ * contention over shared processor resources / facilities, while the
+ * affinity policies seek to improve cache and TLB utilization.
+ *
+ * The CMT PGs created by this class are already arranged into a
+ * hierarchy (which is done in the pghw layer). To implement the top-down
+ * CMT load balancing algorithm, the CMT PGs additionally maintain
+ * parent, child and sibling hierarchy relationships.
+ * Parent PGs always contain a superset of their children(s) resources,
+ * each PG can have at most one parent, and siblings are the group of PGs
+ * sharing the same parent.
+ *
+ * On NUMA systems, the CMT load balancing algorithm balances across the
+ * CMT PGs within their respective lgroups. On UMA based system, there
+ * exists a top level group of PGs to balance across. On NUMA systems multiple
+ * top level groups are instantiated, where the top level balancing begins by
+ * balancng across the CMT PGs within their respective (per lgroup) top level
+ * groups.
+ */
+
+typedef struct cmt_lgrp {
+	group_t		cl_pgs;		/* Top level group of active CMT PGs */
+	int		cl_npgs;	/* # of top level PGs in the lgroup */
+	lgrp_handle_t	cl_hand;	/* lgroup's platform handle */
+	struct cmt_lgrp *cl_next;	/* next cmt_lgrp */
+} cmt_lgrp_t;
+
+static cmt_lgrp_t	*cmt_lgrps = NULL;
+
+static int		is_cpu0 = 1;
+static int		cmt_sched_disabled = 0;
+
+static pg_cid_t		pg_cmt_class_id;		/* PG class id */
+
+static pg_t		*pg_cmt_alloc();
+static void		pg_cmt_free(pg_t *);
+static void		pg_cmt_cpu_init(cpu_t *);
+static void		pg_cmt_cpu_fini(cpu_t *);
+static void		pg_cmt_cpu_active(cpu_t *);
+static void		pg_cmt_cpu_inactive(cpu_t *);
+static void		pg_cmt_cpupart_in(cpu_t *, cpupart_t *);
+static void		pg_cmt_cpupart_move(cpu_t *, cpupart_t *, cpupart_t *);
+static void		pg_cmt_hier_pack(pg_cmt_t **, int);
+static int		pg_cmt_cpu_belongs(pg_t *, cpu_t *);
+static int		pg_cmt_hw(pghw_type_t);
+static cmt_lgrp_t	*pg_cmt_find_lgrp(lgrp_handle_t);
+
+/*
+ * Macro to test if PG is managed by the CMT PG class
+ */
+#define	IS_CMT_PG(pg)	(((pg_t *)(pg))->pg_class->pgc_id == pg_cmt_class_id)
+
+/*
+ * CMT PG ops
+ */
+struct pg_ops pg_ops_cmt = {
+	pg_cmt_alloc,
+	pg_cmt_free,
+	pg_cmt_cpu_init,
+	pg_cmt_cpu_fini,
+	pg_cmt_cpu_active,
+	pg_cmt_cpu_inactive,
+	pg_cmt_cpupart_in,
+	NULL,			/* cpupart_out */
+	pg_cmt_cpupart_move,
+	pg_cmt_cpu_belongs,
+};
+
+/*
+ * Initialize the CMT PG class
+ */
+void
+pg_cmt_class_init(void)
+{
+	if (cmt_sched_disabled)
+		return;
+
+	pg_cmt_class_id = pg_class_register("cmt", &pg_ops_cmt, PGR_PHYSICAL);
+}
+
+/*
+ * Called to indicate a new CPU has started up so
+ * that either t0 or the slave startup thread can
+ * be accounted for.
+ */
+void
+pg_cmt_cpu_startup(cpu_t *cp)
+{
+	PG_NRUN_UPDATE(cp, 1);
+}
+
+/*
+ * Adjust the CMT load in the CMT PGs in which the CPU belongs
+ * Note that "n" can be positive in the case of increasing
+ * load, or negative in the case of decreasing load.
+ */
+void
+pg_cmt_load(cpu_t *cp, int n)
+{
+	pg_cmt_t	*pg;
+
+	pg = (pg_cmt_t *)cp->cpu_pg->cmt_lineage;
+	while (pg != NULL) {
+		ASSERT(IS_CMT_PG(pg));
+		atomic_add_32(&pg->cmt_nrunning, n);
+		pg = pg->cmt_parent;
+	}
+}
+
+/*
+ * Return non-zero if thread can migrate between "from" and "to"
+ * without a performance penalty
+ */
+int
+pg_cmt_can_migrate(cpu_t *from, cpu_t *to)
+{
+	if (from->cpu_physid->cpu_cacheid ==
+	    to->cpu_physid->cpu_cacheid)
+		return (1);
+	return (0);
+}
+
+/*
+ * CMT class specific PG allocation
+ */
+static pg_t *
+pg_cmt_alloc(void)
+{
+	return (kmem_zalloc(sizeof (pg_cmt_t), KM_NOSLEEP));
+}
+
+/*
+ * Class specific PG de-allocation
+ */
+static void
+pg_cmt_free(pg_t *pg)
+{
+	ASSERT(pg != NULL);
+	ASSERT(IS_CMT_PG(pg));
+
+	kmem_free((pg_cmt_t *)pg, sizeof (pg_cmt_t));
+}
+
+/*
+ * Return 1 if CMT load balancing policies should be
+ * implemented across instances of the specified hardware
+ * sharing relationship.
+ */
+static int
+pg_cmt_load_bal_hw(pghw_type_t hw)
+{
+	if (hw == PGHW_IPIPE ||
+	    hw == PGHW_FPU ||
+	    hw == PGHW_CHIP)
+		return (1);
+	else
+		return (0);
+}
+
+/*
+ * Return 1 if thread affinity polices should be implemented
+ * for instances of the specifed hardware sharing relationship.
+ */
+static int
+pg_cmt_affinity_hw(pghw_type_t hw)
+{
+	if (hw == PGHW_CACHE)
+		return (1);
+	else
+		return (0);
+}
+
+/*
+ * Return 1 if CMT scheduling policies should be impelmented
+ * for the specified hardware sharing relationship.
+ */
+static int
+pg_cmt_hw(pghw_type_t hw)
+{
+	return (pg_cmt_load_bal_hw(hw) ||
+	    pg_cmt_affinity_hw(hw));
+}
+
+/*
+ * CMT class callback for a new CPU entering the system
+ */
+static void
+pg_cmt_cpu_init(cpu_t *cp)
+{
+	pg_cmt_t	*pg;
+	group_t		*cmt_pgs;
+	int		level, max_level, nlevels;
+	pghw_type_t	hw;
+	pg_t		*pg_cache = NULL;
+	pg_cmt_t	*cpu_cmt_hier[PGHW_NUM_COMPONENTS];
+	lgrp_handle_t	lgrp_handle;
+	cmt_lgrp_t	*lgrp;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	/*
+	 * A new CPU is coming into the system.
+	 * Interrogate the platform to see if the CPU
+	 * has any performance relevant CMT sharing
+	 * relationships
+	 */
+	cmt_pgs = &cp->cpu_pg->cmt_pgs;
+	cp->cpu_pg->cmt_lineage = NULL;
+
+	bzero(cpu_cmt_hier, sizeof (cpu_cmt_hier));
+	max_level = nlevels = 0;
+	for (hw = PGHW_START; hw < PGHW_NUM_COMPONENTS; hw++) {
+
+		/*
+		 * We're only interested in CMT hw sharing relationships
+		 */
+		if (pg_cmt_hw(hw) == 0 || pg_plat_hw_shared(cp, hw) == 0)
+			continue;
+
+		/*
+		 * Find (or create) the PG associated with
+		 * the hw sharing relationship in which cp
+		 * belongs.
+		 *
+		 * Determine if a suitable PG already
+		 * exists, or if one needs to be created.
+		 */
+		pg = (pg_cmt_t *)pghw_place_cpu(cp, hw);
+		if (pg == NULL) {
+			/*
+			 * Create a new one.
+			 * Initialize the common...
+			 */
+			pg = (pg_cmt_t *)pg_create(pg_cmt_class_id);
+
+			/* ... physical ... */
+			pghw_init((pghw_t *)pg, cp, hw);
+
+			/*
+			 * ... and CMT specific portions of the
+			 * structure.
+			 */
+			bitset_init(&pg->cmt_cpus_actv_set);
+			group_create(&pg->cmt_cpus_actv);
+		} else {
+			ASSERT(IS_CMT_PG(pg));
+		}
+
+		/* Add the CPU to the PG */
+		pg_cpu_add((pg_t *)pg, cp);
+
+		/*
+		 * Ensure capacity of the active CPUs group/bitset
+		 */
+		group_expand(&pg->cmt_cpus_actv,
+		    GROUP_SIZE(&((pg_t *)pg)->pg_cpus));
+
+		if (cp->cpu_seqid >=
+		    bitset_capacity(&pg->cmt_cpus_actv_set)) {
+			bitset_resize(&pg->cmt_cpus_actv_set,
+			    cp->cpu_seqid + 1);
+		}
+
+		/*
+		 * Build a lineage of CMT PGs for load balancing
+		 */
+		if (pg_cmt_load_bal_hw(hw)) {
+			level = pghw_level(hw);
+			cpu_cmt_hier[level] = pg;
+			if (level > max_level)
+				max_level = level;
+			nlevels++;
+		}
+
+		/* Cache this for later */
+		if (hw == PGHW_CACHE)
+			pg_cache = (pg_t *)pg;
+	}
+
+	/*
+	 * Pack out any gaps in the constructed lineage.
+	 * Gaps may exist where the architecture knows
+	 * about a hardware sharing relationship, but such a
+	 * relationship either isn't relevant for load
+	 * balancing or doesn't exist between CPUs on the system.
+	 */
+	pg_cmt_hier_pack(cpu_cmt_hier, max_level + 1);
+
+	/*
+	 * For each of the PGs int the CPU's lineage:
+	 *	- Add an entry in the CPU sorted CMT PG group
+	 *	  which is used for top down CMT load balancing
+	 *	- Tie the PG into the CMT hierarchy by connecting
+	 *	  it to it's parent and siblings.
+	 */
+	group_expand(cmt_pgs, nlevels);
+
+	/*
+	 * Find the lgrp that encapsulates this CPU's CMT hierarchy
+	 */
+	lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id);
+	lgrp = pg_cmt_find_lgrp(lgrp_handle);
+
+	for (level = 0; level < nlevels; level++) {
+		uint_t		children;
+		int		err;
+
+		pg = cpu_cmt_hier[level];
+		err = group_add_at(cmt_pgs, pg, nlevels - level - 1);
+		ASSERT(err == 0);
+
+		if (level == 0)
+			cp->cpu_pg->cmt_lineage = (pg_t *)pg;
+
+		if (pg->cmt_siblings != NULL) {
+			/* Already initialized */
+			ASSERT(pg->cmt_parent == NULL ||
+			    pg->cmt_parent == cpu_cmt_hier[level + 1]);
+			ASSERT(pg->cmt_siblings == &lgrp->cl_pgs ||
+			    pg->cmt_siblings == pg->cmt_parent->cmt_children);
+			continue;
+		}
+
+		if ((level + 1) == nlevels) {
+			pg->cmt_parent = NULL;
+			pg->cmt_siblings = &lgrp->cl_pgs;
+			children = ++lgrp->cl_npgs;
+		} else {
+			pg->cmt_parent = cpu_cmt_hier[level + 1];
+
+			/*
+			 * A good parent keeps track of their children.
+			 * The parent's children group is also the PG's
+			 * siblings.
+			 */
+			if (pg->cmt_parent->cmt_children == NULL) {
+				pg->cmt_parent->cmt_children =
+				    kmem_zalloc(sizeof (group_t), KM_SLEEP);
+				group_create(pg->cmt_parent->cmt_children);
+			}
+			pg->cmt_siblings = pg->cmt_parent->cmt_children;
+			children = ++pg->cmt_parent->cmt_nchildren;
+		}
+		pg->cmt_hint = 0;
+		group_expand(pg->cmt_siblings, children);
+	}
+
+	/*
+	 * Cache the chip and core IDs in the cpu_t->cpu_physid structure
+	 * for fast lookups later.
+	 */
+	if (cp->cpu_physid) {
+		cp->cpu_physid->cpu_chipid =
+		    pg_plat_hw_instance_id(cp, PGHW_CHIP);
+		cp->cpu_physid->cpu_coreid = pg_plat_get_core_id(cp);
+
+		/*
+		 * If this cpu has a PG representing shared cache, then set
+		 * cpu_cacheid to that PG's logical id
+		 */
+		if (pg_cache)
+			cp->cpu_physid->cpu_cacheid = pg_cache->pg_id;
+	}
+
+	/* CPU0 only initialization */
+	if (is_cpu0) {
+		pg_cmt_cpu_startup(cp);
+		is_cpu0 = 0;
+	}
+
+}
+
+/*
+ * Class callback when a CPU is leaving the system (deletion)
+ */
+static void
+pg_cmt_cpu_fini(cpu_t *cp)
+{
+	group_iter_t	i;
+	pg_cmt_t	*pg;
+	group_t		*pgs, *cmt_pgs;
+	lgrp_handle_t	lgrp_handle;
+	cmt_lgrp_t	*lgrp;
+
+	pgs = &cp->cpu_pg->pgs;
+	cmt_pgs = &cp->cpu_pg->cmt_pgs;
+
+	/*
+	 * Find the lgroup that encapsulates this CPU's CMT hierarchy
+	 */
+	lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id);
+	lgrp = pg_cmt_find_lgrp(lgrp_handle);
+
+	/*
+	 * First, clean up anything load balancing specific for each of
+	 * the CPU's PGs that participated in CMT load balancing
+	 */
+	pg = (pg_cmt_t *)cp->cpu_pg->cmt_lineage;
+	while (pg != NULL) {
+
+		/*
+		 * Remove the PG from the CPU's load balancing lineage
+		 */
+		(void) group_remove(cmt_pgs, pg, GRP_RESIZE);
+
+		/*
+		 * If it's about to become empty, destroy it's children
+		 * group, and remove it's reference from it's siblings.
+		 * This is done here (rather than below) to avoid removing
+		 * our reference from a PG that we just eliminated.
+		 */
+		if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 1) {
+			if (pg->cmt_children != NULL)
+				group_destroy(pg->cmt_children);
+			if (pg->cmt_siblings != NULL) {
+				if (pg->cmt_siblings == &lgrp->cl_pgs)
+					lgrp->cl_npgs--;
+				else
+					pg->cmt_parent->cmt_nchildren--;
+			}
+		}
+		pg = pg->cmt_parent;
+	}
+
+	ASSERT(GROUP_SIZE(cmt_pgs) == 0);
+
+	/*
+	 * Now that the load balancing lineage updates have happened,
+	 * remove the CPU from all it's PGs (destroying any that become
+	 * empty).
+	 */
+	group_iter_init(&i);
+	while ((pg = group_iterate(pgs, &i)) != NULL) {
+		if (IS_CMT_PG(pg) == 0)
+			continue;
+
+		pg_cpu_delete((pg_t *)pg, cp);
+		/*
+		 * Deleting the CPU from the PG changes the CPU's
+		 * PG group over which we are actively iterating
+		 * Re-initialize the iteration
+		 */
+		group_iter_init(&i);
+
+		if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 0) {
+
+			/*
+			 * The PG has become zero sized, so destroy it.
+			 */
+			group_destroy(&pg->cmt_cpus_actv);
+			bitset_fini(&pg->cmt_cpus_actv_set);
+			pghw_fini((pghw_t *)pg);
+
+			pg_destroy((pg_t *)pg);
+		}
+	}
+}
+
+/*
+ * Class callback when a CPU is entering a cpu partition
+ */
+static void
+pg_cmt_cpupart_in(cpu_t *cp, cpupart_t *pp)
+{
+	group_t		*pgs;
+	pg_t		*pg;
+	group_iter_t	i;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	pgs = &cp->cpu_pg->pgs;
+
+	/*
+	 * Ensure that the new partition's PG bitset
+	 * is large enough for all CMT PG's to which cp
+	 * belongs
+	 */
+	group_iter_init(&i);
+	while ((pg = group_iterate(pgs, &i)) != NULL) {
+		if (IS_CMT_PG(pg) == 0)
+			continue;
+
+		if (bitset_capacity(&pp->cp_cmt_pgs) <= pg->pg_id)
+			bitset_resize(&pp->cp_cmt_pgs, pg->pg_id + 1);
+	}
+}
+
+/*
+ * Class callback when a CPU is actually moving partitions
+ */
+static void
+pg_cmt_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
+{
+	cpu_t		*cpp;
+	group_t		*pgs;
+	pg_t		*pg;
+	group_iter_t	pg_iter;
+	pg_cpu_itr_t	cpu_iter;
+	boolean_t	found;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	pgs = &cp->cpu_pg->pgs;
+	group_iter_init(&pg_iter);
+
+	/*
+	 * Iterate over the CPUs CMT PGs
+	 */
+	while ((pg = group_iterate(pgs, &pg_iter)) != NULL) {
+
+		if (IS_CMT_PG(pg) == 0)
+			continue;
+
+		/*
+		 * Add the PG to the bitset in the new partition.
+		 */
+		bitset_add(&newpp->cp_cmt_pgs, pg->pg_id);
+
+		/*
+		 * Remove the PG from the bitset in the old partition
+		 * if the last of the PG's CPUs have left.
+		 */
+		found = B_FALSE;
+		PG_CPU_ITR_INIT(pg, cpu_iter);
+		while ((cpp = pg_cpu_next(&cpu_iter)) != NULL) {
+			if (cpp == cp)
+				continue;
+			if (cpp->cpu_part->cp_id == oldpp->cp_id) {
+				found = B_TRUE;
+				break;
+			}
+		}
+		if (!found)
+			bitset_del(&cp->cpu_part->cp_cmt_pgs, pg->pg_id);
+	}
+}
+
+/*
+ * Class callback when a CPU becomes active (online)
+ *
+ * This is called in a context where CPUs are paused
+ */
+static void
+pg_cmt_cpu_active(cpu_t *cp)
+{
+	int		err;
+	group_iter_t	i;
+	pg_cmt_t	*pg;
+	group_t		*pgs;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	pgs = &cp->cpu_pg->pgs;
+	group_iter_init(&i);
+
+	/*
+	 * Iterate over the CPU's PGs
+	 */
+	while ((pg = group_iterate(pgs, &i)) != NULL) {
+
+		if (IS_CMT_PG(pg) == 0)
+			continue;
+
+		err = group_add(&pg->cmt_cpus_actv, cp, GRP_NORESIZE);
+		ASSERT(err == 0);
+
+		/*
+		 * If this is the first active CPU in the PG, and it
+		 * represents a hardware sharing relationship over which
+		 * CMT load balancing is performed, add it as a candidate
+		 * for balancing with it's siblings.
+		 */
+		if (GROUP_SIZE(&pg->cmt_cpus_actv) == 1 &&
+		    pg_cmt_load_bal_hw(((pghw_t *)pg)->pghw_hw)) {
+			err = group_add(pg->cmt_siblings, pg, GRP_NORESIZE);
+			ASSERT(err == 0);
+		}
+
+		/*
+		 * Notate the CPU in the PGs active CPU bitset.
+		 * Also notate the PG as being active in it's associated
+		 * partition
+		 */
+		bitset_add(&pg->cmt_cpus_actv_set, cp->cpu_seqid);
+		bitset_add(&cp->cpu_part->cp_cmt_pgs, ((pg_t *)pg)->pg_id);
+	}
+}
+
+/*
+ * Class callback when a CPU goes inactive (offline)
+ *
+ * This is called in a context where CPUs are paused
+ */
+static void
+pg_cmt_cpu_inactive(cpu_t *cp)
+{
+	int		err;
+	group_t		*pgs;
+	pg_cmt_t	*pg;
+	cpu_t		*cpp;
+	group_iter_t	i;
+	pg_cpu_itr_t	cpu_itr;
+	boolean_t	found;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	pgs = &cp->cpu_pg->pgs;
+	group_iter_init(&i);
+
+	while ((pg = group_iterate(pgs, &i)) != NULL) {
+
+		if (IS_CMT_PG(pg) == 0)
+			continue;
+
+		/*
+		 * Remove the CPU from the CMT PGs active CPU group
+		 * bitmap
+		 */
+		err = group_remove(&pg->cmt_cpus_actv, cp, GRP_NORESIZE);
+		ASSERT(err == 0);
+
+		bitset_del(&pg->cmt_cpus_actv_set, cp->cpu_seqid);
+
+		/*
+		 * If there are no more active CPUs in this PG over which
+		 * load was balanced, remove it as a balancing candidate.
+		 */
+		if (GROUP_SIZE(&pg->cmt_cpus_actv) == 0 &&
+		    pg_cmt_load_bal_hw(((pghw_t *)pg)->pghw_hw)) {
+			err = group_remove(pg->cmt_siblings, pg, GRP_NORESIZE);
+			ASSERT(err == 0);
+		}
+
+		/*
+		 * Assert the number of active CPUs does not exceed
+		 * the total number of CPUs in the PG
+		 */
+		ASSERT(GROUP_SIZE(&pg->cmt_cpus_actv) <=
+		    GROUP_SIZE(&((pg_t *)pg)->pg_cpus));
+
+		/*
+		 * Update the PG bitset in the CPU's old partition
+		 */
+		found = B_FALSE;
+		PG_CPU_ITR_INIT(pg, cpu_itr);
+		while ((cpp = pg_cpu_next(&cpu_itr)) != NULL) {
+			if (cpp == cp)
+				continue;
+			if (cpp->cpu_part->cp_id == cp->cpu_part->cp_id) {
+				found = B_TRUE;
+				break;
+			}
+		}
+		if (!found) {
+			bitset_del(&cp->cpu_part->cp_cmt_pgs,
+			    ((pg_t *)pg)->pg_id);
+		}
+	}
+}
+
+/*
+ * Return non-zero if the CPU belongs in the given PG
+ */
+static int
+pg_cmt_cpu_belongs(pg_t *pg, cpu_t *cp)
+{
+	cpu_t	*pg_cpu;
+
+	pg_cpu = GROUP_ACCESS(&pg->pg_cpus, 0);
+
+	ASSERT(pg_cpu != NULL);
+
+	/*
+	 * The CPU belongs if, given the nature of the hardware sharing
+	 * relationship represented by the PG, the CPU has that
+	 * relationship with some other CPU already in the PG
+	 */
+	if (pg_plat_cpus_share(cp, pg_cpu, ((pghw_t *)pg)->pghw_hw))
+		return (1);
+
+	return (0);
+}
+
+/*
+ * Pack the CPUs CMT hierarchy
+ * The hierarchy order is preserved
+ */
+static void
+pg_cmt_hier_pack(pg_cmt_t *hier[], int sz)
+{
+	int	i, j;
+
+	for (i = 0; i < sz; i++) {
+		if (hier[i] != NULL)
+			continue;
+
+		for (j = i; j < sz; j++) {
+			if (hier[j] != NULL) {
+				hier[i] = hier[j];
+				hier[j] = NULL;
+				break;
+			}
+		}
+		if (j == sz)
+			break;
+	}
+}
+
+/*
+ * Return a cmt_lgrp_t * given an lgroup handle.
+ * If the right one doesn't yet exist, create one
+ * by growing the cmt_lgrps array
+ */
+static cmt_lgrp_t *
+pg_cmt_find_lgrp(lgrp_handle_t hand)
+{
+	cmt_lgrp_t	*lgrp;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	lgrp = cmt_lgrps;
+	while (lgrp != NULL) {
+		if (lgrp->cl_hand == hand)
+			return (lgrp);
+		lgrp = lgrp->cl_next;
+	}
+
+	/*
+	 * Haven't seen this lgrp yet
+	 */
+	lgrp = kmem_zalloc(sizeof (cmt_lgrp_t), KM_SLEEP);
+
+	lgrp->cl_hand = hand;
+	lgrp->cl_npgs = 0;
+	lgrp->cl_next = cmt_lgrps;
+	cmt_lgrps = lgrp;
+	group_create(&lgrp->cl_pgs);
+
+	return (lgrp);
+}