8 files changed, 1568 insertions, 696 deletions
diff --git a/usr/src/uts/common/os/bitset.c b/usr/src/uts/common/os/bitset.c
new file mode 100644
index 0000000000..8222fd9faa
--- /dev/null
+++ b/usr/src/uts/common/os/bitset.c
@@ -0,0 +1,168 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/bitset.h>
+#include <sys/kmem.h>
+#include <sys/systm.h>
+#include <sys/cmn_err.h>
+#include <sys/sysmacros.h>
+
+/*
+ * Initialize a bitset_t.
+ * After bitset_init(), the bitset will be zero sized.
+ */
+void
+bitset_init(bitset_t *b)
+{
+	bzero(b, sizeof (bitset_t));
+}
+
+/*
+ * Uninitialize a bitset_t.
+ * This will free the bitset's data, leaving it zero sized.
+ */
+void
+bitset_fini(bitset_t *b)
+{
+	if (b->bs_words > 0)
+		kmem_free(b->bs_set, b->bs_words * sizeof (ulong_t));
+}
+
+/*
+ * Resize a bitset to where it can hold sz number of bits.
+ * This can either grow or shrink the bitset holding capacity.
+ * In the case of shrinkage, elements that reside outside the new
+ * holding capacity of the bitset are lost.
+ */
+void
+bitset_resize(bitset_t *b, uint_t sz)
+{
+	uint_t	nwords;
+	ulong_t	*bset_new, *bset_tmp;
+
+	nwords = BT_BITOUL(sz);
+	if (b->bs_words == nwords)
+		return;	/* already properly sized */
+
+	/*
+	 * Allocate the new ulong_t array, and copy the old one.
+	 */
+	if (nwords > 0) {
+		bset_new = kmem_zalloc(nwords * sizeof (ulong_t), KM_SLEEP);
+		bcopy(b->bs_set, bset_new,
+		    MIN(b->bs_words, nwords) * sizeof (ulong_t));
+	} else {
+		bset_new = NULL;
+	}
+
+	/* swap out the old ulong_t array for new one */
+	bset_tmp = b->bs_set;
+	b->bs_set = bset_new;
+
+	/* free up the old array */
+	kmem_free(bset_tmp, b->bs_words * sizeof (ulong_t));
+	b->bs_words = nwords;
+}
+
+/*
+ * Returns the current holding capacity of the bitset
+ */
+uint_t
+bitset_capacity(bitset_t *b)
+{
+	return (b->bs_words * BT_NBIPUL);
+}
+
+/*
+ * Add and delete bits in the bitset.
+ *
+ * Adding a bit that is already set, and clearing a bit that's already clear
+ * is legal.
+ *
+ * Adding or deleting an element that falls outside the bitset's current
+ * holding capacity is illegal.
+ */
+void
+bitset_add(bitset_t *b, uint_t elt)
+{
+	ASSERT(b->bs_words * BT_NBIPUL > elt);
+
+	BT_SET(b->bs_set, elt);
+}
+
+void
+bitset_del(bitset_t *b, uint_t elt)
+{
+	ASSERT(b->bs_words * BT_NBIPUL > elt);
+
+	BT_CLEAR(b->bs_set, elt);
+}
+
+/*
+ * Return non-zero if the bit is present in the set
+ */
+int
+bitset_in_set(bitset_t *b, uint_t elt)
+{
+	ASSERT(b->bs_words * BT_NBIPUL > elt);
+
+	return (BT_TEST(b->bs_set, elt));
+}
+
+/*
+ * Return non-zero if the bitset is empty
+ */
+int
+bitset_is_null(bitset_t *b)
+{
+	int	i;
+
+	for (i = 0; i < b->bs_words; i++)
+		if (b->bs_set[i] != 0)
+			return (0);
+	return (1);
+}
+
+/*
+ * Find the first set bit in the bitset
+ * Return -1 if no bit was found
+ */
+uint_t
+bitset_find(bitset_t *b)
+{
+	uint_t	i;
+	uint_t	elt = (uint_t)-1;
+
+	for (i = 0; i < b->bs_words; i++) {
+		elt = (uint_t)(lowbit(b->bs_set[i]) - 1);
+		if (elt != (uint_t)-1) {
+			elt += i * BT_NBIPUL;
+			break;
+		}
+	}
+	return (elt);
+}
diff --git a/usr/src/uts/common/os/chip.c b/usr/src/uts/common/os/chip.c
deleted file mode 100644
index ad11827b0f..0000000000
--- a/usr/src/uts/common/os/chip.c
+++ /dev/null
@@ -1,576 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/thread.h>
-#include <sys/cpuvar.h>
-#include <sys/cpupart.h>
-#include <sys/kmem.h>
-#include <sys/cmn_err.h>
-#include <sys/kstat.h>
-#include <sys/processor.h>
-#include <sys/disp.h>
-#include <sys/chip.h>
-
-/*
- * CMT aware scheduler/dispatcher support
- *
- * With the introduction of Chip Multi-Threaded (CMT) processor architectures,
- * it is no longer necessarily true that a given physical processor
- * module (chip) will present itself as a single schedulable entity (cpu_t).
- * Rather, each chip may present itself as one or more "logical" CPUs.
- *
- * The logical CPUs presented may share physical components on the chip
- * such as caches, data pipes, FPUs, etc. It is advantageous to have the
- * kernel know which logical CPUs are presented by a given chip,
- * and what facilities on the chip are shared, since the kernel can then use
- * this information to employ scheduling policies that help improve the
- * availability of per chip resources, and increase utilization of a thread's
- * cache investment.
- *
- * The "chip_t" structure represents a physical processor.
- * It is used to keep track of which logical CPUs are presented by a given
- * chip, and to provide a parameterized representation of a chip's
- * properties. A count of the number of running threads is also
- * maintained, and is used by the dispatcher to balance load across the
- * system's chips to improve performance through increased chip resource
- * availability.
- *
- * Locking:
- *
- * Safely traversing the per lgroup lists requires the same protections
- * as traversing the cpu lists. One must either:
- *	- hold cpu_lock
- *	- have disabled kernel preemption
- *	- be at high SPL
- *	- have cpu's paused
- *
- * Safely traversing the global "chip_list" requires holding cpu_lock.
- *
- * A chip's nrunning count should only be modified using the
- * CHIP_NRUNNING() macro, through which updates of the count are done
- * atomically.
- */
-
-chip_t			cpu0_chip;	/* chip structure for first CPU */
-cpu_physid_t		cpu0_physid;	/* boot CPU's physical id structure */
-
-/*
- * chip_bootstrap is used on platforms where it is possible to enter the
- * dispatcher before a new CPU's chip initialization has happened.
- */
-static chip_t		chip_bootstrap;
-
-#define	CPU_HAS_NO_CHIP(cp)	\
-	((cp)->cpu_chip == NULL || (cp)->cpu_chip == &chip_bootstrap)
-
-static chip_t		*chip_list;	/* protected by CPU lock */
-static chip_set_t	chip_set;	/* bitmap of chips in existence */
-					/* indexed by chip_seqid */
-static chipid_t		chip_seqid_next = 0;	/* next sequential chip id */
-static int		nchips = 0;	/* num chips in existence */
-
-static chip_t	*chip_find(chipid_t);
-static int	chip_kstat_extract(kstat_t *, int);
-
-/*
- * Declare static kstat names (defined in chip.h)
- */
-CHIP_KSTAT_NAMES;
-
-/*
- * Find the chip_t with the given chip_id.
- */
-static chip_t *
-chip_find(chipid_t chipid)
-{
-	chip_t	*chp, *chip_start;
-
-	ASSERT(chip_list == NULL || chip_list->chip_next == chip_list ||
-	    MUTEX_HELD(&cpu_lock));
-
-	if ((chp = chip_start = chip_list) != NULL) {
-		do {
-			if (chp->chip_id == chipid) {
-				return (chp);
-			}
-		} while ((chp = chp->chip_next) != chip_start);
-	}
-	return (NULL);
-}
-
-chip_t *
-chip_lookup(chipid_t chipid)
-{
-	chip_t *chp;
-
-	mutex_enter(&cpu_lock);
-	chp = chip_find(chipid);
-	mutex_exit(&cpu_lock);
-
-	return (chp);
-}
-
-#ifndef sun4v
-/*
- * Setup the kstats for this chip, if needed
- */
-void
-chip_kstat_create(chip_t *chp)
-{
-	chip_stat_t	stat;
-	kstat_t		*chip_kstat;
-
-	ASSERT(MUTEX_HELD(&cpu_lock));
-
-	if (chp->chip_kstat != NULL)
-		return;		/* already initialized */
-
-	chip_kstat = kstat_create("chip", chp->chip_id, NULL, "misc",
-	    KSTAT_TYPE_NAMED, CHIP_NUM_STATS,
-	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
-
-	if (chip_kstat != NULL) {
-		chip_kstat->ks_lock = &chp->chip_kstat_mutex;
-		mutex_init(chip_kstat->ks_lock, NULL, MUTEX_DEFAULT, NULL);
-		chip_kstat->ks_private = chp;
-		chip_kstat->ks_data = chp->chip_kstat_data;
-		for (stat = 0; stat < CHIP_NUM_STATS; stat++)
-			kstat_named_init(&chp->chip_kstat_data[stat],
-			    chip_kstat_names[stat], KSTAT_DATA_INT64);
-		chip_kstat->ks_update = chip_kstat_extract;
-		chp->chip_kstat = chip_kstat;
-		kstat_install(chip_kstat);
-	}
-}
-#else
-/*
- * Note: On sun4v systems, chip kstats don't currently
- * exist, since "chip" structures and policies are being
- * leveraged to implement core level balancing, and exporting
- * chip kstats in light of this would be both misleading
- * and confusing.
- */
-/* ARGSUSED */
-void
-chip_kstat_create(chip_t *chp)
-{
-}
-#endif	/* !sun4v */
-
-static int
-chip_kstat_extract(kstat_t *ksp, int rw)
-{
-	struct kstat_named	*ksd;
-	chip_t			*chp;
-
-	chp = (chip_t *)ksp->ks_private;
-
-	ksd = (struct kstat_named *)ksp->ks_data;
-	ASSERT(ksd == chp->chip_kstat_data);
-
-	/*
-	 * The chip kstats are read only
-	 */
-	if (rw == KSTAT_WRITE)
-		return (EACCES);
-
-	ksd[CHIP_ID].value.i64 = chp->chip_id;
-	ksd[CHIP_NCPUS].value.i64 = chp->chip_ncpu;
-	ksd[CHIP_NRUNNING].value.i64 = chp->chip_nrunning;
-	ksd[CHIP_RECHOOSE].value.i64 =
-	    rechoose_interval + chp->chip_rechoose_adj;
-
-	return (0);
-}
-
-/*
- * If necessary, instantiate a chip_t for this CPU.
- * Called when a CPU is being added to the system either in startup,
- * or because of DR. The cpu will be assigned to the chip's active
- * CPU list later in chip_cpu_assign()
- */
-void
-chip_cpu_init(cpu_t *cp)
-{
-	chipid_t	cid;
-	int		rechoose;
-	chip_t		*chp;
-	chip_def_t	chp_def;
-
-	ASSERT((chip_list == NULL) || (MUTEX_HELD(&cpu_lock)));
-
-	if (chip_list == NULL)
-		cp->cpu_physid = &cpu0_physid;
-	else
-		cp->cpu_physid = kmem_zalloc(sizeof (cpu_physid_t), KM_SLEEP);
-
-	/*
-	 * Call into the platform to fetch this cpu's chip and core ids.
-	 * The ids are cached in the CPU's physical id structure.
-	 *
-	 * On sun4v platforms, the chip infrastructure is currently being
-	 * leveraged to implement core level load balancing.
-	 */
-#ifdef	DO_CORELEVEL_LOADBAL
-	cid = chip_plat_get_coreid(cp);
-	cp->cpu_physid->cpu_coreid = cid;
-	cp->cpu_physid->cpu_chipid = chip_plat_get_chipid(cp);
-#else
-	cid = chip_plat_get_chipid(cp);
-	cp->cpu_physid->cpu_chipid = cid;
-	cp->cpu_physid->cpu_coreid = chip_plat_get_coreid(cp);
-#endif /* DO_CORELEVEL_LOADBAL */
-
-	chp = chip_find(cid);
-	if (chp == NULL) {
-
-		/*
-		 * Create a new chip
-		 */
-		if (chip_list == NULL)
-			chp = &cpu0_chip;
-		else
-			chp = kmem_zalloc(sizeof (*chp), KM_SLEEP);
-
-		chp->chip_id = cid;
-		chp->chip_nrunning = 0;
-
-		/*
-		 * If we're booting, take this moment to perform
-		 * some additional initialization
-		 */
-		if (chip_list == NULL) {
-			CHIP_SET_ZERO(chip_set);
-			CHIP_SET_ZERO(cp->cpu_part->cp_mach->mc_chipset);
-			chp->chip_nrunning++;	/* for t0 */
-		}
-
-		/*
-		 * Find the next free sequential chip id.
-		 * A chip's sequential id exists in the range
-		 * 0 .. CHIP_MAX_CHIPS, and is suitable for use with
-		 * chip sets.
-		 */
-		while (CHIP_SET_TEST(chip_set, chip_seqid_next))
-			chip_seqid_next++;
-		chp->chip_seqid = chip_seqid_next++;
-		CHIP_SET_ADD(chip_set, chp->chip_seqid);
-
-		ASSERT(chip_seqid_next <= CHIP_MAX_CHIPS);
-
-
-		/*
-		 * Query the platform specific parameters
-		 * for this chip
-		 */
-		chip_plat_define_chip(cp, &chp_def);
-		chp->chip_rechoose_adj = chp_def.chipd_rechoose_adj;
-		chp->chip_type = chp_def.chipd_type;
-		chp->chip_nosteal = chp_def.chipd_nosteal;
-
-		ASSERT((chp->chip_type < CHIP_NUM_TYPES) &&
-		    (chp->chip_type >= CHIP_DEFAULT));
-
-		/*
-		 * Insert this chip in chip_list
-		 */
-		if (chip_list == NULL) {
-			chip_list = chp;
-			chp->chip_next = chp->chip_prev = chp;
-		} else {
-			chip_t	*chptr;
-
-			chptr = chip_list;
-			chp->chip_next = chptr;
-			chp->chip_prev = chptr->chip_prev;
-			chptr->chip_prev->chip_next = chp;
-			chptr->chip_prev = chp;
-		}
-
-		nchips++;
-		ASSERT(nchips <= CHIP_MAX_CHIPS);
-
-		/*
-		 * The boot cpu will create the first chip's kstats
-		 * later in cpu_kstat_init()
-		 */
-		if (chp != &cpu0_chip)
-			chip_kstat_create(chp);
-	}
-
-	/*
-	 * Initialize the effective rechoose interval cached
-	 * in this cpu structure.
-	 */
-	rechoose = rechoose_interval + chp->chip_rechoose_adj;
-	cp->cpu_rechoose = (rechoose < 0) ? 0 : rechoose;
-
-	cp->cpu_chip = chp;
-	chp->chip_ref++;
-}
-
-/*
- * This cpu is being deleted. It has already been removed from
- * the chip's active cpu list back in chip_cpu_unassign(). Here
- * we remove the cpu's reference to the chip, and cleanup/destroy
- * the chip if needed.
- */
-void
-chip_cpu_fini(cpu_t *cp)
-{
-	chip_t	*chp;
-	chip_t	*prev, *next;
-
-	ASSERT(MUTEX_HELD(&cpu_lock));
-
-	/*
-	 * This can happen if the CPU failed to power on
-	 */
-	if (CPU_HAS_NO_CHIP(cp))
-		return;
-
-	chp = cp->cpu_chip;
-	cp->cpu_chip = NULL;
-
-	/*
-	 * Clear out and free the CPU's physical id structure
-	 */
-	cp->cpu_physid->cpu_chipid = -1;
-	cp->cpu_physid->cpu_coreid = -1;
-
-	if (cp->cpu_physid != &cpu0_physid) {
-		ASSERT(cp->cpu_physid != NULL);
-		kmem_free(cp->cpu_physid, sizeof (cpu_physid_t));
-	}
-	cp->cpu_physid = NULL;
-
-	/*
-	 * Delete the chip if its last CPU is being deleted
-	 */
-	if (--chp->chip_ref == 0) {
-
-		ASSERT(chp->chip_ncpu == 0);
-		ASSERT(chp->chip_cpus == NULL);
-		ASSERT(chp->chip_nrunning == 0);
-		ASSERT(chp->chip_lgrp == NULL);
-		ASSERT((chp->chip_next_lgrp == NULL) &&
-		    (chp->chip_prev_lgrp == NULL));
-
-		if (chip_seqid_next > chp->chip_seqid)
-			chip_seqid_next = chp->chip_seqid;
-		CHIP_SET_REMOVE(chip_set, chp->chip_seqid);
-
-		chp->chip_id = -1;
-		chp->chip_seqid = -1;
-
-		/*
-		 * remove the chip from the system's chip list
-		 */
-		if (chip_list == chp)
-			chip_list = chp->chip_next;
-
-		prev = chp->chip_prev;
-		next = chp->chip_next;
-
-		prev->chip_next = next;
-		next->chip_prev = prev;
-
-		chp->chip_next = chp->chip_prev = NULL;
-
-		nchips--;
-
-		/*
-		 * clean up any chip kstats
-		 */
-		if (chp->chip_kstat) {
-			kstat_delete(chp->chip_kstat);
-			chp->chip_kstat = NULL;
-		}
-		/*
-		 * If the chip_t structure was dynamically
-		 * allocated, free it.
-		 */
-		if (chp != &cpu0_chip)
-			kmem_free(chp, sizeof (*chp));
-	}
-}
-
-/*
- * This cpu is becoming active (online).
- * Perform all the necessary bookkeeping in it's chip_t
- */
-void
-chip_cpu_assign(cpu_t *cp)
-{
-	chip_t		*chp;
-	cpu_t		*cptr;
-
-	ASSERT(chip_list == NULL || chip_list->chip_next == chip_list ||
-	    MUTEX_HELD(&cpu_lock));
-
-	chp = cp->cpu_chip;
-
-	/*
-	 * Add this cpu to the chip's cpu list
-	 */
-	if (chp->chip_ncpu == 0) {
-		chp->chip_cpus = cp;
-		cp->cpu_next_chip = cp->cpu_prev_chip = cp;
-	} else {
-		cptr = chp->chip_cpus;
-		cp->cpu_next_chip = cptr;
-		cp->cpu_prev_chip = cptr->cpu_prev_chip;
-		cp->cpu_prev_chip->cpu_next_chip = cp;
-		cptr->cpu_prev_chip = cp;
-	}
-
-	chp->chip_ncpu++;
-
-	/*
-	 * Notate this chip's seqid in the cpu partition's chipset
-	 */
-	chip_cpu_move_part(cp, NULL, cp->cpu_part);
-}
-
-/*
- * This cpu is being offlined, so do the reverse
- * of cpu_chip_assign()
- */
-void
-chip_cpu_unassign(cpu_t *cp)
-{
-	chip_t		*chp;
-	struct cpu	*prev;
-	struct cpu	*next;
-
-	ASSERT(MUTEX_HELD(&cpu_lock));
-
-	chp = cp->cpu_chip;
-
-	chip_cpu_move_part(cp, cp->cpu_part, NULL);
-
-	/*
-	 * remove this cpu from the chip's cpu list
-	 */
-	prev = cp->cpu_prev_chip;
-	next = cp->cpu_next_chip;
-
-	prev->cpu_next_chip = next;
-	next->cpu_prev_chip = prev;
-
-	cp->cpu_next_chip = cp->cpu_prev_chip = NULL;
-
-	chp->chip_ncpu--;
-
-	if (chp->chip_ncpu == 0) {
-		chp->chip_cpus = NULL;
-	} else if (chp->chip_cpus == cp) {
-		chp->chip_cpus = next;
-	}
-}
-
-/*
- * A cpu on the chip is moving into and/or out of a cpu partition.
- * Maintain the cpuparts' chip membership set.
- * oldpp is NULL when a cpu is being offlined.
- * newpp is NULL when a cpu is being onlined.
- */
-void
-chip_cpu_move_part(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
-{
-	cpu_t	*cpp;
-	chip_t	*chp;
-
-	ASSERT(chip_list->chip_next == chip_list || MUTEX_HELD(&cpu_lock));
-
-	chp = cp->cpu_chip;
-
-	if (newpp != NULL) {
-		/*
-		 * Add the chip's seqid to the cpupart's chip set
-		 */
-		CHIP_SET_ADD(newpp->cp_mach->mc_chipset, chp->chip_seqid);
-	}
-
-	if (oldpp != NULL) {
-		cpp = cp;
-		while ((cpp = cpp->cpu_next_chip) != cp) {
-			if (cpp->cpu_part->cp_id == oldpp->cp_id) {
-				/*
-				 * Another cpu on the chip is in the old
-				 * cpu partition, so we're done
-				 */
-				return;
-			}
-		}
-
-		/*
-		 * No other cpu on the chip is in the old partition
-		 * so remove the chip's seqid from it's set
-		 */
-		CHIP_SET_REMOVE(oldpp->cp_mach->mc_chipset, chp->chip_seqid);
-	}
-}
-
-/*
- * Called to indicate a slave CPU has started up.
- */
-void
-chip_cpu_startup(cpu_t *cp)
-{
-	/*
-	 * Indicate that the chip has a new running thread
-	 * (slave startup)
-	 */
-	CHIP_NRUNNING(cp->cpu_chip, 1);
-}
-
-/*
- * Provide the specified CPU a bootstrap chip
- */
-void
-chip_bootstrap_cpu(cpu_t *cp)
-{
-	cp->cpu_chip = &chip_bootstrap;
-}
-
-/*
- * Given a chip set, return 1 if it is empty.
- */
-int
-chip_set_isnull(chip_set_t *set)
-{
-	int	i;
-
-	for (i = 0; i < CHIP_SET_WORDS; i++) {
-		if (set->csb[i] != 0)
-			return (0);
-	}
-	return (1);
-}
diff --git a/usr/src/uts/common/os/clock.c b/usr/src/uts/common/os/clock.c
index 0152c2e958..a1040f1270 100644
--- a/usr/src/uts/common/os/clock.c
+++ b/usr/src/uts/common/os/clock.c
@@ -23,7 +23,7 @@
 
 
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -66,7 +66,6 @@
 #include <sys/cpupart.h>
 #include <sys/rctl.h>
 #include <sys/task.h>
-#include <sys/chip.h>
 #include <sys/sdt.h>
 
 #ifdef __sparc
@@ -260,13 +259,6 @@ cyclic_id_t deadman_cyclic;	/* deadman()'s cyclic_id */
 static int lgrp_ticks;		/* counter to schedule lgrp load calcs */
 
 /*
- * rechoose_interval_history is used to detect when rechoose_interval's
- * value has changed (via hotpatching for example), so that the
- * cached values in the cpu structures may be updated.
- */
-static int rechoose_interval_history = RECHOOSE_INTERVAL;
-
-/*
  * for tod fault detection
  */
 #define	TOD_REF_FREQ		((longlong_t)(NANOSEC))
@@ -345,8 +337,6 @@ clock(void)
 	int64_t lltemp;
 	int s;
 	int do_lgrp_load;
-	int rechoose_update = 0;
-	int rechoose;
 	int i;
 
 	if (panicstr)
@@ -430,21 +420,9 @@ clock(void)
 		do_lgrp_load = 1;
 	}
 
-	/*
-	 * The dispatcher tunable rechoose_interval may be hot-patched.
-	 * Note if it has a new value. If so, the effective rechoose_interval
-	 * cached in the cpu structures needs to be updated.
-	 * If needed we'll do this during the walk of the cpu_list below.
-	 */
-	if (rechoose_interval != rechoose_interval_history) {
-		rechoose_interval_history = rechoose_interval;
-		rechoose_update = 1;
-	}
-
 	if (one_sec)
 		loadavg_update();
 
-
 	/*
 	 * First count the threads waiting on kpreempt queues in each
 	 * CPU partition.
@@ -522,19 +500,6 @@ clock(void)
 			lgrp_loadavg(cp->cpu_lpl,
 			    cpu_nrunnable * LGRP_LOADAVG_IN_THREAD_MAX, 1);
 		}
-		/*
-		 * The platform may define a per physical processor
-		 * adjustment of rechoose_interval. The effective
-		 * (base + adjustment) rechoose_interval is cached
-		 * in the cpu structures for efficiency. Above we detect
-		 * if the cached values need updating, and here is where
-		 * the update happens.
-		 */
-		if (rechoose_update) {
-			rechoose = rechoose_interval +
-				cp->cpu_chip->chip_rechoose_adj;
-			cp->cpu_rechoose = (rechoose < 0) ? 0 : rechoose;
-		}
 	} while ((cp = cp->cpu_next) != cpu_list);
 
 	/*
diff --git a/usr/src/uts/common/os/cpu.c b/usr/src/uts/common/os/cpu.c
index 5ca51ec3da..9237517a69 100644
--- a/usr/src/uts/common/os/cpu.c
+++ b/usr/src/uts/common/os/cpu.c
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -45,7 +45,7 @@
 #include <sys/cpupart.h>
 #include <sys/lgrp.h>
 #include <sys/pset.h>
-#include <sys/chip.h>
+#include <sys/pghw.h>
 #include <sys/kmem.h>
 #include <sys/kmem_impl.h>	/* to set per-cpu kmem_cache offset */
 #include <sys/atomic.h>
@@ -1266,6 +1266,11 @@ cpu_offline(cpu_t *cp, int flags)
 	cpu_state_change_notify(cp->cpu_id, CPU_OFF);
 
 	/*
+	 * Tell the PG subsystem that the CPU is leaving the partition
+	 */
+	pg_cpupart_out(cp, pp);
+
+	/*
 	 * Take the CPU out of interrupt participation so we won't find
 	 * bound kernel threads.  If the architecture cannot completely
 	 * shut off interrupts on the CPU, don't quiesce it, but don't
@@ -1512,6 +1517,11 @@ out:
 		cyclic_online(cp);
 
 	/*
+	 * If we failed, tell the PG subsystem that the CPU is back
+	 */
+	pg_cpupart_in(cp, pp);
+
+	/*
 	 * If we failed, we need to notify everyone that this CPU is back on.
 	 */
 	if (error != 0)
@@ -1732,7 +1742,12 @@ cpu_del_unit(int cpuid)
 	ASSERT(cp->cpu_next_part == cp);
 	ASSERT(cp->cpu_prev_part == cp);
 
-	chip_cpu_fini(cp);
+	/*
+	 * Tear down the CPU's physical ID cache, and update any
+	 * processor groups
+	 */
+	pg_cpu_fini(cp);
+	pghw_physid_destroy(cp);
 
 	/*
 	 * Destroy kstat stuff.
@@ -1816,8 +1831,7 @@ cpu_add_active_internal(cpu_t *cp)
 		ASSERT(cp_numparts_nonempty != 0);
 	}
 
-	chip_cpu_assign(cp);
-
+	pg_cpu_active(cp);
 	lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)cp, 0);
 
 	bzero(&cp->cpu_loadavg, sizeof (cp->cpu_loadavg));
@@ -1830,9 +1844,12 @@ cpu_add_active_internal(cpu_t *cp)
 void
 cpu_add_active(cpu_t *cp)
 {
+	pg_cpupart_in(cp, cp->cpu_part);
+
 	pause_cpus(NULL);
 	cpu_add_active_internal(cp);
 	start_cpus();
+
 	cpu_stats_kstat_create(cp);
 	cpu_create_intrstat(cp);
 	lgrp_kstat_create(cp);
@@ -1854,7 +1871,7 @@ cpu_remove_active(cpu_t *cp)
 	ASSERT(cp->cpu_next_onln != cp);	/* not the last one */
 	ASSERT(cp->cpu_prev_onln != cp);	/* not the last one */
 
-	chip_cpu_unassign(cp);
+	pg_cpu_inactive(cp);
 
 	lgrp_config(LGRP_CONFIG_CPU_OFFLINE, (uintptr_t)cp, 0);
 
@@ -2146,11 +2163,12 @@ cpu_info_kstat_update(kstat_t *ksp, int rw)
 	(void) strncpy(cpu_info_template.ci_fpu_type.value.c,
 	    cp->cpu_type_info.pi_fputypes, 15);
 	cpu_info_template.ci_clock_MHz.value.l = cp->cpu_type_info.pi_clock;
-	cpu_info_template.ci_chip_id.value.l = chip_plat_get_chipid(cp);
+	cpu_info_template.ci_chip_id.value.l =
+	    pg_plat_hw_instance_id(cp, PGHW_CHIP);
 	kstat_named_setstr(&cpu_info_template.ci_implementation,
 	    cp->cpu_idstr);
 	kstat_named_setstr(&cpu_info_template.ci_brandstr, cp->cpu_brandstr);
-	cpu_info_template.ci_core_id.value.l = chip_plat_get_coreid(cp);
+	cpu_info_template.ci_core_id.value.l = pg_plat_get_core_id(cp);
 
 #if defined(__sparcv9)
 	cpu_info_template.ci_device_ID.value.ui64 =
@@ -2163,7 +2181,7 @@ cpu_info_kstat_update(kstat_t *ksp, int rw)
 	cpu_info_template.ci_family.value.l = cpuid_getfamily(cp);
 	cpu_info_template.ci_model.value.l = cpuid_getmodel(cp);
 	cpu_info_template.ci_step.value.l = cpuid_getstep(cp);
-	cpu_info_template.ci_clogid.value.l = chip_plat_get_clogid(cp);
+	cpu_info_template.ci_clogid.value.l = cpuid_get_clogid(cp);
 #endif
 
 	return (0);
@@ -2215,11 +2233,13 @@ cpu_info_kstat_destroy(cpu_t *cp)
 void
 cpu_kstat_init(cpu_t *cp)
 {
+	/*
+	 * XXX need pg kstats for boot CPU
+	 */
 	mutex_enter(&cpu_lock);
 	cpu_info_kstat_create(cp);
 	cpu_stats_kstat_create(cp);
 	cpu_create_intrstat(cp);
-	chip_kstat_create(cp->cpu_chip);
 	cpu_set_state(cp);
 	mutex_exit(&cpu_lock);
 }
diff --git a/usr/src/uts/common/os/group.c b/usr/src/uts/common/os/group.c
new file mode 100644
index 0000000000..b15dff181f
--- /dev/null
+++ b/usr/src/uts/common/os/group.c
@@ -0,0 +1,322 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/systm.h>
+#include <sys/param.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/group.h>
+
+
+#define	GRP_SET_SIZE_DEFAULT 2
+
+static void group_grow_set(group_t *);
+static void group_shrink_set(group_t *);
+static void group_pack_set(void **, uint_t);
+
+/*
+ * Initialize a group_t
+ */
+void
+group_create(group_t *g)
+{
+	bzero(g, sizeof (group_t));
+}
+
+/*
+ * Destroy a group_t
+ * The group must already be empty
+ */
+void
+group_destroy(group_t *g)
+{
+	ASSERT(g->grp_size == 0);
+
+	if (g->grp_capacity > 0) {
+		kmem_free(g->grp_set, g->grp_capacity * sizeof (void *));
+		g->grp_capacity = 0;
+	}
+	g->grp_set = NULL;
+}
+
+/*
+ * Add element "e" to group "g"
+ *
+ * Returns -1 if addition would result in overcapacity, and
+ * resize operations aren't allowed, and 0 otherwise
+ */
+int
+group_add(group_t *g, void *e, int gflag)
+{
+	int	entry;
+
+	if ((gflag & GRP_NORESIZE) &&
+	    g->grp_size == g->grp_capacity)
+		return (-1);
+
+	ASSERT(g->grp_size != g->grp_capacity || (gflag & GRP_RESIZE));
+
+	entry = g->grp_size++;
+	if (g->grp_size > g->grp_capacity)
+		group_grow_set(g);
+
+	ASSERT(g->grp_set[entry] == NULL);
+	g->grp_set[entry] = e;
+
+	return (0);
+}
+
+/*
+ * Remove element "e" from group "g"
+ *
+ * Returns -1 if "e" was not present in "g" and 0 otherwise
+ */
+int
+group_remove(group_t *g, void *e, int gflag)
+{
+	int	i;
+
+	/*
+	 * Find the element in the group's set
+	 */
+	for (i = 0; i < g->grp_size; i++)
+		if (g->grp_set[i] == e)
+			break;
+	if (g->grp_set[i] != e)
+		return (-1);
+
+	g->grp_set[i] = NULL;
+	group_pack_set(g->grp_set, g->grp_size);
+	g->grp_size--;
+
+	if ((gflag & GRP_RESIZE) &&
+	    g->grp_size > GRP_SET_SIZE_DEFAULT &&
+	    ((g->grp_size - 1) & g->grp_size) == 0)
+		group_shrink_set(g);
+
+	return (0);
+}
+
+/*
+ * Expand the capacity of group "g" so that it may
+ * contain at least "n" elements
+ */
+void
+group_expand(group_t *g, uint_t n)
+{
+	while (g->grp_capacity < n)
+		group_grow_set(g);
+}
+
+/*
+ * Upsize a group's holding capacity
+ */
+static void
+group_grow_set(group_t *g)
+{
+	uint_t		cap_old, cap_new;
+	void		**set_old, **set_new;
+
+	cap_old = g->grp_capacity;
+	set_old = g->grp_set;
+
+	/*
+	 * The array size grows in powers of two
+	 */
+	if ((cap_new = (cap_old << 1)) == 0) {
+		/*
+		 * The set is unallocated.
+		 * Allocate a default sized set.
+		 */
+		cap_new = GRP_SET_SIZE_DEFAULT;
+		g->grp_set = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP);
+		g->grp_capacity = cap_new;
+	} else {
+		/*
+		 * Allocate a newly sized array,
+		 * copy the data, and free the old array.
+		 */
+		set_new = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP);
+		(void) kcopy(set_old, set_new, cap_old * sizeof (void *));
+		g->grp_set = set_new;
+		g->grp_capacity = cap_new;
+		kmem_free(set_old, cap_old * sizeof (void *));
+	}
+	/*
+	 * The new array size should be a power of two
+	 */
+	ASSERT(((cap_new - 1) & cap_new) == 0);
+}
+
+/*
+ * Downsize a group's holding capacity
+ */
+static void
+group_shrink_set(group_t *g)
+{
+	uint_t		cap_old, cap_new;
+	void		**set_old, **set_new;
+
+	cap_old = g->grp_capacity;
+	set_old = g->grp_set;
+
+	/*
+	 * The group's existing array size must already
+	 * be a power of two
+	 */
+	ASSERT(((cap_old - 1) & cap_old) == 0);
+	cap_new = cap_old >> 1;
+
+	/*
+	 * GRP_SET_SIZE_DEFAULT is the minumum set size.
+	 */
+	if (cap_new < GRP_SET_SIZE_DEFAULT)
+		return;
+
+	set_new = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP);
+	(void) kcopy(set_old, set_new, cap_new * sizeof (void *));
+	g->grp_capacity = cap_new;
+	g->grp_set = set_new;
+
+	ASSERT(((cap_new - 1) & cap_new) == 0);
+	kmem_free(set_old, cap_old * sizeof (void *));
+}
+
+/*
+ * Pack a group's set
+ * Element order is not preserved
+ */
+static void
+group_pack_set(void **set, uint_t sz)
+{
+	uint_t	i, j, free;
+
+	free = (uint_t)-1;
+
+	for (i = 0; i < sz; i++) {
+		if (set[i] == NULL && free == (uint_t)-1) {
+			/*
+			 * Found a new free slot.
+			 * Start packing from here.
+			 */
+			free = i;
+		} else if (set[i] != NULL && free != (uint_t)-1) {
+			/*
+			 * Found a slot to pack into
+			 * an earlier free slot.
+			 */
+			ASSERT(set[free] == NULL);
+			set[free] = set[i];
+			set[i] = NULL;
+
+			/*
+			 * Find the next free slot
+			 */
+			for (j = free + 1; set[j] != NULL; j++) {
+				ASSERT(j <= i);
+				if (j == i)
+					break;
+			}
+			if (set[j] == NULL)
+				free = j;
+			else
+				free = (uint_t)-1;
+		}
+	}
+}
+
+/*
+ * Initialize a group iterator cookie
+ */
+void
+group_iter_init(group_iter_t *iter)
+{
+	*iter = 0;
+}
+
+/*
+ * Iterate over the elements in a group
+ */
+void *
+group_iterate(group_t *g, group_iter_t *iter)
+{
+	uint_t	idx = *iter;
+	void	*data = NULL;
+
+	while (idx < g->grp_size) {
+		data = g->grp_set[idx++];
+		if (data != NULL)
+			break;
+	}
+	*iter = idx;
+
+	return (data);
+}
+
+/*
+ * Indexed access to a group's elements
+ */
+void *
+group_access_at(group_t *g, uint_t idx)
+{
+	if (idx >= g->grp_capacity)
+		return (NULL);
+
+	return (g->grp_set[idx]);
+}
+
+/*
+ * Add a new ordered group element at specified
+ * index. The group must already be of sufficient
+ * capacity to hold an element at the specified index.
+ *
+ * Returns 0 if addition was sucessful, and -1 if the
+ * addition failed because the table was too small
+ */
+int
+group_add_at(group_t *g, void *e, uint_t idx)
+{
+	if (idx >= g->grp_capacity)
+		return (-1);
+
+	if (idx >= g->grp_size)
+		g->grp_size = idx + 1;
+
+	ASSERT(g->grp_set[idx] == NULL);
+	g->grp_set[idx] = e;
+	return (0);
+}
+
+/*
+ * Remove the entry at the specified index
+ */
+void
+group_remove_at(group_t *g, uint_t idx)
+{
+	ASSERT(idx < g->grp_capacity);
+	g->grp_set[idx] = NULL;
+}
diff --git a/usr/src/uts/common/os/lgrp.c b/usr/src/uts/common/os/lgrp.c
index 83f67e1088..2007f7b158 100644
--- a/usr/src/uts/common/os/lgrp.c
+++ b/usr/src/uts/common/os/lgrp.c
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -89,7 +89,7 @@
 #include <sys/cmn_err.h>
 #include <sys/kstat.h>
 #include <sys/sysmacros.h>
-#include <sys/chip.h>
+#include <sys/pg.h>
 #include <sys/promif.h>
 #include <sys/sdt.h>
 
@@ -314,8 +314,6 @@ lgrp_root_init(void)
 	klgrpset_clear(lgrp_root->lgrp_children);
 	klgrpset_clear(lgrp_root->lgrp_leaves);
 	lgrp_root->lgrp_parent = NULL;
-	lgrp_root->lgrp_chips = NULL;
-	lgrp_root->lgrp_chipcnt = 0;
 	lgrp_root->lgrp_latency = lgrp_plat_latency(hand, hand);
 
 	for (i = 0; i < LGRP_RSRC_COUNT; i++)
@@ -679,7 +677,6 @@ lgrp_cpu_init(struct cpu *cp)
 	lgrp_t		*my_lgrp;
 	lgrp_id_t	lgrpid;
 	struct cpu	*cptr;
-	struct chip	*chp;
 
 	/*
 	 * This is the first time through if the resource set
@@ -795,33 +792,6 @@ lgrp_cpu_init(struct cpu *cp)
 		cptr->cpu_prev_lgrp = cp;
 	}
 	my_lgrp->lgrp_cpucnt++;
-
-	/*
-	 * Add this cpu's chip to the per lgroup list
-	 * if necessary
-	 */
-	if (cp->cpu_chip->chip_lgrp == NULL) {
-		struct chip *lcpr;
-
-		chp = cp->cpu_chip;
-
-		if (my_lgrp->lgrp_chipcnt == 0) {
-			my_lgrp->lgrp_chips = chp;
-			chp->chip_next_lgrp =
-			    chp->chip_prev_lgrp = chp;
-		} else {
-			lcpr = my_lgrp->lgrp_chips;
-			chp->chip_next_lgrp = lcpr;
-			chp->chip_prev_lgrp =
-			    lcpr->chip_prev_lgrp;
-			lcpr->chip_prev_lgrp->chip_next_lgrp =
-			    chp;
-			lcpr->chip_prev_lgrp = chp;
-		}
-		chp->chip_lgrp = my_lgrp;
-		chp->chip_balance = chp->chip_next_lgrp;
-		my_lgrp->lgrp_chipcnt++;
-	}
 }
 
 lgrp_t *
@@ -890,8 +860,6 @@ lgrp_create(void)
 
 	my_lgrp->lgrp_cpu = NULL;
 	my_lgrp->lgrp_cpucnt = 0;
-	my_lgrp->lgrp_chips = NULL;
-	my_lgrp->lgrp_chipcnt = 0;
 
 	if (my_lgrp->lgrp_kstat != NULL)
 		lgrp_kstat_reset(lgrpid);
@@ -945,8 +913,6 @@ lgrp_destroy(lgrp_t *lgrp)
 
 	lgrp->lgrp_cpu = NULL;
 	lgrp->lgrp_cpucnt = 0;
-	lgrp->lgrp_chipcnt = 0;
-	lgrp->lgrp_chips = NULL;
 
 	nlgrps--;
 }
@@ -1022,7 +988,6 @@ lgrp_cpu_fini(struct cpu *cp, lgrp_id_t lgrpid)
 	lgrp_t *my_lgrp;
 	struct cpu *prev;
 	struct cpu *next;
-	chip_t  *chp;
 
 	ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized);
 
@@ -1042,42 +1007,6 @@ lgrp_cpu_fini(struct cpu *cp, lgrp_id_t lgrpid)
 	my_lgrp->lgrp_cpucnt--;
 
 	/*
-	 * If the last CPU on it's chip is being offlined
-	 * then remove this chip from the per lgroup list.
-	 *
-	 * This is also done for the boot CPU when it needs
-	 * to move between lgroups as a consequence of
-	 * null proc lpa.
-	 */
-	chp = cp->cpu_chip;
-	if (chp->chip_ncpu == 0 || !lgrp_initialized) {
-
-		chip_t	*chpp;
-
-		if (--my_lgrp->lgrp_chipcnt == 0)
-			my_lgrp->lgrp_chips = NULL;
-		else if (my_lgrp->lgrp_chips == chp)
-			my_lgrp->lgrp_chips = chp->chip_next_lgrp;
-
-		/*
-		 * Walk this lgroup's chip list looking for chips that
-		 * may try to balance against the one that's leaving
-		 */
-		for (chpp = chp->chip_next_lgrp; chpp != chp;
-		    chpp = chpp->chip_next_lgrp) {
-			if (chpp->chip_balance == chp)
-				chpp->chip_balance = chp->chip_next_lgrp;
-		}
-
-		chp->chip_prev_lgrp->chip_next_lgrp = chp->chip_next_lgrp;
-		chp->chip_next_lgrp->chip_prev_lgrp = chp->chip_prev_lgrp;
-
-		chp->chip_next_lgrp = chp->chip_prev_lgrp = NULL;
-		chp->chip_lgrp = NULL;
-		chp->chip_balance = NULL;
-	}
-
-	/*
 	 * Removing last CPU in lgroup, so update lgroup topology
 	 */
 	if (my_lgrp->lgrp_cpucnt == 0) {
@@ -1661,7 +1590,7 @@ lgrp_phys_to_lgrp(u_longlong_t physaddr)
  * Return the leaf lgroup containing the given CPU
  *
  * The caller needs to take precautions necessary to prevent
- * "cpu" from going away across a call to this function.
+ * "cpu", and it's lpl from going away across a call to this function.
  * hint: kpreempt_disable()/kpreempt_enable()
  */
 static lgrp_t *
diff --git a/usr/src/uts/common/os/pg.c b/usr/src/uts/common/os/pg.c
new file mode 100644
index 0000000000..cb8295b38e
--- /dev/null
+++ b/usr/src/uts/common/os/pg.c
@@ -0,0 +1,624 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/cpupart.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/kstat.h>
+#include <sys/processor.h>
+#include <sys/disp.h>
+#include <sys/group.h>
+#include <sys/pg.h>
+
+/*
+ * Processor groups
+ *
+ * With the introduction of Chip Multi-Threaded (CMT) processor architectures,
+ * it is no longer necessarily true that a given physical processor module
+ * will present itself as a single schedulable entity (cpu_t). Rather, each
+ * chip and/or processor core may present itself as one or more "logical" CPUs.
+ *
+ * The logical CPUs presented may share physical components such as caches,
+ * data pipes, execution pipelines, FPUs, etc. It is advantageous to have the
+ * kernel be aware of the relationships existing between logical CPUs so that
+ * the appropriate optmizations may be employed.
+ *
+ * The processor group abstraction represents a set of logical CPUs that
+ * generally share some sort of physical or characteristic relationship.
+ *
+ * In the case of a physical sharing relationship, the CPUs in the group may
+ * share a pipeline, cache or floating point unit. In the case of a logical
+ * relationship, a PG may represent the set of CPUs in a processor set, or the
+ * set of CPUs running at a particular clock speed.
+ *
+ * The generic processor group structure, pg_t, contains the elements generic
+ * to a group of CPUs. Depending on the nature of the CPU relationship
+ * (LOGICAL or PHYSICAL), a pointer to a pg may be recast to a "view" of that
+ * PG where more specific data is represented.
+ *
+ * As an example, a PG representing a PHYSICAL relationship, may be recast to
+ * a pghw_t, where data further describing the hardware sharing relationship
+ * is maintained. See pghw.c and pghw.h for details on physical PGs.
+ *
+ * At this time a more specialized casting of a PG representing a LOGICAL
+ * relationship has not been implemented, but the architecture allows for this
+ * in the future.
+ *
+ * Processor Group Classes
+ *
+ * Processor group consumers may wish to maintain and associate specific
+ * data with the PGs they create. For this reason, a mechanism for creating
+ * class specific PGs exists. Classes may overload the default functions for
+ * creating, destroying, and associating CPUs with PGs, and may also register
+ * class specific callbacks to be invoked when the CPU related system
+ * configuration changes. Class specific data is stored/associated with
+ * PGs by incorporating the pg_t (or pghw_t, as appropriate), as the first
+ * element of a class specific PG object. In memory, such a structure may look
+ * like:
+ *
+ * ----------------------- - - -
+ * | common              | | | |  <--(pg_t *)
+ * ----------------------- | | -
+ * | HW specific         | | | <-----(pghw_t *)
+ * ----------------------- | -
+ * | class specific      | | <-------(pg_cmt_t *)
+ * ----------------------- -
+ *
+ * Access to the PG class specific data can be had by casting a pointer to
+ * it's class specific view.
+ */
+
+static pg_t		*pg_alloc_default(pg_class_t);
+static void		pg_free_default(pg_t *);
+
+/*
+ * Bootstrap CPU specific PG data
+ * See pg_cpu_bootstrap()
+ */
+static cpu_pg_t		bootstrap_pg_data;
+
+/*
+ * Bitset of allocated PG ids (they are sequential)
+ * and the next free id in the set.
+ */
+static bitset_t		pg_id_set;
+static pgid_t		pg_id_next = 0;
+
+/*
+ * Default and externed PG ops vectors
+ */
+static struct pg_ops pg_ops_default = {
+	pg_alloc_default,	/* alloc */
+	pg_free_default,	/* free */
+	NULL,			/* cpu_init */
+	NULL,			/* cpu_fini */
+	NULL,			/* cpu_active */
+	NULL,			/* cpu_inactive */
+	NULL,			/* cpupart_in */
+	NULL,			/* cpupart_out */
+	NULL,			/* cpupart_move */
+	NULL,			/* cpu_belongs */
+};
+
+/*
+ * Class specific PG allocation callbacks
+ */
+#define	PG_ALLOC(class)							\
+	(pg_classes[class].pgc_ops->alloc ?				\
+	    pg_classes[class].pgc_ops->alloc() :			\
+	    pg_classes[pg_default_cid].pgc_ops->alloc())
+
+#define	PG_FREE(pg)							\
+	((pg)->pg_class->pgc_ops->free ?				\
+	    (pg)->pg_class->pgc_ops->free(pg) :				\
+	    pg_classes[pg_default_cid].pgc_ops->free(pg))		\
+
+
+/*
+ * Class specific membership test callback
+ */
+#define	PG_CPU_BELONGS(pg, cp)						\
+	((pg)->pg_class->pgc_ops->cpu_belongs ?				\
+	    (pg)->pg_class->pgc_ops->cpu_belongs(pg, cp) : 0)		\
+
+/*
+ * CPU configuration callbacks
+ */
+#define	PG_CPU_INIT(class, cp)						\
+{									\
+	if (pg_classes[class].pgc_ops->cpu_init)			\
+		pg_classes[class].pgc_ops->cpu_init(cp);		\
+}
+
+#define	PG_CPU_FINI(class, cp)						\
+{									\
+	if (pg_classes[class].pgc_ops->cpu_fini)			\
+		pg_classes[class].pgc_ops->cpu_fini(cp);		\
+}
+
+#define	PG_CPU_ACTIVE(class, cp)					\
+{									\
+	if (pg_classes[class].pgc_ops->cpu_active)			\
+		pg_classes[class].pgc_ops->cpu_active(cp);		\
+}
+
+#define	PG_CPU_INACTIVE(class, cp)					\
+{									\
+	if (pg_classes[class].pgc_ops->cpu_inactive)			\
+		pg_classes[class].pgc_ops->cpu_inactive(cp);		\
+}
+
+/*
+ * CPU / cpupart configuration callbacks
+ */
+#define	PG_CPUPART_IN(class, cp, pp)					\
+{									\
+	if (pg_classes[class].pgc_ops->cpupart_in)			\
+		pg_classes[class].pgc_ops->cpupart_in(cp, pp);		\
+}
+
+#define	PG_CPUPART_OUT(class, cp, pp)					\
+{									\
+	if (pg_classes[class].pgc_ops->cpupart_out)			\
+		pg_classes[class].pgc_ops->cpupart_out(cp, pp);		\
+}
+
+#define	PG_CPUPART_MOVE(class, cp, old, new)				\
+{									\
+	if (pg_classes[class].pgc_ops->cpupart_move)			\
+		pg_classes[class].pgc_ops->cpupart_move(cp, old, new);	\
+}
+
+
+
+static pg_class_t	*pg_classes;
+static int		pg_nclasses;
+
+static pg_cid_t		pg_default_cid;
+
+/*
+ * Initialze common PG subsystem. Perform CPU 0 initialization
+ */
+void
+pg_init(void)
+{
+	pg_default_cid =
+	    pg_class_register("default", &pg_ops_default, PGR_LOGICAL);
+}
+
+/*
+ * Perform CPU 0 initialization
+ */
+void
+pg_cpu0_init(void)
+{
+	extern void pghw_physid_create();
+
+	/*
+	 * Create the physical ID cache for the boot CPU
+	 */
+	pghw_physid_create(CPU);
+
+	/*
+	 * pg_cpu_* require that cpu_lock be held
+	 */
+	mutex_enter(&cpu_lock);
+
+	pg_cpu_init(CPU);
+	pg_cpupart_in(CPU, &cp_default);
+	pg_cpu_active(CPU);
+
+	mutex_exit(&cpu_lock);
+}
+
+/*
+ * Register a new PG class
+ */
+pg_cid_t
+pg_class_register(char *name, struct pg_ops *ops, pg_relation_t relation)
+{
+	pg_class_t	*newclass;
+	pg_class_t	*classes_old;
+	id_t		cid;
+
+	mutex_enter(&cpu_lock);
+
+	/*
+	 * Allocate a new pg_class_t in the pg_classes array
+	 */
+	if (pg_nclasses == 0) {
+		pg_classes = kmem_zalloc(sizeof (pg_class_t), KM_SLEEP);
+	} else {
+		classes_old = pg_classes;
+		pg_classes =
+		    kmem_zalloc(sizeof (pg_class_t) * (pg_nclasses + 1),
+			KM_SLEEP);
+		(void) kcopy(classes_old, pg_classes,
+		    sizeof (pg_class_t) * pg_nclasses);
+		kmem_free(classes_old, sizeof (pg_class_t) * pg_nclasses);
+	}
+
+	cid = pg_nclasses++;
+	newclass = &pg_classes[cid];
+
+	(void) strncpy(newclass->pgc_name, name, PG_CLASS_NAME_MAX);
+	newclass->pgc_id = cid;
+	newclass->pgc_ops = ops;
+	newclass->pgc_relation = relation;
+
+	mutex_exit(&cpu_lock);
+
+	return (cid);
+}
+
+/*
+ * Try to find an existing pg in set in which to place cp.
+ * Returns the pg if found, and NULL otherwise.
+ * In the event that the CPU could belong to multiple
+ * PGs in the set, the first matching PG will be returned.
+ */
+pg_t *
+pg_cpu_find_pg(cpu_t *cp, group_t *set)
+{
+	pg_t		*pg;
+	group_iter_t	i;
+
+	group_iter_init(&i);
+	while ((pg = group_iterate(set, &i)) != NULL) {
+		/*
+		 * Ask the class if the CPU belongs here
+		 */
+		if (PG_CPU_BELONGS(pg, cp))
+			return (pg);
+	}
+	return (NULL);
+}
+
+/*
+ * Iterate over the CPUs in a PG after initializing
+ * the iterator with PG_CPU_ITR_INIT()
+ */
+cpu_t *
+pg_cpu_next(pg_cpu_itr_t *itr)
+{
+	cpu_t		*cpu;
+	pg_t		*pg = itr->pg;
+
+	cpu = group_iterate(&pg->pg_cpus, &itr->position);
+	return (cpu);
+}
+
+/*
+ * Create a PG of a given class.
+ * This routine may block.
+ */
+pg_t *
+pg_create(pg_cid_t cid)
+{
+	pg_t	*pg;
+	pgid_t	id;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	/*
+	 * Call the class specific PG allocation routine
+	 */
+	pg = PG_ALLOC(cid);
+	pg->pg_class = &pg_classes[cid];
+	pg->pg_relation = pg->pg_class->pgc_relation;
+
+	/*
+	 * Find the next free sequential pg id
+	 */
+	do {
+		if (pg_id_next >= bitset_capacity(&pg_id_set))
+			bitset_resize(&pg_id_set, pg_id_next + 1);
+		id = pg_id_next++;
+	} while (bitset_in_set(&pg_id_set, id));
+
+	pg->pg_id = id;
+	bitset_add(&pg_id_set, pg->pg_id);
+
+	/*
+	 * Create the PG's CPU group
+	 */
+	group_create(&pg->pg_cpus);
+
+	return (pg);
+}
+
+/*
+ * Destroy a PG.
+ * This routine may block.
+ */
+void
+pg_destroy(pg_t *pg)
+{
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	group_destroy(&pg->pg_cpus);
+
+	/*
+	 * Unassign the pg_id
+	 */
+	if (pg_id_next > pg->pg_id)
+		pg_id_next = pg->pg_id;
+	bitset_del(&pg_id_set, pg->pg_id);
+
+	/*
+	 * Invoke the class specific de-allocation routine
+	 */
+	PG_FREE(pg);
+}
+
+/*
+ * Add the CPU "cp" to processor group "pg"
+ * This routine may block.
+ */
+void
+pg_cpu_add(pg_t *pg, cpu_t *cp)
+{
+	int	err;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	/* This adds the CPU to the PG's CPU group */
+	err = group_add(&pg->pg_cpus, cp, GRP_RESIZE);
+	ASSERT(err == 0);
+
+	/* This adds the PG to the CPUs PG group */
+	ASSERT(cp->cpu_pg != &bootstrap_pg_data);
+	err = group_add(&cp->cpu_pg->pgs, pg, GRP_RESIZE);
+	ASSERT(err == 0);
+}
+
+/*
+ * Remove "cp" from "pg".
+ * This routine may block.
+ */
+void
+pg_cpu_delete(pg_t *pg, cpu_t *cp)
+{
+	int	err;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	/* Remove the CPU from the PG */
+	err = group_remove(&pg->pg_cpus, cp, GRP_RESIZE);
+	ASSERT(err == 0);
+
+	/* Remove the PG from the CPU's PG group */
+	ASSERT(cp->cpu_pg != &bootstrap_pg_data);
+	err = group_remove(&cp->cpu_pg->pgs, pg, GRP_RESIZE);
+	ASSERT(err == 0);
+}
+
+/*
+ * Allocate a CPU's PG data. This hangs off struct cpu at cpu_pg
+ */
+static cpu_pg_t *
+pg_cpu_data_alloc(void)
+{
+	cpu_pg_t	*pgd;
+
+	pgd = kmem_zalloc(sizeof (cpu_pg_t), KM_SLEEP);
+	group_create(&pgd->pgs);
+	group_create(&pgd->cmt_pgs);
+
+	return (pgd);
+}
+
+/*
+ * Free the CPU's PG data.
+ */
+static void
+pg_cpu_data_free(cpu_pg_t *pgd)
+{
+	group_destroy(&pgd->pgs);
+	group_destroy(&pgd->cmt_pgs);
+	kmem_free(pgd, sizeof (cpu_pg_t));
+}
+
+/*
+ * A new CPU is coming into the system, either via booting or DR.
+ * Allocate it's PG data, and notify all registered classes about
+ * the new CPU.
+ *
+ * This routine may block.
+ */
+void
+pg_cpu_init(cpu_t *cp)
+{
+	pg_cid_t	i;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	/*
+	 * Allocate and size the per CPU pg data
+	 */
+	cp->cpu_pg = pg_cpu_data_alloc();
+
+	/*
+	 * Notify all registered classes about the new CPU
+	 */
+	for (i = 0; i < pg_nclasses; i++)
+		PG_CPU_INIT(i, cp);
+}
+
+/*
+ * This CPU is being deleted from the system. Notify the classes
+ * and free up the CPU's PG data.
+ */
+void
+pg_cpu_fini(cpu_t *cp)
+{
+	pg_cid_t	i;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	/*
+	 * This can happen if the CPU coming into the system
+	 * failed to power on.
+	 */
+	if (cp->cpu_pg == NULL ||
+	    cp->cpu_pg == &bootstrap_pg_data)
+		return;
+
+	for (i = 0; i < pg_nclasses; i++)
+		PG_CPU_FINI(i, cp);
+
+	pg_cpu_data_free(cp->cpu_pg);
+	cp->cpu_pg = NULL;
+}
+
+/*
+ * This CPU is becoming active (online)
+ * This routine may not block as it is called from paused CPUs
+ * context.
+ */
+void
+pg_cpu_active(cpu_t *cp)
+{
+	pg_cid_t	i;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	/*
+	 * Notify all registered classes about the new CPU
+	 */
+	for (i = 0; i < pg_nclasses; i++)
+		PG_CPU_ACTIVE(i, cp);
+}
+
+/*
+ * This CPU is going inactive (offline)
+ * This routine may not block, as it is called from paused
+ * CPUs context.
+ */
+void
+pg_cpu_inactive(cpu_t *cp)
+{
+	pg_cid_t	i;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	/*
+	 * Notify all registered classes about the new CPU
+	 */
+	for (i = 0; i < pg_nclasses; i++)
+		PG_CPU_INACTIVE(i, cp);
+}
+
+/*
+ * Invoked when the CPU is about to move into the partition
+ * This routine may block.
+ */
+void
+pg_cpupart_in(cpu_t *cp, cpupart_t *pp)
+{
+	int	i;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	/*
+	 * Notify all registered classes that the
+	 * CPU is about to enter the CPU partition
+	 */
+	for (i = 0; i < pg_nclasses; i++)
+		PG_CPUPART_IN(i, cp, pp);
+}
+
+/*
+ * Invoked when the CPU is about to move out of the partition
+ * This routine may block.
+ */
+/*ARGSUSED*/
+void
+pg_cpupart_out(cpu_t *cp, cpupart_t *pp)
+{
+	int	i;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	/*
+	 * Notify all registered classes that the
+	 * CPU is about to leave the CPU partition
+	 */
+	for (i = 0; i < pg_nclasses; i++)
+		PG_CPUPART_OUT(i, cp, pp);
+}
+
+/*
+ * Invoked when the CPU is *moving* partitions.
+ *
+ * This routine may not block, as it is called from paused CPUs
+ * context.
+ */
+void
+pg_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
+{
+	int	i;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	/*
+	 * Notify all registered classes that the
+	 * CPU is about to leave the CPU partition
+	 */
+	for (i = 0; i < pg_nclasses; i++)
+		PG_CPUPART_MOVE(i, cp, oldpp, newpp);
+}
+
+/*
+ * Provide the specified CPU a bootstrap pg
+ * This is needed to allow sane behaviour if any PG consuming
+ * code needs to deal with a partially initialized CPU
+ */
+void
+pg_cpu_bootstrap(cpu_t *cp)
+{
+	cp->cpu_pg = &bootstrap_pg_data;
+}
+
+/*ARGSUSED*/
+static pg_t *
+pg_alloc_default(pg_class_t class)
+{
+	return (kmem_zalloc(sizeof (pg_t), KM_SLEEP));
+}
+
+/*ARGSUSED*/
+static void
+pg_free_default(struct pg *pg)
+{
+	kmem_free(pg, sizeof (pg_t));
+}
diff --git a/usr/src/uts/common/os/pghw.c b/usr/src/uts/common/os/pghw.c
new file mode 100644
index 0000000000..e2dc2a38f2
--- /dev/null
+++ b/usr/src/uts/common/os/pghw.c
@@ -0,0 +1,420 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/group.h>
+#include <sys/pg.h>
+#include <sys/pghw.h>
+
+/*
+ * Processor Groups: Hardware sharing relationship layer
+ *
+ * This file implements an extension to Processor Groups to capture
+ * hardware sharing relationships existing between logical CPUs. Examples of
+ * hardware sharing relationships include shared caches on some CMT
+ * procesoor architectures, or shared local memory controllers on NUMA
+ * based system architectures.
+ *
+ * The pghw_t structure represents the extended PG. The first member
+ * of the structure is the generic pg_t with the pghw specific members
+ * following. The generic pg_t *must* remain the first member of the
+ * structure as the code uses casting of structure references to access
+ * the generic pg_t structure elements.
+ *
+ * In addition to the generic CPU grouping, physical PGs have a hardware
+ * sharing relationship enumerated "type", and an instance id. The enumerated
+ * type is defined by the pghw_type_t enumeration, while the instance id
+ * uniquely identifies the sharing instance from among others of the same
+ * hardware sharing type.
+ *
+ * The physical PGs are organized into an overall hierarchy, and are tracked
+ * in a number of different per CPU, and per pghw_type_t type groups.
+ * As an example:
+ *
+ * -------------
+ * | pg_hw     |
+ * | (group_t) |
+ * -------------
+ *  ||                          ============================
+ *  ||\\-----------------------//       \\                 \\
+ *  ||  | hwset (PGC_HW_CHIP) |        -------------      -------------
+ *  ||  | (group_t)           |        | pghw_t    |      | pghw_t    |
+ *  ||  -----------------------        | chip 0    |      | chip 1    |
+ *  ||                                 -------------      -------------
+ *  ||                                 \\  \\  \\  \\     \\  \\  \\  \\
+ *  ||                                  cpu cpu cpu cpu    cpu cpu cpu cpu
+ *  ||
+ *  ||                          ============================
+ *  ||\\-----------------------//       \\                 \\
+ *  ||  | hwset (PGC_HW_IPIPE)|        -------------      -------------
+ *  ||  | (group_t)           |        | pghw_t    |      | pghw_t    |
+ *  ||  -----------------------        | ipipe 0   |      | ipipe 1   |
+ *  ||                                 -------------      -------------
+ *  ||                                 \\  \\             \\  \\
+ *  ||                                  cpu cpu            cpu cpu
+ *  ...
+ *
+ *
+ * The top level pg_hw is a group of "hwset" groups. Each hwset holds of group
+ * of physical PGs of the same hardware sharing type. Within each hwset, the
+ * PG's instance id uniquely identifies the grouping relationshsip among other
+ * groupings of the same sharing type. The instance id for a grouping is
+ * platform defined, and in some cases may be used by platform code as a handle
+ * to search for a particular relationship instance.
+ *
+ * Each physical PG (by virtue of the embedded pg_t) contains a group of CPUs
+ * that participate in the sharing relationship. Each CPU also has associated
+ * with it a grouping tracking the PGs in which the CPU belongs. This can be
+ * used to iterate over the various relationships in which the CPU participates
+ * (the CPU's chip, cache, lgroup, etc.).
+ *
+ * The hwsets are created dynamically as new hardware sharing relationship types
+ * are instantiated. They are never destroyed, as once a given relathionship
+ * type appears in the system, it is quite likely that at least one instance of
+ * that relationship will always persist as long as the system is running.
+ */
+
+static group_t		*pg_hw;		/* top level pg hw group */
+
+/*
+ * Lookup table mapping hardware sharing relationships with hierarchy levels
+ */
+static int		pghw_level_table[PGHW_NUM_COMPONENTS];
+
+/*
+ * Physical PG kstats
+ */
+struct pghw_kstat {
+	kstat_named_t	pg_id;
+	kstat_named_t	pg_class;
+	kstat_named_t	pg_ncpus;
+	kstat_named_t	pg_instance_id;
+	kstat_named_t	pg_hw;
+} pghw_kstat = {
+	{ "id",			KSTAT_DATA_UINT64 },
+	{ "pg_class",		KSTAT_DATA_STRING },
+	{ "ncpus",		KSTAT_DATA_UINT64 },
+	{ "instance_id",	KSTAT_DATA_UINT64 },
+	{ "hardware",		KSTAT_DATA_STRING },
+};
+
+kmutex_t		pghw_kstat_lock;
+
+/*
+ * hwset operations
+ */
+static group_t		*pghw_set_create(pghw_type_t);
+static void		pghw_set_add(group_t *, pghw_t *);
+static void		pghw_set_remove(group_t *, pghw_t *);
+
+/*
+ * Initialize the physical portion of a physical PG
+ */
+void
+pghw_init(pghw_t *pg, cpu_t *cp, pghw_type_t hw)
+{
+	group_t		*hwset;
+
+	if ((hwset = pghw_set_lookup(hw)) == NULL) {
+		/*
+		 * Haven't seen this hardware type yet
+		 */
+		hwset = pghw_set_create(hw);
+	}
+
+	pghw_set_add(hwset, pg);
+	pg->pghw_hw = hw;
+	pg->pghw_instance =
+	    pg_plat_hw_instance_id(cp, hw);
+	pghw_kstat_create(pg);
+}
+
+/*
+ * Teardown the physical portion of a physical PG
+ */
+void
+pghw_fini(pghw_t *pg)
+{
+	group_t		*hwset;
+
+	hwset = pghw_set_lookup(pg->pghw_hw);
+	ASSERT(hwset != NULL);
+
+	pghw_set_remove(hwset, pg);
+	pg->pghw_instance = (id_t)PGHW_INSTANCE_ANON;
+	pg->pghw_hw = (pghw_type_t)-1;
+
+	if (pg->pghw_kstat)
+		kstat_delete(pg->pghw_kstat);
+}
+
+/*
+ * Find an existing physical PG in which to place
+ * the given CPU for the specified hardware sharing
+ * relationship
+ */
+pghw_t *
+pghw_place_cpu(cpu_t *cp, pghw_type_t hw)
+{
+	group_t		*hwset;
+
+	if ((hwset = pghw_set_lookup(hw)) == NULL) {
+		return (NULL);
+	}
+
+	return ((pghw_t *)pg_cpu_find_pg(cp, hwset));
+}
+
+/*
+ * Find the pg representing the hw sharing relationship in which
+ * cp belongs
+ */
+pghw_t *
+pghw_find_pg(cpu_t *cp, pghw_type_t hw)
+{
+	group_iter_t	i;
+	pghw_t	*pg;
+
+	group_iter_init(&i);
+	while ((pg = group_iterate(&cp->cpu_pg->pgs, &i)) != NULL) {
+		if (pg->pghw_hw == hw)
+			return (pg);
+	}
+	return (NULL);
+}
+
+/*
+ * Find the PG of the given hardware sharing relationship
+ * type with the given instance id
+ */
+pghw_t *
+pghw_find_by_instance(id_t id, pghw_type_t hw)
+{
+	group_iter_t	i;
+	group_t		*set;
+	pghw_t		*pg;
+
+	set = pghw_set_lookup(hw);
+	if (!set)
+		return (NULL);
+
+	group_iter_init(&i);
+	while ((pg = group_iterate(set, &i)) != NULL) {
+		if (pg->pghw_instance == id)
+			return (pg);
+	}
+	return (NULL);
+}
+
+/*
+ * CPUs physical ID cache creation / destruction
+ * The cache's elements are initialized to the CPU's id
+ */
+void
+pghw_physid_create(cpu_t *cp)
+{
+	int	i;
+
+	cp->cpu_physid = kmem_alloc(sizeof (cpu_physid_t), KM_SLEEP);
+
+	for (i = 0; i < (sizeof (cpu_physid_t) / sizeof (id_t)); i++) {
+		((id_t *)cp->cpu_physid)[i] = cp->cpu_id;
+	}
+}
+
+void
+pghw_physid_destroy(cpu_t *cp)
+{
+	if (cp->cpu_physid) {
+		kmem_free(cp->cpu_physid, sizeof (cpu_physid_t));
+		cp->cpu_physid = NULL;
+	}
+}
+
+/*
+ * Return a sequential level identifier for the specified
+ * hardware sharing relationship
+ */
+int
+pghw_level(pghw_type_t hw)
+{
+	return (pg_plat_hw_level(hw));
+}
+
+/*
+ * Create a new, empty hwset.
+ * This routine may block, and must not be called from any
+ * paused CPU context.
+ */
+static group_t	*
+pghw_set_create(pghw_type_t hw)
+{
+	group_t	*g;
+	int	ret;
+
+	/*
+	 * Create the top level PG hw group if it doesn't already exist
+	 * This is a "set" of hardware sets, that is ordered (and indexed)
+	 * by the pghw_type_t enum.
+	 */
+	if (pg_hw == NULL) {
+		pg_hw = kmem_alloc(sizeof (group_t), KM_SLEEP);
+		group_create(pg_hw);
+		group_expand(pg_hw, (uint_t)PGHW_NUM_COMPONENTS);
+	}
+
+	/*
+	 * Create the new hwset
+	 * Add it to the top level pg_hw group.
+	 */
+	g = kmem_alloc(sizeof (group_t), KM_SLEEP);
+	group_create(g);
+
+	ret = group_add_at(pg_hw, g, (uint_t)hw);
+	ASSERT(ret == 0);
+
+	/*
+	 * Update the table that maps hardware sharing relationships
+	 * to hierarchy levels
+	 */
+	ASSERT(pghw_level_table[hw] == NULL);
+	pghw_level_table[hw] = pg_plat_hw_level(hw);
+
+	return (g);
+}
+
+/*
+ * Find the hwset associated with the given hardware sharing type
+ */
+group_t *
+pghw_set_lookup(pghw_type_t hw)
+{
+	group_t	*hwset;
+
+	if (pg_hw == NULL)
+		return (NULL);
+
+	hwset = GROUP_ACCESS(pg_hw, (uint_t)hw);
+	return (hwset);
+}
+
+/*
+ * Add a PG to a hwset
+ */
+static void
+pghw_set_add(group_t *hwset, pghw_t *pg)
+{
+	(void) group_add(hwset, pg, GRP_RESIZE);
+}
+
+/*
+ * Remove a PG from a hwset
+ */
+static void
+pghw_set_remove(group_t *hwset, pghw_t *pg)
+{
+	int result;
+
+	result = group_remove(hwset, pg, GRP_RESIZE);
+	ASSERT(result == 0);
+}
+
+
+/*
+ * Return a string name given a pg_hw sharing type
+ */
+#define	PGHW_TYPE_NAME_MAX	8
+
+static char *
+pghw_type_string(pghw_type_t hw)
+{
+	switch (hw) {
+	case PGHW_IPIPE:
+		return ("ipipe");
+	case PGHW_CACHE:
+		return ("cache");
+	case PGHW_FPU:
+		return ("fpu");
+	case PGHW_CHIP:
+		return ("chip");
+	case PGHW_MEMORY:
+		return ("memory");
+	default:
+		return ("unknown");
+	}
+}
+
+/*
+ * Create / Update routines for PG hw kstats
+ *
+ * It is the intention of these kstats to provide some level
+ * of informational / debugging observability into the types
+ * and nature of the system's detected hardware sharing relationships
+ */
+void
+pghw_kstat_create(pghw_t *pg)
+{
+	/*
+	 * Create a physical pg kstat
+	 */
+	if ((pg->pghw_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id,
+	    "pg", "pg", KSTAT_TYPE_NAMED,
+	    sizeof (pghw_kstat) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL)) != NULL) {
+		pg->pghw_kstat->ks_data_size += PG_CLASS_NAME_MAX;
+		pg->pghw_kstat->ks_data_size += PGHW_TYPE_NAME_MAX;
+		pg->pghw_kstat->ks_lock = &pghw_kstat_lock;
+		pg->pghw_kstat->ks_data = &pghw_kstat;
+		pg->pghw_kstat->ks_update = pghw_kstat_update;
+		pg->pghw_kstat->ks_private = pg;
+		kstat_install(pg->pghw_kstat);
+	}
+}
+
+int
+pghw_kstat_update(kstat_t *ksp, int rw)
+{
+	struct pghw_kstat	*pgsp = &pghw_kstat;
+	pghw_t			*pg = ksp->ks_private;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	pgsp->pg_id.value.ui64 = ((pg_t *)pg)->pg_id;
+	pgsp->pg_ncpus.value.ui64 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus);
+	pgsp->pg_instance_id.value.ui64 = (uint64_t)pg->pghw_instance;
+	kstat_named_setstr(&pgsp->pg_class, ((pg_t *)pg)->pg_class->pgc_name);
+	kstat_named_setstr(&pgsp->pg_hw, pghw_type_string(pg->pghw_hw));
+
+	return (0);
+}