summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/os
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/os')
-rw-r--r--usr/src/uts/common/os/bitset.c168
-rw-r--r--usr/src/uts/common/os/chip.c576
-rw-r--r--usr/src/uts/common/os/clock.c37
-rw-r--r--usr/src/uts/common/os/cpu.c40
-rw-r--r--usr/src/uts/common/os/group.c322
-rw-r--r--usr/src/uts/common/os/lgrp.c77
-rw-r--r--usr/src/uts/common/os/pg.c624
-rw-r--r--usr/src/uts/common/os/pghw.c420
8 files changed, 1568 insertions, 696 deletions
diff --git a/usr/src/uts/common/os/bitset.c b/usr/src/uts/common/os/bitset.c
new file mode 100644
index 0000000000..8222fd9faa
--- /dev/null
+++ b/usr/src/uts/common/os/bitset.c
@@ -0,0 +1,168 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/bitset.h>
+#include <sys/kmem.h>
+#include <sys/systm.h>
+#include <sys/cmn_err.h>
+#include <sys/sysmacros.h>
+
+/*
+ * Initialize a bitset_t.
+ * After bitset_init(), the bitset will be zero sized.
+ */
+void
+bitset_init(bitset_t *b)
+{
+ bzero(b, sizeof (bitset_t));
+}
+
+/*
+ * Uninitialize a bitset_t.
+ * This will free the bitset's data, leaving it zero sized.
+ */
+void
+bitset_fini(bitset_t *b)
+{
+ if (b->bs_words > 0)
+ kmem_free(b->bs_set, b->bs_words * sizeof (ulong_t));
+}
+
+/*
+ * Resize a bitset to where it can hold sz number of bits.
+ * This can either grow or shrink the bitset holding capacity.
+ * In the case of shrinkage, elements that reside outside the new
+ * holding capacity of the bitset are lost.
+ */
+void
+bitset_resize(bitset_t *b, uint_t sz)
+{
+ uint_t nwords;
+ ulong_t *bset_new, *bset_tmp;
+
+ nwords = BT_BITOUL(sz);
+ if (b->bs_words == nwords)
+ return; /* already properly sized */
+
+ /*
+ * Allocate the new ulong_t array, and copy the old one.
+ */
+ if (nwords > 0) {
+ bset_new = kmem_zalloc(nwords * sizeof (ulong_t), KM_SLEEP);
+ bcopy(b->bs_set, bset_new,
+ MIN(b->bs_words, nwords) * sizeof (ulong_t));
+ } else {
+ bset_new = NULL;
+ }
+
+ /* swap out the old ulong_t array for new one */
+ bset_tmp = b->bs_set;
+ b->bs_set = bset_new;
+
+ /* free up the old array */
+ kmem_free(bset_tmp, b->bs_words * sizeof (ulong_t));
+ b->bs_words = nwords;
+}
+
+/*
+ * Returns the current holding capacity of the bitset
+ */
+uint_t
+bitset_capacity(bitset_t *b)
+{
+ return (b->bs_words * BT_NBIPUL);
+}
+
+/*
+ * Add and delete bits in the bitset.
+ *
+ * Adding a bit that is already set, and clearing a bit that's already clear
+ * is legal.
+ *
+ * Adding or deleting an element that falls outside the bitset's current
+ * holding capacity is illegal.
+ */
+void
+bitset_add(bitset_t *b, uint_t elt)
+{
+ ASSERT(b->bs_words * BT_NBIPUL > elt);
+
+ BT_SET(b->bs_set, elt);
+}
+
+void
+bitset_del(bitset_t *b, uint_t elt)
+{
+ ASSERT(b->bs_words * BT_NBIPUL > elt);
+
+ BT_CLEAR(b->bs_set, elt);
+}
+
+/*
+ * Return non-zero if the bit is present in the set
+ */
+int
+bitset_in_set(bitset_t *b, uint_t elt)
+{
+ ASSERT(b->bs_words * BT_NBIPUL > elt);
+
+ return (BT_TEST(b->bs_set, elt));
+}
+
+/*
+ * Return non-zero if the bitset is empty
+ */
+int
+bitset_is_null(bitset_t *b)
+{
+ int i;
+
+ for (i = 0; i < b->bs_words; i++)
+ if (b->bs_set[i] != 0)
+ return (0);
+ return (1);
+}
+
+/*
+ * Find the first set bit in the bitset
+ * Return -1 if no bit was found
+ */
+uint_t
+bitset_find(bitset_t *b)
+{
+ uint_t i;
+ uint_t elt = (uint_t)-1;
+
+ for (i = 0; i < b->bs_words; i++) {
+ elt = (uint_t)(lowbit(b->bs_set[i]) - 1);
+ if (elt != (uint_t)-1) {
+ elt += i * BT_NBIPUL;
+ break;
+ }
+ }
+ return (elt);
+}
diff --git a/usr/src/uts/common/os/chip.c b/usr/src/uts/common/os/chip.c
deleted file mode 100644
index ad11827b0f..0000000000
--- a/usr/src/uts/common/os/chip.c
+++ /dev/null
@@ -1,576 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/thread.h>
-#include <sys/cpuvar.h>
-#include <sys/cpupart.h>
-#include <sys/kmem.h>
-#include <sys/cmn_err.h>
-#include <sys/kstat.h>
-#include <sys/processor.h>
-#include <sys/disp.h>
-#include <sys/chip.h>
-
-/*
- * CMT aware scheduler/dispatcher support
- *
- * With the introduction of Chip Multi-Threaded (CMT) processor architectures,
- * it is no longer necessarily true that a given physical processor
- * module (chip) will present itself as a single schedulable entity (cpu_t).
- * Rather, each chip may present itself as one or more "logical" CPUs.
- *
- * The logical CPUs presented may share physical components on the chip
- * such as caches, data pipes, FPUs, etc. It is advantageous to have the
- * kernel know which logical CPUs are presented by a given chip,
- * and what facilities on the chip are shared, since the kernel can then use
- * this information to employ scheduling policies that help improve the
- * availability of per chip resources, and increase utilization of a thread's
- * cache investment.
- *
- * The "chip_t" structure represents a physical processor.
- * It is used to keep track of which logical CPUs are presented by a given
- * chip, and to provide a parameterized representation of a chip's
- * properties. A count of the number of running threads is also
- * maintained, and is used by the dispatcher to balance load across the
- * system's chips to improve performance through increased chip resource
- * availability.
- *
- * Locking:
- *
- * Safely traversing the per lgroup lists requires the same protections
- * as traversing the cpu lists. One must either:
- * - hold cpu_lock
- * - have disabled kernel preemption
- * - be at high SPL
- * - have cpu's paused
- *
- * Safely traversing the global "chip_list" requires holding cpu_lock.
- *
- * A chip's nrunning count should only be modified using the
- * CHIP_NRUNNING() macro, through which updates of the count are done
- * atomically.
- */
-
-chip_t cpu0_chip; /* chip structure for first CPU */
-cpu_physid_t cpu0_physid; /* boot CPU's physical id structure */
-
-/*
- * chip_bootstrap is used on platforms where it is possible to enter the
- * dispatcher before a new CPU's chip initialization has happened.
- */
-static chip_t chip_bootstrap;
-
-#define CPU_HAS_NO_CHIP(cp) \
- ((cp)->cpu_chip == NULL || (cp)->cpu_chip == &chip_bootstrap)
-
-static chip_t *chip_list; /* protected by CPU lock */
-static chip_set_t chip_set; /* bitmap of chips in existence */
- /* indexed by chip_seqid */
-static chipid_t chip_seqid_next = 0; /* next sequential chip id */
-static int nchips = 0; /* num chips in existence */
-
-static chip_t *chip_find(chipid_t);
-static int chip_kstat_extract(kstat_t *, int);
-
-/*
- * Declare static kstat names (defined in chip.h)
- */
-CHIP_KSTAT_NAMES;
-
-/*
- * Find the chip_t with the given chip_id.
- */
-static chip_t *
-chip_find(chipid_t chipid)
-{
- chip_t *chp, *chip_start;
-
- ASSERT(chip_list == NULL || chip_list->chip_next == chip_list ||
- MUTEX_HELD(&cpu_lock));
-
- if ((chp = chip_start = chip_list) != NULL) {
- do {
- if (chp->chip_id == chipid) {
- return (chp);
- }
- } while ((chp = chp->chip_next) != chip_start);
- }
- return (NULL);
-}
-
-chip_t *
-chip_lookup(chipid_t chipid)
-{
- chip_t *chp;
-
- mutex_enter(&cpu_lock);
- chp = chip_find(chipid);
- mutex_exit(&cpu_lock);
-
- return (chp);
-}
-
-#ifndef sun4v
-/*
- * Setup the kstats for this chip, if needed
- */
-void
-chip_kstat_create(chip_t *chp)
-{
- chip_stat_t stat;
- kstat_t *chip_kstat;
-
- ASSERT(MUTEX_HELD(&cpu_lock));
-
- if (chp->chip_kstat != NULL)
- return; /* already initialized */
-
- chip_kstat = kstat_create("chip", chp->chip_id, NULL, "misc",
- KSTAT_TYPE_NAMED, CHIP_NUM_STATS,
- KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
-
- if (chip_kstat != NULL) {
- chip_kstat->ks_lock = &chp->chip_kstat_mutex;
- mutex_init(chip_kstat->ks_lock, NULL, MUTEX_DEFAULT, NULL);
- chip_kstat->ks_private = chp;
- chip_kstat->ks_data = chp->chip_kstat_data;
- for (stat = 0; stat < CHIP_NUM_STATS; stat++)
- kstat_named_init(&chp->chip_kstat_data[stat],
- chip_kstat_names[stat], KSTAT_DATA_INT64);
- chip_kstat->ks_update = chip_kstat_extract;
- chp->chip_kstat = chip_kstat;
- kstat_install(chip_kstat);
- }
-}
-#else
-/*
- * Note: On sun4v systems, chip kstats don't currently
- * exist, since "chip" structures and policies are being
- * leveraged to implement core level balancing, and exporting
- * chip kstats in light of this would be both misleading
- * and confusing.
- */
-/* ARGSUSED */
-void
-chip_kstat_create(chip_t *chp)
-{
-}
-#endif /* !sun4v */
-
-static int
-chip_kstat_extract(kstat_t *ksp, int rw)
-{
- struct kstat_named *ksd;
- chip_t *chp;
-
- chp = (chip_t *)ksp->ks_private;
-
- ksd = (struct kstat_named *)ksp->ks_data;
- ASSERT(ksd == chp->chip_kstat_data);
-
- /*
- * The chip kstats are read only
- */
- if (rw == KSTAT_WRITE)
- return (EACCES);
-
- ksd[CHIP_ID].value.i64 = chp->chip_id;
- ksd[CHIP_NCPUS].value.i64 = chp->chip_ncpu;
- ksd[CHIP_NRUNNING].value.i64 = chp->chip_nrunning;
- ksd[CHIP_RECHOOSE].value.i64 =
- rechoose_interval + chp->chip_rechoose_adj;
-
- return (0);
-}
-
-/*
- * If necessary, instantiate a chip_t for this CPU.
- * Called when a CPU is being added to the system either in startup,
- * or because of DR. The cpu will be assigned to the chip's active
- * CPU list later in chip_cpu_assign()
- */
-void
-chip_cpu_init(cpu_t *cp)
-{
- chipid_t cid;
- int rechoose;
- chip_t *chp;
- chip_def_t chp_def;
-
- ASSERT((chip_list == NULL) || (MUTEX_HELD(&cpu_lock)));
-
- if (chip_list == NULL)
- cp->cpu_physid = &cpu0_physid;
- else
- cp->cpu_physid = kmem_zalloc(sizeof (cpu_physid_t), KM_SLEEP);
-
- /*
- * Call into the platform to fetch this cpu's chip and core ids.
- * The ids are cached in the CPU's physical id structure.
- *
- * On sun4v platforms, the chip infrastructure is currently being
- * leveraged to implement core level load balancing.
- */
-#ifdef DO_CORELEVEL_LOADBAL
- cid = chip_plat_get_coreid(cp);
- cp->cpu_physid->cpu_coreid = cid;
- cp->cpu_physid->cpu_chipid = chip_plat_get_chipid(cp);
-#else
- cid = chip_plat_get_chipid(cp);
- cp->cpu_physid->cpu_chipid = cid;
- cp->cpu_physid->cpu_coreid = chip_plat_get_coreid(cp);
-#endif /* DO_CORELEVEL_LOADBAL */
-
- chp = chip_find(cid);
- if (chp == NULL) {
-
- /*
- * Create a new chip
- */
- if (chip_list == NULL)
- chp = &cpu0_chip;
- else
- chp = kmem_zalloc(sizeof (*chp), KM_SLEEP);
-
- chp->chip_id = cid;
- chp->chip_nrunning = 0;
-
- /*
- * If we're booting, take this moment to perform
- * some additional initialization
- */
- if (chip_list == NULL) {
- CHIP_SET_ZERO(chip_set);
- CHIP_SET_ZERO(cp->cpu_part->cp_mach->mc_chipset);
- chp->chip_nrunning++; /* for t0 */
- }
-
- /*
- * Find the next free sequential chip id.
- * A chip's sequential id exists in the range
- * 0 .. CHIP_MAX_CHIPS, and is suitable for use with
- * chip sets.
- */
- while (CHIP_SET_TEST(chip_set, chip_seqid_next))
- chip_seqid_next++;
- chp->chip_seqid = chip_seqid_next++;
- CHIP_SET_ADD(chip_set, chp->chip_seqid);
-
- ASSERT(chip_seqid_next <= CHIP_MAX_CHIPS);
-
-
- /*
- * Query the platform specific parameters
- * for this chip
- */
- chip_plat_define_chip(cp, &chp_def);
- chp->chip_rechoose_adj = chp_def.chipd_rechoose_adj;
- chp->chip_type = chp_def.chipd_type;
- chp->chip_nosteal = chp_def.chipd_nosteal;
-
- ASSERT((chp->chip_type < CHIP_NUM_TYPES) &&
- (chp->chip_type >= CHIP_DEFAULT));
-
- /*
- * Insert this chip in chip_list
- */
- if (chip_list == NULL) {
- chip_list = chp;
- chp->chip_next = chp->chip_prev = chp;
- } else {
- chip_t *chptr;
-
- chptr = chip_list;
- chp->chip_next = chptr;
- chp->chip_prev = chptr->chip_prev;
- chptr->chip_prev->chip_next = chp;
- chptr->chip_prev = chp;
- }
-
- nchips++;
- ASSERT(nchips <= CHIP_MAX_CHIPS);
-
- /*
- * The boot cpu will create the first chip's kstats
- * later in cpu_kstat_init()
- */
- if (chp != &cpu0_chip)
- chip_kstat_create(chp);
- }
-
- /*
- * Initialize the effective rechoose interval cached
- * in this cpu structure.
- */
- rechoose = rechoose_interval + chp->chip_rechoose_adj;
- cp->cpu_rechoose = (rechoose < 0) ? 0 : rechoose;
-
- cp->cpu_chip = chp;
- chp->chip_ref++;
-}
-
-/*
- * This cpu is being deleted. It has already been removed from
- * the chip's active cpu list back in chip_cpu_unassign(). Here
- * we remove the cpu's reference to the chip, and cleanup/destroy
- * the chip if needed.
- */
-void
-chip_cpu_fini(cpu_t *cp)
-{
- chip_t *chp;
- chip_t *prev, *next;
-
- ASSERT(MUTEX_HELD(&cpu_lock));
-
- /*
- * This can happen if the CPU failed to power on
- */
- if (CPU_HAS_NO_CHIP(cp))
- return;
-
- chp = cp->cpu_chip;
- cp->cpu_chip = NULL;
-
- /*
- * Clear out and free the CPU's physical id structure
- */
- cp->cpu_physid->cpu_chipid = -1;
- cp->cpu_physid->cpu_coreid = -1;
-
- if (cp->cpu_physid != &cpu0_physid) {
- ASSERT(cp->cpu_physid != NULL);
- kmem_free(cp->cpu_physid, sizeof (cpu_physid_t));
- }
- cp->cpu_physid = NULL;
-
- /*
- * Delete the chip if its last CPU is being deleted
- */
- if (--chp->chip_ref == 0) {
-
- ASSERT(chp->chip_ncpu == 0);
- ASSERT(chp->chip_cpus == NULL);
- ASSERT(chp->chip_nrunning == 0);
- ASSERT(chp->chip_lgrp == NULL);
- ASSERT((chp->chip_next_lgrp == NULL) &&
- (chp->chip_prev_lgrp == NULL));
-
- if (chip_seqid_next > chp->chip_seqid)
- chip_seqid_next = chp->chip_seqid;
- CHIP_SET_REMOVE(chip_set, chp->chip_seqid);
-
- chp->chip_id = -1;
- chp->chip_seqid = -1;
-
- /*
- * remove the chip from the system's chip list
- */
- if (chip_list == chp)
- chip_list = chp->chip_next;
-
- prev = chp->chip_prev;
- next = chp->chip_next;
-
- prev->chip_next = next;
- next->chip_prev = prev;
-
- chp->chip_next = chp->chip_prev = NULL;
-
- nchips--;
-
- /*
- * clean up any chip kstats
- */
- if (chp->chip_kstat) {
- kstat_delete(chp->chip_kstat);
- chp->chip_kstat = NULL;
- }
- /*
- * If the chip_t structure was dynamically
- * allocated, free it.
- */
- if (chp != &cpu0_chip)
- kmem_free(chp, sizeof (*chp));
- }
-}
-
-/*
- * This cpu is becoming active (online).
- * Perform all the necessary bookkeeping in it's chip_t
- */
-void
-chip_cpu_assign(cpu_t *cp)
-{
- chip_t *chp;
- cpu_t *cptr;
-
- ASSERT(chip_list == NULL || chip_list->chip_next == chip_list ||
- MUTEX_HELD(&cpu_lock));
-
- chp = cp->cpu_chip;
-
- /*
- * Add this cpu to the chip's cpu list
- */
- if (chp->chip_ncpu == 0) {
- chp->chip_cpus = cp;
- cp->cpu_next_chip = cp->cpu_prev_chip = cp;
- } else {
- cptr = chp->chip_cpus;
- cp->cpu_next_chip = cptr;
- cp->cpu_prev_chip = cptr->cpu_prev_chip;
- cp->cpu_prev_chip->cpu_next_chip = cp;
- cptr->cpu_prev_chip = cp;
- }
-
- chp->chip_ncpu++;
-
- /*
- * Notate this chip's seqid in the cpu partition's chipset
- */
- chip_cpu_move_part(cp, NULL, cp->cpu_part);
-}
-
-/*
- * This cpu is being offlined, so do the reverse
- * of cpu_chip_assign()
- */
-void
-chip_cpu_unassign(cpu_t *cp)
-{
- chip_t *chp;
- struct cpu *prev;
- struct cpu *next;
-
- ASSERT(MUTEX_HELD(&cpu_lock));
-
- chp = cp->cpu_chip;
-
- chip_cpu_move_part(cp, cp->cpu_part, NULL);
-
- /*
- * remove this cpu from the chip's cpu list
- */
- prev = cp->cpu_prev_chip;
- next = cp->cpu_next_chip;
-
- prev->cpu_next_chip = next;
- next->cpu_prev_chip = prev;
-
- cp->cpu_next_chip = cp->cpu_prev_chip = NULL;
-
- chp->chip_ncpu--;
-
- if (chp->chip_ncpu == 0) {
- chp->chip_cpus = NULL;
- } else if (chp->chip_cpus == cp) {
- chp->chip_cpus = next;
- }
-}
-
-/*
- * A cpu on the chip is moving into and/or out of a cpu partition.
- * Maintain the cpuparts' chip membership set.
- * oldpp is NULL when a cpu is being offlined.
- * newpp is NULL when a cpu is being onlined.
- */
-void
-chip_cpu_move_part(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
-{
- cpu_t *cpp;
- chip_t *chp;
-
- ASSERT(chip_list->chip_next == chip_list || MUTEX_HELD(&cpu_lock));
-
- chp = cp->cpu_chip;
-
- if (newpp != NULL) {
- /*
- * Add the chip's seqid to the cpupart's chip set
- */
- CHIP_SET_ADD(newpp->cp_mach->mc_chipset, chp->chip_seqid);
- }
-
- if (oldpp != NULL) {
- cpp = cp;
- while ((cpp = cpp->cpu_next_chip) != cp) {
- if (cpp->cpu_part->cp_id == oldpp->cp_id) {
- /*
- * Another cpu on the chip is in the old
- * cpu partition, so we're done
- */
- return;
- }
- }
-
- /*
- * No other cpu on the chip is in the old partition
- * so remove the chip's seqid from it's set
- */
- CHIP_SET_REMOVE(oldpp->cp_mach->mc_chipset, chp->chip_seqid);
- }
-}
-
-/*
- * Called to indicate a slave CPU has started up.
- */
-void
-chip_cpu_startup(cpu_t *cp)
-{
- /*
- * Indicate that the chip has a new running thread
- * (slave startup)
- */
- CHIP_NRUNNING(cp->cpu_chip, 1);
-}
-
-/*
- * Provide the specified CPU a bootstrap chip
- */
-void
-chip_bootstrap_cpu(cpu_t *cp)
-{
- cp->cpu_chip = &chip_bootstrap;
-}
-
-/*
- * Given a chip set, return 1 if it is empty.
- */
-int
-chip_set_isnull(chip_set_t *set)
-{
- int i;
-
- for (i = 0; i < CHIP_SET_WORDS; i++) {
- if (set->csb[i] != 0)
- return (0);
- }
- return (1);
-}
diff --git a/usr/src/uts/common/os/clock.c b/usr/src/uts/common/os/clock.c
index 0152c2e958..a1040f1270 100644
--- a/usr/src/uts/common/os/clock.c
+++ b/usr/src/uts/common/os/clock.c
@@ -23,7 +23,7 @@
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -66,7 +66,6 @@
#include <sys/cpupart.h>
#include <sys/rctl.h>
#include <sys/task.h>
-#include <sys/chip.h>
#include <sys/sdt.h>
#ifdef __sparc
@@ -260,13 +259,6 @@ cyclic_id_t deadman_cyclic; /* deadman()'s cyclic_id */
static int lgrp_ticks; /* counter to schedule lgrp load calcs */
/*
- * rechoose_interval_history is used to detect when rechoose_interval's
- * value has changed (via hotpatching for example), so that the
- * cached values in the cpu structures may be updated.
- */
-static int rechoose_interval_history = RECHOOSE_INTERVAL;
-
-/*
* for tod fault detection
*/
#define TOD_REF_FREQ ((longlong_t)(NANOSEC))
@@ -345,8 +337,6 @@ clock(void)
int64_t lltemp;
int s;
int do_lgrp_load;
- int rechoose_update = 0;
- int rechoose;
int i;
if (panicstr)
@@ -430,21 +420,9 @@ clock(void)
do_lgrp_load = 1;
}
- /*
- * The dispatcher tunable rechoose_interval may be hot-patched.
- * Note if it has a new value. If so, the effective rechoose_interval
- * cached in the cpu structures needs to be updated.
- * If needed we'll do this during the walk of the cpu_list below.
- */
- if (rechoose_interval != rechoose_interval_history) {
- rechoose_interval_history = rechoose_interval;
- rechoose_update = 1;
- }
-
if (one_sec)
loadavg_update();
-
/*
* First count the threads waiting on kpreempt queues in each
* CPU partition.
@@ -522,19 +500,6 @@ clock(void)
lgrp_loadavg(cp->cpu_lpl,
cpu_nrunnable * LGRP_LOADAVG_IN_THREAD_MAX, 1);
}
- /*
- * The platform may define a per physical processor
- * adjustment of rechoose_interval. The effective
- * (base + adjustment) rechoose_interval is cached
- * in the cpu structures for efficiency. Above we detect
- * if the cached values need updating, and here is where
- * the update happens.
- */
- if (rechoose_update) {
- rechoose = rechoose_interval +
- cp->cpu_chip->chip_rechoose_adj;
- cp->cpu_rechoose = (rechoose < 0) ? 0 : rechoose;
- }
} while ((cp = cp->cpu_next) != cpu_list);
/*
diff --git a/usr/src/uts/common/os/cpu.c b/usr/src/uts/common/os/cpu.c
index 5ca51ec3da..9237517a69 100644
--- a/usr/src/uts/common/os/cpu.c
+++ b/usr/src/uts/common/os/cpu.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -45,7 +45,7 @@
#include <sys/cpupart.h>
#include <sys/lgrp.h>
#include <sys/pset.h>
-#include <sys/chip.h>
+#include <sys/pghw.h>
#include <sys/kmem.h>
#include <sys/kmem_impl.h> /* to set per-cpu kmem_cache offset */
#include <sys/atomic.h>
@@ -1266,6 +1266,11 @@ cpu_offline(cpu_t *cp, int flags)
cpu_state_change_notify(cp->cpu_id, CPU_OFF);
/*
+ * Tell the PG subsystem that the CPU is leaving the partition
+ */
+ pg_cpupart_out(cp, pp);
+
+ /*
* Take the CPU out of interrupt participation so we won't find
* bound kernel threads. If the architecture cannot completely
* shut off interrupts on the CPU, don't quiesce it, but don't
@@ -1512,6 +1517,11 @@ out:
cyclic_online(cp);
/*
+ * If we failed, tell the PG subsystem that the CPU is back
+ */
+ pg_cpupart_in(cp, pp);
+
+ /*
* If we failed, we need to notify everyone that this CPU is back on.
*/
if (error != 0)
@@ -1732,7 +1742,12 @@ cpu_del_unit(int cpuid)
ASSERT(cp->cpu_next_part == cp);
ASSERT(cp->cpu_prev_part == cp);
- chip_cpu_fini(cp);
+ /*
+ * Tear down the CPU's physical ID cache, and update any
+ * processor groups
+ */
+ pg_cpu_fini(cp);
+ pghw_physid_destroy(cp);
/*
* Destroy kstat stuff.
@@ -1816,8 +1831,7 @@ cpu_add_active_internal(cpu_t *cp)
ASSERT(cp_numparts_nonempty != 0);
}
- chip_cpu_assign(cp);
-
+ pg_cpu_active(cp);
lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)cp, 0);
bzero(&cp->cpu_loadavg, sizeof (cp->cpu_loadavg));
@@ -1830,9 +1844,12 @@ cpu_add_active_internal(cpu_t *cp)
void
cpu_add_active(cpu_t *cp)
{
+ pg_cpupart_in(cp, cp->cpu_part);
+
pause_cpus(NULL);
cpu_add_active_internal(cp);
start_cpus();
+
cpu_stats_kstat_create(cp);
cpu_create_intrstat(cp);
lgrp_kstat_create(cp);
@@ -1854,7 +1871,7 @@ cpu_remove_active(cpu_t *cp)
ASSERT(cp->cpu_next_onln != cp); /* not the last one */
ASSERT(cp->cpu_prev_onln != cp); /* not the last one */
- chip_cpu_unassign(cp);
+ pg_cpu_inactive(cp);
lgrp_config(LGRP_CONFIG_CPU_OFFLINE, (uintptr_t)cp, 0);
@@ -2146,11 +2163,12 @@ cpu_info_kstat_update(kstat_t *ksp, int rw)
(void) strncpy(cpu_info_template.ci_fpu_type.value.c,
cp->cpu_type_info.pi_fputypes, 15);
cpu_info_template.ci_clock_MHz.value.l = cp->cpu_type_info.pi_clock;
- cpu_info_template.ci_chip_id.value.l = chip_plat_get_chipid(cp);
+ cpu_info_template.ci_chip_id.value.l =
+ pg_plat_hw_instance_id(cp, PGHW_CHIP);
kstat_named_setstr(&cpu_info_template.ci_implementation,
cp->cpu_idstr);
kstat_named_setstr(&cpu_info_template.ci_brandstr, cp->cpu_brandstr);
- cpu_info_template.ci_core_id.value.l = chip_plat_get_coreid(cp);
+ cpu_info_template.ci_core_id.value.l = pg_plat_get_core_id(cp);
#if defined(__sparcv9)
cpu_info_template.ci_device_ID.value.ui64 =
@@ -2163,7 +2181,7 @@ cpu_info_kstat_update(kstat_t *ksp, int rw)
cpu_info_template.ci_family.value.l = cpuid_getfamily(cp);
cpu_info_template.ci_model.value.l = cpuid_getmodel(cp);
cpu_info_template.ci_step.value.l = cpuid_getstep(cp);
- cpu_info_template.ci_clogid.value.l = chip_plat_get_clogid(cp);
+ cpu_info_template.ci_clogid.value.l = cpuid_get_clogid(cp);
#endif
return (0);
@@ -2215,11 +2233,13 @@ cpu_info_kstat_destroy(cpu_t *cp)
void
cpu_kstat_init(cpu_t *cp)
{
+ /*
+ * XXX need pg kstats for boot CPU
+ */
mutex_enter(&cpu_lock);
cpu_info_kstat_create(cp);
cpu_stats_kstat_create(cp);
cpu_create_intrstat(cp);
- chip_kstat_create(cp->cpu_chip);
cpu_set_state(cp);
mutex_exit(&cpu_lock);
}
diff --git a/usr/src/uts/common/os/group.c b/usr/src/uts/common/os/group.c
new file mode 100644
index 0000000000..b15dff181f
--- /dev/null
+++ b/usr/src/uts/common/os/group.c
@@ -0,0 +1,322 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/systm.h>
+#include <sys/param.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/group.h>
+
+
+#define GRP_SET_SIZE_DEFAULT 2
+
+static void group_grow_set(group_t *);
+static void group_shrink_set(group_t *);
+static void group_pack_set(void **, uint_t);
+
+/*
+ * Initialize a group_t
+ */
+void
+group_create(group_t *g)
+{
+ bzero(g, sizeof (group_t));
+}
+
+/*
+ * Destroy a group_t
+ * The group must already be empty
+ */
+void
+group_destroy(group_t *g)
+{
+ ASSERT(g->grp_size == 0);
+
+ if (g->grp_capacity > 0) {
+ kmem_free(g->grp_set, g->grp_capacity * sizeof (void *));
+ g->grp_capacity = 0;
+ }
+ g->grp_set = NULL;
+}
+
+/*
+ * Add element "e" to group "g"
+ *
+ * Returns -1 if addition would result in overcapacity, and
+ * resize operations aren't allowed, and 0 otherwise
+ */
+int
+group_add(group_t *g, void *e, int gflag)
+{
+ int entry;
+
+ if ((gflag & GRP_NORESIZE) &&
+ g->grp_size == g->grp_capacity)
+ return (-1);
+
+ ASSERT(g->grp_size != g->grp_capacity || (gflag & GRP_RESIZE));
+
+ entry = g->grp_size++;
+ if (g->grp_size > g->grp_capacity)
+ group_grow_set(g);
+
+ ASSERT(g->grp_set[entry] == NULL);
+ g->grp_set[entry] = e;
+
+ return (0);
+}
+
+/*
+ * Remove element "e" from group "g"
+ *
+ * Returns -1 if "e" was not present in "g" and 0 otherwise
+ */
+int
+group_remove(group_t *g, void *e, int gflag)
+{
+ int i;
+
+ /*
+ * Find the element in the group's set
+ */
+ for (i = 0; i < g->grp_size; i++)
+ if (g->grp_set[i] == e)
+ break;
+ if (g->grp_set[i] != e)
+ return (-1);
+
+ g->grp_set[i] = NULL;
+ group_pack_set(g->grp_set, g->grp_size);
+ g->grp_size--;
+
+ if ((gflag & GRP_RESIZE) &&
+ g->grp_size > GRP_SET_SIZE_DEFAULT &&
+ ((g->grp_size - 1) & g->grp_size) == 0)
+ group_shrink_set(g);
+
+ return (0);
+}
+
+/*
+ * Expand the capacity of group "g" so that it may
+ * contain at least "n" elements
+ */
+void
+group_expand(group_t *g, uint_t n)
+{
+ while (g->grp_capacity < n)
+ group_grow_set(g);
+}
+
+/*
+ * Upsize a group's holding capacity
+ */
+static void
+group_grow_set(group_t *g)
+{
+ uint_t cap_old, cap_new;
+ void **set_old, **set_new;
+
+ cap_old = g->grp_capacity;
+ set_old = g->grp_set;
+
+ /*
+ * The array size grows in powers of two
+ */
+ if ((cap_new = (cap_old << 1)) == 0) {
+ /*
+ * The set is unallocated.
+ * Allocate a default sized set.
+ */
+ cap_new = GRP_SET_SIZE_DEFAULT;
+ g->grp_set = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP);
+ g->grp_capacity = cap_new;
+ } else {
+ /*
+ * Allocate a newly sized array,
+ * copy the data, and free the old array.
+ */
+ set_new = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP);
+ (void) kcopy(set_old, set_new, cap_old * sizeof (void *));
+ g->grp_set = set_new;
+ g->grp_capacity = cap_new;
+ kmem_free(set_old, cap_old * sizeof (void *));
+ }
+ /*
+ * The new array size should be a power of two
+ */
+ ASSERT(((cap_new - 1) & cap_new) == 0);
+}
+
+/*
+ * Downsize a group's holding capacity
+ */
+static void
+group_shrink_set(group_t *g)
+{
+ uint_t cap_old, cap_new;
+ void **set_old, **set_new;
+
+ cap_old = g->grp_capacity;
+ set_old = g->grp_set;
+
+ /*
+ * The group's existing array size must already
+ * be a power of two
+ */
+ ASSERT(((cap_old - 1) & cap_old) == 0);
+ cap_new = cap_old >> 1;
+
+ /*
+ * GRP_SET_SIZE_DEFAULT is the minumum set size.
+ */
+ if (cap_new < GRP_SET_SIZE_DEFAULT)
+ return;
+
+ set_new = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP);
+ (void) kcopy(set_old, set_new, cap_new * sizeof (void *));
+ g->grp_capacity = cap_new;
+ g->grp_set = set_new;
+
+ ASSERT(((cap_new - 1) & cap_new) == 0);
+ kmem_free(set_old, cap_old * sizeof (void *));
+}
+
+/*
+ * Pack a group's set
+ * Element order is not preserved
+ */
+static void
+group_pack_set(void **set, uint_t sz)
+{
+ uint_t i, j, free;
+
+ free = (uint_t)-1;
+
+ for (i = 0; i < sz; i++) {
+ if (set[i] == NULL && free == (uint_t)-1) {
+ /*
+ * Found a new free slot.
+ * Start packing from here.
+ */
+ free = i;
+ } else if (set[i] != NULL && free != (uint_t)-1) {
+ /*
+ * Found a slot to pack into
+ * an earlier free slot.
+ */
+ ASSERT(set[free] == NULL);
+ set[free] = set[i];
+ set[i] = NULL;
+
+ /*
+ * Find the next free slot
+ */
+ for (j = free + 1; set[j] != NULL; j++) {
+ ASSERT(j <= i);
+ if (j == i)
+ break;
+ }
+ if (set[j] == NULL)
+ free = j;
+ else
+ free = (uint_t)-1;
+ }
+ }
+}
+
+/*
+ * Initialize a group iterator cookie
+ */
+void
+group_iter_init(group_iter_t *iter)
+{
+ *iter = 0;
+}
+
+/*
+ * Iterate over the elements in a group
+ */
+void *
+group_iterate(group_t *g, group_iter_t *iter)
+{
+ uint_t idx = *iter;
+ void *data = NULL;
+
+ while (idx < g->grp_size) {
+ data = g->grp_set[idx++];
+ if (data != NULL)
+ break;
+ }
+ *iter = idx;
+
+ return (data);
+}
+
+/*
+ * Indexed access to a group's elements
+ */
+void *
+group_access_at(group_t *g, uint_t idx)
+{
+ if (idx >= g->grp_capacity)
+ return (NULL);
+
+ return (g->grp_set[idx]);
+}
+
+/*
+ * Add a new ordered group element at specified
+ * index. The group must already be of sufficient
+ * capacity to hold an element at the specified index.
+ *
+ * Returns 0 if addition was sucessful, and -1 if the
+ * addition failed because the table was too small
+ */
+int
+group_add_at(group_t *g, void *e, uint_t idx)
+{
+ if (idx >= g->grp_capacity)
+ return (-1);
+
+ if (idx >= g->grp_size)
+ g->grp_size = idx + 1;
+
+ ASSERT(g->grp_set[idx] == NULL);
+ g->grp_set[idx] = e;
+ return (0);
+}
+
+/*
+ * Remove the entry at the specified index
+ */
+void
+group_remove_at(group_t *g, uint_t idx)
+{
+ ASSERT(idx < g->grp_capacity);
+ g->grp_set[idx] = NULL;
+}
diff --git a/usr/src/uts/common/os/lgrp.c b/usr/src/uts/common/os/lgrp.c
index 83f67e1088..2007f7b158 100644
--- a/usr/src/uts/common/os/lgrp.c
+++ b/usr/src/uts/common/os/lgrp.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -89,7 +89,7 @@
#include <sys/cmn_err.h>
#include <sys/kstat.h>
#include <sys/sysmacros.h>
-#include <sys/chip.h>
+#include <sys/pg.h>
#include <sys/promif.h>
#include <sys/sdt.h>
@@ -314,8 +314,6 @@ lgrp_root_init(void)
klgrpset_clear(lgrp_root->lgrp_children);
klgrpset_clear(lgrp_root->lgrp_leaves);
lgrp_root->lgrp_parent = NULL;
- lgrp_root->lgrp_chips = NULL;
- lgrp_root->lgrp_chipcnt = 0;
lgrp_root->lgrp_latency = lgrp_plat_latency(hand, hand);
for (i = 0; i < LGRP_RSRC_COUNT; i++)
@@ -679,7 +677,6 @@ lgrp_cpu_init(struct cpu *cp)
lgrp_t *my_lgrp;
lgrp_id_t lgrpid;
struct cpu *cptr;
- struct chip *chp;
/*
* This is the first time through if the resource set
@@ -795,33 +792,6 @@ lgrp_cpu_init(struct cpu *cp)
cptr->cpu_prev_lgrp = cp;
}
my_lgrp->lgrp_cpucnt++;
-
- /*
- * Add this cpu's chip to the per lgroup list
- * if necessary
- */
- if (cp->cpu_chip->chip_lgrp == NULL) {
- struct chip *lcpr;
-
- chp = cp->cpu_chip;
-
- if (my_lgrp->lgrp_chipcnt == 0) {
- my_lgrp->lgrp_chips = chp;
- chp->chip_next_lgrp =
- chp->chip_prev_lgrp = chp;
- } else {
- lcpr = my_lgrp->lgrp_chips;
- chp->chip_next_lgrp = lcpr;
- chp->chip_prev_lgrp =
- lcpr->chip_prev_lgrp;
- lcpr->chip_prev_lgrp->chip_next_lgrp =
- chp;
- lcpr->chip_prev_lgrp = chp;
- }
- chp->chip_lgrp = my_lgrp;
- chp->chip_balance = chp->chip_next_lgrp;
- my_lgrp->lgrp_chipcnt++;
- }
}
lgrp_t *
@@ -890,8 +860,6 @@ lgrp_create(void)
my_lgrp->lgrp_cpu = NULL;
my_lgrp->lgrp_cpucnt = 0;
- my_lgrp->lgrp_chips = NULL;
- my_lgrp->lgrp_chipcnt = 0;
if (my_lgrp->lgrp_kstat != NULL)
lgrp_kstat_reset(lgrpid);
@@ -945,8 +913,6 @@ lgrp_destroy(lgrp_t *lgrp)
lgrp->lgrp_cpu = NULL;
lgrp->lgrp_cpucnt = 0;
- lgrp->lgrp_chipcnt = 0;
- lgrp->lgrp_chips = NULL;
nlgrps--;
}
@@ -1022,7 +988,6 @@ lgrp_cpu_fini(struct cpu *cp, lgrp_id_t lgrpid)
lgrp_t *my_lgrp;
struct cpu *prev;
struct cpu *next;
- chip_t *chp;
ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized);
@@ -1042,42 +1007,6 @@ lgrp_cpu_fini(struct cpu *cp, lgrp_id_t lgrpid)
my_lgrp->lgrp_cpucnt--;
/*
- * If the last CPU on it's chip is being offlined
- * then remove this chip from the per lgroup list.
- *
- * This is also done for the boot CPU when it needs
- * to move between lgroups as a consequence of
- * null proc lpa.
- */
- chp = cp->cpu_chip;
- if (chp->chip_ncpu == 0 || !lgrp_initialized) {
-
- chip_t *chpp;
-
- if (--my_lgrp->lgrp_chipcnt == 0)
- my_lgrp->lgrp_chips = NULL;
- else if (my_lgrp->lgrp_chips == chp)
- my_lgrp->lgrp_chips = chp->chip_next_lgrp;
-
- /*
- * Walk this lgroup's chip list looking for chips that
- * may try to balance against the one that's leaving
- */
- for (chpp = chp->chip_next_lgrp; chpp != chp;
- chpp = chpp->chip_next_lgrp) {
- if (chpp->chip_balance == chp)
- chpp->chip_balance = chp->chip_next_lgrp;
- }
-
- chp->chip_prev_lgrp->chip_next_lgrp = chp->chip_next_lgrp;
- chp->chip_next_lgrp->chip_prev_lgrp = chp->chip_prev_lgrp;
-
- chp->chip_next_lgrp = chp->chip_prev_lgrp = NULL;
- chp->chip_lgrp = NULL;
- chp->chip_balance = NULL;
- }
-
- /*
* Removing last CPU in lgroup, so update lgroup topology
*/
if (my_lgrp->lgrp_cpucnt == 0) {
@@ -1661,7 +1590,7 @@ lgrp_phys_to_lgrp(u_longlong_t physaddr)
* Return the leaf lgroup containing the given CPU
*
* The caller needs to take precautions necessary to prevent
- * "cpu" from going away across a call to this function.
+ * "cpu", and it's lpl from going away across a call to this function.
* hint: kpreempt_disable()/kpreempt_enable()
*/
static lgrp_t *
diff --git a/usr/src/uts/common/os/pg.c b/usr/src/uts/common/os/pg.c
new file mode 100644
index 0000000000..cb8295b38e
--- /dev/null
+++ b/usr/src/uts/common/os/pg.c
@@ -0,0 +1,624 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/cpupart.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/kstat.h>
+#include <sys/processor.h>
+#include <sys/disp.h>
+#include <sys/group.h>
+#include <sys/pg.h>
+
+/*
+ * Processor groups
+ *
+ * With the introduction of Chip Multi-Threaded (CMT) processor architectures,
+ * it is no longer necessarily true that a given physical processor module
+ * will present itself as a single schedulable entity (cpu_t). Rather, each
+ * chip and/or processor core may present itself as one or more "logical" CPUs.
+ *
+ * The logical CPUs presented may share physical components such as caches,
+ * data pipes, execution pipelines, FPUs, etc. It is advantageous to have the
+ * kernel be aware of the relationships existing between logical CPUs so that
+ * the appropriate optmizations may be employed.
+ *
+ * The processor group abstraction represents a set of logical CPUs that
+ * generally share some sort of physical or characteristic relationship.
+ *
+ * In the case of a physical sharing relationship, the CPUs in the group may
+ * share a pipeline, cache or floating point unit. In the case of a logical
+ * relationship, a PG may represent the set of CPUs in a processor set, or the
+ * set of CPUs running at a particular clock speed.
+ *
+ * The generic processor group structure, pg_t, contains the elements generic
+ * to a group of CPUs. Depending on the nature of the CPU relationship
+ * (LOGICAL or PHYSICAL), a pointer to a pg may be recast to a "view" of that
+ * PG where more specific data is represented.
+ *
+ * As an example, a PG representing a PHYSICAL relationship, may be recast to
+ * a pghw_t, where data further describing the hardware sharing relationship
+ * is maintained. See pghw.c and pghw.h for details on physical PGs.
+ *
+ * At this time a more specialized casting of a PG representing a LOGICAL
+ * relationship has not been implemented, but the architecture allows for this
+ * in the future.
+ *
+ * Processor Group Classes
+ *
+ * Processor group consumers may wish to maintain and associate specific
+ * data with the PGs they create. For this reason, a mechanism for creating
+ * class specific PGs exists. Classes may overload the default functions for
+ * creating, destroying, and associating CPUs with PGs, and may also register
+ * class specific callbacks to be invoked when the CPU related system
+ * configuration changes. Class specific data is stored/associated with
+ * PGs by incorporating the pg_t (or pghw_t, as appropriate), as the first
+ * element of a class specific PG object. In memory, such a structure may look
+ * like:
+ *
+ * ----------------------- - - -
+ * | common | | | | <--(pg_t *)
+ * ----------------------- | | -
+ * | HW specific | | | <-----(pghw_t *)
+ * ----------------------- | -
+ * | class specific | | <-------(pg_cmt_t *)
+ * ----------------------- -
+ *
+ * Access to the PG class specific data can be had by casting a pointer to
+ * it's class specific view.
+ */
+
+static pg_t *pg_alloc_default(pg_class_t);
+static void pg_free_default(pg_t *);
+
+/*
+ * Bootstrap CPU specific PG data
+ * See pg_cpu_bootstrap()
+ */
+static cpu_pg_t bootstrap_pg_data;
+
+/*
+ * Bitset of allocated PG ids (they are sequential)
+ * and the next free id in the set.
+ */
+static bitset_t pg_id_set;
+static pgid_t pg_id_next = 0;
+
+/*
+ * Default and externed PG ops vectors
+ */
+static struct pg_ops pg_ops_default = {
+ pg_alloc_default, /* alloc */
+ pg_free_default, /* free */
+ NULL, /* cpu_init */
+ NULL, /* cpu_fini */
+ NULL, /* cpu_active */
+ NULL, /* cpu_inactive */
+ NULL, /* cpupart_in */
+ NULL, /* cpupart_out */
+ NULL, /* cpupart_move */
+ NULL, /* cpu_belongs */
+};
+
+/*
+ * Class specific PG allocation callbacks
+ */
+#define PG_ALLOC(class) \
+ (pg_classes[class].pgc_ops->alloc ? \
+ pg_classes[class].pgc_ops->alloc() : \
+ pg_classes[pg_default_cid].pgc_ops->alloc())
+
+#define PG_FREE(pg) \
+ ((pg)->pg_class->pgc_ops->free ? \
+ (pg)->pg_class->pgc_ops->free(pg) : \
+ pg_classes[pg_default_cid].pgc_ops->free(pg)) \
+
+
+/*
+ * Class specific membership test callback
+ */
+#define PG_CPU_BELONGS(pg, cp) \
+ ((pg)->pg_class->pgc_ops->cpu_belongs ? \
+ (pg)->pg_class->pgc_ops->cpu_belongs(pg, cp) : 0) \
+
+/*
+ * CPU configuration callbacks
+ */
+#define PG_CPU_INIT(class, cp) \
+{ \
+ if (pg_classes[class].pgc_ops->cpu_init) \
+ pg_classes[class].pgc_ops->cpu_init(cp); \
+}
+
+#define PG_CPU_FINI(class, cp) \
+{ \
+ if (pg_classes[class].pgc_ops->cpu_fini) \
+ pg_classes[class].pgc_ops->cpu_fini(cp); \
+}
+
+#define PG_CPU_ACTIVE(class, cp) \
+{ \
+ if (pg_classes[class].pgc_ops->cpu_active) \
+ pg_classes[class].pgc_ops->cpu_active(cp); \
+}
+
+#define PG_CPU_INACTIVE(class, cp) \
+{ \
+ if (pg_classes[class].pgc_ops->cpu_inactive) \
+ pg_classes[class].pgc_ops->cpu_inactive(cp); \
+}
+
+/*
+ * CPU / cpupart configuration callbacks
+ */
+#define PG_CPUPART_IN(class, cp, pp) \
+{ \
+ if (pg_classes[class].pgc_ops->cpupart_in) \
+ pg_classes[class].pgc_ops->cpupart_in(cp, pp); \
+}
+
+#define PG_CPUPART_OUT(class, cp, pp) \
+{ \
+ if (pg_classes[class].pgc_ops->cpupart_out) \
+ pg_classes[class].pgc_ops->cpupart_out(cp, pp); \
+}
+
+#define PG_CPUPART_MOVE(class, cp, old, new) \
+{ \
+ if (pg_classes[class].pgc_ops->cpupart_move) \
+ pg_classes[class].pgc_ops->cpupart_move(cp, old, new); \
+}
+
+
+
+static pg_class_t *pg_classes;
+static int pg_nclasses;
+
+static pg_cid_t pg_default_cid;
+
+/*
+ * Initialze common PG subsystem. Perform CPU 0 initialization
+ */
+void
+pg_init(void)
+{
+ pg_default_cid =
+ pg_class_register("default", &pg_ops_default, PGR_LOGICAL);
+}
+
+/*
+ * Perform CPU 0 initialization
+ */
+void
+pg_cpu0_init(void)
+{
+ extern void pghw_physid_create();
+
+ /*
+ * Create the physical ID cache for the boot CPU
+ */
+ pghw_physid_create(CPU);
+
+ /*
+ * pg_cpu_* require that cpu_lock be held
+ */
+ mutex_enter(&cpu_lock);
+
+ pg_cpu_init(CPU);
+ pg_cpupart_in(CPU, &cp_default);
+ pg_cpu_active(CPU);
+
+ mutex_exit(&cpu_lock);
+}
+
+/*
+ * Register a new PG class
+ */
+pg_cid_t
+pg_class_register(char *name, struct pg_ops *ops, pg_relation_t relation)
+{
+ pg_class_t *newclass;
+ pg_class_t *classes_old;
+ id_t cid;
+
+ mutex_enter(&cpu_lock);
+
+ /*
+ * Allocate a new pg_class_t in the pg_classes array
+ */
+ if (pg_nclasses == 0) {
+ pg_classes = kmem_zalloc(sizeof (pg_class_t), KM_SLEEP);
+ } else {
+ classes_old = pg_classes;
+ pg_classes =
+ kmem_zalloc(sizeof (pg_class_t) * (pg_nclasses + 1),
+ KM_SLEEP);
+ (void) kcopy(classes_old, pg_classes,
+ sizeof (pg_class_t) * pg_nclasses);
+ kmem_free(classes_old, sizeof (pg_class_t) * pg_nclasses);
+ }
+
+ cid = pg_nclasses++;
+ newclass = &pg_classes[cid];
+
+ (void) strncpy(newclass->pgc_name, name, PG_CLASS_NAME_MAX);
+ newclass->pgc_id = cid;
+ newclass->pgc_ops = ops;
+ newclass->pgc_relation = relation;
+
+ mutex_exit(&cpu_lock);
+
+ return (cid);
+}
+
+/*
+ * Try to find an existing pg in set in which to place cp.
+ * Returns the pg if found, and NULL otherwise.
+ * In the event that the CPU could belong to multiple
+ * PGs in the set, the first matching PG will be returned.
+ */
+pg_t *
+pg_cpu_find_pg(cpu_t *cp, group_t *set)
+{
+ pg_t *pg;
+ group_iter_t i;
+
+ group_iter_init(&i);
+ while ((pg = group_iterate(set, &i)) != NULL) {
+ /*
+ * Ask the class if the CPU belongs here
+ */
+ if (PG_CPU_BELONGS(pg, cp))
+ return (pg);
+ }
+ return (NULL);
+}
+
+/*
+ * Iterate over the CPUs in a PG after initializing
+ * the iterator with PG_CPU_ITR_INIT()
+ */
+cpu_t *
+pg_cpu_next(pg_cpu_itr_t *itr)
+{
+ cpu_t *cpu;
+ pg_t *pg = itr->pg;
+
+ cpu = group_iterate(&pg->pg_cpus, &itr->position);
+ return (cpu);
+}
+
+/*
+ * Create a PG of a given class.
+ * This routine may block.
+ */
+pg_t *
+pg_create(pg_cid_t cid)
+{
+ pg_t *pg;
+ pgid_t id;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Call the class specific PG allocation routine
+ */
+ pg = PG_ALLOC(cid);
+ pg->pg_class = &pg_classes[cid];
+ pg->pg_relation = pg->pg_class->pgc_relation;
+
+ /*
+ * Find the next free sequential pg id
+ */
+ do {
+ if (pg_id_next >= bitset_capacity(&pg_id_set))
+ bitset_resize(&pg_id_set, pg_id_next + 1);
+ id = pg_id_next++;
+ } while (bitset_in_set(&pg_id_set, id));
+
+ pg->pg_id = id;
+ bitset_add(&pg_id_set, pg->pg_id);
+
+ /*
+ * Create the PG's CPU group
+ */
+ group_create(&pg->pg_cpus);
+
+ return (pg);
+}
+
+/*
+ * Destroy a PG.
+ * This routine may block.
+ */
+void
+pg_destroy(pg_t *pg)
+{
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ group_destroy(&pg->pg_cpus);
+
+ /*
+ * Unassign the pg_id
+ */
+ if (pg_id_next > pg->pg_id)
+ pg_id_next = pg->pg_id;
+ bitset_del(&pg_id_set, pg->pg_id);
+
+ /*
+ * Invoke the class specific de-allocation routine
+ */
+ PG_FREE(pg);
+}
+
+/*
+ * Add the CPU "cp" to processor group "pg"
+ * This routine may block.
+ */
+void
+pg_cpu_add(pg_t *pg, cpu_t *cp)
+{
+ int err;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /* This adds the CPU to the PG's CPU group */
+ err = group_add(&pg->pg_cpus, cp, GRP_RESIZE);
+ ASSERT(err == 0);
+
+ /* This adds the PG to the CPUs PG group */
+ ASSERT(cp->cpu_pg != &bootstrap_pg_data);
+ err = group_add(&cp->cpu_pg->pgs, pg, GRP_RESIZE);
+ ASSERT(err == 0);
+}
+
+/*
+ * Remove "cp" from "pg".
+ * This routine may block.
+ */
+void
+pg_cpu_delete(pg_t *pg, cpu_t *cp)
+{
+ int err;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /* Remove the CPU from the PG */
+ err = group_remove(&pg->pg_cpus, cp, GRP_RESIZE);
+ ASSERT(err == 0);
+
+ /* Remove the PG from the CPU's PG group */
+ ASSERT(cp->cpu_pg != &bootstrap_pg_data);
+ err = group_remove(&cp->cpu_pg->pgs, pg, GRP_RESIZE);
+ ASSERT(err == 0);
+}
+
+/*
+ * Allocate a CPU's PG data. This hangs off struct cpu at cpu_pg
+ */
+static cpu_pg_t *
+pg_cpu_data_alloc(void)
+{
+ cpu_pg_t *pgd;
+
+ pgd = kmem_zalloc(sizeof (cpu_pg_t), KM_SLEEP);
+ group_create(&pgd->pgs);
+ group_create(&pgd->cmt_pgs);
+
+ return (pgd);
+}
+
+/*
+ * Free the CPU's PG data.
+ */
+static void
+pg_cpu_data_free(cpu_pg_t *pgd)
+{
+ group_destroy(&pgd->pgs);
+ group_destroy(&pgd->cmt_pgs);
+ kmem_free(pgd, sizeof (cpu_pg_t));
+}
+
+/*
+ * A new CPU is coming into the system, either via booting or DR.
+ * Allocate it's PG data, and notify all registered classes about
+ * the new CPU.
+ *
+ * This routine may block.
+ */
+void
+pg_cpu_init(cpu_t *cp)
+{
+ pg_cid_t i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Allocate and size the per CPU pg data
+ */
+ cp->cpu_pg = pg_cpu_data_alloc();
+
+ /*
+ * Notify all registered classes about the new CPU
+ */
+ for (i = 0; i < pg_nclasses; i++)
+ PG_CPU_INIT(i, cp);
+}
+
+/*
+ * This CPU is being deleted from the system. Notify the classes
+ * and free up the CPU's PG data.
+ */
+void
+pg_cpu_fini(cpu_t *cp)
+{
+ pg_cid_t i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * This can happen if the CPU coming into the system
+ * failed to power on.
+ */
+ if (cp->cpu_pg == NULL ||
+ cp->cpu_pg == &bootstrap_pg_data)
+ return;
+
+ for (i = 0; i < pg_nclasses; i++)
+ PG_CPU_FINI(i, cp);
+
+ pg_cpu_data_free(cp->cpu_pg);
+ cp->cpu_pg = NULL;
+}
+
+/*
+ * This CPU is becoming active (online)
+ * This routine may not block as it is called from paused CPUs
+ * context.
+ */
+void
+pg_cpu_active(cpu_t *cp)
+{
+ pg_cid_t i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Notify all registered classes about the new CPU
+ */
+ for (i = 0; i < pg_nclasses; i++)
+ PG_CPU_ACTIVE(i, cp);
+}
+
+/*
+ * This CPU is going inactive (offline)
+ * This routine may not block, as it is called from paused
+ * CPUs context.
+ */
+void
+pg_cpu_inactive(cpu_t *cp)
+{
+ pg_cid_t i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Notify all registered classes about the new CPU
+ */
+ for (i = 0; i < pg_nclasses; i++)
+ PG_CPU_INACTIVE(i, cp);
+}
+
+/*
+ * Invoked when the CPU is about to move into the partition
+ * This routine may block.
+ */
+void
+pg_cpupart_in(cpu_t *cp, cpupart_t *pp)
+{
+ int i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Notify all registered classes that the
+ * CPU is about to enter the CPU partition
+ */
+ for (i = 0; i < pg_nclasses; i++)
+ PG_CPUPART_IN(i, cp, pp);
+}
+
+/*
+ * Invoked when the CPU is about to move out of the partition
+ * This routine may block.
+ */
+/*ARGSUSED*/
+void
+pg_cpupart_out(cpu_t *cp, cpupart_t *pp)
+{
+ int i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Notify all registered classes that the
+ * CPU is about to leave the CPU partition
+ */
+ for (i = 0; i < pg_nclasses; i++)
+ PG_CPUPART_OUT(i, cp, pp);
+}
+
+/*
+ * Invoked when the CPU is *moving* partitions.
+ *
+ * This routine may not block, as it is called from paused CPUs
+ * context.
+ */
+void
+pg_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
+{
+ int i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Notify all registered classes that the
+ * CPU is about to leave the CPU partition
+ */
+ for (i = 0; i < pg_nclasses; i++)
+ PG_CPUPART_MOVE(i, cp, oldpp, newpp);
+}
+
+/*
+ * Provide the specified CPU a bootstrap pg
+ * This is needed to allow sane behaviour if any PG consuming
+ * code needs to deal with a partially initialized CPU
+ */
+void
+pg_cpu_bootstrap(cpu_t *cp)
+{
+ cp->cpu_pg = &bootstrap_pg_data;
+}
+
+/*ARGSUSED*/
+static pg_t *
+pg_alloc_default(pg_class_t class)
+{
+ return (kmem_zalloc(sizeof (pg_t), KM_SLEEP));
+}
+
+/*ARGSUSED*/
+static void
+pg_free_default(struct pg *pg)
+{
+ kmem_free(pg, sizeof (pg_t));
+}
diff --git a/usr/src/uts/common/os/pghw.c b/usr/src/uts/common/os/pghw.c
new file mode 100644
index 0000000000..e2dc2a38f2
--- /dev/null
+++ b/usr/src/uts/common/os/pghw.c
@@ -0,0 +1,420 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/group.h>
+#include <sys/pg.h>
+#include <sys/pghw.h>
+
+/*
+ * Processor Groups: Hardware sharing relationship layer
+ *
+ * This file implements an extension to Processor Groups to capture
+ * hardware sharing relationships existing between logical CPUs. Examples of
+ * hardware sharing relationships include shared caches on some CMT
+ * procesoor architectures, or shared local memory controllers on NUMA
+ * based system architectures.
+ *
+ * The pghw_t structure represents the extended PG. The first member
+ * of the structure is the generic pg_t with the pghw specific members
+ * following. The generic pg_t *must* remain the first member of the
+ * structure as the code uses casting of structure references to access
+ * the generic pg_t structure elements.
+ *
+ * In addition to the generic CPU grouping, physical PGs have a hardware
+ * sharing relationship enumerated "type", and an instance id. The enumerated
+ * type is defined by the pghw_type_t enumeration, while the instance id
+ * uniquely identifies the sharing instance from among others of the same
+ * hardware sharing type.
+ *
+ * The physical PGs are organized into an overall hierarchy, and are tracked
+ * in a number of different per CPU, and per pghw_type_t type groups.
+ * As an example:
+ *
+ * -------------
+ * | pg_hw |
+ * | (group_t) |
+ * -------------
+ * || ============================
+ * ||\\-----------------------// \\ \\
+ * || | hwset (PGC_HW_CHIP) | ------------- -------------
+ * || | (group_t) | | pghw_t | | pghw_t |
+ * || ----------------------- | chip 0 | | chip 1 |
+ * || ------------- -------------
+ * || \\ \\ \\ \\ \\ \\ \\ \\
+ * || cpu cpu cpu cpu cpu cpu cpu cpu
+ * ||
+ * || ============================
+ * ||\\-----------------------// \\ \\
+ * || | hwset (PGC_HW_IPIPE)| ------------- -------------
+ * || | (group_t) | | pghw_t | | pghw_t |
+ * || ----------------------- | ipipe 0 | | ipipe 1 |
+ * || ------------- -------------
+ * || \\ \\ \\ \\
+ * || cpu cpu cpu cpu
+ * ...
+ *
+ *
+ * The top level pg_hw is a group of "hwset" groups. Each hwset holds of group
+ * of physical PGs of the same hardware sharing type. Within each hwset, the
+ * PG's instance id uniquely identifies the grouping relationshsip among other
+ * groupings of the same sharing type. The instance id for a grouping is
+ * platform defined, and in some cases may be used by platform code as a handle
+ * to search for a particular relationship instance.
+ *
+ * Each physical PG (by virtue of the embedded pg_t) contains a group of CPUs
+ * that participate in the sharing relationship. Each CPU also has associated
+ * with it a grouping tracking the PGs in which the CPU belongs. This can be
+ * used to iterate over the various relationships in which the CPU participates
+ * (the CPU's chip, cache, lgroup, etc.).
+ *
+ * The hwsets are created dynamically as new hardware sharing relationship types
+ * are instantiated. They are never destroyed, as once a given relathionship
+ * type appears in the system, it is quite likely that at least one instance of
+ * that relationship will always persist as long as the system is running.
+ */
+
+static group_t *pg_hw; /* top level pg hw group */
+
+/*
+ * Lookup table mapping hardware sharing relationships with hierarchy levels
+ */
+static int pghw_level_table[PGHW_NUM_COMPONENTS];
+
+/*
+ * Physical PG kstats
+ */
+struct pghw_kstat {
+ kstat_named_t pg_id;
+ kstat_named_t pg_class;
+ kstat_named_t pg_ncpus;
+ kstat_named_t pg_instance_id;
+ kstat_named_t pg_hw;
+} pghw_kstat = {
+ { "id", KSTAT_DATA_UINT64 },
+ { "pg_class", KSTAT_DATA_STRING },
+ { "ncpus", KSTAT_DATA_UINT64 },
+ { "instance_id", KSTAT_DATA_UINT64 },
+ { "hardware", KSTAT_DATA_STRING },
+};
+
+kmutex_t pghw_kstat_lock;
+
+/*
+ * hwset operations
+ */
+static group_t *pghw_set_create(pghw_type_t);
+static void pghw_set_add(group_t *, pghw_t *);
+static void pghw_set_remove(group_t *, pghw_t *);
+
+/*
+ * Initialize the physical portion of a physical PG
+ */
+void
+pghw_init(pghw_t *pg, cpu_t *cp, pghw_type_t hw)
+{
+ group_t *hwset;
+
+ if ((hwset = pghw_set_lookup(hw)) == NULL) {
+ /*
+ * Haven't seen this hardware type yet
+ */
+ hwset = pghw_set_create(hw);
+ }
+
+ pghw_set_add(hwset, pg);
+ pg->pghw_hw = hw;
+ pg->pghw_instance =
+ pg_plat_hw_instance_id(cp, hw);
+ pghw_kstat_create(pg);
+}
+
+/*
+ * Teardown the physical portion of a physical PG
+ */
+void
+pghw_fini(pghw_t *pg)
+{
+ group_t *hwset;
+
+ hwset = pghw_set_lookup(pg->pghw_hw);
+ ASSERT(hwset != NULL);
+
+ pghw_set_remove(hwset, pg);
+ pg->pghw_instance = (id_t)PGHW_INSTANCE_ANON;
+ pg->pghw_hw = (pghw_type_t)-1;
+
+ if (pg->pghw_kstat)
+ kstat_delete(pg->pghw_kstat);
+}
+
+/*
+ * Find an existing physical PG in which to place
+ * the given CPU for the specified hardware sharing
+ * relationship
+ */
+pghw_t *
+pghw_place_cpu(cpu_t *cp, pghw_type_t hw)
+{
+ group_t *hwset;
+
+ if ((hwset = pghw_set_lookup(hw)) == NULL) {
+ return (NULL);
+ }
+
+ return ((pghw_t *)pg_cpu_find_pg(cp, hwset));
+}
+
+/*
+ * Find the pg representing the hw sharing relationship in which
+ * cp belongs
+ */
+pghw_t *
+pghw_find_pg(cpu_t *cp, pghw_type_t hw)
+{
+ group_iter_t i;
+ pghw_t *pg;
+
+ group_iter_init(&i);
+ while ((pg = group_iterate(&cp->cpu_pg->pgs, &i)) != NULL) {
+ if (pg->pghw_hw == hw)
+ return (pg);
+ }
+ return (NULL);
+}
+
+/*
+ * Find the PG of the given hardware sharing relationship
+ * type with the given instance id
+ */
+pghw_t *
+pghw_find_by_instance(id_t id, pghw_type_t hw)
+{
+ group_iter_t i;
+ group_t *set;
+ pghw_t *pg;
+
+ set = pghw_set_lookup(hw);
+ if (!set)
+ return (NULL);
+
+ group_iter_init(&i);
+ while ((pg = group_iterate(set, &i)) != NULL) {
+ if (pg->pghw_instance == id)
+ return (pg);
+ }
+ return (NULL);
+}
+
+/*
+ * CPUs physical ID cache creation / destruction
+ * The cache's elements are initialized to the CPU's id
+ */
+void
+pghw_physid_create(cpu_t *cp)
+{
+ int i;
+
+ cp->cpu_physid = kmem_alloc(sizeof (cpu_physid_t), KM_SLEEP);
+
+ for (i = 0; i < (sizeof (cpu_physid_t) / sizeof (id_t)); i++) {
+ ((id_t *)cp->cpu_physid)[i] = cp->cpu_id;
+ }
+}
+
+void
+pghw_physid_destroy(cpu_t *cp)
+{
+ if (cp->cpu_physid) {
+ kmem_free(cp->cpu_physid, sizeof (cpu_physid_t));
+ cp->cpu_physid = NULL;
+ }
+}
+
+/*
+ * Return a sequential level identifier for the specified
+ * hardware sharing relationship
+ */
+int
+pghw_level(pghw_type_t hw)
+{
+ return (pg_plat_hw_level(hw));
+}
+
+/*
+ * Create a new, empty hwset.
+ * This routine may block, and must not be called from any
+ * paused CPU context.
+ */
+static group_t *
+pghw_set_create(pghw_type_t hw)
+{
+ group_t *g;
+ int ret;
+
+ /*
+ * Create the top level PG hw group if it doesn't already exist
+ * This is a "set" of hardware sets, that is ordered (and indexed)
+ * by the pghw_type_t enum.
+ */
+ if (pg_hw == NULL) {
+ pg_hw = kmem_alloc(sizeof (group_t), KM_SLEEP);
+ group_create(pg_hw);
+ group_expand(pg_hw, (uint_t)PGHW_NUM_COMPONENTS);
+ }
+
+ /*
+ * Create the new hwset
+ * Add it to the top level pg_hw group.
+ */
+ g = kmem_alloc(sizeof (group_t), KM_SLEEP);
+ group_create(g);
+
+ ret = group_add_at(pg_hw, g, (uint_t)hw);
+ ASSERT(ret == 0);
+
+ /*
+ * Update the table that maps hardware sharing relationships
+ * to hierarchy levels
+ */
+ ASSERT(pghw_level_table[hw] == NULL);
+ pghw_level_table[hw] = pg_plat_hw_level(hw);
+
+ return (g);
+}
+
+/*
+ * Find the hwset associated with the given hardware sharing type
+ */
+group_t *
+pghw_set_lookup(pghw_type_t hw)
+{
+ group_t *hwset;
+
+ if (pg_hw == NULL)
+ return (NULL);
+
+ hwset = GROUP_ACCESS(pg_hw, (uint_t)hw);
+ return (hwset);
+}
+
+/*
+ * Add a PG to a hwset
+ */
+static void
+pghw_set_add(group_t *hwset, pghw_t *pg)
+{
+ (void) group_add(hwset, pg, GRP_RESIZE);
+}
+
+/*
+ * Remove a PG from a hwset
+ */
+static void
+pghw_set_remove(group_t *hwset, pghw_t *pg)
+{
+ int result;
+
+ result = group_remove(hwset, pg, GRP_RESIZE);
+ ASSERT(result == 0);
+}
+
+
+/*
+ * Return a string name given a pg_hw sharing type
+ */
+#define PGHW_TYPE_NAME_MAX 8
+
+static char *
+pghw_type_string(pghw_type_t hw)
+{
+ switch (hw) {
+ case PGHW_IPIPE:
+ return ("ipipe");
+ case PGHW_CACHE:
+ return ("cache");
+ case PGHW_FPU:
+ return ("fpu");
+ case PGHW_CHIP:
+ return ("chip");
+ case PGHW_MEMORY:
+ return ("memory");
+ default:
+ return ("unknown");
+ }
+}
+
+/*
+ * Create / Update routines for PG hw kstats
+ *
+ * It is the intention of these kstats to provide some level
+ * of informational / debugging observability into the types
+ * and nature of the system's detected hardware sharing relationships
+ */
+void
+pghw_kstat_create(pghw_t *pg)
+{
+ /*
+ * Create a physical pg kstat
+ */
+ if ((pg->pghw_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id,
+ "pg", "pg", KSTAT_TYPE_NAMED,
+ sizeof (pghw_kstat) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL)) != NULL) {
+ pg->pghw_kstat->ks_data_size += PG_CLASS_NAME_MAX;
+ pg->pghw_kstat->ks_data_size += PGHW_TYPE_NAME_MAX;
+ pg->pghw_kstat->ks_lock = &pghw_kstat_lock;
+ pg->pghw_kstat->ks_data = &pghw_kstat;
+ pg->pghw_kstat->ks_update = pghw_kstat_update;
+ pg->pghw_kstat->ks_private = pg;
+ kstat_install(pg->pghw_kstat);
+ }
+}
+
+int
+pghw_kstat_update(kstat_t *ksp, int rw)
+{
+ struct pghw_kstat *pgsp = &pghw_kstat;
+ pghw_t *pg = ksp->ks_private;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ pgsp->pg_id.value.ui64 = ((pg_t *)pg)->pg_id;
+ pgsp->pg_ncpus.value.ui64 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus);
+ pgsp->pg_instance_id.value.ui64 = (uint64_t)pg->pghw_instance;
+ kstat_named_setstr(&pgsp->pg_class, ((pg_t *)pg)->pg_class->pgc_name);
+ kstat_named_setstr(&pgsp->pg_hw, pghw_type_string(pg->pghw_hw));
+
+ return (0);
+}