diff options
Diffstat (limited to 'usr/src/uts/common/os')
-rw-r--r-- | usr/src/uts/common/os/bitset.c | 168 | ||||
-rw-r--r-- | usr/src/uts/common/os/chip.c | 576 | ||||
-rw-r--r-- | usr/src/uts/common/os/clock.c | 37 | ||||
-rw-r--r-- | usr/src/uts/common/os/cpu.c | 40 | ||||
-rw-r--r-- | usr/src/uts/common/os/group.c | 322 | ||||
-rw-r--r-- | usr/src/uts/common/os/lgrp.c | 77 | ||||
-rw-r--r-- | usr/src/uts/common/os/pg.c | 624 | ||||
-rw-r--r-- | usr/src/uts/common/os/pghw.c | 420 |
8 files changed, 1568 insertions, 696 deletions
diff --git a/usr/src/uts/common/os/bitset.c b/usr/src/uts/common/os/bitset.c new file mode 100644 index 0000000000..8222fd9faa --- /dev/null +++ b/usr/src/uts/common/os/bitset.c @@ -0,0 +1,168 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/bitset.h> +#include <sys/kmem.h> +#include <sys/systm.h> +#include <sys/cmn_err.h> +#include <sys/sysmacros.h> + +/* + * Initialize a bitset_t. + * After bitset_init(), the bitset will be zero sized. + */ +void +bitset_init(bitset_t *b) +{ + bzero(b, sizeof (bitset_t)); +} + +/* + * Uninitialize a bitset_t. + * This will free the bitset's data, leaving it zero sized. + */ +void +bitset_fini(bitset_t *b) +{ + if (b->bs_words > 0) + kmem_free(b->bs_set, b->bs_words * sizeof (ulong_t)); +} + +/* + * Resize a bitset to where it can hold sz number of bits. + * This can either grow or shrink the bitset holding capacity. + * In the case of shrinkage, elements that reside outside the new + * holding capacity of the bitset are lost. + */ +void +bitset_resize(bitset_t *b, uint_t sz) +{ + uint_t nwords; + ulong_t *bset_new, *bset_tmp; + + nwords = BT_BITOUL(sz); + if (b->bs_words == nwords) + return; /* already properly sized */ + + /* + * Allocate the new ulong_t array, and copy the old one. + */ + if (nwords > 0) { + bset_new = kmem_zalloc(nwords * sizeof (ulong_t), KM_SLEEP); + bcopy(b->bs_set, bset_new, + MIN(b->bs_words, nwords) * sizeof (ulong_t)); + } else { + bset_new = NULL; + } + + /* swap out the old ulong_t array for new one */ + bset_tmp = b->bs_set; + b->bs_set = bset_new; + + /* free up the old array */ + kmem_free(bset_tmp, b->bs_words * sizeof (ulong_t)); + b->bs_words = nwords; +} + +/* + * Returns the current holding capacity of the bitset + */ +uint_t +bitset_capacity(bitset_t *b) +{ + return (b->bs_words * BT_NBIPUL); +} + +/* + * Add and delete bits in the bitset. + * + * Adding a bit that is already set, and clearing a bit that's already clear + * is legal. + * + * Adding or deleting an element that falls outside the bitset's current + * holding capacity is illegal. + */ +void +bitset_add(bitset_t *b, uint_t elt) +{ + ASSERT(b->bs_words * BT_NBIPUL > elt); + + BT_SET(b->bs_set, elt); +} + +void +bitset_del(bitset_t *b, uint_t elt) +{ + ASSERT(b->bs_words * BT_NBIPUL > elt); + + BT_CLEAR(b->bs_set, elt); +} + +/* + * Return non-zero if the bit is present in the set + */ +int +bitset_in_set(bitset_t *b, uint_t elt) +{ + ASSERT(b->bs_words * BT_NBIPUL > elt); + + return (BT_TEST(b->bs_set, elt)); +} + +/* + * Return non-zero if the bitset is empty + */ +int +bitset_is_null(bitset_t *b) +{ + int i; + + for (i = 0; i < b->bs_words; i++) + if (b->bs_set[i] != 0) + return (0); + return (1); +} + +/* + * Find the first set bit in the bitset + * Return -1 if no bit was found + */ +uint_t +bitset_find(bitset_t *b) +{ + uint_t i; + uint_t elt = (uint_t)-1; + + for (i = 0; i < b->bs_words; i++) { + elt = (uint_t)(lowbit(b->bs_set[i]) - 1); + if (elt != (uint_t)-1) { + elt += i * BT_NBIPUL; + break; + } + } + return (elt); +} diff --git a/usr/src/uts/common/os/chip.c b/usr/src/uts/common/os/chip.c deleted file mode 100644 index ad11827b0f..0000000000 --- a/usr/src/uts/common/os/chip.c +++ /dev/null @@ -1,576 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/thread.h> -#include <sys/cpuvar.h> -#include <sys/cpupart.h> -#include <sys/kmem.h> -#include <sys/cmn_err.h> -#include <sys/kstat.h> -#include <sys/processor.h> -#include <sys/disp.h> -#include <sys/chip.h> - -/* - * CMT aware scheduler/dispatcher support - * - * With the introduction of Chip Multi-Threaded (CMT) processor architectures, - * it is no longer necessarily true that a given physical processor - * module (chip) will present itself as a single schedulable entity (cpu_t). - * Rather, each chip may present itself as one or more "logical" CPUs. - * - * The logical CPUs presented may share physical components on the chip - * such as caches, data pipes, FPUs, etc. It is advantageous to have the - * kernel know which logical CPUs are presented by a given chip, - * and what facilities on the chip are shared, since the kernel can then use - * this information to employ scheduling policies that help improve the - * availability of per chip resources, and increase utilization of a thread's - * cache investment. - * - * The "chip_t" structure represents a physical processor. - * It is used to keep track of which logical CPUs are presented by a given - * chip, and to provide a parameterized representation of a chip's - * properties. A count of the number of running threads is also - * maintained, and is used by the dispatcher to balance load across the - * system's chips to improve performance through increased chip resource - * availability. - * - * Locking: - * - * Safely traversing the per lgroup lists requires the same protections - * as traversing the cpu lists. One must either: - * - hold cpu_lock - * - have disabled kernel preemption - * - be at high SPL - * - have cpu's paused - * - * Safely traversing the global "chip_list" requires holding cpu_lock. - * - * A chip's nrunning count should only be modified using the - * CHIP_NRUNNING() macro, through which updates of the count are done - * atomically. - */ - -chip_t cpu0_chip; /* chip structure for first CPU */ -cpu_physid_t cpu0_physid; /* boot CPU's physical id structure */ - -/* - * chip_bootstrap is used on platforms where it is possible to enter the - * dispatcher before a new CPU's chip initialization has happened. - */ -static chip_t chip_bootstrap; - -#define CPU_HAS_NO_CHIP(cp) \ - ((cp)->cpu_chip == NULL || (cp)->cpu_chip == &chip_bootstrap) - -static chip_t *chip_list; /* protected by CPU lock */ -static chip_set_t chip_set; /* bitmap of chips in existence */ - /* indexed by chip_seqid */ -static chipid_t chip_seqid_next = 0; /* next sequential chip id */ -static int nchips = 0; /* num chips in existence */ - -static chip_t *chip_find(chipid_t); -static int chip_kstat_extract(kstat_t *, int); - -/* - * Declare static kstat names (defined in chip.h) - */ -CHIP_KSTAT_NAMES; - -/* - * Find the chip_t with the given chip_id. - */ -static chip_t * -chip_find(chipid_t chipid) -{ - chip_t *chp, *chip_start; - - ASSERT(chip_list == NULL || chip_list->chip_next == chip_list || - MUTEX_HELD(&cpu_lock)); - - if ((chp = chip_start = chip_list) != NULL) { - do { - if (chp->chip_id == chipid) { - return (chp); - } - } while ((chp = chp->chip_next) != chip_start); - } - return (NULL); -} - -chip_t * -chip_lookup(chipid_t chipid) -{ - chip_t *chp; - - mutex_enter(&cpu_lock); - chp = chip_find(chipid); - mutex_exit(&cpu_lock); - - return (chp); -} - -#ifndef sun4v -/* - * Setup the kstats for this chip, if needed - */ -void -chip_kstat_create(chip_t *chp) -{ - chip_stat_t stat; - kstat_t *chip_kstat; - - ASSERT(MUTEX_HELD(&cpu_lock)); - - if (chp->chip_kstat != NULL) - return; /* already initialized */ - - chip_kstat = kstat_create("chip", chp->chip_id, NULL, "misc", - KSTAT_TYPE_NAMED, CHIP_NUM_STATS, - KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE); - - if (chip_kstat != NULL) { - chip_kstat->ks_lock = &chp->chip_kstat_mutex; - mutex_init(chip_kstat->ks_lock, NULL, MUTEX_DEFAULT, NULL); - chip_kstat->ks_private = chp; - chip_kstat->ks_data = chp->chip_kstat_data; - for (stat = 0; stat < CHIP_NUM_STATS; stat++) - kstat_named_init(&chp->chip_kstat_data[stat], - chip_kstat_names[stat], KSTAT_DATA_INT64); - chip_kstat->ks_update = chip_kstat_extract; - chp->chip_kstat = chip_kstat; - kstat_install(chip_kstat); - } -} -#else -/* - * Note: On sun4v systems, chip kstats don't currently - * exist, since "chip" structures and policies are being - * leveraged to implement core level balancing, and exporting - * chip kstats in light of this would be both misleading - * and confusing. - */ -/* ARGSUSED */ -void -chip_kstat_create(chip_t *chp) -{ -} -#endif /* !sun4v */ - -static int -chip_kstat_extract(kstat_t *ksp, int rw) -{ - struct kstat_named *ksd; - chip_t *chp; - - chp = (chip_t *)ksp->ks_private; - - ksd = (struct kstat_named *)ksp->ks_data; - ASSERT(ksd == chp->chip_kstat_data); - - /* - * The chip kstats are read only - */ - if (rw == KSTAT_WRITE) - return (EACCES); - - ksd[CHIP_ID].value.i64 = chp->chip_id; - ksd[CHIP_NCPUS].value.i64 = chp->chip_ncpu; - ksd[CHIP_NRUNNING].value.i64 = chp->chip_nrunning; - ksd[CHIP_RECHOOSE].value.i64 = - rechoose_interval + chp->chip_rechoose_adj; - - return (0); -} - -/* - * If necessary, instantiate a chip_t for this CPU. - * Called when a CPU is being added to the system either in startup, - * or because of DR. The cpu will be assigned to the chip's active - * CPU list later in chip_cpu_assign() - */ -void -chip_cpu_init(cpu_t *cp) -{ - chipid_t cid; - int rechoose; - chip_t *chp; - chip_def_t chp_def; - - ASSERT((chip_list == NULL) || (MUTEX_HELD(&cpu_lock))); - - if (chip_list == NULL) - cp->cpu_physid = &cpu0_physid; - else - cp->cpu_physid = kmem_zalloc(sizeof (cpu_physid_t), KM_SLEEP); - - /* - * Call into the platform to fetch this cpu's chip and core ids. - * The ids are cached in the CPU's physical id structure. - * - * On sun4v platforms, the chip infrastructure is currently being - * leveraged to implement core level load balancing. - */ -#ifdef DO_CORELEVEL_LOADBAL - cid = chip_plat_get_coreid(cp); - cp->cpu_physid->cpu_coreid = cid; - cp->cpu_physid->cpu_chipid = chip_plat_get_chipid(cp); -#else - cid = chip_plat_get_chipid(cp); - cp->cpu_physid->cpu_chipid = cid; - cp->cpu_physid->cpu_coreid = chip_plat_get_coreid(cp); -#endif /* DO_CORELEVEL_LOADBAL */ - - chp = chip_find(cid); - if (chp == NULL) { - - /* - * Create a new chip - */ - if (chip_list == NULL) - chp = &cpu0_chip; - else - chp = kmem_zalloc(sizeof (*chp), KM_SLEEP); - - chp->chip_id = cid; - chp->chip_nrunning = 0; - - /* - * If we're booting, take this moment to perform - * some additional initialization - */ - if (chip_list == NULL) { - CHIP_SET_ZERO(chip_set); - CHIP_SET_ZERO(cp->cpu_part->cp_mach->mc_chipset); - chp->chip_nrunning++; /* for t0 */ - } - - /* - * Find the next free sequential chip id. - * A chip's sequential id exists in the range - * 0 .. CHIP_MAX_CHIPS, and is suitable for use with - * chip sets. - */ - while (CHIP_SET_TEST(chip_set, chip_seqid_next)) - chip_seqid_next++; - chp->chip_seqid = chip_seqid_next++; - CHIP_SET_ADD(chip_set, chp->chip_seqid); - - ASSERT(chip_seqid_next <= CHIP_MAX_CHIPS); - - - /* - * Query the platform specific parameters - * for this chip - */ - chip_plat_define_chip(cp, &chp_def); - chp->chip_rechoose_adj = chp_def.chipd_rechoose_adj; - chp->chip_type = chp_def.chipd_type; - chp->chip_nosteal = chp_def.chipd_nosteal; - - ASSERT((chp->chip_type < CHIP_NUM_TYPES) && - (chp->chip_type >= CHIP_DEFAULT)); - - /* - * Insert this chip in chip_list - */ - if (chip_list == NULL) { - chip_list = chp; - chp->chip_next = chp->chip_prev = chp; - } else { - chip_t *chptr; - - chptr = chip_list; - chp->chip_next = chptr; - chp->chip_prev = chptr->chip_prev; - chptr->chip_prev->chip_next = chp; - chptr->chip_prev = chp; - } - - nchips++; - ASSERT(nchips <= CHIP_MAX_CHIPS); - - /* - * The boot cpu will create the first chip's kstats - * later in cpu_kstat_init() - */ - if (chp != &cpu0_chip) - chip_kstat_create(chp); - } - - /* - * Initialize the effective rechoose interval cached - * in this cpu structure. - */ - rechoose = rechoose_interval + chp->chip_rechoose_adj; - cp->cpu_rechoose = (rechoose < 0) ? 0 : rechoose; - - cp->cpu_chip = chp; - chp->chip_ref++; -} - -/* - * This cpu is being deleted. It has already been removed from - * the chip's active cpu list back in chip_cpu_unassign(). Here - * we remove the cpu's reference to the chip, and cleanup/destroy - * the chip if needed. - */ -void -chip_cpu_fini(cpu_t *cp) -{ - chip_t *chp; - chip_t *prev, *next; - - ASSERT(MUTEX_HELD(&cpu_lock)); - - /* - * This can happen if the CPU failed to power on - */ - if (CPU_HAS_NO_CHIP(cp)) - return; - - chp = cp->cpu_chip; - cp->cpu_chip = NULL; - - /* - * Clear out and free the CPU's physical id structure - */ - cp->cpu_physid->cpu_chipid = -1; - cp->cpu_physid->cpu_coreid = -1; - - if (cp->cpu_physid != &cpu0_physid) { - ASSERT(cp->cpu_physid != NULL); - kmem_free(cp->cpu_physid, sizeof (cpu_physid_t)); - } - cp->cpu_physid = NULL; - - /* - * Delete the chip if its last CPU is being deleted - */ - if (--chp->chip_ref == 0) { - - ASSERT(chp->chip_ncpu == 0); - ASSERT(chp->chip_cpus == NULL); - ASSERT(chp->chip_nrunning == 0); - ASSERT(chp->chip_lgrp == NULL); - ASSERT((chp->chip_next_lgrp == NULL) && - (chp->chip_prev_lgrp == NULL)); - - if (chip_seqid_next > chp->chip_seqid) - chip_seqid_next = chp->chip_seqid; - CHIP_SET_REMOVE(chip_set, chp->chip_seqid); - - chp->chip_id = -1; - chp->chip_seqid = -1; - - /* - * remove the chip from the system's chip list - */ - if (chip_list == chp) - chip_list = chp->chip_next; - - prev = chp->chip_prev; - next = chp->chip_next; - - prev->chip_next = next; - next->chip_prev = prev; - - chp->chip_next = chp->chip_prev = NULL; - - nchips--; - - /* - * clean up any chip kstats - */ - if (chp->chip_kstat) { - kstat_delete(chp->chip_kstat); - chp->chip_kstat = NULL; - } - /* - * If the chip_t structure was dynamically - * allocated, free it. - */ - if (chp != &cpu0_chip) - kmem_free(chp, sizeof (*chp)); - } -} - -/* - * This cpu is becoming active (online). - * Perform all the necessary bookkeeping in it's chip_t - */ -void -chip_cpu_assign(cpu_t *cp) -{ - chip_t *chp; - cpu_t *cptr; - - ASSERT(chip_list == NULL || chip_list->chip_next == chip_list || - MUTEX_HELD(&cpu_lock)); - - chp = cp->cpu_chip; - - /* - * Add this cpu to the chip's cpu list - */ - if (chp->chip_ncpu == 0) { - chp->chip_cpus = cp; - cp->cpu_next_chip = cp->cpu_prev_chip = cp; - } else { - cptr = chp->chip_cpus; - cp->cpu_next_chip = cptr; - cp->cpu_prev_chip = cptr->cpu_prev_chip; - cp->cpu_prev_chip->cpu_next_chip = cp; - cptr->cpu_prev_chip = cp; - } - - chp->chip_ncpu++; - - /* - * Notate this chip's seqid in the cpu partition's chipset - */ - chip_cpu_move_part(cp, NULL, cp->cpu_part); -} - -/* - * This cpu is being offlined, so do the reverse - * of cpu_chip_assign() - */ -void -chip_cpu_unassign(cpu_t *cp) -{ - chip_t *chp; - struct cpu *prev; - struct cpu *next; - - ASSERT(MUTEX_HELD(&cpu_lock)); - - chp = cp->cpu_chip; - - chip_cpu_move_part(cp, cp->cpu_part, NULL); - - /* - * remove this cpu from the chip's cpu list - */ - prev = cp->cpu_prev_chip; - next = cp->cpu_next_chip; - - prev->cpu_next_chip = next; - next->cpu_prev_chip = prev; - - cp->cpu_next_chip = cp->cpu_prev_chip = NULL; - - chp->chip_ncpu--; - - if (chp->chip_ncpu == 0) { - chp->chip_cpus = NULL; - } else if (chp->chip_cpus == cp) { - chp->chip_cpus = next; - } -} - -/* - * A cpu on the chip is moving into and/or out of a cpu partition. - * Maintain the cpuparts' chip membership set. - * oldpp is NULL when a cpu is being offlined. - * newpp is NULL when a cpu is being onlined. - */ -void -chip_cpu_move_part(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp) -{ - cpu_t *cpp; - chip_t *chp; - - ASSERT(chip_list->chip_next == chip_list || MUTEX_HELD(&cpu_lock)); - - chp = cp->cpu_chip; - - if (newpp != NULL) { - /* - * Add the chip's seqid to the cpupart's chip set - */ - CHIP_SET_ADD(newpp->cp_mach->mc_chipset, chp->chip_seqid); - } - - if (oldpp != NULL) { - cpp = cp; - while ((cpp = cpp->cpu_next_chip) != cp) { - if (cpp->cpu_part->cp_id == oldpp->cp_id) { - /* - * Another cpu on the chip is in the old - * cpu partition, so we're done - */ - return; - } - } - - /* - * No other cpu on the chip is in the old partition - * so remove the chip's seqid from it's set - */ - CHIP_SET_REMOVE(oldpp->cp_mach->mc_chipset, chp->chip_seqid); - } -} - -/* - * Called to indicate a slave CPU has started up. - */ -void -chip_cpu_startup(cpu_t *cp) -{ - /* - * Indicate that the chip has a new running thread - * (slave startup) - */ - CHIP_NRUNNING(cp->cpu_chip, 1); -} - -/* - * Provide the specified CPU a bootstrap chip - */ -void -chip_bootstrap_cpu(cpu_t *cp) -{ - cp->cpu_chip = &chip_bootstrap; -} - -/* - * Given a chip set, return 1 if it is empty. - */ -int -chip_set_isnull(chip_set_t *set) -{ - int i; - - for (i = 0; i < CHIP_SET_WORDS; i++) { - if (set->csb[i] != 0) - return (0); - } - return (1); -} diff --git a/usr/src/uts/common/os/clock.c b/usr/src/uts/common/os/clock.c index 0152c2e958..a1040f1270 100644 --- a/usr/src/uts/common/os/clock.c +++ b/usr/src/uts/common/os/clock.c @@ -23,7 +23,7 @@ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -66,7 +66,6 @@ #include <sys/cpupart.h> #include <sys/rctl.h> #include <sys/task.h> -#include <sys/chip.h> #include <sys/sdt.h> #ifdef __sparc @@ -260,13 +259,6 @@ cyclic_id_t deadman_cyclic; /* deadman()'s cyclic_id */ static int lgrp_ticks; /* counter to schedule lgrp load calcs */ /* - * rechoose_interval_history is used to detect when rechoose_interval's - * value has changed (via hotpatching for example), so that the - * cached values in the cpu structures may be updated. - */ -static int rechoose_interval_history = RECHOOSE_INTERVAL; - -/* * for tod fault detection */ #define TOD_REF_FREQ ((longlong_t)(NANOSEC)) @@ -345,8 +337,6 @@ clock(void) int64_t lltemp; int s; int do_lgrp_load; - int rechoose_update = 0; - int rechoose; int i; if (panicstr) @@ -430,21 +420,9 @@ clock(void) do_lgrp_load = 1; } - /* - * The dispatcher tunable rechoose_interval may be hot-patched. - * Note if it has a new value. If so, the effective rechoose_interval - * cached in the cpu structures needs to be updated. - * If needed we'll do this during the walk of the cpu_list below. - */ - if (rechoose_interval != rechoose_interval_history) { - rechoose_interval_history = rechoose_interval; - rechoose_update = 1; - } - if (one_sec) loadavg_update(); - /* * First count the threads waiting on kpreempt queues in each * CPU partition. @@ -522,19 +500,6 @@ clock(void) lgrp_loadavg(cp->cpu_lpl, cpu_nrunnable * LGRP_LOADAVG_IN_THREAD_MAX, 1); } - /* - * The platform may define a per physical processor - * adjustment of rechoose_interval. The effective - * (base + adjustment) rechoose_interval is cached - * in the cpu structures for efficiency. Above we detect - * if the cached values need updating, and here is where - * the update happens. - */ - if (rechoose_update) { - rechoose = rechoose_interval + - cp->cpu_chip->chip_rechoose_adj; - cp->cpu_rechoose = (rechoose < 0) ? 0 : rechoose; - } } while ((cp = cp->cpu_next) != cpu_list); /* diff --git a/usr/src/uts/common/os/cpu.c b/usr/src/uts/common/os/cpu.c index 5ca51ec3da..9237517a69 100644 --- a/usr/src/uts/common/os/cpu.c +++ b/usr/src/uts/common/os/cpu.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -45,7 +45,7 @@ #include <sys/cpupart.h> #include <sys/lgrp.h> #include <sys/pset.h> -#include <sys/chip.h> +#include <sys/pghw.h> #include <sys/kmem.h> #include <sys/kmem_impl.h> /* to set per-cpu kmem_cache offset */ #include <sys/atomic.h> @@ -1266,6 +1266,11 @@ cpu_offline(cpu_t *cp, int flags) cpu_state_change_notify(cp->cpu_id, CPU_OFF); /* + * Tell the PG subsystem that the CPU is leaving the partition + */ + pg_cpupart_out(cp, pp); + + /* * Take the CPU out of interrupt participation so we won't find * bound kernel threads. If the architecture cannot completely * shut off interrupts on the CPU, don't quiesce it, but don't @@ -1512,6 +1517,11 @@ out: cyclic_online(cp); /* + * If we failed, tell the PG subsystem that the CPU is back + */ + pg_cpupart_in(cp, pp); + + /* * If we failed, we need to notify everyone that this CPU is back on. */ if (error != 0) @@ -1732,7 +1742,12 @@ cpu_del_unit(int cpuid) ASSERT(cp->cpu_next_part == cp); ASSERT(cp->cpu_prev_part == cp); - chip_cpu_fini(cp); + /* + * Tear down the CPU's physical ID cache, and update any + * processor groups + */ + pg_cpu_fini(cp); + pghw_physid_destroy(cp); /* * Destroy kstat stuff. @@ -1816,8 +1831,7 @@ cpu_add_active_internal(cpu_t *cp) ASSERT(cp_numparts_nonempty != 0); } - chip_cpu_assign(cp); - + pg_cpu_active(cp); lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)cp, 0); bzero(&cp->cpu_loadavg, sizeof (cp->cpu_loadavg)); @@ -1830,9 +1844,12 @@ cpu_add_active_internal(cpu_t *cp) void cpu_add_active(cpu_t *cp) { + pg_cpupart_in(cp, cp->cpu_part); + pause_cpus(NULL); cpu_add_active_internal(cp); start_cpus(); + cpu_stats_kstat_create(cp); cpu_create_intrstat(cp); lgrp_kstat_create(cp); @@ -1854,7 +1871,7 @@ cpu_remove_active(cpu_t *cp) ASSERT(cp->cpu_next_onln != cp); /* not the last one */ ASSERT(cp->cpu_prev_onln != cp); /* not the last one */ - chip_cpu_unassign(cp); + pg_cpu_inactive(cp); lgrp_config(LGRP_CONFIG_CPU_OFFLINE, (uintptr_t)cp, 0); @@ -2146,11 +2163,12 @@ cpu_info_kstat_update(kstat_t *ksp, int rw) (void) strncpy(cpu_info_template.ci_fpu_type.value.c, cp->cpu_type_info.pi_fputypes, 15); cpu_info_template.ci_clock_MHz.value.l = cp->cpu_type_info.pi_clock; - cpu_info_template.ci_chip_id.value.l = chip_plat_get_chipid(cp); + cpu_info_template.ci_chip_id.value.l = + pg_plat_hw_instance_id(cp, PGHW_CHIP); kstat_named_setstr(&cpu_info_template.ci_implementation, cp->cpu_idstr); kstat_named_setstr(&cpu_info_template.ci_brandstr, cp->cpu_brandstr); - cpu_info_template.ci_core_id.value.l = chip_plat_get_coreid(cp); + cpu_info_template.ci_core_id.value.l = pg_plat_get_core_id(cp); #if defined(__sparcv9) cpu_info_template.ci_device_ID.value.ui64 = @@ -2163,7 +2181,7 @@ cpu_info_kstat_update(kstat_t *ksp, int rw) cpu_info_template.ci_family.value.l = cpuid_getfamily(cp); cpu_info_template.ci_model.value.l = cpuid_getmodel(cp); cpu_info_template.ci_step.value.l = cpuid_getstep(cp); - cpu_info_template.ci_clogid.value.l = chip_plat_get_clogid(cp); + cpu_info_template.ci_clogid.value.l = cpuid_get_clogid(cp); #endif return (0); @@ -2215,11 +2233,13 @@ cpu_info_kstat_destroy(cpu_t *cp) void cpu_kstat_init(cpu_t *cp) { + /* + * XXX need pg kstats for boot CPU + */ mutex_enter(&cpu_lock); cpu_info_kstat_create(cp); cpu_stats_kstat_create(cp); cpu_create_intrstat(cp); - chip_kstat_create(cp->cpu_chip); cpu_set_state(cp); mutex_exit(&cpu_lock); } diff --git a/usr/src/uts/common/os/group.c b/usr/src/uts/common/os/group.c new file mode 100644 index 0000000000..b15dff181f --- /dev/null +++ b/usr/src/uts/common/os/group.c @@ -0,0 +1,322 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/systm.h> +#include <sys/param.h> +#include <sys/debug.h> +#include <sys/kmem.h> +#include <sys/group.h> + + +#define GRP_SET_SIZE_DEFAULT 2 + +static void group_grow_set(group_t *); +static void group_shrink_set(group_t *); +static void group_pack_set(void **, uint_t); + +/* + * Initialize a group_t + */ +void +group_create(group_t *g) +{ + bzero(g, sizeof (group_t)); +} + +/* + * Destroy a group_t + * The group must already be empty + */ +void +group_destroy(group_t *g) +{ + ASSERT(g->grp_size == 0); + + if (g->grp_capacity > 0) { + kmem_free(g->grp_set, g->grp_capacity * sizeof (void *)); + g->grp_capacity = 0; + } + g->grp_set = NULL; +} + +/* + * Add element "e" to group "g" + * + * Returns -1 if addition would result in overcapacity, and + * resize operations aren't allowed, and 0 otherwise + */ +int +group_add(group_t *g, void *e, int gflag) +{ + int entry; + + if ((gflag & GRP_NORESIZE) && + g->grp_size == g->grp_capacity) + return (-1); + + ASSERT(g->grp_size != g->grp_capacity || (gflag & GRP_RESIZE)); + + entry = g->grp_size++; + if (g->grp_size > g->grp_capacity) + group_grow_set(g); + + ASSERT(g->grp_set[entry] == NULL); + g->grp_set[entry] = e; + + return (0); +} + +/* + * Remove element "e" from group "g" + * + * Returns -1 if "e" was not present in "g" and 0 otherwise + */ +int +group_remove(group_t *g, void *e, int gflag) +{ + int i; + + /* + * Find the element in the group's set + */ + for (i = 0; i < g->grp_size; i++) + if (g->grp_set[i] == e) + break; + if (g->grp_set[i] != e) + return (-1); + + g->grp_set[i] = NULL; + group_pack_set(g->grp_set, g->grp_size); + g->grp_size--; + + if ((gflag & GRP_RESIZE) && + g->grp_size > GRP_SET_SIZE_DEFAULT && + ((g->grp_size - 1) & g->grp_size) == 0) + group_shrink_set(g); + + return (0); +} + +/* + * Expand the capacity of group "g" so that it may + * contain at least "n" elements + */ +void +group_expand(group_t *g, uint_t n) +{ + while (g->grp_capacity < n) + group_grow_set(g); +} + +/* + * Upsize a group's holding capacity + */ +static void +group_grow_set(group_t *g) +{ + uint_t cap_old, cap_new; + void **set_old, **set_new; + + cap_old = g->grp_capacity; + set_old = g->grp_set; + + /* + * The array size grows in powers of two + */ + if ((cap_new = (cap_old << 1)) == 0) { + /* + * The set is unallocated. + * Allocate a default sized set. + */ + cap_new = GRP_SET_SIZE_DEFAULT; + g->grp_set = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP); + g->grp_capacity = cap_new; + } else { + /* + * Allocate a newly sized array, + * copy the data, and free the old array. + */ + set_new = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP); + (void) kcopy(set_old, set_new, cap_old * sizeof (void *)); + g->grp_set = set_new; + g->grp_capacity = cap_new; + kmem_free(set_old, cap_old * sizeof (void *)); + } + /* + * The new array size should be a power of two + */ + ASSERT(((cap_new - 1) & cap_new) == 0); +} + +/* + * Downsize a group's holding capacity + */ +static void +group_shrink_set(group_t *g) +{ + uint_t cap_old, cap_new; + void **set_old, **set_new; + + cap_old = g->grp_capacity; + set_old = g->grp_set; + + /* + * The group's existing array size must already + * be a power of two + */ + ASSERT(((cap_old - 1) & cap_old) == 0); + cap_new = cap_old >> 1; + + /* + * GRP_SET_SIZE_DEFAULT is the minumum set size. + */ + if (cap_new < GRP_SET_SIZE_DEFAULT) + return; + + set_new = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP); + (void) kcopy(set_old, set_new, cap_new * sizeof (void *)); + g->grp_capacity = cap_new; + g->grp_set = set_new; + + ASSERT(((cap_new - 1) & cap_new) == 0); + kmem_free(set_old, cap_old * sizeof (void *)); +} + +/* + * Pack a group's set + * Element order is not preserved + */ +static void +group_pack_set(void **set, uint_t sz) +{ + uint_t i, j, free; + + free = (uint_t)-1; + + for (i = 0; i < sz; i++) { + if (set[i] == NULL && free == (uint_t)-1) { + /* + * Found a new free slot. + * Start packing from here. + */ + free = i; + } else if (set[i] != NULL && free != (uint_t)-1) { + /* + * Found a slot to pack into + * an earlier free slot. + */ + ASSERT(set[free] == NULL); + set[free] = set[i]; + set[i] = NULL; + + /* + * Find the next free slot + */ + for (j = free + 1; set[j] != NULL; j++) { + ASSERT(j <= i); + if (j == i) + break; + } + if (set[j] == NULL) + free = j; + else + free = (uint_t)-1; + } + } +} + +/* + * Initialize a group iterator cookie + */ +void +group_iter_init(group_iter_t *iter) +{ + *iter = 0; +} + +/* + * Iterate over the elements in a group + */ +void * +group_iterate(group_t *g, group_iter_t *iter) +{ + uint_t idx = *iter; + void *data = NULL; + + while (idx < g->grp_size) { + data = g->grp_set[idx++]; + if (data != NULL) + break; + } + *iter = idx; + + return (data); +} + +/* + * Indexed access to a group's elements + */ +void * +group_access_at(group_t *g, uint_t idx) +{ + if (idx >= g->grp_capacity) + return (NULL); + + return (g->grp_set[idx]); +} + +/* + * Add a new ordered group element at specified + * index. The group must already be of sufficient + * capacity to hold an element at the specified index. + * + * Returns 0 if addition was sucessful, and -1 if the + * addition failed because the table was too small + */ +int +group_add_at(group_t *g, void *e, uint_t idx) +{ + if (idx >= g->grp_capacity) + return (-1); + + if (idx >= g->grp_size) + g->grp_size = idx + 1; + + ASSERT(g->grp_set[idx] == NULL); + g->grp_set[idx] = e; + return (0); +} + +/* + * Remove the entry at the specified index + */ +void +group_remove_at(group_t *g, uint_t idx) +{ + ASSERT(idx < g->grp_capacity); + g->grp_set[idx] = NULL; +} diff --git a/usr/src/uts/common/os/lgrp.c b/usr/src/uts/common/os/lgrp.c index 83f67e1088..2007f7b158 100644 --- a/usr/src/uts/common/os/lgrp.c +++ b/usr/src/uts/common/os/lgrp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -89,7 +89,7 @@ #include <sys/cmn_err.h> #include <sys/kstat.h> #include <sys/sysmacros.h> -#include <sys/chip.h> +#include <sys/pg.h> #include <sys/promif.h> #include <sys/sdt.h> @@ -314,8 +314,6 @@ lgrp_root_init(void) klgrpset_clear(lgrp_root->lgrp_children); klgrpset_clear(lgrp_root->lgrp_leaves); lgrp_root->lgrp_parent = NULL; - lgrp_root->lgrp_chips = NULL; - lgrp_root->lgrp_chipcnt = 0; lgrp_root->lgrp_latency = lgrp_plat_latency(hand, hand); for (i = 0; i < LGRP_RSRC_COUNT; i++) @@ -679,7 +677,6 @@ lgrp_cpu_init(struct cpu *cp) lgrp_t *my_lgrp; lgrp_id_t lgrpid; struct cpu *cptr; - struct chip *chp; /* * This is the first time through if the resource set @@ -795,33 +792,6 @@ lgrp_cpu_init(struct cpu *cp) cptr->cpu_prev_lgrp = cp; } my_lgrp->lgrp_cpucnt++; - - /* - * Add this cpu's chip to the per lgroup list - * if necessary - */ - if (cp->cpu_chip->chip_lgrp == NULL) { - struct chip *lcpr; - - chp = cp->cpu_chip; - - if (my_lgrp->lgrp_chipcnt == 0) { - my_lgrp->lgrp_chips = chp; - chp->chip_next_lgrp = - chp->chip_prev_lgrp = chp; - } else { - lcpr = my_lgrp->lgrp_chips; - chp->chip_next_lgrp = lcpr; - chp->chip_prev_lgrp = - lcpr->chip_prev_lgrp; - lcpr->chip_prev_lgrp->chip_next_lgrp = - chp; - lcpr->chip_prev_lgrp = chp; - } - chp->chip_lgrp = my_lgrp; - chp->chip_balance = chp->chip_next_lgrp; - my_lgrp->lgrp_chipcnt++; - } } lgrp_t * @@ -890,8 +860,6 @@ lgrp_create(void) my_lgrp->lgrp_cpu = NULL; my_lgrp->lgrp_cpucnt = 0; - my_lgrp->lgrp_chips = NULL; - my_lgrp->lgrp_chipcnt = 0; if (my_lgrp->lgrp_kstat != NULL) lgrp_kstat_reset(lgrpid); @@ -945,8 +913,6 @@ lgrp_destroy(lgrp_t *lgrp) lgrp->lgrp_cpu = NULL; lgrp->lgrp_cpucnt = 0; - lgrp->lgrp_chipcnt = 0; - lgrp->lgrp_chips = NULL; nlgrps--; } @@ -1022,7 +988,6 @@ lgrp_cpu_fini(struct cpu *cp, lgrp_id_t lgrpid) lgrp_t *my_lgrp; struct cpu *prev; struct cpu *next; - chip_t *chp; ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized); @@ -1042,42 +1007,6 @@ lgrp_cpu_fini(struct cpu *cp, lgrp_id_t lgrpid) my_lgrp->lgrp_cpucnt--; /* - * If the last CPU on it's chip is being offlined - * then remove this chip from the per lgroup list. - * - * This is also done for the boot CPU when it needs - * to move between lgroups as a consequence of - * null proc lpa. - */ - chp = cp->cpu_chip; - if (chp->chip_ncpu == 0 || !lgrp_initialized) { - - chip_t *chpp; - - if (--my_lgrp->lgrp_chipcnt == 0) - my_lgrp->lgrp_chips = NULL; - else if (my_lgrp->lgrp_chips == chp) - my_lgrp->lgrp_chips = chp->chip_next_lgrp; - - /* - * Walk this lgroup's chip list looking for chips that - * may try to balance against the one that's leaving - */ - for (chpp = chp->chip_next_lgrp; chpp != chp; - chpp = chpp->chip_next_lgrp) { - if (chpp->chip_balance == chp) - chpp->chip_balance = chp->chip_next_lgrp; - } - - chp->chip_prev_lgrp->chip_next_lgrp = chp->chip_next_lgrp; - chp->chip_next_lgrp->chip_prev_lgrp = chp->chip_prev_lgrp; - - chp->chip_next_lgrp = chp->chip_prev_lgrp = NULL; - chp->chip_lgrp = NULL; - chp->chip_balance = NULL; - } - - /* * Removing last CPU in lgroup, so update lgroup topology */ if (my_lgrp->lgrp_cpucnt == 0) { @@ -1661,7 +1590,7 @@ lgrp_phys_to_lgrp(u_longlong_t physaddr) * Return the leaf lgroup containing the given CPU * * The caller needs to take precautions necessary to prevent - * "cpu" from going away across a call to this function. + * "cpu", and it's lpl from going away across a call to this function. * hint: kpreempt_disable()/kpreempt_enable() */ static lgrp_t * diff --git a/usr/src/uts/common/os/pg.c b/usr/src/uts/common/os/pg.c new file mode 100644 index 0000000000..cb8295b38e --- /dev/null +++ b/usr/src/uts/common/os/pg.c @@ -0,0 +1,624 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/systm.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/thread.h> +#include <sys/cpuvar.h> +#include <sys/cpupart.h> +#include <sys/kmem.h> +#include <sys/cmn_err.h> +#include <sys/kstat.h> +#include <sys/processor.h> +#include <sys/disp.h> +#include <sys/group.h> +#include <sys/pg.h> + +/* + * Processor groups + * + * With the introduction of Chip Multi-Threaded (CMT) processor architectures, + * it is no longer necessarily true that a given physical processor module + * will present itself as a single schedulable entity (cpu_t). Rather, each + * chip and/or processor core may present itself as one or more "logical" CPUs. + * + * The logical CPUs presented may share physical components such as caches, + * data pipes, execution pipelines, FPUs, etc. It is advantageous to have the + * kernel be aware of the relationships existing between logical CPUs so that + * the appropriate optmizations may be employed. + * + * The processor group abstraction represents a set of logical CPUs that + * generally share some sort of physical or characteristic relationship. + * + * In the case of a physical sharing relationship, the CPUs in the group may + * share a pipeline, cache or floating point unit. In the case of a logical + * relationship, a PG may represent the set of CPUs in a processor set, or the + * set of CPUs running at a particular clock speed. + * + * The generic processor group structure, pg_t, contains the elements generic + * to a group of CPUs. Depending on the nature of the CPU relationship + * (LOGICAL or PHYSICAL), a pointer to a pg may be recast to a "view" of that + * PG where more specific data is represented. + * + * As an example, a PG representing a PHYSICAL relationship, may be recast to + * a pghw_t, where data further describing the hardware sharing relationship + * is maintained. See pghw.c and pghw.h for details on physical PGs. + * + * At this time a more specialized casting of a PG representing a LOGICAL + * relationship has not been implemented, but the architecture allows for this + * in the future. + * + * Processor Group Classes + * + * Processor group consumers may wish to maintain and associate specific + * data with the PGs they create. For this reason, a mechanism for creating + * class specific PGs exists. Classes may overload the default functions for + * creating, destroying, and associating CPUs with PGs, and may also register + * class specific callbacks to be invoked when the CPU related system + * configuration changes. Class specific data is stored/associated with + * PGs by incorporating the pg_t (or pghw_t, as appropriate), as the first + * element of a class specific PG object. In memory, such a structure may look + * like: + * + * ----------------------- - - - + * | common | | | | <--(pg_t *) + * ----------------------- | | - + * | HW specific | | | <-----(pghw_t *) + * ----------------------- | - + * | class specific | | <-------(pg_cmt_t *) + * ----------------------- - + * + * Access to the PG class specific data can be had by casting a pointer to + * it's class specific view. + */ + +static pg_t *pg_alloc_default(pg_class_t); +static void pg_free_default(pg_t *); + +/* + * Bootstrap CPU specific PG data + * See pg_cpu_bootstrap() + */ +static cpu_pg_t bootstrap_pg_data; + +/* + * Bitset of allocated PG ids (they are sequential) + * and the next free id in the set. + */ +static bitset_t pg_id_set; +static pgid_t pg_id_next = 0; + +/* + * Default and externed PG ops vectors + */ +static struct pg_ops pg_ops_default = { + pg_alloc_default, /* alloc */ + pg_free_default, /* free */ + NULL, /* cpu_init */ + NULL, /* cpu_fini */ + NULL, /* cpu_active */ + NULL, /* cpu_inactive */ + NULL, /* cpupart_in */ + NULL, /* cpupart_out */ + NULL, /* cpupart_move */ + NULL, /* cpu_belongs */ +}; + +/* + * Class specific PG allocation callbacks + */ +#define PG_ALLOC(class) \ + (pg_classes[class].pgc_ops->alloc ? \ + pg_classes[class].pgc_ops->alloc() : \ + pg_classes[pg_default_cid].pgc_ops->alloc()) + +#define PG_FREE(pg) \ + ((pg)->pg_class->pgc_ops->free ? \ + (pg)->pg_class->pgc_ops->free(pg) : \ + pg_classes[pg_default_cid].pgc_ops->free(pg)) \ + + +/* + * Class specific membership test callback + */ +#define PG_CPU_BELONGS(pg, cp) \ + ((pg)->pg_class->pgc_ops->cpu_belongs ? \ + (pg)->pg_class->pgc_ops->cpu_belongs(pg, cp) : 0) \ + +/* + * CPU configuration callbacks + */ +#define PG_CPU_INIT(class, cp) \ +{ \ + if (pg_classes[class].pgc_ops->cpu_init) \ + pg_classes[class].pgc_ops->cpu_init(cp); \ +} + +#define PG_CPU_FINI(class, cp) \ +{ \ + if (pg_classes[class].pgc_ops->cpu_fini) \ + pg_classes[class].pgc_ops->cpu_fini(cp); \ +} + +#define PG_CPU_ACTIVE(class, cp) \ +{ \ + if (pg_classes[class].pgc_ops->cpu_active) \ + pg_classes[class].pgc_ops->cpu_active(cp); \ +} + +#define PG_CPU_INACTIVE(class, cp) \ +{ \ + if (pg_classes[class].pgc_ops->cpu_inactive) \ + pg_classes[class].pgc_ops->cpu_inactive(cp); \ +} + +/* + * CPU / cpupart configuration callbacks + */ +#define PG_CPUPART_IN(class, cp, pp) \ +{ \ + if (pg_classes[class].pgc_ops->cpupart_in) \ + pg_classes[class].pgc_ops->cpupart_in(cp, pp); \ +} + +#define PG_CPUPART_OUT(class, cp, pp) \ +{ \ + if (pg_classes[class].pgc_ops->cpupart_out) \ + pg_classes[class].pgc_ops->cpupart_out(cp, pp); \ +} + +#define PG_CPUPART_MOVE(class, cp, old, new) \ +{ \ + if (pg_classes[class].pgc_ops->cpupart_move) \ + pg_classes[class].pgc_ops->cpupart_move(cp, old, new); \ +} + + + +static pg_class_t *pg_classes; +static int pg_nclasses; + +static pg_cid_t pg_default_cid; + +/* + * Initialze common PG subsystem. Perform CPU 0 initialization + */ +void +pg_init(void) +{ + pg_default_cid = + pg_class_register("default", &pg_ops_default, PGR_LOGICAL); +} + +/* + * Perform CPU 0 initialization + */ +void +pg_cpu0_init(void) +{ + extern void pghw_physid_create(); + + /* + * Create the physical ID cache for the boot CPU + */ + pghw_physid_create(CPU); + + /* + * pg_cpu_* require that cpu_lock be held + */ + mutex_enter(&cpu_lock); + + pg_cpu_init(CPU); + pg_cpupart_in(CPU, &cp_default); + pg_cpu_active(CPU); + + mutex_exit(&cpu_lock); +} + +/* + * Register a new PG class + */ +pg_cid_t +pg_class_register(char *name, struct pg_ops *ops, pg_relation_t relation) +{ + pg_class_t *newclass; + pg_class_t *classes_old; + id_t cid; + + mutex_enter(&cpu_lock); + + /* + * Allocate a new pg_class_t in the pg_classes array + */ + if (pg_nclasses == 0) { + pg_classes = kmem_zalloc(sizeof (pg_class_t), KM_SLEEP); + } else { + classes_old = pg_classes; + pg_classes = + kmem_zalloc(sizeof (pg_class_t) * (pg_nclasses + 1), + KM_SLEEP); + (void) kcopy(classes_old, pg_classes, + sizeof (pg_class_t) * pg_nclasses); + kmem_free(classes_old, sizeof (pg_class_t) * pg_nclasses); + } + + cid = pg_nclasses++; + newclass = &pg_classes[cid]; + + (void) strncpy(newclass->pgc_name, name, PG_CLASS_NAME_MAX); + newclass->pgc_id = cid; + newclass->pgc_ops = ops; + newclass->pgc_relation = relation; + + mutex_exit(&cpu_lock); + + return (cid); +} + +/* + * Try to find an existing pg in set in which to place cp. + * Returns the pg if found, and NULL otherwise. + * In the event that the CPU could belong to multiple + * PGs in the set, the first matching PG will be returned. + */ +pg_t * +pg_cpu_find_pg(cpu_t *cp, group_t *set) +{ + pg_t *pg; + group_iter_t i; + + group_iter_init(&i); + while ((pg = group_iterate(set, &i)) != NULL) { + /* + * Ask the class if the CPU belongs here + */ + if (PG_CPU_BELONGS(pg, cp)) + return (pg); + } + return (NULL); +} + +/* + * Iterate over the CPUs in a PG after initializing + * the iterator with PG_CPU_ITR_INIT() + */ +cpu_t * +pg_cpu_next(pg_cpu_itr_t *itr) +{ + cpu_t *cpu; + pg_t *pg = itr->pg; + + cpu = group_iterate(&pg->pg_cpus, &itr->position); + return (cpu); +} + +/* + * Create a PG of a given class. + * This routine may block. + */ +pg_t * +pg_create(pg_cid_t cid) +{ + pg_t *pg; + pgid_t id; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Call the class specific PG allocation routine + */ + pg = PG_ALLOC(cid); + pg->pg_class = &pg_classes[cid]; + pg->pg_relation = pg->pg_class->pgc_relation; + + /* + * Find the next free sequential pg id + */ + do { + if (pg_id_next >= bitset_capacity(&pg_id_set)) + bitset_resize(&pg_id_set, pg_id_next + 1); + id = pg_id_next++; + } while (bitset_in_set(&pg_id_set, id)); + + pg->pg_id = id; + bitset_add(&pg_id_set, pg->pg_id); + + /* + * Create the PG's CPU group + */ + group_create(&pg->pg_cpus); + + return (pg); +} + +/* + * Destroy a PG. + * This routine may block. + */ +void +pg_destroy(pg_t *pg) +{ + ASSERT(MUTEX_HELD(&cpu_lock)); + + group_destroy(&pg->pg_cpus); + + /* + * Unassign the pg_id + */ + if (pg_id_next > pg->pg_id) + pg_id_next = pg->pg_id; + bitset_del(&pg_id_set, pg->pg_id); + + /* + * Invoke the class specific de-allocation routine + */ + PG_FREE(pg); +} + +/* + * Add the CPU "cp" to processor group "pg" + * This routine may block. + */ +void +pg_cpu_add(pg_t *pg, cpu_t *cp) +{ + int err; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* This adds the CPU to the PG's CPU group */ + err = group_add(&pg->pg_cpus, cp, GRP_RESIZE); + ASSERT(err == 0); + + /* This adds the PG to the CPUs PG group */ + ASSERT(cp->cpu_pg != &bootstrap_pg_data); + err = group_add(&cp->cpu_pg->pgs, pg, GRP_RESIZE); + ASSERT(err == 0); +} + +/* + * Remove "cp" from "pg". + * This routine may block. + */ +void +pg_cpu_delete(pg_t *pg, cpu_t *cp) +{ + int err; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* Remove the CPU from the PG */ + err = group_remove(&pg->pg_cpus, cp, GRP_RESIZE); + ASSERT(err == 0); + + /* Remove the PG from the CPU's PG group */ + ASSERT(cp->cpu_pg != &bootstrap_pg_data); + err = group_remove(&cp->cpu_pg->pgs, pg, GRP_RESIZE); + ASSERT(err == 0); +} + +/* + * Allocate a CPU's PG data. This hangs off struct cpu at cpu_pg + */ +static cpu_pg_t * +pg_cpu_data_alloc(void) +{ + cpu_pg_t *pgd; + + pgd = kmem_zalloc(sizeof (cpu_pg_t), KM_SLEEP); + group_create(&pgd->pgs); + group_create(&pgd->cmt_pgs); + + return (pgd); +} + +/* + * Free the CPU's PG data. + */ +static void +pg_cpu_data_free(cpu_pg_t *pgd) +{ + group_destroy(&pgd->pgs); + group_destroy(&pgd->cmt_pgs); + kmem_free(pgd, sizeof (cpu_pg_t)); +} + +/* + * A new CPU is coming into the system, either via booting or DR. + * Allocate it's PG data, and notify all registered classes about + * the new CPU. + * + * This routine may block. + */ +void +pg_cpu_init(cpu_t *cp) +{ + pg_cid_t i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Allocate and size the per CPU pg data + */ + cp->cpu_pg = pg_cpu_data_alloc(); + + /* + * Notify all registered classes about the new CPU + */ + for (i = 0; i < pg_nclasses; i++) + PG_CPU_INIT(i, cp); +} + +/* + * This CPU is being deleted from the system. Notify the classes + * and free up the CPU's PG data. + */ +void +pg_cpu_fini(cpu_t *cp) +{ + pg_cid_t i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * This can happen if the CPU coming into the system + * failed to power on. + */ + if (cp->cpu_pg == NULL || + cp->cpu_pg == &bootstrap_pg_data) + return; + + for (i = 0; i < pg_nclasses; i++) + PG_CPU_FINI(i, cp); + + pg_cpu_data_free(cp->cpu_pg); + cp->cpu_pg = NULL; +} + +/* + * This CPU is becoming active (online) + * This routine may not block as it is called from paused CPUs + * context. + */ +void +pg_cpu_active(cpu_t *cp) +{ + pg_cid_t i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Notify all registered classes about the new CPU + */ + for (i = 0; i < pg_nclasses; i++) + PG_CPU_ACTIVE(i, cp); +} + +/* + * This CPU is going inactive (offline) + * This routine may not block, as it is called from paused + * CPUs context. + */ +void +pg_cpu_inactive(cpu_t *cp) +{ + pg_cid_t i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Notify all registered classes about the new CPU + */ + for (i = 0; i < pg_nclasses; i++) + PG_CPU_INACTIVE(i, cp); +} + +/* + * Invoked when the CPU is about to move into the partition + * This routine may block. + */ +void +pg_cpupart_in(cpu_t *cp, cpupart_t *pp) +{ + int i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Notify all registered classes that the + * CPU is about to enter the CPU partition + */ + for (i = 0; i < pg_nclasses; i++) + PG_CPUPART_IN(i, cp, pp); +} + +/* + * Invoked when the CPU is about to move out of the partition + * This routine may block. + */ +/*ARGSUSED*/ +void +pg_cpupart_out(cpu_t *cp, cpupart_t *pp) +{ + int i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Notify all registered classes that the + * CPU is about to leave the CPU partition + */ + for (i = 0; i < pg_nclasses; i++) + PG_CPUPART_OUT(i, cp, pp); +} + +/* + * Invoked when the CPU is *moving* partitions. + * + * This routine may not block, as it is called from paused CPUs + * context. + */ +void +pg_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp) +{ + int i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Notify all registered classes that the + * CPU is about to leave the CPU partition + */ + for (i = 0; i < pg_nclasses; i++) + PG_CPUPART_MOVE(i, cp, oldpp, newpp); +} + +/* + * Provide the specified CPU a bootstrap pg + * This is needed to allow sane behaviour if any PG consuming + * code needs to deal with a partially initialized CPU + */ +void +pg_cpu_bootstrap(cpu_t *cp) +{ + cp->cpu_pg = &bootstrap_pg_data; +} + +/*ARGSUSED*/ +static pg_t * +pg_alloc_default(pg_class_t class) +{ + return (kmem_zalloc(sizeof (pg_t), KM_SLEEP)); +} + +/*ARGSUSED*/ +static void +pg_free_default(struct pg *pg) +{ + kmem_free(pg, sizeof (pg_t)); +} diff --git a/usr/src/uts/common/os/pghw.c b/usr/src/uts/common/os/pghw.c new file mode 100644 index 0000000000..e2dc2a38f2 --- /dev/null +++ b/usr/src/uts/common/os/pghw.c @@ -0,0 +1,420 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/systm.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/thread.h> +#include <sys/cpuvar.h> +#include <sys/kmem.h> +#include <sys/cmn_err.h> +#include <sys/group.h> +#include <sys/pg.h> +#include <sys/pghw.h> + +/* + * Processor Groups: Hardware sharing relationship layer + * + * This file implements an extension to Processor Groups to capture + * hardware sharing relationships existing between logical CPUs. Examples of + * hardware sharing relationships include shared caches on some CMT + * procesoor architectures, or shared local memory controllers on NUMA + * based system architectures. + * + * The pghw_t structure represents the extended PG. The first member + * of the structure is the generic pg_t with the pghw specific members + * following. The generic pg_t *must* remain the first member of the + * structure as the code uses casting of structure references to access + * the generic pg_t structure elements. + * + * In addition to the generic CPU grouping, physical PGs have a hardware + * sharing relationship enumerated "type", and an instance id. The enumerated + * type is defined by the pghw_type_t enumeration, while the instance id + * uniquely identifies the sharing instance from among others of the same + * hardware sharing type. + * + * The physical PGs are organized into an overall hierarchy, and are tracked + * in a number of different per CPU, and per pghw_type_t type groups. + * As an example: + * + * ------------- + * | pg_hw | + * | (group_t) | + * ------------- + * || ============================ + * ||\\-----------------------// \\ \\ + * || | hwset (PGC_HW_CHIP) | ------------- ------------- + * || | (group_t) | | pghw_t | | pghw_t | + * || ----------------------- | chip 0 | | chip 1 | + * || ------------- ------------- + * || \\ \\ \\ \\ \\ \\ \\ \\ + * || cpu cpu cpu cpu cpu cpu cpu cpu + * || + * || ============================ + * ||\\-----------------------// \\ \\ + * || | hwset (PGC_HW_IPIPE)| ------------- ------------- + * || | (group_t) | | pghw_t | | pghw_t | + * || ----------------------- | ipipe 0 | | ipipe 1 | + * || ------------- ------------- + * || \\ \\ \\ \\ + * || cpu cpu cpu cpu + * ... + * + * + * The top level pg_hw is a group of "hwset" groups. Each hwset holds of group + * of physical PGs of the same hardware sharing type. Within each hwset, the + * PG's instance id uniquely identifies the grouping relationshsip among other + * groupings of the same sharing type. The instance id for a grouping is + * platform defined, and in some cases may be used by platform code as a handle + * to search for a particular relationship instance. + * + * Each physical PG (by virtue of the embedded pg_t) contains a group of CPUs + * that participate in the sharing relationship. Each CPU also has associated + * with it a grouping tracking the PGs in which the CPU belongs. This can be + * used to iterate over the various relationships in which the CPU participates + * (the CPU's chip, cache, lgroup, etc.). + * + * The hwsets are created dynamically as new hardware sharing relationship types + * are instantiated. They are never destroyed, as once a given relathionship + * type appears in the system, it is quite likely that at least one instance of + * that relationship will always persist as long as the system is running. + */ + +static group_t *pg_hw; /* top level pg hw group */ + +/* + * Lookup table mapping hardware sharing relationships with hierarchy levels + */ +static int pghw_level_table[PGHW_NUM_COMPONENTS]; + +/* + * Physical PG kstats + */ +struct pghw_kstat { + kstat_named_t pg_id; + kstat_named_t pg_class; + kstat_named_t pg_ncpus; + kstat_named_t pg_instance_id; + kstat_named_t pg_hw; +} pghw_kstat = { + { "id", KSTAT_DATA_UINT64 }, + { "pg_class", KSTAT_DATA_STRING }, + { "ncpus", KSTAT_DATA_UINT64 }, + { "instance_id", KSTAT_DATA_UINT64 }, + { "hardware", KSTAT_DATA_STRING }, +}; + +kmutex_t pghw_kstat_lock; + +/* + * hwset operations + */ +static group_t *pghw_set_create(pghw_type_t); +static void pghw_set_add(group_t *, pghw_t *); +static void pghw_set_remove(group_t *, pghw_t *); + +/* + * Initialize the physical portion of a physical PG + */ +void +pghw_init(pghw_t *pg, cpu_t *cp, pghw_type_t hw) +{ + group_t *hwset; + + if ((hwset = pghw_set_lookup(hw)) == NULL) { + /* + * Haven't seen this hardware type yet + */ + hwset = pghw_set_create(hw); + } + + pghw_set_add(hwset, pg); + pg->pghw_hw = hw; + pg->pghw_instance = + pg_plat_hw_instance_id(cp, hw); + pghw_kstat_create(pg); +} + +/* + * Teardown the physical portion of a physical PG + */ +void +pghw_fini(pghw_t *pg) +{ + group_t *hwset; + + hwset = pghw_set_lookup(pg->pghw_hw); + ASSERT(hwset != NULL); + + pghw_set_remove(hwset, pg); + pg->pghw_instance = (id_t)PGHW_INSTANCE_ANON; + pg->pghw_hw = (pghw_type_t)-1; + + if (pg->pghw_kstat) + kstat_delete(pg->pghw_kstat); +} + +/* + * Find an existing physical PG in which to place + * the given CPU for the specified hardware sharing + * relationship + */ +pghw_t * +pghw_place_cpu(cpu_t *cp, pghw_type_t hw) +{ + group_t *hwset; + + if ((hwset = pghw_set_lookup(hw)) == NULL) { + return (NULL); + } + + return ((pghw_t *)pg_cpu_find_pg(cp, hwset)); +} + +/* + * Find the pg representing the hw sharing relationship in which + * cp belongs + */ +pghw_t * +pghw_find_pg(cpu_t *cp, pghw_type_t hw) +{ + group_iter_t i; + pghw_t *pg; + + group_iter_init(&i); + while ((pg = group_iterate(&cp->cpu_pg->pgs, &i)) != NULL) { + if (pg->pghw_hw == hw) + return (pg); + } + return (NULL); +} + +/* + * Find the PG of the given hardware sharing relationship + * type with the given instance id + */ +pghw_t * +pghw_find_by_instance(id_t id, pghw_type_t hw) +{ + group_iter_t i; + group_t *set; + pghw_t *pg; + + set = pghw_set_lookup(hw); + if (!set) + return (NULL); + + group_iter_init(&i); + while ((pg = group_iterate(set, &i)) != NULL) { + if (pg->pghw_instance == id) + return (pg); + } + return (NULL); +} + +/* + * CPUs physical ID cache creation / destruction + * The cache's elements are initialized to the CPU's id + */ +void +pghw_physid_create(cpu_t *cp) +{ + int i; + + cp->cpu_physid = kmem_alloc(sizeof (cpu_physid_t), KM_SLEEP); + + for (i = 0; i < (sizeof (cpu_physid_t) / sizeof (id_t)); i++) { + ((id_t *)cp->cpu_physid)[i] = cp->cpu_id; + } +} + +void +pghw_physid_destroy(cpu_t *cp) +{ + if (cp->cpu_physid) { + kmem_free(cp->cpu_physid, sizeof (cpu_physid_t)); + cp->cpu_physid = NULL; + } +} + +/* + * Return a sequential level identifier for the specified + * hardware sharing relationship + */ +int +pghw_level(pghw_type_t hw) +{ + return (pg_plat_hw_level(hw)); +} + +/* + * Create a new, empty hwset. + * This routine may block, and must not be called from any + * paused CPU context. + */ +static group_t * +pghw_set_create(pghw_type_t hw) +{ + group_t *g; + int ret; + + /* + * Create the top level PG hw group if it doesn't already exist + * This is a "set" of hardware sets, that is ordered (and indexed) + * by the pghw_type_t enum. + */ + if (pg_hw == NULL) { + pg_hw = kmem_alloc(sizeof (group_t), KM_SLEEP); + group_create(pg_hw); + group_expand(pg_hw, (uint_t)PGHW_NUM_COMPONENTS); + } + + /* + * Create the new hwset + * Add it to the top level pg_hw group. + */ + g = kmem_alloc(sizeof (group_t), KM_SLEEP); + group_create(g); + + ret = group_add_at(pg_hw, g, (uint_t)hw); + ASSERT(ret == 0); + + /* + * Update the table that maps hardware sharing relationships + * to hierarchy levels + */ + ASSERT(pghw_level_table[hw] == NULL); + pghw_level_table[hw] = pg_plat_hw_level(hw); + + return (g); +} + +/* + * Find the hwset associated with the given hardware sharing type + */ +group_t * +pghw_set_lookup(pghw_type_t hw) +{ + group_t *hwset; + + if (pg_hw == NULL) + return (NULL); + + hwset = GROUP_ACCESS(pg_hw, (uint_t)hw); + return (hwset); +} + +/* + * Add a PG to a hwset + */ +static void +pghw_set_add(group_t *hwset, pghw_t *pg) +{ + (void) group_add(hwset, pg, GRP_RESIZE); +} + +/* + * Remove a PG from a hwset + */ +static void +pghw_set_remove(group_t *hwset, pghw_t *pg) +{ + int result; + + result = group_remove(hwset, pg, GRP_RESIZE); + ASSERT(result == 0); +} + + +/* + * Return a string name given a pg_hw sharing type + */ +#define PGHW_TYPE_NAME_MAX 8 + +static char * +pghw_type_string(pghw_type_t hw) +{ + switch (hw) { + case PGHW_IPIPE: + return ("ipipe"); + case PGHW_CACHE: + return ("cache"); + case PGHW_FPU: + return ("fpu"); + case PGHW_CHIP: + return ("chip"); + case PGHW_MEMORY: + return ("memory"); + default: + return ("unknown"); + } +} + +/* + * Create / Update routines for PG hw kstats + * + * It is the intention of these kstats to provide some level + * of informational / debugging observability into the types + * and nature of the system's detected hardware sharing relationships + */ +void +pghw_kstat_create(pghw_t *pg) +{ + /* + * Create a physical pg kstat + */ + if ((pg->pghw_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id, + "pg", "pg", KSTAT_TYPE_NAMED, + sizeof (pghw_kstat) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL)) != NULL) { + pg->pghw_kstat->ks_data_size += PG_CLASS_NAME_MAX; + pg->pghw_kstat->ks_data_size += PGHW_TYPE_NAME_MAX; + pg->pghw_kstat->ks_lock = &pghw_kstat_lock; + pg->pghw_kstat->ks_data = &pghw_kstat; + pg->pghw_kstat->ks_update = pghw_kstat_update; + pg->pghw_kstat->ks_private = pg; + kstat_install(pg->pghw_kstat); + } +} + +int +pghw_kstat_update(kstat_t *ksp, int rw) +{ + struct pghw_kstat *pgsp = &pghw_kstat; + pghw_t *pg = ksp->ks_private; + + if (rw == KSTAT_WRITE) + return (EACCES); + + pgsp->pg_id.value.ui64 = ((pg_t *)pg)->pg_id; + pgsp->pg_ncpus.value.ui64 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus); + pgsp->pg_instance_id.value.ui64 = (uint64_t)pg->pghw_instance; + kstat_named_setstr(&pgsp->pg_class, ((pg_t *)pg)->pg_class->pgc_name); + kstat_named_setstr(&pgsp->pg_hw, pghw_type_string(pg->pghw_hw)); + + return (0); +} |