summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authoresaxe <none@none>2007-01-17 18:01:29 -0800
committeresaxe <none@none>2007-01-17 18:01:29 -0800
commitfb2f18f820d90b001aea4fb27dd654bc1263c440 (patch)
tree4b88b69e1244f360a85d70294a4498ecf57ca283 /usr/src
parent9a7670889e9c36ec355371e6b02f2d9084f040dc (diff)
downloadillumos-joyent-fb2f18f820d90b001aea4fb27dd654bc1263c440.tar.gz
6461311 multi-level CMT scheduling optimizations
6509639 cpu0 is not in the right chip_t if its chipid is not zero --HG-- rename : usr/src/uts/common/os/chip.c => deleted_files/usr/src/uts/common/os/chip.c rename : usr/src/uts/common/sys/chip.h => deleted_files/usr/src/uts/common/sys/chip.h
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/genunix.c14
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/group.c179
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/group.h50
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/pg.c141
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/pg.h48
-rw-r--r--usr/src/cmd/mdb/intel/amd64/genunix/Makefile4
-rw-r--r--usr/src/cmd/mdb/intel/ia32/Makefile2
-rw-r--r--usr/src/cmd/mdb/intel/ia32/genunix/Makefile4
-rw-r--r--usr/src/cmd/mdb/sparc/v9/genunix/Makefile4
-rw-r--r--usr/src/pkgdefs/SUNWhea/prototype_com5
-rw-r--r--usr/src/uts/common/Makefile.files6
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prvnops.c6
-rw-r--r--usr/src/uts/common/conf/param.c8
-rw-r--r--usr/src/uts/common/disp/cmt.c804
-rw-r--r--usr/src/uts/common/disp/cpupart.c34
-rw-r--r--usr/src/uts/common/disp/disp.c199
-rw-r--r--usr/src/uts/common/os/bitset.c168
-rw-r--r--usr/src/uts/common/os/chip.c576
-rw-r--r--usr/src/uts/common/os/clock.c37
-rw-r--r--usr/src/uts/common/os/cpu.c40
-rw-r--r--usr/src/uts/common/os/group.c322
-rw-r--r--usr/src/uts/common/os/lgrp.c77
-rw-r--r--usr/src/uts/common/os/pg.c624
-rw-r--r--usr/src/uts/common/os/pghw.c420
-rw-r--r--usr/src/uts/common/sys/Makefile7
-rw-r--r--usr/src/uts/common/sys/bitset.h76
-rw-r--r--usr/src/uts/common/sys/chip.h207
-rw-r--r--usr/src/uts/common/sys/cmt.h75
-rw-r--r--usr/src/uts/common/sys/cpupart.h7
-rw-r--r--usr/src/uts/common/sys/cpuvar.h9
-rw-r--r--usr/src/uts/common/sys/disp.h14
-rw-r--r--usr/src/uts/common/sys/group.h104
-rw-r--r--usr/src/uts/common/sys/lgrp.h4
-rw-r--r--usr/src/uts/common/sys/pg.h173
-rw-r--r--usr/src/uts/common/sys/pghw.h135
-rw-r--r--usr/src/uts/i86pc/cpu/amd_opteron/ao_cpu.c10
-rw-r--r--usr/src/uts/i86pc/cpu/amd_opteron/ao_main.c7
-rw-r--r--usr/src/uts/i86pc/cpu/amd_opteron/ao_mca.c8
-rw-r--r--usr/src/uts/i86pc/io/mc/mcamd.h6
-rw-r--r--usr/src/uts/i86pc/io/mc/mcamd_drv.c46
-rw-r--r--usr/src/uts/i86pc/io/mc/mcamd_pcicfg.c10
-rw-r--r--usr/src/uts/i86pc/io/mc/mcamd_subr.c4
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c12
-rw-r--r--usr/src/uts/i86pc/os/lgrpplat.c6
-rw-r--r--usr/src/uts/i86pc/os/mlsetup.c15
-rw-r--r--usr/src/uts/i86pc/os/mp_machdep.c117
-rw-r--r--usr/src/uts/i86pc/os/mp_startup.c25
-rw-r--r--usr/src/uts/i86pc/os/startup.c14
-rw-r--r--usr/src/uts/intel/ia32/os/cpc_subr.c55
-rw-r--r--usr/src/uts/intel/pcbe/p4_pcbe.c11
-rw-r--r--usr/src/uts/intel/sys/x86_archext.h5
-rw-r--r--usr/src/uts/sun4/io/trapstat.c6
-rw-r--r--usr/src/uts/sun4/os/machdep.c3
-rw-r--r--usr/src/uts/sun4/os/mlsetup.c13
-rw-r--r--usr/src/uts/sun4/os/mp_startup.c20
-rw-r--r--usr/src/uts/sun4u/cpu/us3_cheetahplus.c23
-rw-r--r--usr/src/uts/sun4u/cpu/us3_common.c10
-rw-r--r--usr/src/uts/sun4u/os/cmp.c141
-rw-r--r--usr/src/uts/sun4v/cpu/generic.c15
-rw-r--r--usr/src/uts/sun4v/cpu/niagara.c17
-rw-r--r--usr/src/uts/sun4v/cpu/niagara2.c17
-rw-r--r--usr/src/uts/sun4v/os/cmp.c99
-rw-r--r--usr/src/uts/sun4v/os/fillsysinfo.c25
-rw-r--r--usr/src/uts/sun4v/sys/machcpuvar.h7
64 files changed, 4027 insertions, 1303 deletions
diff --git a/usr/src/cmd/mdb/common/modules/genunix/genunix.c b/usr/src/cmd/mdb/common/modules/genunix/genunix.c
index 9863d8a241..66acba369f 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/genunix.c
+++ b/usr/src/cmd/mdb/common/modules/genunix/genunix.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -74,6 +74,8 @@
#include "devinfo.h"
#include "leaky.h"
#include "lgrp.h"
+#include "pg.h"
+#include "group.h"
#include "list.h"
#include "log.h"
#include "kgrep.h"
@@ -3422,6 +3424,12 @@ static const mdb_dcmd_t dcmds[] = {
{ NVLIST_DCMD_NAME, NVLIST_DCMD_USAGE, NVLIST_DCMD_DESCR,
print_nvlist },
+ /* from pg.c */
+ { "pg", "?[-q]", "display a pg", pg},
+ /* from group.c */
+ { "group", "?[-q]", "display a group", group},
+
+ /* from log.c */
/* from rctl.c */
{ "rctl_dict", "?", "print systemwide default rctl definitions",
rctl_dict },
@@ -3714,6 +3722,10 @@ static const mdb_walker_t walkers[] = {
{ "lgrp_rsrc_cpu", "walk lgroup CPU resources of given lgroup",
lgrp_rsrc_cpu_walk_init, lgrp_set_walk_step, NULL },
+ /* from group.c */
+ { "group", "walk all elements of a group",
+ group_walk_init, group_walk_step, NULL },
+
/* from list.c */
{ LIST_WALK_NAME, LIST_WALK_DESC,
list_walk_init, list_walk_step, list_walk_fini },
diff --git a/usr/src/cmd/mdb/common/modules/genunix/group.c b/usr/src/cmd/mdb/common/modules/genunix/group.c
new file mode 100644
index 0000000000..6040ba4939
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/genunix/group.c
@@ -0,0 +1,179 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Display group information and walk all elements of a group
+ */
+
+#include "group.h"
+
+#include <mdb/mdb_modapi.h>
+#include <sys/group.h>
+
+/*
+ * Display group information
+ */
+
+/* ARGSUSED */
+int
+group(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ group_t group;
+ int opt_q = 0; /* display only address. */
+
+ /* Should provide an address */
+ if (!(flags & DCMD_ADDRSPEC))
+ return (DCMD_USAGE);
+
+ if (mdb_getopts(argc, argv,
+ 'q', MDB_OPT_SETBITS, TRUE, &opt_q,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ if (flags & DCMD_PIPE_OUT)
+ opt_q = B_TRUE;
+
+ if (DCMD_HDRSPEC(flags) && !opt_q) {
+ mdb_printf("%?s %6s %9s %?s\n",
+ "ADDR",
+ "SIZE",
+ "CAPACITY",
+ "SET");
+ }
+
+ if (mdb_vread(&group, sizeof (struct group), addr) == -1) {
+ mdb_warn("unable to read 'group' at %p", addr);
+ return (DCMD_ERR);
+ }
+
+ if (opt_q) {
+ mdb_printf("%0?p\n", addr);
+ return (DCMD_OK);
+ }
+
+ mdb_printf("%?p %6d %9d %?p\n",
+ addr, group.grp_size, group.grp_capacity, group.grp_set);
+
+ return (DCMD_OK);
+}
+
+/*
+ * Walk all elements in the group set.
+ */
+
+typedef struct group_walk {
+ uintptr_t *gw_set;
+ int gw_size;
+ int gw_pos;
+ int gw_initialized;
+} group_walk_t;
+
+
+/*
+ * Initialize the walk structure with the copy of a group set, its size and the
+ * initial pointer position.
+ */
+int
+group_walk_init(mdb_walk_state_t *wsp)
+{
+ group_walk_t *gw;
+ group_t group;
+
+ gw = mdb_alloc(sizeof (group_walk_t), UM_SLEEP | UM_GC);
+
+ if (mdb_vread(&group, sizeof (struct group), wsp->walk_addr) == -1) {
+ mdb_warn("couldn't read 'group' at %p", wsp->walk_addr);
+ return (WALK_ERR);
+ }
+
+ gw->gw_size = group.grp_size;
+ gw->gw_initialized = 0;
+ gw->gw_pos = 0;
+
+ if (gw->gw_size < 0) {
+ mdb_warn("invalid group at %p", wsp->walk_addr);
+ return (WALK_ERR);
+ }
+
+ if (gw->gw_size == 0)
+ return (WALK_DONE);
+
+ /*
+ * Allocate space for the set and copy all set entries.
+ */
+ gw->gw_set = mdb_alloc(group.grp_size * sizeof (uintptr_t),
+ UM_SLEEP | UM_GC);
+
+ if (mdb_vread(gw->gw_set, group.grp_size * sizeof (uintptr_t),
+ (uintptr_t)group.grp_set) == -1) {
+ mdb_warn("couldn't read 'group set' at %p", group.grp_set);
+ return (WALK_ERR);
+ }
+
+ wsp->walk_data = gw;
+ wsp->walk_addr = gw->gw_set[0];
+ gw->gw_pos = 0;
+
+ return (WALK_NEXT);
+}
+
+/*
+ * Print element of the set and advance the pointer.
+ */
+int
+group_walk_step(mdb_walk_state_t *wsp)
+{
+ group_walk_t *gw = (group_walk_t *)wsp->walk_data;
+ int status;
+
+ /*
+ * Already visited all valid elements, nothing else to do.
+ */
+ if (gw->gw_size < 0)
+ return (WALK_DONE);
+
+ /*
+ * Print non-NULL elements
+ */
+ status = wsp->walk_addr == NULL ?
+ WALK_NEXT :
+ wsp->walk_callback(wsp->walk_addr, wsp->walk_data,
+ wsp->walk_cbdata);
+
+ /*
+ * Adjust walk_addr to point to the next element
+ */
+ gw->gw_size--;
+
+ if (gw->gw_size > 0)
+ wsp->walk_addr = gw->gw_set[++gw->gw_pos];
+ else
+ status = WALK_DONE;
+
+ return (status);
+}
diff --git a/usr/src/cmd/mdb/common/modules/genunix/group.h b/usr/src/cmd/mdb/common/modules/genunix/group.h
new file mode 100644
index 0000000000..3d8ca22097
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/genunix/group.h
@@ -0,0 +1,50 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _MDB_GROUP_H
+#define _MDB_GROUP_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Block comment that describes the contents of this file.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <mdb/mdb_modapi.h>
+
+int group(uintptr_t, uint_t, int, const mdb_arg_t *);
+int group_walk_init(mdb_walk_state_t *);
+int group_walk_step(mdb_walk_state_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MDB_GROUP_H */
diff --git a/usr/src/cmd/mdb/common/modules/genunix/pg.c b/usr/src/cmd/mdb/common/modules/genunix/pg.c
new file mode 100644
index 0000000000..4d23c08bc5
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/genunix/pg.c
@@ -0,0 +1,141 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Display processor group information
+ */
+
+#include "pg.h"
+
+#include <mdb/mdb_modapi.h>
+#include <sys/pghw.h>
+
+/*
+ * PG hardware types indexed by hardware ID
+ */
+char *pg_hw_names[] = {
+ "hw",
+ "ipipe",
+ "cache",
+ "fpu",
+ "mpipe/chip",
+ "memory",
+};
+
+#define A_CNT(arr) (sizeof (arr) / sizeof (arr[0]))
+
+#define NHW A_CNT(pg_hw_names)
+
+/*
+ * Convert HW id to symbolic name
+ */
+static char *
+pg_hw_name(int hw)
+{
+ return ((hw < 0 || hw > NHW) ? "UNKNOWN" : pg_hw_names[hw]);
+}
+
+/*
+ * Display processor group.
+ */
+/* ARGSUSED */
+int
+pg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ pg_t pg;
+ pghw_t pghw;
+ pg_class_t pg_class;
+ int opt_q = 0; /* display only address. */
+
+ /* Should provide an address */
+ if (! (flags & DCMD_ADDRSPEC))
+ return (DCMD_USAGE);
+
+ if (mdb_getopts(argc, argv,
+ 'q', MDB_OPT_SETBITS, TRUE, &opt_q,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ if (flags & DCMD_PIPE_OUT)
+ opt_q = B_TRUE;
+
+ if (DCMD_HDRSPEC(flags) && !opt_q) {
+ mdb_printf("%6s %?s %6s %7s %9s %5s\n",
+ "PGID",
+ "ADDR",
+ "PHYSID",
+ "CLASS",
+ "HARDWARE",
+ "#CPUs");
+ }
+
+ /*
+ * Read pg at specified address
+ */
+ if (mdb_vread(&pg, sizeof (struct pg), addr) == -1) {
+ mdb_warn("unable to read 'pg' at %p", addr);
+ return (DCMD_ERR);
+ }
+
+ /*
+ * In quiet mode just print pg address
+ */
+ if (opt_q) {
+ mdb_printf("%0?p\n", addr);
+ return (DCMD_OK);
+ }
+
+ if (mdb_vread(&pg_class, sizeof (struct pg_class),
+ (uintptr_t)pg.pg_class) == -1) {
+ mdb_warn("unable to read 'pg_class' at %p", pg.pg_class);
+ return (DCMD_ERR);
+ }
+
+ if (pg.pg_relation == PGR_PHYSICAL) {
+ if (mdb_vread(&pghw, sizeof (struct pghw), addr) == -1) {
+ mdb_warn("unable to read 'pghw' at %p", addr);
+ return (DCMD_ERR);
+ }
+ /*
+ * Display the physical PG info.
+ */
+ mdb_printf("%6d %?p %6d %7s %9s %5d\n",
+ pg.pg_id, addr, pghw.pghw_instance,
+ pg_class.pgc_name, pg_hw_name(pghw.pghw_hw),
+ pg.pg_cpus.grp_size);
+ } else {
+ /*
+ * Display the basic PG info.
+ */
+ mdb_printf("%6d %?p %7s %5d\n",
+ pg.pg_id, addr, pg_class.pgc_name,
+ pg.pg_cpus.grp_size);
+ }
+
+ return (DCMD_OK);
+}
diff --git a/usr/src/cmd/mdb/common/modules/genunix/pg.h b/usr/src/cmd/mdb/common/modules/genunix/pg.h
new file mode 100644
index 0000000000..1c87475cab
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/genunix/pg.h
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _MDB_PG_H
+#define _MDB_PG_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Block comment that describes the contents of this file.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <mdb/mdb_modapi.h>
+
+int pg(uintptr_t, uint_t, int, const mdb_arg_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MDB_PG_H */
diff --git a/usr/src/cmd/mdb/intel/amd64/genunix/Makefile b/usr/src/cmd/mdb/intel/amd64/genunix/Makefile
index 04c22008ba..801d16db46 100644
--- a/usr/src/cmd/mdb/intel/amd64/genunix/Makefile
+++ b/usr/src/cmd/mdb/intel/amd64/genunix/Makefile
@@ -21,7 +21,7 @@
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#ident "%Z%%M% %I% %E% SMI"
@@ -40,6 +40,7 @@ COMMONSRCS = \
findstack.c \
fm.c \
genunix.c \
+ group.c \
kgrep.c \
kmem.c \
ldi.c \
@@ -55,6 +56,7 @@ COMMONSRCS = \
ndievents.c \
net.c \
nvpair.c \
+ pg.c \
rctl.c \
sobj.c \
streams.c \
diff --git a/usr/src/cmd/mdb/intel/ia32/Makefile b/usr/src/cmd/mdb/intel/ia32/Makefile
index 4a0e384174..7bf0dc08b2 100644
--- a/usr/src/cmd/mdb/intel/ia32/Makefile
+++ b/usr/src/cmd/mdb/intel/ia32/Makefile
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/mdb/intel/ia32/genunix/Makefile b/usr/src/cmd/mdb/intel/ia32/genunix/Makefile
index e02aea1345..63609d42b2 100644
--- a/usr/src/cmd/mdb/intel/ia32/genunix/Makefile
+++ b/usr/src/cmd/mdb/intel/ia32/genunix/Makefile
@@ -21,7 +21,7 @@
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#ident "%Z%%M% %I% %E% SMI"
@@ -40,6 +40,7 @@ COMMONSRCS = \
findstack.c \
fm.c \
genunix.c \
+ group.c \
kgrep.c \
kmem.c \
ldi.c \
@@ -55,6 +56,7 @@ COMMONSRCS = \
ndievents.c \
net.c \
nvpair.c \
+ pg.c \
rctl.c \
sobj.c \
streams.c \
diff --git a/usr/src/cmd/mdb/sparc/v9/genunix/Makefile b/usr/src/cmd/mdb/sparc/v9/genunix/Makefile
index 4e67aae365..2cf7038d9e 100644
--- a/usr/src/cmd/mdb/sparc/v9/genunix/Makefile
+++ b/usr/src/cmd/mdb/sparc/v9/genunix/Makefile
@@ -21,7 +21,7 @@
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#ident "%Z%%M% %I% %E% SMI"
@@ -40,6 +40,7 @@ COMMONSRCS = \
findstack.c \
fm.c \
genunix.c \
+ group.c \
kgrep.c \
kmem.c \
ldi.c \
@@ -55,6 +56,7 @@ COMMONSRCS = \
ndievents.c \
net.c \
nvpair.c \
+ pg.c \
rctl.c \
sobj.c \
streams.c \
diff --git a/usr/src/pkgdefs/SUNWhea/prototype_com b/usr/src/pkgdefs/SUNWhea/prototype_com
index b10c0e5dd6..fc2a8c7d1e 100644
--- a/usr/src/pkgdefs/SUNWhea/prototype_com
+++ b/usr/src/pkgdefs/SUNWhea/prototype_com
@@ -529,6 +529,7 @@ f none usr/include/sys/avl.h 644 root bin
f none usr/include/sys/avl_impl.h 644 root bin
f none usr/include/sys/battery.h 644 root bin
f none usr/include/sys/bitmap.h 644 root bin
+f none usr/include/sys/bitset.h 644 root bin
f none usr/include/sys/bl.h 644 root bin
f none usr/include/sys/bofi.h 644 root bin
f none usr/include/sys/bofi_impl.h 644 root bin
@@ -544,7 +545,6 @@ f none usr/include/sys/callb.h 644 root bin
f none usr/include/sys/callo.h 644 root bin
f none usr/include/sys/ccompile.h 644 root bin
f none usr/include/sys/cdio.h 644 root bin
-f none usr/include/sys/chip.h 644 root bin
f none usr/include/sys/cis.h 644 root bin
f none usr/include/sys/cis_handlers.h 644 root bin
f none usr/include/sys/cis_protos.h 644 root bin
@@ -756,6 +756,7 @@ f none usr/include/sys/fssnap_if.h 644 root bin
f none usr/include/sys/fstyp.h 644 root bin
f none usr/include/sys/ftrace.h 644 root bin
f none usr/include/sys/gfs.h 644 root bin
+f none usr/include/sys/group.h 644 root bin
f none usr/include/sys/hdio.h 644 root bin
f none usr/include/sys/hook.h 644 root bin
f none usr/include/sys/hook_event.h 644 root bin
@@ -938,6 +939,8 @@ f none usr/include/sys/pem.h 644 root bin
f none usr/include/sys/physmem.h 644 root bin
f none usr/include/sys/serializer.h 644 root bin
f none usr/include/sys/pfmod.h 644 root bin
+f none usr/include/sys/pg.h 644 root bin
+f none usr/include/sys/pghw.h 644 root bin
f none usr/include/sys/pm.h 0644 root bin
f none usr/include/sys/pool.h 644 root bin
f none usr/include/sys/pool_impl.h 644 root bin
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index d16d18b02d..b7f1d6b864 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -40,13 +40,15 @@ sparc_CORE_OBJS +=
COMMON_CORE_OBJS += \
atomic.o \
+ bitset.o \
bp_map.o \
brand.o \
- chip.o \
+ cmt.o \
cpu.o \
cpu_intr.o \
cpupart.o \
disp.o \
+ group.o \
kstat_fr.o \
lgrp.o \
lgrp_topo.o \
@@ -55,6 +57,8 @@ COMMON_CORE_OBJS += \
page_retire.o \
panic.o \
param.o \
+ pg.o \
+ pghw.o \
putnext.o \
rctl_proc.o \
rwlock.o \
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
index 0385139384..815ef1dd46 100644
--- a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -57,6 +57,7 @@
#include <sys/pool_pset.h>
#include <sys/pset.h>
#include <sys/zone.h>
+#include <sys/pghw.h>
/* Dependent on the Solaris procfs */
extern kthread_t *prchoose(proc_t *);
@@ -1861,7 +1862,8 @@ lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
*/
lxpr_uiobuf_printf(uiobuf,
"physical id\t: %lu\n"
- "siblings\t: %u\n", chip_plat_get_chipid(cp),
+ "siblings\t: %u\n",
+ pg_plat_hw_instance_id(cp, PGHW_CHIP),
cpuid_get_ncpu_per_chip(cp));
}
diff --git a/usr/src/uts/common/conf/param.c b/usr/src/uts/common/conf/param.c
index 6b39fd54b2..bdce7ec148 100644
--- a/usr/src/uts/common/conf/param.c
+++ b/usr/src/uts/common/conf/param.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -191,6 +191,9 @@ extern void deadman_init(void);
extern void clock_timer_init(void);
extern void clock_realtime_init(void);
extern void clock_highres_init(void);
+extern void pg_init(void);
+extern void pg_cmt_class_init(void);
+extern void pg_cpu0_init(void);
void (*init_tbl[])(void) = {
system_taskq_init,
@@ -208,6 +211,9 @@ void (*init_tbl[])(void) = {
anon_init,
segvn_init,
flk_init,
+ pg_init,
+ pg_cmt_class_init,
+ pg_cpu0_init,
schedctl_init,
fdb_init,
deadman_init,
diff --git a/usr/src/uts/common/disp/cmt.c b/usr/src/uts/common/disp/cmt.c
new file mode 100644
index 0000000000..1bf0704346
--- /dev/null
+++ b/usr/src/uts/common/disp/cmt.c
@@ -0,0 +1,804 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/cpupart.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/kstat.h>
+#include <sys/processor.h>
+#include <sys/disp.h>
+#include <sys/group.h>
+#include <sys/pghw.h>
+#include <sys/bitset.h>
+#include <sys/lgrp.h>
+#include <sys/cmt.h>
+
+/*
+ * CMT scheduler / dispatcher support
+ *
+ * This file implements CMT scheduler support using Processor Groups.
+ * The CMT processor group class creates and maintains the CMT class
+ * specific processor group pg_cmt_t.
+ *
+ * ---------------------------- <-- pg_cmt_t *
+ * | pghw_t |
+ * ----------------------------
+ * | CMT class specific data |
+ * | - hierarchy linkage |
+ * | - CMT load balancing data|
+ * | - active CPU group/bitset|
+ * ----------------------------
+ *
+ * The scheduler/dispatcher leverages knowledge of the performance
+ * relevant CMT sharing relationships existing between cpus to implement
+ * optimized affinity and load balancing policies.
+ *
+ * Load balancing policy seeks to improve performance by minimizing
+ * contention over shared processor resources / facilities, while the
+ * affinity policies seek to improve cache and TLB utilization.
+ *
+ * The CMT PGs created by this class are already arranged into a
+ * hierarchy (which is done in the pghw layer). To implement the top-down
+ * CMT load balancing algorithm, the CMT PGs additionally maintain
+ * parent, child and sibling hierarchy relationships.
+ * Parent PGs always contain a superset of their children(s) resources,
+ * each PG can have at most one parent, and siblings are the group of PGs
+ * sharing the same parent.
+ *
+ * On NUMA systems, the CMT load balancing algorithm balances across the
+ * CMT PGs within their respective lgroups. On UMA based system, there
+ * exists a top level group of PGs to balance across. On NUMA systems multiple
+ * top level groups are instantiated, where the top level balancing begins by
+ * balancng across the CMT PGs within their respective (per lgroup) top level
+ * groups.
+ */
+
+typedef struct cmt_lgrp {
+ group_t cl_pgs; /* Top level group of active CMT PGs */
+ int cl_npgs; /* # of top level PGs in the lgroup */
+ lgrp_handle_t cl_hand; /* lgroup's platform handle */
+ struct cmt_lgrp *cl_next; /* next cmt_lgrp */
+} cmt_lgrp_t;
+
+static cmt_lgrp_t *cmt_lgrps = NULL;
+
+static int is_cpu0 = 1;
+static int cmt_sched_disabled = 0;
+
+static pg_cid_t pg_cmt_class_id; /* PG class id */
+
+static pg_t *pg_cmt_alloc();
+static void pg_cmt_free(pg_t *);
+static void pg_cmt_cpu_init(cpu_t *);
+static void pg_cmt_cpu_fini(cpu_t *);
+static void pg_cmt_cpu_active(cpu_t *);
+static void pg_cmt_cpu_inactive(cpu_t *);
+static void pg_cmt_cpupart_in(cpu_t *, cpupart_t *);
+static void pg_cmt_cpupart_move(cpu_t *, cpupart_t *, cpupart_t *);
+static void pg_cmt_hier_pack(pg_cmt_t **, int);
+static int pg_cmt_cpu_belongs(pg_t *, cpu_t *);
+static int pg_cmt_hw(pghw_type_t);
+static cmt_lgrp_t *pg_cmt_find_lgrp(lgrp_handle_t);
+
+/*
+ * Macro to test if PG is managed by the CMT PG class
+ */
+#define IS_CMT_PG(pg) (((pg_t *)(pg))->pg_class->pgc_id == pg_cmt_class_id)
+
+/*
+ * CMT PG ops
+ */
+struct pg_ops pg_ops_cmt = {
+ pg_cmt_alloc,
+ pg_cmt_free,
+ pg_cmt_cpu_init,
+ pg_cmt_cpu_fini,
+ pg_cmt_cpu_active,
+ pg_cmt_cpu_inactive,
+ pg_cmt_cpupart_in,
+ NULL, /* cpupart_out */
+ pg_cmt_cpupart_move,
+ pg_cmt_cpu_belongs,
+};
+
+/*
+ * Initialize the CMT PG class
+ */
+void
+pg_cmt_class_init(void)
+{
+ if (cmt_sched_disabled)
+ return;
+
+ pg_cmt_class_id = pg_class_register("cmt", &pg_ops_cmt, PGR_PHYSICAL);
+}
+
+/*
+ * Called to indicate a new CPU has started up so
+ * that either t0 or the slave startup thread can
+ * be accounted for.
+ */
+void
+pg_cmt_cpu_startup(cpu_t *cp)
+{
+ PG_NRUN_UPDATE(cp, 1);
+}
+
+/*
+ * Adjust the CMT load in the CMT PGs in which the CPU belongs
+ * Note that "n" can be positive in the case of increasing
+ * load, or negative in the case of decreasing load.
+ */
+void
+pg_cmt_load(cpu_t *cp, int n)
+{
+ pg_cmt_t *pg;
+
+ pg = (pg_cmt_t *)cp->cpu_pg->cmt_lineage;
+ while (pg != NULL) {
+ ASSERT(IS_CMT_PG(pg));
+ atomic_add_32(&pg->cmt_nrunning, n);
+ pg = pg->cmt_parent;
+ }
+}
+
+/*
+ * Return non-zero if thread can migrate between "from" and "to"
+ * without a performance penalty
+ */
+int
+pg_cmt_can_migrate(cpu_t *from, cpu_t *to)
+{
+ if (from->cpu_physid->cpu_cacheid ==
+ to->cpu_physid->cpu_cacheid)
+ return (1);
+ return (0);
+}
+
+/*
+ * CMT class specific PG allocation
+ */
+static pg_t *
+pg_cmt_alloc(void)
+{
+ return (kmem_zalloc(sizeof (pg_cmt_t), KM_NOSLEEP));
+}
+
+/*
+ * Class specific PG de-allocation
+ */
+static void
+pg_cmt_free(pg_t *pg)
+{
+ ASSERT(pg != NULL);
+ ASSERT(IS_CMT_PG(pg));
+
+ kmem_free((pg_cmt_t *)pg, sizeof (pg_cmt_t));
+}
+
+/*
+ * Return 1 if CMT load balancing policies should be
+ * implemented across instances of the specified hardware
+ * sharing relationship.
+ */
+static int
+pg_cmt_load_bal_hw(pghw_type_t hw)
+{
+ if (hw == PGHW_IPIPE ||
+ hw == PGHW_FPU ||
+ hw == PGHW_CHIP)
+ return (1);
+ else
+ return (0);
+}
+
+/*
+ * Return 1 if thread affinity polices should be implemented
+ * for instances of the specifed hardware sharing relationship.
+ */
+static int
+pg_cmt_affinity_hw(pghw_type_t hw)
+{
+ if (hw == PGHW_CACHE)
+ return (1);
+ else
+ return (0);
+}
+
+/*
+ * Return 1 if CMT scheduling policies should be impelmented
+ * for the specified hardware sharing relationship.
+ */
+static int
+pg_cmt_hw(pghw_type_t hw)
+{
+ return (pg_cmt_load_bal_hw(hw) ||
+ pg_cmt_affinity_hw(hw));
+}
+
+/*
+ * CMT class callback for a new CPU entering the system
+ */
+static void
+pg_cmt_cpu_init(cpu_t *cp)
+{
+ pg_cmt_t *pg;
+ group_t *cmt_pgs;
+ int level, max_level, nlevels;
+ pghw_type_t hw;
+ pg_t *pg_cache = NULL;
+ pg_cmt_t *cpu_cmt_hier[PGHW_NUM_COMPONENTS];
+ lgrp_handle_t lgrp_handle;
+ cmt_lgrp_t *lgrp;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * A new CPU is coming into the system.
+ * Interrogate the platform to see if the CPU
+ * has any performance relevant CMT sharing
+ * relationships
+ */
+ cmt_pgs = &cp->cpu_pg->cmt_pgs;
+ cp->cpu_pg->cmt_lineage = NULL;
+
+ bzero(cpu_cmt_hier, sizeof (cpu_cmt_hier));
+ max_level = nlevels = 0;
+ for (hw = PGHW_START; hw < PGHW_NUM_COMPONENTS; hw++) {
+
+ /*
+ * We're only interested in CMT hw sharing relationships
+ */
+ if (pg_cmt_hw(hw) == 0 || pg_plat_hw_shared(cp, hw) == 0)
+ continue;
+
+ /*
+ * Find (or create) the PG associated with
+ * the hw sharing relationship in which cp
+ * belongs.
+ *
+ * Determine if a suitable PG already
+ * exists, or if one needs to be created.
+ */
+ pg = (pg_cmt_t *)pghw_place_cpu(cp, hw);
+ if (pg == NULL) {
+ /*
+ * Create a new one.
+ * Initialize the common...
+ */
+ pg = (pg_cmt_t *)pg_create(pg_cmt_class_id);
+
+ /* ... physical ... */
+ pghw_init((pghw_t *)pg, cp, hw);
+
+ /*
+ * ... and CMT specific portions of the
+ * structure.
+ */
+ bitset_init(&pg->cmt_cpus_actv_set);
+ group_create(&pg->cmt_cpus_actv);
+ } else {
+ ASSERT(IS_CMT_PG(pg));
+ }
+
+ /* Add the CPU to the PG */
+ pg_cpu_add((pg_t *)pg, cp);
+
+ /*
+ * Ensure capacity of the active CPUs group/bitset
+ */
+ group_expand(&pg->cmt_cpus_actv,
+ GROUP_SIZE(&((pg_t *)pg)->pg_cpus));
+
+ if (cp->cpu_seqid >=
+ bitset_capacity(&pg->cmt_cpus_actv_set)) {
+ bitset_resize(&pg->cmt_cpus_actv_set,
+ cp->cpu_seqid + 1);
+ }
+
+ /*
+ * Build a lineage of CMT PGs for load balancing
+ */
+ if (pg_cmt_load_bal_hw(hw)) {
+ level = pghw_level(hw);
+ cpu_cmt_hier[level] = pg;
+ if (level > max_level)
+ max_level = level;
+ nlevels++;
+ }
+
+ /* Cache this for later */
+ if (hw == PGHW_CACHE)
+ pg_cache = (pg_t *)pg;
+ }
+
+ /*
+ * Pack out any gaps in the constructed lineage.
+ * Gaps may exist where the architecture knows
+ * about a hardware sharing relationship, but such a
+ * relationship either isn't relevant for load
+ * balancing or doesn't exist between CPUs on the system.
+ */
+ pg_cmt_hier_pack(cpu_cmt_hier, max_level + 1);
+
+ /*
+ * For each of the PGs int the CPU's lineage:
+ * - Add an entry in the CPU sorted CMT PG group
+ * which is used for top down CMT load balancing
+ * - Tie the PG into the CMT hierarchy by connecting
+ * it to it's parent and siblings.
+ */
+ group_expand(cmt_pgs, nlevels);
+
+ /*
+ * Find the lgrp that encapsulates this CPU's CMT hierarchy
+ */
+ lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id);
+ lgrp = pg_cmt_find_lgrp(lgrp_handle);
+
+ for (level = 0; level < nlevels; level++) {
+ uint_t children;
+ int err;
+
+ pg = cpu_cmt_hier[level];
+ err = group_add_at(cmt_pgs, pg, nlevels - level - 1);
+ ASSERT(err == 0);
+
+ if (level == 0)
+ cp->cpu_pg->cmt_lineage = (pg_t *)pg;
+
+ if (pg->cmt_siblings != NULL) {
+ /* Already initialized */
+ ASSERT(pg->cmt_parent == NULL ||
+ pg->cmt_parent == cpu_cmt_hier[level + 1]);
+ ASSERT(pg->cmt_siblings == &lgrp->cl_pgs ||
+ pg->cmt_siblings == pg->cmt_parent->cmt_children);
+ continue;
+ }
+
+ if ((level + 1) == nlevels) {
+ pg->cmt_parent = NULL;
+ pg->cmt_siblings = &lgrp->cl_pgs;
+ children = ++lgrp->cl_npgs;
+ } else {
+ pg->cmt_parent = cpu_cmt_hier[level + 1];
+
+ /*
+ * A good parent keeps track of their children.
+ * The parent's children group is also the PG's
+ * siblings.
+ */
+ if (pg->cmt_parent->cmt_children == NULL) {
+ pg->cmt_parent->cmt_children =
+ kmem_zalloc(sizeof (group_t), KM_SLEEP);
+ group_create(pg->cmt_parent->cmt_children);
+ }
+ pg->cmt_siblings = pg->cmt_parent->cmt_children;
+ children = ++pg->cmt_parent->cmt_nchildren;
+ }
+ pg->cmt_hint = 0;
+ group_expand(pg->cmt_siblings, children);
+ }
+
+ /*
+ * Cache the chip and core IDs in the cpu_t->cpu_physid structure
+ * for fast lookups later.
+ */
+ if (cp->cpu_physid) {
+ cp->cpu_physid->cpu_chipid =
+ pg_plat_hw_instance_id(cp, PGHW_CHIP);
+ cp->cpu_physid->cpu_coreid = pg_plat_get_core_id(cp);
+
+ /*
+ * If this cpu has a PG representing shared cache, then set
+ * cpu_cacheid to that PG's logical id
+ */
+ if (pg_cache)
+ cp->cpu_physid->cpu_cacheid = pg_cache->pg_id;
+ }
+
+ /* CPU0 only initialization */
+ if (is_cpu0) {
+ pg_cmt_cpu_startup(cp);
+ is_cpu0 = 0;
+ }
+
+}
+
+/*
+ * Class callback when a CPU is leaving the system (deletion)
+ */
+static void
+pg_cmt_cpu_fini(cpu_t *cp)
+{
+ group_iter_t i;
+ pg_cmt_t *pg;
+ group_t *pgs, *cmt_pgs;
+ lgrp_handle_t lgrp_handle;
+ cmt_lgrp_t *lgrp;
+
+ pgs = &cp->cpu_pg->pgs;
+ cmt_pgs = &cp->cpu_pg->cmt_pgs;
+
+ /*
+ * Find the lgroup that encapsulates this CPU's CMT hierarchy
+ */
+ lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id);
+ lgrp = pg_cmt_find_lgrp(lgrp_handle);
+
+ /*
+ * First, clean up anything load balancing specific for each of
+ * the CPU's PGs that participated in CMT load balancing
+ */
+ pg = (pg_cmt_t *)cp->cpu_pg->cmt_lineage;
+ while (pg != NULL) {
+
+ /*
+ * Remove the PG from the CPU's load balancing lineage
+ */
+ (void) group_remove(cmt_pgs, pg, GRP_RESIZE);
+
+ /*
+ * If it's about to become empty, destroy it's children
+ * group, and remove it's reference from it's siblings.
+ * This is done here (rather than below) to avoid removing
+ * our reference from a PG that we just eliminated.
+ */
+ if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 1) {
+ if (pg->cmt_children != NULL)
+ group_destroy(pg->cmt_children);
+ if (pg->cmt_siblings != NULL) {
+ if (pg->cmt_siblings == &lgrp->cl_pgs)
+ lgrp->cl_npgs--;
+ else
+ pg->cmt_parent->cmt_nchildren--;
+ }
+ }
+ pg = pg->cmt_parent;
+ }
+
+ ASSERT(GROUP_SIZE(cmt_pgs) == 0);
+
+ /*
+ * Now that the load balancing lineage updates have happened,
+ * remove the CPU from all it's PGs (destroying any that become
+ * empty).
+ */
+ group_iter_init(&i);
+ while ((pg = group_iterate(pgs, &i)) != NULL) {
+ if (IS_CMT_PG(pg) == 0)
+ continue;
+
+ pg_cpu_delete((pg_t *)pg, cp);
+ /*
+ * Deleting the CPU from the PG changes the CPU's
+ * PG group over which we are actively iterating
+ * Re-initialize the iteration
+ */
+ group_iter_init(&i);
+
+ if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 0) {
+
+ /*
+ * The PG has become zero sized, so destroy it.
+ */
+ group_destroy(&pg->cmt_cpus_actv);
+ bitset_fini(&pg->cmt_cpus_actv_set);
+ pghw_fini((pghw_t *)pg);
+
+ pg_destroy((pg_t *)pg);
+ }
+ }
+}
+
+/*
+ * Class callback when a CPU is entering a cpu partition
+ */
+static void
+pg_cmt_cpupart_in(cpu_t *cp, cpupart_t *pp)
+{
+ group_t *pgs;
+ pg_t *pg;
+ group_iter_t i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ pgs = &cp->cpu_pg->pgs;
+
+ /*
+ * Ensure that the new partition's PG bitset
+ * is large enough for all CMT PG's to which cp
+ * belongs
+ */
+ group_iter_init(&i);
+ while ((pg = group_iterate(pgs, &i)) != NULL) {
+ if (IS_CMT_PG(pg) == 0)
+ continue;
+
+ if (bitset_capacity(&pp->cp_cmt_pgs) <= pg->pg_id)
+ bitset_resize(&pp->cp_cmt_pgs, pg->pg_id + 1);
+ }
+}
+
+/*
+ * Class callback when a CPU is actually moving partitions
+ */
+static void
+pg_cmt_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
+{
+ cpu_t *cpp;
+ group_t *pgs;
+ pg_t *pg;
+ group_iter_t pg_iter;
+ pg_cpu_itr_t cpu_iter;
+ boolean_t found;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ pgs = &cp->cpu_pg->pgs;
+ group_iter_init(&pg_iter);
+
+ /*
+ * Iterate over the CPUs CMT PGs
+ */
+ while ((pg = group_iterate(pgs, &pg_iter)) != NULL) {
+
+ if (IS_CMT_PG(pg) == 0)
+ continue;
+
+ /*
+ * Add the PG to the bitset in the new partition.
+ */
+ bitset_add(&newpp->cp_cmt_pgs, pg->pg_id);
+
+ /*
+ * Remove the PG from the bitset in the old partition
+ * if the last of the PG's CPUs have left.
+ */
+ found = B_FALSE;
+ PG_CPU_ITR_INIT(pg, cpu_iter);
+ while ((cpp = pg_cpu_next(&cpu_iter)) != NULL) {
+ if (cpp == cp)
+ continue;
+ if (cpp->cpu_part->cp_id == oldpp->cp_id) {
+ found = B_TRUE;
+ break;
+ }
+ }
+ if (!found)
+ bitset_del(&cp->cpu_part->cp_cmt_pgs, pg->pg_id);
+ }
+}
+
+/*
+ * Class callback when a CPU becomes active (online)
+ *
+ * This is called in a context where CPUs are paused
+ */
+static void
+pg_cmt_cpu_active(cpu_t *cp)
+{
+ int err;
+ group_iter_t i;
+ pg_cmt_t *pg;
+ group_t *pgs;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ pgs = &cp->cpu_pg->pgs;
+ group_iter_init(&i);
+
+ /*
+ * Iterate over the CPU's PGs
+ */
+ while ((pg = group_iterate(pgs, &i)) != NULL) {
+
+ if (IS_CMT_PG(pg) == 0)
+ continue;
+
+ err = group_add(&pg->cmt_cpus_actv, cp, GRP_NORESIZE);
+ ASSERT(err == 0);
+
+ /*
+ * If this is the first active CPU in the PG, and it
+ * represents a hardware sharing relationship over which
+ * CMT load balancing is performed, add it as a candidate
+ * for balancing with it's siblings.
+ */
+ if (GROUP_SIZE(&pg->cmt_cpus_actv) == 1 &&
+ pg_cmt_load_bal_hw(((pghw_t *)pg)->pghw_hw)) {
+ err = group_add(pg->cmt_siblings, pg, GRP_NORESIZE);
+ ASSERT(err == 0);
+ }
+
+ /*
+ * Notate the CPU in the PGs active CPU bitset.
+ * Also notate the PG as being active in it's associated
+ * partition
+ */
+ bitset_add(&pg->cmt_cpus_actv_set, cp->cpu_seqid);
+ bitset_add(&cp->cpu_part->cp_cmt_pgs, ((pg_t *)pg)->pg_id);
+ }
+}
+
+/*
+ * Class callback when a CPU goes inactive (offline)
+ *
+ * This is called in a context where CPUs are paused
+ */
+static void
+pg_cmt_cpu_inactive(cpu_t *cp)
+{
+ int err;
+ group_t *pgs;
+ pg_cmt_t *pg;
+ cpu_t *cpp;
+ group_iter_t i;
+ pg_cpu_itr_t cpu_itr;
+ boolean_t found;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ pgs = &cp->cpu_pg->pgs;
+ group_iter_init(&i);
+
+ while ((pg = group_iterate(pgs, &i)) != NULL) {
+
+ if (IS_CMT_PG(pg) == 0)
+ continue;
+
+ /*
+ * Remove the CPU from the CMT PGs active CPU group
+ * bitmap
+ */
+ err = group_remove(&pg->cmt_cpus_actv, cp, GRP_NORESIZE);
+ ASSERT(err == 0);
+
+ bitset_del(&pg->cmt_cpus_actv_set, cp->cpu_seqid);
+
+ /*
+ * If there are no more active CPUs in this PG over which
+ * load was balanced, remove it as a balancing candidate.
+ */
+ if (GROUP_SIZE(&pg->cmt_cpus_actv) == 0 &&
+ pg_cmt_load_bal_hw(((pghw_t *)pg)->pghw_hw)) {
+ err = group_remove(pg->cmt_siblings, pg, GRP_NORESIZE);
+ ASSERT(err == 0);
+ }
+
+ /*
+ * Assert the number of active CPUs does not exceed
+ * the total number of CPUs in the PG
+ */
+ ASSERT(GROUP_SIZE(&pg->cmt_cpus_actv) <=
+ GROUP_SIZE(&((pg_t *)pg)->pg_cpus));
+
+ /*
+ * Update the PG bitset in the CPU's old partition
+ */
+ found = B_FALSE;
+ PG_CPU_ITR_INIT(pg, cpu_itr);
+ while ((cpp = pg_cpu_next(&cpu_itr)) != NULL) {
+ if (cpp == cp)
+ continue;
+ if (cpp->cpu_part->cp_id == cp->cpu_part->cp_id) {
+ found = B_TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ bitset_del(&cp->cpu_part->cp_cmt_pgs,
+ ((pg_t *)pg)->pg_id);
+ }
+ }
+}
+
+/*
+ * Return non-zero if the CPU belongs in the given PG
+ */
+static int
+pg_cmt_cpu_belongs(pg_t *pg, cpu_t *cp)
+{
+ cpu_t *pg_cpu;
+
+ pg_cpu = GROUP_ACCESS(&pg->pg_cpus, 0);
+
+ ASSERT(pg_cpu != NULL);
+
+ /*
+ * The CPU belongs if, given the nature of the hardware sharing
+ * relationship represented by the PG, the CPU has that
+ * relationship with some other CPU already in the PG
+ */
+ if (pg_plat_cpus_share(cp, pg_cpu, ((pghw_t *)pg)->pghw_hw))
+ return (1);
+
+ return (0);
+}
+
+/*
+ * Pack the CPUs CMT hierarchy
+ * The hierarchy order is preserved
+ */
+static void
+pg_cmt_hier_pack(pg_cmt_t *hier[], int sz)
+{
+ int i, j;
+
+ for (i = 0; i < sz; i++) {
+ if (hier[i] != NULL)
+ continue;
+
+ for (j = i; j < sz; j++) {
+ if (hier[j] != NULL) {
+ hier[i] = hier[j];
+ hier[j] = NULL;
+ break;
+ }
+ }
+ if (j == sz)
+ break;
+ }
+}
+
+/*
+ * Return a cmt_lgrp_t * given an lgroup handle.
+ * If the right one doesn't yet exist, create one
+ * by growing the cmt_lgrps array
+ */
+static cmt_lgrp_t *
+pg_cmt_find_lgrp(lgrp_handle_t hand)
+{
+ cmt_lgrp_t *lgrp;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ lgrp = cmt_lgrps;
+ while (lgrp != NULL) {
+ if (lgrp->cl_hand == hand)
+ return (lgrp);
+ lgrp = lgrp->cl_next;
+ }
+
+ /*
+ * Haven't seen this lgrp yet
+ */
+ lgrp = kmem_zalloc(sizeof (cmt_lgrp_t), KM_SLEEP);
+
+ lgrp->cl_hand = hand;
+ lgrp->cl_npgs = 0;
+ lgrp->cl_next = cmt_lgrps;
+ cmt_lgrps = lgrp;
+ group_create(&lgrp->cl_pgs);
+
+ return (lgrp);
+}
diff --git a/usr/src/uts/common/disp/cpupart.c b/usr/src/uts/common/disp/cpupart.c
index 02856e4dc3..ecd5aff873 100644
--- a/usr/src/uts/common/disp/cpupart.c
+++ b/usr/src/uts/common/disp/cpupart.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -38,7 +38,7 @@
#include <sys/var.h>
#include <sys/cyclic.h>
#include <sys/lgrp.h>
-#include <sys/chip.h>
+#include <sys/pghw.h>
#include <sys/loadavg.h>
#include <sys/class.h>
#include <sys/fss.h>
@@ -267,6 +267,8 @@ cpupart_initialize_default(void)
* Set t0's home
*/
t0.t_lpl = &cp_default.cp_lgrploads[LGRP_ROOTID];
+
+ bitset_init(&cp_default.cp_cmt_pgs);
}
@@ -322,6 +324,15 @@ cpupart_move_cpu(cpu_t *cp, cpupart_t *newpp, int forced)
cpu_inmotion = cp;
membar_enter();
+ /*
+ * Notify the Processor Groups subsystem that the CPU
+ * will be moving cpu partitions. This is done before
+ * CPUs are paused to provide an opportunity for any
+ * needed memory allocations.
+ */
+ pg_cpupart_out(cp, oldpp);
+ pg_cpupart_in(cp, newpp);
+
again:
if (move_threads) {
int loop_count;
@@ -332,6 +343,8 @@ again:
if (loop_count >= 5) {
cpu_state_change_notify(cp->cpu_id,
CPU_CPUPART_IN);
+ pg_cpupart_out(cp, newpp);
+ pg_cpupart_in(cp, oldpp);
cpu_inmotion = NULL;
return (EBUSY); /* some threads still bound */
}
@@ -350,6 +363,8 @@ again:
* a bound cyclic.
*/
cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
+ pg_cpupart_out(cp, newpp);
+ pg_cpupart_in(cp, oldpp);
cpu_inmotion = NULL;
return (EBUSY);
}
@@ -370,9 +385,10 @@ again:
}
/*
- * Update the set of chip's being spanned
+ * Now that CPUs are paused, let the PG subsystem perform
+ * any necessary data structure updates.
*/
- chip_cpu_move_part(cp, oldpp, newpp);
+ pg_cpupart_move(cp, oldpp, newpp);
/* save this cpu's lgroup -- it'll be the same in the new partition */
lgrpid = cp->cpu_lpl->lpl_lgrpid;
@@ -764,7 +780,7 @@ cpupart_create(psetid_t *psid)
for (i = 0; i < pp->cp_nlgrploads; i++) {
pp->cp_lgrploads[i].lpl_lgrpid = i;
}
- CHIP_SET_ZERO(pp->cp_mach->mc_chipset);
+ bitset_init(&pp->cp_cmt_pgs);
/*
* Pause all CPUs while changing the partition list, to make sure
@@ -859,10 +875,16 @@ again: p = ttoproc(t);
}
}
- ASSERT(CHIP_SET_ISNULL(pp->cp_mach->mc_chipset));
+ ASSERT(bitset_is_null(&pp->cp_cmt_pgs));
ASSERT(CPUSET_ISNULL(pp->cp_mach->mc_haltset));
/*
+ * Teardown the partition's group of active CMT PGs now that
+ * all of the CPUs have left.
+ */
+ bitset_fini(&pp->cp_cmt_pgs);
+
+ /*
* Reset the pointers in any offline processors so they won't
* try to rejoin the destroyed partition when they're turned
* online.
diff --git a/usr/src/uts/common/disp/disp.c b/usr/src/uts/common/disp/disp.c
index dc53b411e3..b2052828b3 100644
--- a/usr/src/uts/common/disp/disp.c
+++ b/usr/src/uts/common/disp/disp.c
@@ -50,7 +50,9 @@
#include <sys/tnf.h>
#include <sys/cpupart.h>
#include <sys/lgrp.h>
-#include <sys/chip.h>
+#include <sys/pg.h>
+#include <sys/cmt.h>
+#include <sys/bitset.h>
#include <sys/schedctl.h>
#include <sys/atomic.h>
#include <sys/dtrace.h>
@@ -117,12 +119,8 @@ static void setkpdq(kthread_t *tp, int borf);
* Parameter that determines how recently a thread must have run
* on the CPU to be considered loosely-bound to that CPU to reduce
* cold cache effects. The interval is in hertz.
- *
- * The platform may define a per physical processor adjustment of
- * this parameter. For efficiency, the effective rechoose interval
- * (rechoose_interval + per chip adjustment) is maintained in the
- * cpu structures. See cpu_choose()
*/
+#define RECHOOSE_INTERVAL 3
int rechoose_interval = RECHOOSE_INTERVAL;
static cpu_t *cpu_choose(kthread_t *, pri_t);
@@ -132,14 +130,9 @@ static cpu_t *cpu_choose(kthread_t *, pri_t);
* to reduce migrations. The interval is in nanoseconds.
*
* The nosteal_nsec should be set by a platform code to an appropriate value.
- *
- */
-hrtime_t nosteal_nsec = 0;
-
-/*
- * Value of nosteal_nsec meaning that nosteal optimization should be disabled
+ * Setting it to 0 effectively disables the nosteal 'protection'
*/
-#define NOSTEAL_DISABLED 1
+hrtime_t nosteal_nsec = -1;
id_t defaultcid; /* system "default" class; see dispadmin(1M) */
@@ -225,6 +218,7 @@ dispinit(void)
mutex_enter(&cpu_lock);
CPU->cpu_disp->disp_maxrunpri = -1;
CPU->cpu_disp->disp_max_unbound_pri = -1;
+
/*
* Initialize the default CPU partition.
*/
@@ -874,9 +868,9 @@ swtch()
if (next != t) {
if (t == cp->cpu_idle_thread) {
- CHIP_NRUNNING(cp->cpu_chip, 1);
+ PG_NRUN_UPDATE(cp, 1);
} else if (next == cp->cpu_idle_thread) {
- CHIP_NRUNNING(cp->cpu_chip, -1);
+ PG_NRUN_UPDATE(cp, -1);
}
/*
@@ -944,7 +938,7 @@ swtch_from_zombie()
TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start");
if (next == cpu->cpu_idle_thread)
- CHIP_NRUNNING(cpu->cpu_chip, -1);
+ PG_NRUN_UPDATE(cpu, -1);
restore_mstate(next);
@@ -1024,7 +1018,7 @@ swtch_to(kthread_t *next)
TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start");
if (curthread == cp->cpu_idle_thread)
- CHIP_NRUNNING(cp->cpu_chip, 1);
+ PG_NRUN_UPDATE(cp, 1);
/* OK to steal anything left on run queue */
cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL;
@@ -1092,68 +1086,113 @@ cpu_resched(cpu_t *cp, pri_t tpri)
}
/*
- * Routine used by setbackdq() to balance load across the physical
- * processors. Returns a CPU of a lesser loaded chip in the lgroup
- * if balancing is necessary, or the "hint" CPU if it's not.
- *
- * - tp is the thread being enqueued
- * - cp is a hint CPU (chosen by cpu_choose()).
- * - curchip (if not NULL) is the chip on which the current thread
- * is running.
- *
- * The thread lock for "tp" must be held while calling this routine.
+ * Perform multi-level CMT load balancing of running threads.
+ * tp is the thread being enqueued
+ * cp is the hint CPU (chosen by cpu_choose()).
*/
static cpu_t *
-chip_balance(kthread_t *tp, cpu_t *cp, chip_t *curchip)
+cmt_balance(kthread_t *tp, cpu_t *cp)
{
- int chp_nrun, ochp_nrun;
- chip_t *chp, *nchp;
+ int hint, i, cpu;
+ int self = 0;
+ group_t *cmt_pgs, *siblings;
+ pg_cmt_t *pg, *pg_tmp, *tpg = NULL;
+ int pg_nrun, tpg_nrun;
+ int level = 0;
+ cpu_t *newcp;
+
+ ASSERT(THREAD_LOCK_HELD(tp));
- chp = cp->cpu_chip;
- chp_nrun = chp->chip_nrunning;
+ cmt_pgs = &cp->cpu_pg->cmt_pgs;
- if (chp == curchip)
- chp_nrun--; /* Ignore curthread */
+ if (GROUP_SIZE(cmt_pgs) == 0)
+ return (cp); /* nothing to do */
+
+ if (tp == curthread)
+ self = 1;
/*
- * If this chip isn't at all idle, then let
- * run queue balancing do the work.
+ * Balance across siblings in the CPUs CMT lineage
*/
- if (chp_nrun == chp->chip_ncpu)
- return (cp);
-
- nchp = chp->chip_balance;
do {
- if (nchp == chp ||
- !CHIP_IN_CPUPART(nchp, tp->t_cpupart))
- continue;
+ pg = GROUP_ACCESS(cmt_pgs, level);
+
+ pg_nrun = pg->cmt_nrunning;
+ if (self &&
+ bitset_in_set(&pg->cmt_cpus_actv_set, CPU->cpu_seqid))
+ pg_nrun--; /* Ignore curthread's effect */
+
+ siblings = pg->cmt_siblings;
+ hint = pg->cmt_hint;
- ochp_nrun = nchp->chip_nrunning;
+ /*
+ * Check for validity of the hint
+ * It should reference a valid sibling
+ */
+ if (hint >= GROUP_SIZE(siblings))
+ hint = pg->cmt_hint = 0;
+ else
+ pg->cmt_hint++;
/*
- * If the other chip is running less threads,
- * or if it's running the same number of threads, but
- * has more online logical CPUs, then choose to balance.
+ * Find a balancing candidate from among our siblings
+ * "hint" is a hint for where to start looking
*/
- if (chp_nrun > ochp_nrun ||
- (chp_nrun == ochp_nrun &&
- nchp->chip_ncpu > chp->chip_ncpu)) {
- cp = nchp->chip_cpus;
- nchp->chip_cpus = cp->cpu_next_chip;
+ i = hint;
+ do {
+ ASSERT(i < GROUP_SIZE(siblings));
+ pg_tmp = GROUP_ACCESS(siblings, i);
/*
- * Find a CPU on the chip in the correct
- * partition. We know at least one exists
- * because of the CHIP_IN_CPUPART() check above.
+ * The candidate must not be us, and must
+ * have some CPU resources in the thread's
+ * partition
*/
- while (cp->cpu_part != tp->t_cpupart)
- cp = cp->cpu_next_chip;
+ if (pg_tmp != pg &&
+ bitset_in_set(&tp->t_cpupart->cp_cmt_pgs,
+ ((pg_t *)pg_tmp)->pg_id)) {
+ tpg = pg_tmp;
+ break;
+ }
+
+ if (++i >= GROUP_SIZE(siblings))
+ i = 0;
+ } while (i != hint);
+
+ if (!tpg)
+ continue; /* no candidates at this level */
+
+ /*
+ * Check if the balancing target is underloaded
+ * Decide to balance if the target is running fewer
+ * threads, or if it's running the same number of threads
+ * with more online CPUs
+ */
+ tpg_nrun = tpg->cmt_nrunning;
+ if (pg_nrun > tpg_nrun ||
+ (pg_nrun == tpg_nrun &&
+ (GROUP_SIZE(&tpg->cmt_cpus_actv) >
+ GROUP_SIZE(&pg->cmt_cpus_actv)))) {
+ break;
}
- chp->chip_balance = nchp->chip_next_lgrp;
- break;
- } while ((nchp = nchp->chip_next_lgrp) != chp->chip_balance);
+ tpg = NULL;
+ } while (++level < GROUP_SIZE(cmt_pgs));
+
+
+ if (tpg) {
+ /*
+ * Select an idle CPU from the target PG
+ */
+ for (cpu = 0; cpu < GROUP_SIZE(&tpg->cmt_cpus_actv); cpu++) {
+ newcp = GROUP_ACCESS(&tpg->cmt_cpus_actv, cpu);
+ if (newcp->cpu_part == tp->t_cpupart &&
+ newcp->cpu_dispatch_pri == -1) {
+ cp = newcp;
+ break;
+ }
+ }
+ }
- ASSERT(CHIP_IN_CPUPART(cp->cpu_chip, tp->t_cpupart));
return (cp);
}
@@ -1181,7 +1220,6 @@ setbackdq(kthread_t *tp)
{
dispq_t *dq;
disp_t *dp;
- chip_t *curchip = NULL;
cpu_t *cp;
pri_t tpri;
int bound;
@@ -1200,10 +1238,6 @@ setbackdq(kthread_t *tp)
}
tpri = DISP_PRIO(tp);
- if (tp == curthread) {
- curchip = CPU->cpu_chip;
- }
-
if (ncpus == 1)
cp = tp->t_cpu;
else if (!tp->t_bound_cpu && !tp->t_weakbound_cpu) {
@@ -1220,12 +1254,9 @@ setbackdq(kthread_t *tp)
int qlen;
/*
- * Select another CPU if we need
- * to do some load balancing across the
- * physical processors.
+ * Perform any CMT load balancing
*/
- if (CHIP_SHOULD_BALANCE(cp->cpu_chip))
- cp = chip_balance(tp, cp, curchip);
+ cp = cmt_balance(tp, cp);
/*
* Balance across the run queues
@@ -1960,8 +1991,8 @@ disp_getwork(cpu_t *cp)
if (pri > maxpri) {
/*
* Don't steal threads that we attempted
- * to be stolen very recently until
- * they're ready to be stolen again.
+ * to steal recently until they're ready
+ * to be stolen again.
*/
stealtime = ocp->cpu_disp->disp_steal;
if (stealtime == 0 ||
@@ -2158,8 +2189,6 @@ disp_getbest(disp_t *dp)
allbound = B_TRUE;
for (tp = dq->dq_first; tp != NULL; tp = tp->t_link) {
hrtime_t now, nosteal, rqtime;
- chip_type_t chtype;
- chip_t *chip;
/*
* Skip over bound threads which could be here even
@@ -2209,21 +2238,15 @@ disp_getbest(disp_t *dp)
break;
/*
- * Steal immediately if the chip has shared cache and we are
- * sharing the chip with the target thread's CPU.
+ * Steal immediately if, due to CMT processor architecture
+ * migraiton between cp and tcp would incur no performance
+ * penalty.
*/
- chip = tcp->cpu_chip;
- chtype = chip->chip_type;
- if ((chtype == CHIP_SMT || chtype == CHIP_CMP_SHARED_CACHE) &&
- chip == cp->cpu_chip)
+ if (pg_cmt_can_migrate(cp, tcp))
break;
- /*
- * Get the value of nosteal interval either from nosteal_nsec
- * global variable or from a value specified by a chip
- */
- nosteal = nosteal_nsec ? nosteal_nsec : chip->chip_nosteal;
- if (nosteal == 0 || nosteal == NOSTEAL_DISABLED)
+ nosteal = nosteal_nsec;
+ if (nosteal == 0)
break;
/*
@@ -2643,7 +2666,7 @@ cpu_choose(kthread_t *t, pri_t tpri)
{
ASSERT(tpri < kpqpri);
- if ((((lbolt - t->t_disp_time) > t->t_cpu->cpu_rechoose) &&
+ if ((((lbolt - t->t_disp_time) > rechoose_interval) &&
t != curthread) || t->t_cpu == cpu_inmotion) {
return (disp_lowpri_cpu(t->t_cpu, t->t_lpl, tpri, NULL));
}
diff --git a/usr/src/uts/common/os/bitset.c b/usr/src/uts/common/os/bitset.c
new file mode 100644
index 0000000000..8222fd9faa
--- /dev/null
+++ b/usr/src/uts/common/os/bitset.c
@@ -0,0 +1,168 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/bitset.h>
+#include <sys/kmem.h>
+#include <sys/systm.h>
+#include <sys/cmn_err.h>
+#include <sys/sysmacros.h>
+
+/*
+ * Initialize a bitset_t.
+ * After bitset_init(), the bitset will be zero sized.
+ */
+void
+bitset_init(bitset_t *b)
+{
+ bzero(b, sizeof (bitset_t));
+}
+
+/*
+ * Uninitialize a bitset_t.
+ * This will free the bitset's data, leaving it zero sized.
+ */
+void
+bitset_fini(bitset_t *b)
+{
+ if (b->bs_words > 0)
+ kmem_free(b->bs_set, b->bs_words * sizeof (ulong_t));
+}
+
+/*
+ * Resize a bitset to where it can hold sz number of bits.
+ * This can either grow or shrink the bitset holding capacity.
+ * In the case of shrinkage, elements that reside outside the new
+ * holding capacity of the bitset are lost.
+ */
+void
+bitset_resize(bitset_t *b, uint_t sz)
+{
+ uint_t nwords;
+ ulong_t *bset_new, *bset_tmp;
+
+ nwords = BT_BITOUL(sz);
+ if (b->bs_words == nwords)
+ return; /* already properly sized */
+
+ /*
+ * Allocate the new ulong_t array, and copy the old one.
+ */
+ if (nwords > 0) {
+ bset_new = kmem_zalloc(nwords * sizeof (ulong_t), KM_SLEEP);
+ bcopy(b->bs_set, bset_new,
+ MIN(b->bs_words, nwords) * sizeof (ulong_t));
+ } else {
+ bset_new = NULL;
+ }
+
+ /* swap out the old ulong_t array for new one */
+ bset_tmp = b->bs_set;
+ b->bs_set = bset_new;
+
+ /* free up the old array */
+ kmem_free(bset_tmp, b->bs_words * sizeof (ulong_t));
+ b->bs_words = nwords;
+}
+
+/*
+ * Returns the current holding capacity of the bitset
+ */
+uint_t
+bitset_capacity(bitset_t *b)
+{
+ return (b->bs_words * BT_NBIPUL);
+}
+
+/*
+ * Add and delete bits in the bitset.
+ *
+ * Adding a bit that is already set, and clearing a bit that's already clear
+ * is legal.
+ *
+ * Adding or deleting an element that falls outside the bitset's current
+ * holding capacity is illegal.
+ */
+void
+bitset_add(bitset_t *b, uint_t elt)
+{
+ ASSERT(b->bs_words * BT_NBIPUL > elt);
+
+ BT_SET(b->bs_set, elt);
+}
+
+void
+bitset_del(bitset_t *b, uint_t elt)
+{
+ ASSERT(b->bs_words * BT_NBIPUL > elt);
+
+ BT_CLEAR(b->bs_set, elt);
+}
+
+/*
+ * Return non-zero if the bit is present in the set
+ */
+int
+bitset_in_set(bitset_t *b, uint_t elt)
+{
+ ASSERT(b->bs_words * BT_NBIPUL > elt);
+
+ return (BT_TEST(b->bs_set, elt));
+}
+
+/*
+ * Return non-zero if the bitset is empty
+ */
+int
+bitset_is_null(bitset_t *b)
+{
+ int i;
+
+ for (i = 0; i < b->bs_words; i++)
+ if (b->bs_set[i] != 0)
+ return (0);
+ return (1);
+}
+
+/*
+ * Find the first set bit in the bitset
+ * Return -1 if no bit was found
+ */
+uint_t
+bitset_find(bitset_t *b)
+{
+ uint_t i;
+ uint_t elt = (uint_t)-1;
+
+ for (i = 0; i < b->bs_words; i++) {
+ elt = (uint_t)(lowbit(b->bs_set[i]) - 1);
+ if (elt != (uint_t)-1) {
+ elt += i * BT_NBIPUL;
+ break;
+ }
+ }
+ return (elt);
+}
diff --git a/usr/src/uts/common/os/chip.c b/usr/src/uts/common/os/chip.c
deleted file mode 100644
index ad11827b0f..0000000000
--- a/usr/src/uts/common/os/chip.c
+++ /dev/null
@@ -1,576 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/thread.h>
-#include <sys/cpuvar.h>
-#include <sys/cpupart.h>
-#include <sys/kmem.h>
-#include <sys/cmn_err.h>
-#include <sys/kstat.h>
-#include <sys/processor.h>
-#include <sys/disp.h>
-#include <sys/chip.h>
-
-/*
- * CMT aware scheduler/dispatcher support
- *
- * With the introduction of Chip Multi-Threaded (CMT) processor architectures,
- * it is no longer necessarily true that a given physical processor
- * module (chip) will present itself as a single schedulable entity (cpu_t).
- * Rather, each chip may present itself as one or more "logical" CPUs.
- *
- * The logical CPUs presented may share physical components on the chip
- * such as caches, data pipes, FPUs, etc. It is advantageous to have the
- * kernel know which logical CPUs are presented by a given chip,
- * and what facilities on the chip are shared, since the kernel can then use
- * this information to employ scheduling policies that help improve the
- * availability of per chip resources, and increase utilization of a thread's
- * cache investment.
- *
- * The "chip_t" structure represents a physical processor.
- * It is used to keep track of which logical CPUs are presented by a given
- * chip, and to provide a parameterized representation of a chip's
- * properties. A count of the number of running threads is also
- * maintained, and is used by the dispatcher to balance load across the
- * system's chips to improve performance through increased chip resource
- * availability.
- *
- * Locking:
- *
- * Safely traversing the per lgroup lists requires the same protections
- * as traversing the cpu lists. One must either:
- * - hold cpu_lock
- * - have disabled kernel preemption
- * - be at high SPL
- * - have cpu's paused
- *
- * Safely traversing the global "chip_list" requires holding cpu_lock.
- *
- * A chip's nrunning count should only be modified using the
- * CHIP_NRUNNING() macro, through which updates of the count are done
- * atomically.
- */
-
-chip_t cpu0_chip; /* chip structure for first CPU */
-cpu_physid_t cpu0_physid; /* boot CPU's physical id structure */
-
-/*
- * chip_bootstrap is used on platforms where it is possible to enter the
- * dispatcher before a new CPU's chip initialization has happened.
- */
-static chip_t chip_bootstrap;
-
-#define CPU_HAS_NO_CHIP(cp) \
- ((cp)->cpu_chip == NULL || (cp)->cpu_chip == &chip_bootstrap)
-
-static chip_t *chip_list; /* protected by CPU lock */
-static chip_set_t chip_set; /* bitmap of chips in existence */
- /* indexed by chip_seqid */
-static chipid_t chip_seqid_next = 0; /* next sequential chip id */
-static int nchips = 0; /* num chips in existence */
-
-static chip_t *chip_find(chipid_t);
-static int chip_kstat_extract(kstat_t *, int);
-
-/*
- * Declare static kstat names (defined in chip.h)
- */
-CHIP_KSTAT_NAMES;
-
-/*
- * Find the chip_t with the given chip_id.
- */
-static chip_t *
-chip_find(chipid_t chipid)
-{
- chip_t *chp, *chip_start;
-
- ASSERT(chip_list == NULL || chip_list->chip_next == chip_list ||
- MUTEX_HELD(&cpu_lock));
-
- if ((chp = chip_start = chip_list) != NULL) {
- do {
- if (chp->chip_id == chipid) {
- return (chp);
- }
- } while ((chp = chp->chip_next) != chip_start);
- }
- return (NULL);
-}
-
-chip_t *
-chip_lookup(chipid_t chipid)
-{
- chip_t *chp;
-
- mutex_enter(&cpu_lock);
- chp = chip_find(chipid);
- mutex_exit(&cpu_lock);
-
- return (chp);
-}
-
-#ifndef sun4v
-/*
- * Setup the kstats for this chip, if needed
- */
-void
-chip_kstat_create(chip_t *chp)
-{
- chip_stat_t stat;
- kstat_t *chip_kstat;
-
- ASSERT(MUTEX_HELD(&cpu_lock));
-
- if (chp->chip_kstat != NULL)
- return; /* already initialized */
-
- chip_kstat = kstat_create("chip", chp->chip_id, NULL, "misc",
- KSTAT_TYPE_NAMED, CHIP_NUM_STATS,
- KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
-
- if (chip_kstat != NULL) {
- chip_kstat->ks_lock = &chp->chip_kstat_mutex;
- mutex_init(chip_kstat->ks_lock, NULL, MUTEX_DEFAULT, NULL);
- chip_kstat->ks_private = chp;
- chip_kstat->ks_data = chp->chip_kstat_data;
- for (stat = 0; stat < CHIP_NUM_STATS; stat++)
- kstat_named_init(&chp->chip_kstat_data[stat],
- chip_kstat_names[stat], KSTAT_DATA_INT64);
- chip_kstat->ks_update = chip_kstat_extract;
- chp->chip_kstat = chip_kstat;
- kstat_install(chip_kstat);
- }
-}
-#else
-/*
- * Note: On sun4v systems, chip kstats don't currently
- * exist, since "chip" structures and policies are being
- * leveraged to implement core level balancing, and exporting
- * chip kstats in light of this would be both misleading
- * and confusing.
- */
-/* ARGSUSED */
-void
-chip_kstat_create(chip_t *chp)
-{
-}
-#endif /* !sun4v */
-
-static int
-chip_kstat_extract(kstat_t *ksp, int rw)
-{
- struct kstat_named *ksd;
- chip_t *chp;
-
- chp = (chip_t *)ksp->ks_private;
-
- ksd = (struct kstat_named *)ksp->ks_data;
- ASSERT(ksd == chp->chip_kstat_data);
-
- /*
- * The chip kstats are read only
- */
- if (rw == KSTAT_WRITE)
- return (EACCES);
-
- ksd[CHIP_ID].value.i64 = chp->chip_id;
- ksd[CHIP_NCPUS].value.i64 = chp->chip_ncpu;
- ksd[CHIP_NRUNNING].value.i64 = chp->chip_nrunning;
- ksd[CHIP_RECHOOSE].value.i64 =
- rechoose_interval + chp->chip_rechoose_adj;
-
- return (0);
-}
-
-/*
- * If necessary, instantiate a chip_t for this CPU.
- * Called when a CPU is being added to the system either in startup,
- * or because of DR. The cpu will be assigned to the chip's active
- * CPU list later in chip_cpu_assign()
- */
-void
-chip_cpu_init(cpu_t *cp)
-{
- chipid_t cid;
- int rechoose;
- chip_t *chp;
- chip_def_t chp_def;
-
- ASSERT((chip_list == NULL) || (MUTEX_HELD(&cpu_lock)));
-
- if (chip_list == NULL)
- cp->cpu_physid = &cpu0_physid;
- else
- cp->cpu_physid = kmem_zalloc(sizeof (cpu_physid_t), KM_SLEEP);
-
- /*
- * Call into the platform to fetch this cpu's chip and core ids.
- * The ids are cached in the CPU's physical id structure.
- *
- * On sun4v platforms, the chip infrastructure is currently being
- * leveraged to implement core level load balancing.
- */
-#ifdef DO_CORELEVEL_LOADBAL
- cid = chip_plat_get_coreid(cp);
- cp->cpu_physid->cpu_coreid = cid;
- cp->cpu_physid->cpu_chipid = chip_plat_get_chipid(cp);
-#else
- cid = chip_plat_get_chipid(cp);
- cp->cpu_physid->cpu_chipid = cid;
- cp->cpu_physid->cpu_coreid = chip_plat_get_coreid(cp);
-#endif /* DO_CORELEVEL_LOADBAL */
-
- chp = chip_find(cid);
- if (chp == NULL) {
-
- /*
- * Create a new chip
- */
- if (chip_list == NULL)
- chp = &cpu0_chip;
- else
- chp = kmem_zalloc(sizeof (*chp), KM_SLEEP);
-
- chp->chip_id = cid;
- chp->chip_nrunning = 0;
-
- /*
- * If we're booting, take this moment to perform
- * some additional initialization
- */
- if (chip_list == NULL) {
- CHIP_SET_ZERO(chip_set);
- CHIP_SET_ZERO(cp->cpu_part->cp_mach->mc_chipset);
- chp->chip_nrunning++; /* for t0 */
- }
-
- /*
- * Find the next free sequential chip id.
- * A chip's sequential id exists in the range
- * 0 .. CHIP_MAX_CHIPS, and is suitable for use with
- * chip sets.
- */
- while (CHIP_SET_TEST(chip_set, chip_seqid_next))
- chip_seqid_next++;
- chp->chip_seqid = chip_seqid_next++;
- CHIP_SET_ADD(chip_set, chp->chip_seqid);
-
- ASSERT(chip_seqid_next <= CHIP_MAX_CHIPS);
-
-
- /*
- * Query the platform specific parameters
- * for this chip
- */
- chip_plat_define_chip(cp, &chp_def);
- chp->chip_rechoose_adj = chp_def.chipd_rechoose_adj;
- chp->chip_type = chp_def.chipd_type;
- chp->chip_nosteal = chp_def.chipd_nosteal;
-
- ASSERT((chp->chip_type < CHIP_NUM_TYPES) &&
- (chp->chip_type >= CHIP_DEFAULT));
-
- /*
- * Insert this chip in chip_list
- */
- if (chip_list == NULL) {
- chip_list = chp;
- chp->chip_next = chp->chip_prev = chp;
- } else {
- chip_t *chptr;
-
- chptr = chip_list;
- chp->chip_next = chptr;
- chp->chip_prev = chptr->chip_prev;
- chptr->chip_prev->chip_next = chp;
- chptr->chip_prev = chp;
- }
-
- nchips++;
- ASSERT(nchips <= CHIP_MAX_CHIPS);
-
- /*
- * The boot cpu will create the first chip's kstats
- * later in cpu_kstat_init()
- */
- if (chp != &cpu0_chip)
- chip_kstat_create(chp);
- }
-
- /*
- * Initialize the effective rechoose interval cached
- * in this cpu structure.
- */
- rechoose = rechoose_interval + chp->chip_rechoose_adj;
- cp->cpu_rechoose = (rechoose < 0) ? 0 : rechoose;
-
- cp->cpu_chip = chp;
- chp->chip_ref++;
-}
-
-/*
- * This cpu is being deleted. It has already been removed from
- * the chip's active cpu list back in chip_cpu_unassign(). Here
- * we remove the cpu's reference to the chip, and cleanup/destroy
- * the chip if needed.
- */
-void
-chip_cpu_fini(cpu_t *cp)
-{
- chip_t *chp;
- chip_t *prev, *next;
-
- ASSERT(MUTEX_HELD(&cpu_lock));
-
- /*
- * This can happen if the CPU failed to power on
- */
- if (CPU_HAS_NO_CHIP(cp))
- return;
-
- chp = cp->cpu_chip;
- cp->cpu_chip = NULL;
-
- /*
- * Clear out and free the CPU's physical id structure
- */
- cp->cpu_physid->cpu_chipid = -1;
- cp->cpu_physid->cpu_coreid = -1;
-
- if (cp->cpu_physid != &cpu0_physid) {
- ASSERT(cp->cpu_physid != NULL);
- kmem_free(cp->cpu_physid, sizeof (cpu_physid_t));
- }
- cp->cpu_physid = NULL;
-
- /*
- * Delete the chip if its last CPU is being deleted
- */
- if (--chp->chip_ref == 0) {
-
- ASSERT(chp->chip_ncpu == 0);
- ASSERT(chp->chip_cpus == NULL);
- ASSERT(chp->chip_nrunning == 0);
- ASSERT(chp->chip_lgrp == NULL);
- ASSERT((chp->chip_next_lgrp == NULL) &&
- (chp->chip_prev_lgrp == NULL));
-
- if (chip_seqid_next > chp->chip_seqid)
- chip_seqid_next = chp->chip_seqid;
- CHIP_SET_REMOVE(chip_set, chp->chip_seqid);
-
- chp->chip_id = -1;
- chp->chip_seqid = -1;
-
- /*
- * remove the chip from the system's chip list
- */
- if (chip_list == chp)
- chip_list = chp->chip_next;
-
- prev = chp->chip_prev;
- next = chp->chip_next;
-
- prev->chip_next = next;
- next->chip_prev = prev;
-
- chp->chip_next = chp->chip_prev = NULL;
-
- nchips--;
-
- /*
- * clean up any chip kstats
- */
- if (chp->chip_kstat) {
- kstat_delete(chp->chip_kstat);
- chp->chip_kstat = NULL;
- }
- /*
- * If the chip_t structure was dynamically
- * allocated, free it.
- */
- if (chp != &cpu0_chip)
- kmem_free(chp, sizeof (*chp));
- }
-}
-
-/*
- * This cpu is becoming active (online).
- * Perform all the necessary bookkeeping in it's chip_t
- */
-void
-chip_cpu_assign(cpu_t *cp)
-{
- chip_t *chp;
- cpu_t *cptr;
-
- ASSERT(chip_list == NULL || chip_list->chip_next == chip_list ||
- MUTEX_HELD(&cpu_lock));
-
- chp = cp->cpu_chip;
-
- /*
- * Add this cpu to the chip's cpu list
- */
- if (chp->chip_ncpu == 0) {
- chp->chip_cpus = cp;
- cp->cpu_next_chip = cp->cpu_prev_chip = cp;
- } else {
- cptr = chp->chip_cpus;
- cp->cpu_next_chip = cptr;
- cp->cpu_prev_chip = cptr->cpu_prev_chip;
- cp->cpu_prev_chip->cpu_next_chip = cp;
- cptr->cpu_prev_chip = cp;
- }
-
- chp->chip_ncpu++;
-
- /*
- * Notate this chip's seqid in the cpu partition's chipset
- */
- chip_cpu_move_part(cp, NULL, cp->cpu_part);
-}
-
-/*
- * This cpu is being offlined, so do the reverse
- * of cpu_chip_assign()
- */
-void
-chip_cpu_unassign(cpu_t *cp)
-{
- chip_t *chp;
- struct cpu *prev;
- struct cpu *next;
-
- ASSERT(MUTEX_HELD(&cpu_lock));
-
- chp = cp->cpu_chip;
-
- chip_cpu_move_part(cp, cp->cpu_part, NULL);
-
- /*
- * remove this cpu from the chip's cpu list
- */
- prev = cp->cpu_prev_chip;
- next = cp->cpu_next_chip;
-
- prev->cpu_next_chip = next;
- next->cpu_prev_chip = prev;
-
- cp->cpu_next_chip = cp->cpu_prev_chip = NULL;
-
- chp->chip_ncpu--;
-
- if (chp->chip_ncpu == 0) {
- chp->chip_cpus = NULL;
- } else if (chp->chip_cpus == cp) {
- chp->chip_cpus = next;
- }
-}
-
-/*
- * A cpu on the chip is moving into and/or out of a cpu partition.
- * Maintain the cpuparts' chip membership set.
- * oldpp is NULL when a cpu is being offlined.
- * newpp is NULL when a cpu is being onlined.
- */
-void
-chip_cpu_move_part(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
-{
- cpu_t *cpp;
- chip_t *chp;
-
- ASSERT(chip_list->chip_next == chip_list || MUTEX_HELD(&cpu_lock));
-
- chp = cp->cpu_chip;
-
- if (newpp != NULL) {
- /*
- * Add the chip's seqid to the cpupart's chip set
- */
- CHIP_SET_ADD(newpp->cp_mach->mc_chipset, chp->chip_seqid);
- }
-
- if (oldpp != NULL) {
- cpp = cp;
- while ((cpp = cpp->cpu_next_chip) != cp) {
- if (cpp->cpu_part->cp_id == oldpp->cp_id) {
- /*
- * Another cpu on the chip is in the old
- * cpu partition, so we're done
- */
- return;
- }
- }
-
- /*
- * No other cpu on the chip is in the old partition
- * so remove the chip's seqid from it's set
- */
- CHIP_SET_REMOVE(oldpp->cp_mach->mc_chipset, chp->chip_seqid);
- }
-}
-
-/*
- * Called to indicate a slave CPU has started up.
- */
-void
-chip_cpu_startup(cpu_t *cp)
-{
- /*
- * Indicate that the chip has a new running thread
- * (slave startup)
- */
- CHIP_NRUNNING(cp->cpu_chip, 1);
-}
-
-/*
- * Provide the specified CPU a bootstrap chip
- */
-void
-chip_bootstrap_cpu(cpu_t *cp)
-{
- cp->cpu_chip = &chip_bootstrap;
-}
-
-/*
- * Given a chip set, return 1 if it is empty.
- */
-int
-chip_set_isnull(chip_set_t *set)
-{
- int i;
-
- for (i = 0; i < CHIP_SET_WORDS; i++) {
- if (set->csb[i] != 0)
- return (0);
- }
- return (1);
-}
diff --git a/usr/src/uts/common/os/clock.c b/usr/src/uts/common/os/clock.c
index 0152c2e958..a1040f1270 100644
--- a/usr/src/uts/common/os/clock.c
+++ b/usr/src/uts/common/os/clock.c
@@ -23,7 +23,7 @@
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -66,7 +66,6 @@
#include <sys/cpupart.h>
#include <sys/rctl.h>
#include <sys/task.h>
-#include <sys/chip.h>
#include <sys/sdt.h>
#ifdef __sparc
@@ -260,13 +259,6 @@ cyclic_id_t deadman_cyclic; /* deadman()'s cyclic_id */
static int lgrp_ticks; /* counter to schedule lgrp load calcs */
/*
- * rechoose_interval_history is used to detect when rechoose_interval's
- * value has changed (via hotpatching for example), so that the
- * cached values in the cpu structures may be updated.
- */
-static int rechoose_interval_history = RECHOOSE_INTERVAL;
-
-/*
* for tod fault detection
*/
#define TOD_REF_FREQ ((longlong_t)(NANOSEC))
@@ -345,8 +337,6 @@ clock(void)
int64_t lltemp;
int s;
int do_lgrp_load;
- int rechoose_update = 0;
- int rechoose;
int i;
if (panicstr)
@@ -430,21 +420,9 @@ clock(void)
do_lgrp_load = 1;
}
- /*
- * The dispatcher tunable rechoose_interval may be hot-patched.
- * Note if it has a new value. If so, the effective rechoose_interval
- * cached in the cpu structures needs to be updated.
- * If needed we'll do this during the walk of the cpu_list below.
- */
- if (rechoose_interval != rechoose_interval_history) {
- rechoose_interval_history = rechoose_interval;
- rechoose_update = 1;
- }
-
if (one_sec)
loadavg_update();
-
/*
* First count the threads waiting on kpreempt queues in each
* CPU partition.
@@ -522,19 +500,6 @@ clock(void)
lgrp_loadavg(cp->cpu_lpl,
cpu_nrunnable * LGRP_LOADAVG_IN_THREAD_MAX, 1);
}
- /*
- * The platform may define a per physical processor
- * adjustment of rechoose_interval. The effective
- * (base + adjustment) rechoose_interval is cached
- * in the cpu structures for efficiency. Above we detect
- * if the cached values need updating, and here is where
- * the update happens.
- */
- if (rechoose_update) {
- rechoose = rechoose_interval +
- cp->cpu_chip->chip_rechoose_adj;
- cp->cpu_rechoose = (rechoose < 0) ? 0 : rechoose;
- }
} while ((cp = cp->cpu_next) != cpu_list);
/*
diff --git a/usr/src/uts/common/os/cpu.c b/usr/src/uts/common/os/cpu.c
index 5ca51ec3da..9237517a69 100644
--- a/usr/src/uts/common/os/cpu.c
+++ b/usr/src/uts/common/os/cpu.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -45,7 +45,7 @@
#include <sys/cpupart.h>
#include <sys/lgrp.h>
#include <sys/pset.h>
-#include <sys/chip.h>
+#include <sys/pghw.h>
#include <sys/kmem.h>
#include <sys/kmem_impl.h> /* to set per-cpu kmem_cache offset */
#include <sys/atomic.h>
@@ -1266,6 +1266,11 @@ cpu_offline(cpu_t *cp, int flags)
cpu_state_change_notify(cp->cpu_id, CPU_OFF);
/*
+ * Tell the PG subsystem that the CPU is leaving the partition
+ */
+ pg_cpupart_out(cp, pp);
+
+ /*
* Take the CPU out of interrupt participation so we won't find
* bound kernel threads. If the architecture cannot completely
* shut off interrupts on the CPU, don't quiesce it, but don't
@@ -1512,6 +1517,11 @@ out:
cyclic_online(cp);
/*
+ * If we failed, tell the PG subsystem that the CPU is back
+ */
+ pg_cpupart_in(cp, pp);
+
+ /*
* If we failed, we need to notify everyone that this CPU is back on.
*/
if (error != 0)
@@ -1732,7 +1742,12 @@ cpu_del_unit(int cpuid)
ASSERT(cp->cpu_next_part == cp);
ASSERT(cp->cpu_prev_part == cp);
- chip_cpu_fini(cp);
+ /*
+ * Tear down the CPU's physical ID cache, and update any
+ * processor groups
+ */
+ pg_cpu_fini(cp);
+ pghw_physid_destroy(cp);
/*
* Destroy kstat stuff.
@@ -1816,8 +1831,7 @@ cpu_add_active_internal(cpu_t *cp)
ASSERT(cp_numparts_nonempty != 0);
}
- chip_cpu_assign(cp);
-
+ pg_cpu_active(cp);
lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)cp, 0);
bzero(&cp->cpu_loadavg, sizeof (cp->cpu_loadavg));
@@ -1830,9 +1844,12 @@ cpu_add_active_internal(cpu_t *cp)
void
cpu_add_active(cpu_t *cp)
{
+ pg_cpupart_in(cp, cp->cpu_part);
+
pause_cpus(NULL);
cpu_add_active_internal(cp);
start_cpus();
+
cpu_stats_kstat_create(cp);
cpu_create_intrstat(cp);
lgrp_kstat_create(cp);
@@ -1854,7 +1871,7 @@ cpu_remove_active(cpu_t *cp)
ASSERT(cp->cpu_next_onln != cp); /* not the last one */
ASSERT(cp->cpu_prev_onln != cp); /* not the last one */
- chip_cpu_unassign(cp);
+ pg_cpu_inactive(cp);
lgrp_config(LGRP_CONFIG_CPU_OFFLINE, (uintptr_t)cp, 0);
@@ -2146,11 +2163,12 @@ cpu_info_kstat_update(kstat_t *ksp, int rw)
(void) strncpy(cpu_info_template.ci_fpu_type.value.c,
cp->cpu_type_info.pi_fputypes, 15);
cpu_info_template.ci_clock_MHz.value.l = cp->cpu_type_info.pi_clock;
- cpu_info_template.ci_chip_id.value.l = chip_plat_get_chipid(cp);
+ cpu_info_template.ci_chip_id.value.l =
+ pg_plat_hw_instance_id(cp, PGHW_CHIP);
kstat_named_setstr(&cpu_info_template.ci_implementation,
cp->cpu_idstr);
kstat_named_setstr(&cpu_info_template.ci_brandstr, cp->cpu_brandstr);
- cpu_info_template.ci_core_id.value.l = chip_plat_get_coreid(cp);
+ cpu_info_template.ci_core_id.value.l = pg_plat_get_core_id(cp);
#if defined(__sparcv9)
cpu_info_template.ci_device_ID.value.ui64 =
@@ -2163,7 +2181,7 @@ cpu_info_kstat_update(kstat_t *ksp, int rw)
cpu_info_template.ci_family.value.l = cpuid_getfamily(cp);
cpu_info_template.ci_model.value.l = cpuid_getmodel(cp);
cpu_info_template.ci_step.value.l = cpuid_getstep(cp);
- cpu_info_template.ci_clogid.value.l = chip_plat_get_clogid(cp);
+ cpu_info_template.ci_clogid.value.l = cpuid_get_clogid(cp);
#endif
return (0);
@@ -2215,11 +2233,13 @@ cpu_info_kstat_destroy(cpu_t *cp)
void
cpu_kstat_init(cpu_t *cp)
{
+ /*
+ * XXX need pg kstats for boot CPU
+ */
mutex_enter(&cpu_lock);
cpu_info_kstat_create(cp);
cpu_stats_kstat_create(cp);
cpu_create_intrstat(cp);
- chip_kstat_create(cp->cpu_chip);
cpu_set_state(cp);
mutex_exit(&cpu_lock);
}
diff --git a/usr/src/uts/common/os/group.c b/usr/src/uts/common/os/group.c
new file mode 100644
index 0000000000..b15dff181f
--- /dev/null
+++ b/usr/src/uts/common/os/group.c
@@ -0,0 +1,322 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/systm.h>
+#include <sys/param.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/group.h>
+
+
+#define GRP_SET_SIZE_DEFAULT 2
+
+static void group_grow_set(group_t *);
+static void group_shrink_set(group_t *);
+static void group_pack_set(void **, uint_t);
+
+/*
+ * Initialize a group_t
+ */
+void
+group_create(group_t *g)
+{
+ bzero(g, sizeof (group_t));
+}
+
+/*
+ * Destroy a group_t
+ * The group must already be empty
+ */
+void
+group_destroy(group_t *g)
+{
+ ASSERT(g->grp_size == 0);
+
+ if (g->grp_capacity > 0) {
+ kmem_free(g->grp_set, g->grp_capacity * sizeof (void *));
+ g->grp_capacity = 0;
+ }
+ g->grp_set = NULL;
+}
+
+/*
+ * Add element "e" to group "g"
+ *
+ * Returns -1 if addition would result in overcapacity, and
+ * resize operations aren't allowed, and 0 otherwise
+ */
+int
+group_add(group_t *g, void *e, int gflag)
+{
+ int entry;
+
+ if ((gflag & GRP_NORESIZE) &&
+ g->grp_size == g->grp_capacity)
+ return (-1);
+
+ ASSERT(g->grp_size != g->grp_capacity || (gflag & GRP_RESIZE));
+
+ entry = g->grp_size++;
+ if (g->grp_size > g->grp_capacity)
+ group_grow_set(g);
+
+ ASSERT(g->grp_set[entry] == NULL);
+ g->grp_set[entry] = e;
+
+ return (0);
+}
+
+/*
+ * Remove element "e" from group "g"
+ *
+ * Returns -1 if "e" was not present in "g" and 0 otherwise
+ */
+int
+group_remove(group_t *g, void *e, int gflag)
+{
+ int i;
+
+ /*
+ * Find the element in the group's set
+ */
+ for (i = 0; i < g->grp_size; i++)
+ if (g->grp_set[i] == e)
+ break;
+ if (g->grp_set[i] != e)
+ return (-1);
+
+ g->grp_set[i] = NULL;
+ group_pack_set(g->grp_set, g->grp_size);
+ g->grp_size--;
+
+ if ((gflag & GRP_RESIZE) &&
+ g->grp_size > GRP_SET_SIZE_DEFAULT &&
+ ((g->grp_size - 1) & g->grp_size) == 0)
+ group_shrink_set(g);
+
+ return (0);
+}
+
+/*
+ * Expand the capacity of group "g" so that it may
+ * contain at least "n" elements
+ */
+void
+group_expand(group_t *g, uint_t n)
+{
+ while (g->grp_capacity < n)
+ group_grow_set(g);
+}
+
+/*
+ * Upsize a group's holding capacity
+ */
+static void
+group_grow_set(group_t *g)
+{
+ uint_t cap_old, cap_new;
+ void **set_old, **set_new;
+
+ cap_old = g->grp_capacity;
+ set_old = g->grp_set;
+
+ /*
+ * The array size grows in powers of two
+ */
+ if ((cap_new = (cap_old << 1)) == 0) {
+ /*
+ * The set is unallocated.
+ * Allocate a default sized set.
+ */
+ cap_new = GRP_SET_SIZE_DEFAULT;
+ g->grp_set = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP);
+ g->grp_capacity = cap_new;
+ } else {
+ /*
+ * Allocate a newly sized array,
+ * copy the data, and free the old array.
+ */
+ set_new = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP);
+ (void) kcopy(set_old, set_new, cap_old * sizeof (void *));
+ g->grp_set = set_new;
+ g->grp_capacity = cap_new;
+ kmem_free(set_old, cap_old * sizeof (void *));
+ }
+ /*
+ * The new array size should be a power of two
+ */
+ ASSERT(((cap_new - 1) & cap_new) == 0);
+}
+
+/*
+ * Downsize a group's holding capacity
+ */
+static void
+group_shrink_set(group_t *g)
+{
+ uint_t cap_old, cap_new;
+ void **set_old, **set_new;
+
+ cap_old = g->grp_capacity;
+ set_old = g->grp_set;
+
+ /*
+ * The group's existing array size must already
+ * be a power of two
+ */
+ ASSERT(((cap_old - 1) & cap_old) == 0);
+ cap_new = cap_old >> 1;
+
+ /*
+ * GRP_SET_SIZE_DEFAULT is the minumum set size.
+ */
+ if (cap_new < GRP_SET_SIZE_DEFAULT)
+ return;
+
+ set_new = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP);
+ (void) kcopy(set_old, set_new, cap_new * sizeof (void *));
+ g->grp_capacity = cap_new;
+ g->grp_set = set_new;
+
+ ASSERT(((cap_new - 1) & cap_new) == 0);
+ kmem_free(set_old, cap_old * sizeof (void *));
+}
+
+/*
+ * Pack a group's set
+ * Element order is not preserved
+ */
+static void
+group_pack_set(void **set, uint_t sz)
+{
+ uint_t i, j, free;
+
+ free = (uint_t)-1;
+
+ for (i = 0; i < sz; i++) {
+ if (set[i] == NULL && free == (uint_t)-1) {
+ /*
+ * Found a new free slot.
+ * Start packing from here.
+ */
+ free = i;
+ } else if (set[i] != NULL && free != (uint_t)-1) {
+ /*
+ * Found a slot to pack into
+ * an earlier free slot.
+ */
+ ASSERT(set[free] == NULL);
+ set[free] = set[i];
+ set[i] = NULL;
+
+ /*
+ * Find the next free slot
+ */
+ for (j = free + 1; set[j] != NULL; j++) {
+ ASSERT(j <= i);
+ if (j == i)
+ break;
+ }
+ if (set[j] == NULL)
+ free = j;
+ else
+ free = (uint_t)-1;
+ }
+ }
+}
+
+/*
+ * Initialize a group iterator cookie
+ */
+void
+group_iter_init(group_iter_t *iter)
+{
+ *iter = 0;
+}
+
+/*
+ * Iterate over the elements in a group
+ */
+void *
+group_iterate(group_t *g, group_iter_t *iter)
+{
+ uint_t idx = *iter;
+ void *data = NULL;
+
+ while (idx < g->grp_size) {
+ data = g->grp_set[idx++];
+ if (data != NULL)
+ break;
+ }
+ *iter = idx;
+
+ return (data);
+}
+
+/*
+ * Indexed access to a group's elements
+ */
+void *
+group_access_at(group_t *g, uint_t idx)
+{
+ if (idx >= g->grp_capacity)
+ return (NULL);
+
+ return (g->grp_set[idx]);
+}
+
+/*
+ * Add a new ordered group element at specified
+ * index. The group must already be of sufficient
+ * capacity to hold an element at the specified index.
+ *
+ * Returns 0 if addition was sucessful, and -1 if the
+ * addition failed because the table was too small
+ */
+int
+group_add_at(group_t *g, void *e, uint_t idx)
+{
+ if (idx >= g->grp_capacity)
+ return (-1);
+
+ if (idx >= g->grp_size)
+ g->grp_size = idx + 1;
+
+ ASSERT(g->grp_set[idx] == NULL);
+ g->grp_set[idx] = e;
+ return (0);
+}
+
+/*
+ * Remove the entry at the specified index
+ */
+void
+group_remove_at(group_t *g, uint_t idx)
+{
+ ASSERT(idx < g->grp_capacity);
+ g->grp_set[idx] = NULL;
+}
diff --git a/usr/src/uts/common/os/lgrp.c b/usr/src/uts/common/os/lgrp.c
index 83f67e1088..2007f7b158 100644
--- a/usr/src/uts/common/os/lgrp.c
+++ b/usr/src/uts/common/os/lgrp.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -89,7 +89,7 @@
#include <sys/cmn_err.h>
#include <sys/kstat.h>
#include <sys/sysmacros.h>
-#include <sys/chip.h>
+#include <sys/pg.h>
#include <sys/promif.h>
#include <sys/sdt.h>
@@ -314,8 +314,6 @@ lgrp_root_init(void)
klgrpset_clear(lgrp_root->lgrp_children);
klgrpset_clear(lgrp_root->lgrp_leaves);
lgrp_root->lgrp_parent = NULL;
- lgrp_root->lgrp_chips = NULL;
- lgrp_root->lgrp_chipcnt = 0;
lgrp_root->lgrp_latency = lgrp_plat_latency(hand, hand);
for (i = 0; i < LGRP_RSRC_COUNT; i++)
@@ -679,7 +677,6 @@ lgrp_cpu_init(struct cpu *cp)
lgrp_t *my_lgrp;
lgrp_id_t lgrpid;
struct cpu *cptr;
- struct chip *chp;
/*
* This is the first time through if the resource set
@@ -795,33 +792,6 @@ lgrp_cpu_init(struct cpu *cp)
cptr->cpu_prev_lgrp = cp;
}
my_lgrp->lgrp_cpucnt++;
-
- /*
- * Add this cpu's chip to the per lgroup list
- * if necessary
- */
- if (cp->cpu_chip->chip_lgrp == NULL) {
- struct chip *lcpr;
-
- chp = cp->cpu_chip;
-
- if (my_lgrp->lgrp_chipcnt == 0) {
- my_lgrp->lgrp_chips = chp;
- chp->chip_next_lgrp =
- chp->chip_prev_lgrp = chp;
- } else {
- lcpr = my_lgrp->lgrp_chips;
- chp->chip_next_lgrp = lcpr;
- chp->chip_prev_lgrp =
- lcpr->chip_prev_lgrp;
- lcpr->chip_prev_lgrp->chip_next_lgrp =
- chp;
- lcpr->chip_prev_lgrp = chp;
- }
- chp->chip_lgrp = my_lgrp;
- chp->chip_balance = chp->chip_next_lgrp;
- my_lgrp->lgrp_chipcnt++;
- }
}
lgrp_t *
@@ -890,8 +860,6 @@ lgrp_create(void)
my_lgrp->lgrp_cpu = NULL;
my_lgrp->lgrp_cpucnt = 0;
- my_lgrp->lgrp_chips = NULL;
- my_lgrp->lgrp_chipcnt = 0;
if (my_lgrp->lgrp_kstat != NULL)
lgrp_kstat_reset(lgrpid);
@@ -945,8 +913,6 @@ lgrp_destroy(lgrp_t *lgrp)
lgrp->lgrp_cpu = NULL;
lgrp->lgrp_cpucnt = 0;
- lgrp->lgrp_chipcnt = 0;
- lgrp->lgrp_chips = NULL;
nlgrps--;
}
@@ -1022,7 +988,6 @@ lgrp_cpu_fini(struct cpu *cp, lgrp_id_t lgrpid)
lgrp_t *my_lgrp;
struct cpu *prev;
struct cpu *next;
- chip_t *chp;
ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized);
@@ -1042,42 +1007,6 @@ lgrp_cpu_fini(struct cpu *cp, lgrp_id_t lgrpid)
my_lgrp->lgrp_cpucnt--;
/*
- * If the last CPU on it's chip is being offlined
- * then remove this chip from the per lgroup list.
- *
- * This is also done for the boot CPU when it needs
- * to move between lgroups as a consequence of
- * null proc lpa.
- */
- chp = cp->cpu_chip;
- if (chp->chip_ncpu == 0 || !lgrp_initialized) {
-
- chip_t *chpp;
-
- if (--my_lgrp->lgrp_chipcnt == 0)
- my_lgrp->lgrp_chips = NULL;
- else if (my_lgrp->lgrp_chips == chp)
- my_lgrp->lgrp_chips = chp->chip_next_lgrp;
-
- /*
- * Walk this lgroup's chip list looking for chips that
- * may try to balance against the one that's leaving
- */
- for (chpp = chp->chip_next_lgrp; chpp != chp;
- chpp = chpp->chip_next_lgrp) {
- if (chpp->chip_balance == chp)
- chpp->chip_balance = chp->chip_next_lgrp;
- }
-
- chp->chip_prev_lgrp->chip_next_lgrp = chp->chip_next_lgrp;
- chp->chip_next_lgrp->chip_prev_lgrp = chp->chip_prev_lgrp;
-
- chp->chip_next_lgrp = chp->chip_prev_lgrp = NULL;
- chp->chip_lgrp = NULL;
- chp->chip_balance = NULL;
- }
-
- /*
* Removing last CPU in lgroup, so update lgroup topology
*/
if (my_lgrp->lgrp_cpucnt == 0) {
@@ -1661,7 +1590,7 @@ lgrp_phys_to_lgrp(u_longlong_t physaddr)
* Return the leaf lgroup containing the given CPU
*
* The caller needs to take precautions necessary to prevent
- * "cpu" from going away across a call to this function.
+ * "cpu", and it's lpl from going away across a call to this function.
* hint: kpreempt_disable()/kpreempt_enable()
*/
static lgrp_t *
diff --git a/usr/src/uts/common/os/pg.c b/usr/src/uts/common/os/pg.c
new file mode 100644
index 0000000000..cb8295b38e
--- /dev/null
+++ b/usr/src/uts/common/os/pg.c
@@ -0,0 +1,624 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/cpupart.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/kstat.h>
+#include <sys/processor.h>
+#include <sys/disp.h>
+#include <sys/group.h>
+#include <sys/pg.h>
+
+/*
+ * Processor groups
+ *
+ * With the introduction of Chip Multi-Threaded (CMT) processor architectures,
+ * it is no longer necessarily true that a given physical processor module
+ * will present itself as a single schedulable entity (cpu_t). Rather, each
+ * chip and/or processor core may present itself as one or more "logical" CPUs.
+ *
+ * The logical CPUs presented may share physical components such as caches,
+ * data pipes, execution pipelines, FPUs, etc. It is advantageous to have the
+ * kernel be aware of the relationships existing between logical CPUs so that
+ * the appropriate optmizations may be employed.
+ *
+ * The processor group abstraction represents a set of logical CPUs that
+ * generally share some sort of physical or characteristic relationship.
+ *
+ * In the case of a physical sharing relationship, the CPUs in the group may
+ * share a pipeline, cache or floating point unit. In the case of a logical
+ * relationship, a PG may represent the set of CPUs in a processor set, or the
+ * set of CPUs running at a particular clock speed.
+ *
+ * The generic processor group structure, pg_t, contains the elements generic
+ * to a group of CPUs. Depending on the nature of the CPU relationship
+ * (LOGICAL or PHYSICAL), a pointer to a pg may be recast to a "view" of that
+ * PG where more specific data is represented.
+ *
+ * As an example, a PG representing a PHYSICAL relationship, may be recast to
+ * a pghw_t, where data further describing the hardware sharing relationship
+ * is maintained. See pghw.c and pghw.h for details on physical PGs.
+ *
+ * At this time a more specialized casting of a PG representing a LOGICAL
+ * relationship has not been implemented, but the architecture allows for this
+ * in the future.
+ *
+ * Processor Group Classes
+ *
+ * Processor group consumers may wish to maintain and associate specific
+ * data with the PGs they create. For this reason, a mechanism for creating
+ * class specific PGs exists. Classes may overload the default functions for
+ * creating, destroying, and associating CPUs with PGs, and may also register
+ * class specific callbacks to be invoked when the CPU related system
+ * configuration changes. Class specific data is stored/associated with
+ * PGs by incorporating the pg_t (or pghw_t, as appropriate), as the first
+ * element of a class specific PG object. In memory, such a structure may look
+ * like:
+ *
+ * ----------------------- - - -
+ * | common | | | | <--(pg_t *)
+ * ----------------------- | | -
+ * | HW specific | | | <-----(pghw_t *)
+ * ----------------------- | -
+ * | class specific | | <-------(pg_cmt_t *)
+ * ----------------------- -
+ *
+ * Access to the PG class specific data can be had by casting a pointer to
+ * it's class specific view.
+ */
+
+static pg_t *pg_alloc_default(pg_class_t);
+static void pg_free_default(pg_t *);
+
+/*
+ * Bootstrap CPU specific PG data
+ * See pg_cpu_bootstrap()
+ */
+static cpu_pg_t bootstrap_pg_data;
+
+/*
+ * Bitset of allocated PG ids (they are sequential)
+ * and the next free id in the set.
+ */
+static bitset_t pg_id_set;
+static pgid_t pg_id_next = 0;
+
+/*
+ * Default and externed PG ops vectors
+ */
+static struct pg_ops pg_ops_default = {
+ pg_alloc_default, /* alloc */
+ pg_free_default, /* free */
+ NULL, /* cpu_init */
+ NULL, /* cpu_fini */
+ NULL, /* cpu_active */
+ NULL, /* cpu_inactive */
+ NULL, /* cpupart_in */
+ NULL, /* cpupart_out */
+ NULL, /* cpupart_move */
+ NULL, /* cpu_belongs */
+};
+
+/*
+ * Class specific PG allocation callbacks
+ */
+#define PG_ALLOC(class) \
+ (pg_classes[class].pgc_ops->alloc ? \
+ pg_classes[class].pgc_ops->alloc() : \
+ pg_classes[pg_default_cid].pgc_ops->alloc())
+
+#define PG_FREE(pg) \
+ ((pg)->pg_class->pgc_ops->free ? \
+ (pg)->pg_class->pgc_ops->free(pg) : \
+ pg_classes[pg_default_cid].pgc_ops->free(pg)) \
+
+
+/*
+ * Class specific membership test callback
+ */
+#define PG_CPU_BELONGS(pg, cp) \
+ ((pg)->pg_class->pgc_ops->cpu_belongs ? \
+ (pg)->pg_class->pgc_ops->cpu_belongs(pg, cp) : 0) \
+
+/*
+ * CPU configuration callbacks
+ */
+#define PG_CPU_INIT(class, cp) \
+{ \
+ if (pg_classes[class].pgc_ops->cpu_init) \
+ pg_classes[class].pgc_ops->cpu_init(cp); \
+}
+
+#define PG_CPU_FINI(class, cp) \
+{ \
+ if (pg_classes[class].pgc_ops->cpu_fini) \
+ pg_classes[class].pgc_ops->cpu_fini(cp); \
+}
+
+#define PG_CPU_ACTIVE(class, cp) \
+{ \
+ if (pg_classes[class].pgc_ops->cpu_active) \
+ pg_classes[class].pgc_ops->cpu_active(cp); \
+}
+
+#define PG_CPU_INACTIVE(class, cp) \
+{ \
+ if (pg_classes[class].pgc_ops->cpu_inactive) \
+ pg_classes[class].pgc_ops->cpu_inactive(cp); \
+}
+
+/*
+ * CPU / cpupart configuration callbacks
+ */
+#define PG_CPUPART_IN(class, cp, pp) \
+{ \
+ if (pg_classes[class].pgc_ops->cpupart_in) \
+ pg_classes[class].pgc_ops->cpupart_in(cp, pp); \
+}
+
+#define PG_CPUPART_OUT(class, cp, pp) \
+{ \
+ if (pg_classes[class].pgc_ops->cpupart_out) \
+ pg_classes[class].pgc_ops->cpupart_out(cp, pp); \
+}
+
+#define PG_CPUPART_MOVE(class, cp, old, new) \
+{ \
+ if (pg_classes[class].pgc_ops->cpupart_move) \
+ pg_classes[class].pgc_ops->cpupart_move(cp, old, new); \
+}
+
+
+
+static pg_class_t *pg_classes;
+static int pg_nclasses;
+
+static pg_cid_t pg_default_cid;
+
+/*
+ * Initialze common PG subsystem. Perform CPU 0 initialization
+ */
+void
+pg_init(void)
+{
+ pg_default_cid =
+ pg_class_register("default", &pg_ops_default, PGR_LOGICAL);
+}
+
+/*
+ * Perform CPU 0 initialization
+ */
+void
+pg_cpu0_init(void)
+{
+ extern void pghw_physid_create();
+
+ /*
+ * Create the physical ID cache for the boot CPU
+ */
+ pghw_physid_create(CPU);
+
+ /*
+ * pg_cpu_* require that cpu_lock be held
+ */
+ mutex_enter(&cpu_lock);
+
+ pg_cpu_init(CPU);
+ pg_cpupart_in(CPU, &cp_default);
+ pg_cpu_active(CPU);
+
+ mutex_exit(&cpu_lock);
+}
+
+/*
+ * Register a new PG class
+ */
+pg_cid_t
+pg_class_register(char *name, struct pg_ops *ops, pg_relation_t relation)
+{
+ pg_class_t *newclass;
+ pg_class_t *classes_old;
+ id_t cid;
+
+ mutex_enter(&cpu_lock);
+
+ /*
+ * Allocate a new pg_class_t in the pg_classes array
+ */
+ if (pg_nclasses == 0) {
+ pg_classes = kmem_zalloc(sizeof (pg_class_t), KM_SLEEP);
+ } else {
+ classes_old = pg_classes;
+ pg_classes =
+ kmem_zalloc(sizeof (pg_class_t) * (pg_nclasses + 1),
+ KM_SLEEP);
+ (void) kcopy(classes_old, pg_classes,
+ sizeof (pg_class_t) * pg_nclasses);
+ kmem_free(classes_old, sizeof (pg_class_t) * pg_nclasses);
+ }
+
+ cid = pg_nclasses++;
+ newclass = &pg_classes[cid];
+
+ (void) strncpy(newclass->pgc_name, name, PG_CLASS_NAME_MAX);
+ newclass->pgc_id = cid;
+ newclass->pgc_ops = ops;
+ newclass->pgc_relation = relation;
+
+ mutex_exit(&cpu_lock);
+
+ return (cid);
+}
+
+/*
+ * Try to find an existing pg in set in which to place cp.
+ * Returns the pg if found, and NULL otherwise.
+ * In the event that the CPU could belong to multiple
+ * PGs in the set, the first matching PG will be returned.
+ */
+pg_t *
+pg_cpu_find_pg(cpu_t *cp, group_t *set)
+{
+ pg_t *pg;
+ group_iter_t i;
+
+ group_iter_init(&i);
+ while ((pg = group_iterate(set, &i)) != NULL) {
+ /*
+ * Ask the class if the CPU belongs here
+ */
+ if (PG_CPU_BELONGS(pg, cp))
+ return (pg);
+ }
+ return (NULL);
+}
+
+/*
+ * Iterate over the CPUs in a PG after initializing
+ * the iterator with PG_CPU_ITR_INIT()
+ */
+cpu_t *
+pg_cpu_next(pg_cpu_itr_t *itr)
+{
+ cpu_t *cpu;
+ pg_t *pg = itr->pg;
+
+ cpu = group_iterate(&pg->pg_cpus, &itr->position);
+ return (cpu);
+}
+
+/*
+ * Create a PG of a given class.
+ * This routine may block.
+ */
+pg_t *
+pg_create(pg_cid_t cid)
+{
+ pg_t *pg;
+ pgid_t id;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Call the class specific PG allocation routine
+ */
+ pg = PG_ALLOC(cid);
+ pg->pg_class = &pg_classes[cid];
+ pg->pg_relation = pg->pg_class->pgc_relation;
+
+ /*
+ * Find the next free sequential pg id
+ */
+ do {
+ if (pg_id_next >= bitset_capacity(&pg_id_set))
+ bitset_resize(&pg_id_set, pg_id_next + 1);
+ id = pg_id_next++;
+ } while (bitset_in_set(&pg_id_set, id));
+
+ pg->pg_id = id;
+ bitset_add(&pg_id_set, pg->pg_id);
+
+ /*
+ * Create the PG's CPU group
+ */
+ group_create(&pg->pg_cpus);
+
+ return (pg);
+}
+
+/*
+ * Destroy a PG.
+ * This routine may block.
+ */
+void
+pg_destroy(pg_t *pg)
+{
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ group_destroy(&pg->pg_cpus);
+
+ /*
+ * Unassign the pg_id
+ */
+ if (pg_id_next > pg->pg_id)
+ pg_id_next = pg->pg_id;
+ bitset_del(&pg_id_set, pg->pg_id);
+
+ /*
+ * Invoke the class specific de-allocation routine
+ */
+ PG_FREE(pg);
+}
+
+/*
+ * Add the CPU "cp" to processor group "pg"
+ * This routine may block.
+ */
+void
+pg_cpu_add(pg_t *pg, cpu_t *cp)
+{
+ int err;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /* This adds the CPU to the PG's CPU group */
+ err = group_add(&pg->pg_cpus, cp, GRP_RESIZE);
+ ASSERT(err == 0);
+
+ /* This adds the PG to the CPUs PG group */
+ ASSERT(cp->cpu_pg != &bootstrap_pg_data);
+ err = group_add(&cp->cpu_pg->pgs, pg, GRP_RESIZE);
+ ASSERT(err == 0);
+}
+
+/*
+ * Remove "cp" from "pg".
+ * This routine may block.
+ */
+void
+pg_cpu_delete(pg_t *pg, cpu_t *cp)
+{
+ int err;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /* Remove the CPU from the PG */
+ err = group_remove(&pg->pg_cpus, cp, GRP_RESIZE);
+ ASSERT(err == 0);
+
+ /* Remove the PG from the CPU's PG group */
+ ASSERT(cp->cpu_pg != &bootstrap_pg_data);
+ err = group_remove(&cp->cpu_pg->pgs, pg, GRP_RESIZE);
+ ASSERT(err == 0);
+}
+
+/*
+ * Allocate a CPU's PG data. This hangs off struct cpu at cpu_pg
+ */
+static cpu_pg_t *
+pg_cpu_data_alloc(void)
+{
+ cpu_pg_t *pgd;
+
+ pgd = kmem_zalloc(sizeof (cpu_pg_t), KM_SLEEP);
+ group_create(&pgd->pgs);
+ group_create(&pgd->cmt_pgs);
+
+ return (pgd);
+}
+
+/*
+ * Free the CPU's PG data.
+ */
+static void
+pg_cpu_data_free(cpu_pg_t *pgd)
+{
+ group_destroy(&pgd->pgs);
+ group_destroy(&pgd->cmt_pgs);
+ kmem_free(pgd, sizeof (cpu_pg_t));
+}
+
+/*
+ * A new CPU is coming into the system, either via booting or DR.
+ * Allocate it's PG data, and notify all registered classes about
+ * the new CPU.
+ *
+ * This routine may block.
+ */
+void
+pg_cpu_init(cpu_t *cp)
+{
+ pg_cid_t i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Allocate and size the per CPU pg data
+ */
+ cp->cpu_pg = pg_cpu_data_alloc();
+
+ /*
+ * Notify all registered classes about the new CPU
+ */
+ for (i = 0; i < pg_nclasses; i++)
+ PG_CPU_INIT(i, cp);
+}
+
+/*
+ * This CPU is being deleted from the system. Notify the classes
+ * and free up the CPU's PG data.
+ */
+void
+pg_cpu_fini(cpu_t *cp)
+{
+ pg_cid_t i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * This can happen if the CPU coming into the system
+ * failed to power on.
+ */
+ if (cp->cpu_pg == NULL ||
+ cp->cpu_pg == &bootstrap_pg_data)
+ return;
+
+ for (i = 0; i < pg_nclasses; i++)
+ PG_CPU_FINI(i, cp);
+
+ pg_cpu_data_free(cp->cpu_pg);
+ cp->cpu_pg = NULL;
+}
+
+/*
+ * This CPU is becoming active (online)
+ * This routine may not block as it is called from paused CPUs
+ * context.
+ */
+void
+pg_cpu_active(cpu_t *cp)
+{
+ pg_cid_t i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Notify all registered classes about the new CPU
+ */
+ for (i = 0; i < pg_nclasses; i++)
+ PG_CPU_ACTIVE(i, cp);
+}
+
+/*
+ * This CPU is going inactive (offline)
+ * This routine may not block, as it is called from paused
+ * CPUs context.
+ */
+void
+pg_cpu_inactive(cpu_t *cp)
+{
+ pg_cid_t i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Notify all registered classes about the new CPU
+ */
+ for (i = 0; i < pg_nclasses; i++)
+ PG_CPU_INACTIVE(i, cp);
+}
+
+/*
+ * Invoked when the CPU is about to move into the partition
+ * This routine may block.
+ */
+void
+pg_cpupart_in(cpu_t *cp, cpupart_t *pp)
+{
+ int i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Notify all registered classes that the
+ * CPU is about to enter the CPU partition
+ */
+ for (i = 0; i < pg_nclasses; i++)
+ PG_CPUPART_IN(i, cp, pp);
+}
+
+/*
+ * Invoked when the CPU is about to move out of the partition
+ * This routine may block.
+ */
+/*ARGSUSED*/
+void
+pg_cpupart_out(cpu_t *cp, cpupart_t *pp)
+{
+ int i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Notify all registered classes that the
+ * CPU is about to leave the CPU partition
+ */
+ for (i = 0; i < pg_nclasses; i++)
+ PG_CPUPART_OUT(i, cp, pp);
+}
+
+/*
+ * Invoked when the CPU is *moving* partitions.
+ *
+ * This routine may not block, as it is called from paused CPUs
+ * context.
+ */
+void
+pg_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
+{
+ int i;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Notify all registered classes that the
+ * CPU is about to leave the CPU partition
+ */
+ for (i = 0; i < pg_nclasses; i++)
+ PG_CPUPART_MOVE(i, cp, oldpp, newpp);
+}
+
+/*
+ * Provide the specified CPU a bootstrap pg
+ * This is needed to allow sane behaviour if any PG consuming
+ * code needs to deal with a partially initialized CPU
+ */
+void
+pg_cpu_bootstrap(cpu_t *cp)
+{
+ cp->cpu_pg = &bootstrap_pg_data;
+}
+
+/*ARGSUSED*/
+static pg_t *
+pg_alloc_default(pg_class_t class)
+{
+ return (kmem_zalloc(sizeof (pg_t), KM_SLEEP));
+}
+
+/*ARGSUSED*/
+static void
+pg_free_default(struct pg *pg)
+{
+ kmem_free(pg, sizeof (pg_t));
+}
diff --git a/usr/src/uts/common/os/pghw.c b/usr/src/uts/common/os/pghw.c
new file mode 100644
index 0000000000..e2dc2a38f2
--- /dev/null
+++ b/usr/src/uts/common/os/pghw.c
@@ -0,0 +1,420 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/group.h>
+#include <sys/pg.h>
+#include <sys/pghw.h>
+
+/*
+ * Processor Groups: Hardware sharing relationship layer
+ *
+ * This file implements an extension to Processor Groups to capture
+ * hardware sharing relationships existing between logical CPUs. Examples of
+ * hardware sharing relationships include shared caches on some CMT
+ * procesoor architectures, or shared local memory controllers on NUMA
+ * based system architectures.
+ *
+ * The pghw_t structure represents the extended PG. The first member
+ * of the structure is the generic pg_t with the pghw specific members
+ * following. The generic pg_t *must* remain the first member of the
+ * structure as the code uses casting of structure references to access
+ * the generic pg_t structure elements.
+ *
+ * In addition to the generic CPU grouping, physical PGs have a hardware
+ * sharing relationship enumerated "type", and an instance id. The enumerated
+ * type is defined by the pghw_type_t enumeration, while the instance id
+ * uniquely identifies the sharing instance from among others of the same
+ * hardware sharing type.
+ *
+ * The physical PGs are organized into an overall hierarchy, and are tracked
+ * in a number of different per CPU, and per pghw_type_t type groups.
+ * As an example:
+ *
+ * -------------
+ * | pg_hw |
+ * | (group_t) |
+ * -------------
+ * || ============================
+ * ||\\-----------------------// \\ \\
+ * || | hwset (PGC_HW_CHIP) | ------------- -------------
+ * || | (group_t) | | pghw_t | | pghw_t |
+ * || ----------------------- | chip 0 | | chip 1 |
+ * || ------------- -------------
+ * || \\ \\ \\ \\ \\ \\ \\ \\
+ * || cpu cpu cpu cpu cpu cpu cpu cpu
+ * ||
+ * || ============================
+ * ||\\-----------------------// \\ \\
+ * || | hwset (PGC_HW_IPIPE)| ------------- -------------
+ * || | (group_t) | | pghw_t | | pghw_t |
+ * || ----------------------- | ipipe 0 | | ipipe 1 |
+ * || ------------- -------------
+ * || \\ \\ \\ \\
+ * || cpu cpu cpu cpu
+ * ...
+ *
+ *
+ * The top level pg_hw is a group of "hwset" groups. Each hwset holds of group
+ * of physical PGs of the same hardware sharing type. Within each hwset, the
+ * PG's instance id uniquely identifies the grouping relationshsip among other
+ * groupings of the same sharing type. The instance id for a grouping is
+ * platform defined, and in some cases may be used by platform code as a handle
+ * to search for a particular relationship instance.
+ *
+ * Each physical PG (by virtue of the embedded pg_t) contains a group of CPUs
+ * that participate in the sharing relationship. Each CPU also has associated
+ * with it a grouping tracking the PGs in which the CPU belongs. This can be
+ * used to iterate over the various relationships in which the CPU participates
+ * (the CPU's chip, cache, lgroup, etc.).
+ *
+ * The hwsets are created dynamically as new hardware sharing relationship types
+ * are instantiated. They are never destroyed, as once a given relathionship
+ * type appears in the system, it is quite likely that at least one instance of
+ * that relationship will always persist as long as the system is running.
+ */
+
+static group_t *pg_hw; /* top level pg hw group */
+
+/*
+ * Lookup table mapping hardware sharing relationships with hierarchy levels
+ */
+static int pghw_level_table[PGHW_NUM_COMPONENTS];
+
+/*
+ * Physical PG kstats
+ */
+struct pghw_kstat {
+ kstat_named_t pg_id;
+ kstat_named_t pg_class;
+ kstat_named_t pg_ncpus;
+ kstat_named_t pg_instance_id;
+ kstat_named_t pg_hw;
+} pghw_kstat = {
+ { "id", KSTAT_DATA_UINT64 },
+ { "pg_class", KSTAT_DATA_STRING },
+ { "ncpus", KSTAT_DATA_UINT64 },
+ { "instance_id", KSTAT_DATA_UINT64 },
+ { "hardware", KSTAT_DATA_STRING },
+};
+
+kmutex_t pghw_kstat_lock;
+
+/*
+ * hwset operations
+ */
+static group_t *pghw_set_create(pghw_type_t);
+static void pghw_set_add(group_t *, pghw_t *);
+static void pghw_set_remove(group_t *, pghw_t *);
+
+/*
+ * Initialize the physical portion of a physical PG
+ */
+void
+pghw_init(pghw_t *pg, cpu_t *cp, pghw_type_t hw)
+{
+ group_t *hwset;
+
+ if ((hwset = pghw_set_lookup(hw)) == NULL) {
+ /*
+ * Haven't seen this hardware type yet
+ */
+ hwset = pghw_set_create(hw);
+ }
+
+ pghw_set_add(hwset, pg);
+ pg->pghw_hw = hw;
+ pg->pghw_instance =
+ pg_plat_hw_instance_id(cp, hw);
+ pghw_kstat_create(pg);
+}
+
+/*
+ * Teardown the physical portion of a physical PG
+ */
+void
+pghw_fini(pghw_t *pg)
+{
+ group_t *hwset;
+
+ hwset = pghw_set_lookup(pg->pghw_hw);
+ ASSERT(hwset != NULL);
+
+ pghw_set_remove(hwset, pg);
+ pg->pghw_instance = (id_t)PGHW_INSTANCE_ANON;
+ pg->pghw_hw = (pghw_type_t)-1;
+
+ if (pg->pghw_kstat)
+ kstat_delete(pg->pghw_kstat);
+}
+
+/*
+ * Find an existing physical PG in which to place
+ * the given CPU for the specified hardware sharing
+ * relationship
+ */
+pghw_t *
+pghw_place_cpu(cpu_t *cp, pghw_type_t hw)
+{
+ group_t *hwset;
+
+ if ((hwset = pghw_set_lookup(hw)) == NULL) {
+ return (NULL);
+ }
+
+ return ((pghw_t *)pg_cpu_find_pg(cp, hwset));
+}
+
+/*
+ * Find the pg representing the hw sharing relationship in which
+ * cp belongs
+ */
+pghw_t *
+pghw_find_pg(cpu_t *cp, pghw_type_t hw)
+{
+ group_iter_t i;
+ pghw_t *pg;
+
+ group_iter_init(&i);
+ while ((pg = group_iterate(&cp->cpu_pg->pgs, &i)) != NULL) {
+ if (pg->pghw_hw == hw)
+ return (pg);
+ }
+ return (NULL);
+}
+
+/*
+ * Find the PG of the given hardware sharing relationship
+ * type with the given instance id
+ */
+pghw_t *
+pghw_find_by_instance(id_t id, pghw_type_t hw)
+{
+ group_iter_t i;
+ group_t *set;
+ pghw_t *pg;
+
+ set = pghw_set_lookup(hw);
+ if (!set)
+ return (NULL);
+
+ group_iter_init(&i);
+ while ((pg = group_iterate(set, &i)) != NULL) {
+ if (pg->pghw_instance == id)
+ return (pg);
+ }
+ return (NULL);
+}
+
+/*
+ * CPUs physical ID cache creation / destruction
+ * The cache's elements are initialized to the CPU's id
+ */
+void
+pghw_physid_create(cpu_t *cp)
+{
+ int i;
+
+ cp->cpu_physid = kmem_alloc(sizeof (cpu_physid_t), KM_SLEEP);
+
+ for (i = 0; i < (sizeof (cpu_physid_t) / sizeof (id_t)); i++) {
+ ((id_t *)cp->cpu_physid)[i] = cp->cpu_id;
+ }
+}
+
+void
+pghw_physid_destroy(cpu_t *cp)
+{
+ if (cp->cpu_physid) {
+ kmem_free(cp->cpu_physid, sizeof (cpu_physid_t));
+ cp->cpu_physid = NULL;
+ }
+}
+
+/*
+ * Return a sequential level identifier for the specified
+ * hardware sharing relationship
+ */
+int
+pghw_level(pghw_type_t hw)
+{
+ return (pg_plat_hw_level(hw));
+}
+
+/*
+ * Create a new, empty hwset.
+ * This routine may block, and must not be called from any
+ * paused CPU context.
+ */
+static group_t *
+pghw_set_create(pghw_type_t hw)
+{
+ group_t *g;
+ int ret;
+
+ /*
+ * Create the top level PG hw group if it doesn't already exist
+ * This is a "set" of hardware sets, that is ordered (and indexed)
+ * by the pghw_type_t enum.
+ */
+ if (pg_hw == NULL) {
+ pg_hw = kmem_alloc(sizeof (group_t), KM_SLEEP);
+ group_create(pg_hw);
+ group_expand(pg_hw, (uint_t)PGHW_NUM_COMPONENTS);
+ }
+
+ /*
+ * Create the new hwset
+ * Add it to the top level pg_hw group.
+ */
+ g = kmem_alloc(sizeof (group_t), KM_SLEEP);
+ group_create(g);
+
+ ret = group_add_at(pg_hw, g, (uint_t)hw);
+ ASSERT(ret == 0);
+
+ /*
+ * Update the table that maps hardware sharing relationships
+ * to hierarchy levels
+ */
+ ASSERT(pghw_level_table[hw] == NULL);
+ pghw_level_table[hw] = pg_plat_hw_level(hw);
+
+ return (g);
+}
+
+/*
+ * Find the hwset associated with the given hardware sharing type
+ */
+group_t *
+pghw_set_lookup(pghw_type_t hw)
+{
+ group_t *hwset;
+
+ if (pg_hw == NULL)
+ return (NULL);
+
+ hwset = GROUP_ACCESS(pg_hw, (uint_t)hw);
+ return (hwset);
+}
+
+/*
+ * Add a PG to a hwset
+ */
+static void
+pghw_set_add(group_t *hwset, pghw_t *pg)
+{
+ (void) group_add(hwset, pg, GRP_RESIZE);
+}
+
+/*
+ * Remove a PG from a hwset
+ */
+static void
+pghw_set_remove(group_t *hwset, pghw_t *pg)
+{
+ int result;
+
+ result = group_remove(hwset, pg, GRP_RESIZE);
+ ASSERT(result == 0);
+}
+
+
+/*
+ * Return a string name given a pg_hw sharing type
+ */
+#define PGHW_TYPE_NAME_MAX 8
+
+static char *
+pghw_type_string(pghw_type_t hw)
+{
+ switch (hw) {
+ case PGHW_IPIPE:
+ return ("ipipe");
+ case PGHW_CACHE:
+ return ("cache");
+ case PGHW_FPU:
+ return ("fpu");
+ case PGHW_CHIP:
+ return ("chip");
+ case PGHW_MEMORY:
+ return ("memory");
+ default:
+ return ("unknown");
+ }
+}
+
+/*
+ * Create / Update routines for PG hw kstats
+ *
+ * It is the intention of these kstats to provide some level
+ * of informational / debugging observability into the types
+ * and nature of the system's detected hardware sharing relationships
+ */
+void
+pghw_kstat_create(pghw_t *pg)
+{
+ /*
+ * Create a physical pg kstat
+ */
+ if ((pg->pghw_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id,
+ "pg", "pg", KSTAT_TYPE_NAMED,
+ sizeof (pghw_kstat) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL)) != NULL) {
+ pg->pghw_kstat->ks_data_size += PG_CLASS_NAME_MAX;
+ pg->pghw_kstat->ks_data_size += PGHW_TYPE_NAME_MAX;
+ pg->pghw_kstat->ks_lock = &pghw_kstat_lock;
+ pg->pghw_kstat->ks_data = &pghw_kstat;
+ pg->pghw_kstat->ks_update = pghw_kstat_update;
+ pg->pghw_kstat->ks_private = pg;
+ kstat_install(pg->pghw_kstat);
+ }
+}
+
+int
+pghw_kstat_update(kstat_t *ksp, int rw)
+{
+ struct pghw_kstat *pgsp = &pghw_kstat;
+ pghw_t *pg = ksp->ks_private;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ pgsp->pg_id.value.ui64 = ((pg_t *)pg)->pg_id;
+ pgsp->pg_ncpus.value.ui64 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus);
+ pgsp->pg_instance_id.value.ui64 = (uint64_t)pg->pghw_instance;
+ kstat_named_setstr(&pgsp->pg_class, ((pg_t *)pg)->pg_class->pgc_name);
+ kstat_named_setstr(&pgsp->pg_hw, pghw_type_string(pg->pghw_hw));
+
+ return (0);
+}
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index aecbb6b281..c0947b12a5 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
@@ -97,6 +97,7 @@ CHKHDRS= \
avl_impl.h \
battery.h \
bitmap.h \
+ bitset.h \
bl.h \
bofi.h \
bofi_impl.h \
@@ -111,7 +112,6 @@ CHKHDRS= \
callo.h \
ccompile.h \
cdio.h \
- chip.h \
cladm.h \
class.h \
clconf.h \
@@ -246,6 +246,7 @@ CHKHDRS= \
gfs.h \
gld.h \
gldpriv.h \
+ group.h \
hdio.h \
hook.h \
hook_event.h \
@@ -370,6 +371,8 @@ CHKHDRS= \
pctypes.h \
pem.h \
pfmod.h \
+ pg.h \
+ pghw.h \
physmem.h \
pm.h \
policy.h \
diff --git a/usr/src/uts/common/sys/bitset.h b/usr/src/uts/common/sys/bitset.h
new file mode 100644
index 0000000000..98e5872892
--- /dev/null
+++ b/usr/src/uts/common/sys/bitset.h
@@ -0,0 +1,76 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _BITSET_H
+#define _BITSET_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if (defined(_KERNEL) || defined(_KMEMUSER))
+#include <sys/bitmap.h>
+#include <sys/types.h>
+
+typedef struct bitset {
+ ulong_t *bs_set;
+ uint_t bs_words;
+} bitset_t;
+
+/*
+ * Bitset initialiation / teardown
+ */
+void bitset_init(bitset_t *);
+void bitset_fini(bitset_t *);
+
+/*
+ * Resize / query a bitset's holding capacity
+ */
+void bitset_resize(bitset_t *, uint_t);
+uint_t bitset_capacity(bitset_t *);
+
+/*
+ * Set / clear a bit in the set
+ */
+void bitset_add(bitset_t *, uint_t);
+void bitset_del(bitset_t *, uint_t);
+
+/*
+ * Bitset queries
+ */
+int bitset_in_set(bitset_t *, uint_t);
+int bitset_is_null(bitset_t *);
+uint_t bitset_find(bitset_t *);
+
+
+#endif /* !_KERNEL && !_KMEMUSER */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BITSET_H */
diff --git a/usr/src/uts/common/sys/chip.h b/usr/src/uts/common/sys/chip.h
deleted file mode 100644
index 8889756884..0000000000
--- a/usr/src/uts/common/sys/chip.h
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _CHIP_H
-#define _CHIP_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * generic kernel CMT processor support
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if (defined(_KERNEL) || defined(_KMEMUSER))
-#include <sys/cpuvar.h>
-#include <sys/processor.h>
-#include <sys/bitmap.h>
-#include <sys/atomic.h>
-#include <sys/time.h>
-#include <sys/types.h>
-
-/*
- * Chip types
- */
-typedef enum chip_type {
- CHIP_DEFAULT, /* Default, non CMT processor */
- CHIP_SMT, /* SMT, single core */
- CHIP_CMP_SPLIT_CACHE, /* CMP with split caches */
- CHIP_CMP_SHARED_CACHE, /* CMP with shared caches */
- CHIP_CMT, /* CMT w/ multiple cores and threads */
- CHIP_NUM_TYPES
-} chip_type_t;
-
-
-/*
- * Balancing is possible if multiple chips exist in the lgroup
- * but only necessary if the chip has multiple online logical CPUs
- */
-#define CHIP_SHOULD_BALANCE(chp) \
- (((chp)->chip_ncpu > 1) && ((chp)->chip_next_lgrp != (chp)))
-
-/*
- * Platform's definition of a chip's properties
- */
-typedef struct chip_def {
- chip_type_t chipd_type;
- int chipd_rechoose_adj;
- hrtime_t chipd_nosteal;
-} chip_def_t;
-
-/*
- * Per chip kstats
- */
-typedef enum chip_stat_types {
- CHIP_ID, /* chip "id" */
- CHIP_NCPUS, /* number of active cpus */
- CHIP_NRUNNING, /* number of running threads on chip */
- CHIP_RECHOOSE, /* chip's rechoose_interval */
- CHIP_NUM_STATS /* always last */
-} chip_stat_t;
-
-#define CHIP_KSTAT_NAMES \
-static char *chip_kstat_names[] = { \
- \
- "chip_id", \
- "logical_cpus", \
- "chip_nrunning", \
- "chip_rechoose_interval", \
-}
-
-/*
- * Physical processor (chip) structure.
- */
-typedef struct chip {
- chipid_t chip_id; /* chip's "id" */
- chipid_t chip_seqid; /* sequential id */
- struct chip *chip_prev; /* previous chip on list */
- struct chip *chip_next; /* next chip on list */
- struct chip *chip_prev_lgrp; /* prev chip in lgroup */
- struct chip *chip_next_lgrp; /* next chip in lgroup */
- chip_type_t chip_type; /* type of chip */
- uint16_t chip_ncpu; /* number of active cpus */
- uint16_t chip_ref; /* chip's reference count */
- struct cpu *chip_cpus; /* per chip cpu list */
- struct lgrp *chip_lgrp; /* chip lives in this lgroup */
- int chip_rechoose_adj; /* chip specific adjustment */
-
- /*
- * chip kstats
- */
- kstat_t *chip_kstat;
- kmutex_t chip_kstat_mutex;
- struct kstat_named chip_kstat_data[CHIP_NUM_STATS];
-
- struct chip *chip_balance; /* chip to balance against */
- uint32_t chip_nrunning; /* # of running threads */
- hrtime_t chip_nosteal; /* nosteal interval (nsecs) */
-} chip_t;
-
-/*
- * IDs associating a CPU with various physical hardware
- */
-typedef struct cpu_physid {
- chipid_t cpu_chipid; /* CPU's physical processor */
- id_t cpu_coreid; /* CPU's physical core */
-} cpu_physid_t;
-
-/*
- * Change the number of running threads on the chip
- */
-#define CHIP_NRUNNING(chp, n) { \
- atomic_add_32(&((chp)->chip_nrunning), (n)); \
-}
-
-/*
- * True if this CPU is active on the chip
- */
-#define CHIP_CPU_ACTIVE(cp) ((cp)->cpu_next_chip != NULL)
-
-/*
- * Sets of chips
- * The "id" used here should be a chip's sequential id.
- * (chip_seqid)
- */
-#if defined(_MACHDEP)
-
-#define CHIP_MAX_CHIPS NCPU
-#define CHIP_SET_WORDS BT_BITOUL(CHIP_MAX_CHIPS)
-
-typedef struct chip_set {
- ulong_t csb[CHIP_SET_WORDS];
-} chip_set_t;
-
-extern int chip_set_isnull(chip_set_t *);
-
-#define CHIP_SET_ISNULL(set) chip_set_isnull(&(set))
-#define CHIP_SET_TEST(set, id) BT_TEST((set).csb, id)
-#define CHIP_SET_REMOVE(set, id) BT_CLEAR((set).csb, id)
-#define CHIP_SET_ADD(set, id) BT_SET((set).csb, id)
-
-#define CHIP_SET_ZERO(set) { \
- int _i; \
- for (_i = 0; _i < CHIP_SET_WORDS; _i++) \
- (set).csb[_i] = 0; \
-}
-
-#define CHIP_IN_CPUPART(chp, cp) \
- (CHIP_SET_TEST((cp)->cp_mach->mc_chipset, (chp)->chip_seqid))
-
-#endif /* _MACHDEP */
-
-/*
- * Common kernel chip operations
- */
-void chip_cpu_init(cpu_t *);
-void chip_cpu_fini(cpu_t *);
-void chip_cpu_assign(cpu_t *);
-void chip_cpu_unassign(cpu_t *);
-void chip_cpu_startup(cpu_t *);
-chip_t *chip_lookup(chipid_t);
-void chip_bootstrap_cpu(cpu_t *);
-
-void chip_cpu_move_part(cpu_t *, struct cpupart *,
- struct cpupart *);
-
-void chip_kstat_create(chip_t *);
-
-/*
- * Platform chip operations
- */
-chipid_t chip_plat_get_chipid(cpu_t *);
-id_t chip_plat_get_coreid(cpu_t *);
-int chip_plat_get_clogid(cpu_t *);
-void chip_plat_define_chip(cpu_t *, chip_def_t *);
-
-#endif /* !_KERNEL && !_KMEMUSER */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _CHIP_H */
diff --git a/usr/src/uts/common/sys/cmt.h b/usr/src/uts/common/sys/cmt.h
new file mode 100644
index 0000000000..4638438eec
--- /dev/null
+++ b/usr/src/uts/common/sys/cmt.h
@@ -0,0 +1,75 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _CMT_H
+#define _CMT_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * CMT PG class
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if (defined(_KERNEL) || defined(_KMEMUSER))
+#include <sys/group.h>
+#include <sys/pghw.h>
+#include <sys/types.h>
+
+/*
+ * CMT pg structure
+ */
+typedef struct pg_cmt {
+ struct pghw cmt_pg; /* physical grouping */
+ struct group *cmt_siblings; /* CMT PGs to balance with */
+ struct pg_cmt *cmt_parent; /* Parent CMT PG */
+ struct group *cmt_children; /* Active children CMT PGs */
+ int cmt_nchildren; /* # of children CMT PGs */
+ int cmt_hint; /* hint for balancing */
+ uint32_t cmt_nrunning; /* # of running threads */
+ struct group cmt_cpus_actv;
+ bitset_t cmt_cpus_actv_set; /* bitset of active CPUs */
+} pg_cmt_t;
+
+
+/*
+ * Change the number of running threads on the pg
+ */
+#define PG_NRUN_UPDATE(cp, n) (pg_cmt_load((cp), (n)))
+
+void pg_cmt_load(cpu_t *, int);
+void pg_cmt_cpu_startup(cpu_t *);
+int pg_cmt_can_migrate(cpu_t *, cpu_t *);
+
+#endif /* !_KERNEL && !_KMEMUSER */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CMT_H */
diff --git a/usr/src/uts/common/sys/cpupart.h b/usr/src/uts/common/sys/cpupart.h
index 0a74d17379..b9e0da4e19 100644
--- a/usr/src/uts/common/sys/cpupart.h
+++ b/usr/src/uts/common/sys/cpupart.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -35,7 +35,8 @@
#include <sys/pset.h>
#include <sys/lgrp.h>
#include <sys/lgrp_user.h>
-#include <sys/chip.h>
+#include <sys/pg.h>
+#include <sys/bitset.h>
#include <sys/time.h>
#ifdef __cplusplus
@@ -60,7 +61,6 @@ typedef int cpupartid_t;
#if defined(_MACHDEP)
struct mach_cpupart {
cpuset_t mc_haltset;
- chip_set_t mc_chipset;
};
extern struct mach_cpupart cp_default_mach;
@@ -102,6 +102,7 @@ typedef struct cpupart {
uint_t cp_attr; /* bitmask of attributes */
lgrp_gen_t cp_gen; /* generation number */
lgrp_id_t cp_lgrp_hint; /* last home lgroup chosen */
+ bitset_t cp_cmt_pgs; /* CMT PGs represented */
struct mach_cpupart *cp_mach; /* mach-specific */
} cpupart_t;
diff --git a/usr/src/uts/common/sys/cpuvar.h b/usr/src/uts/common/sys/cpuvar.h
index f413431b0d..1e467e4b64 100644
--- a/usr/src/uts/common/sys/cpuvar.h
+++ b/usr/src/uts/common/sys/cpuvar.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -100,8 +100,6 @@ typedef struct cpu {
klwp_id_t cpu_fpowner; /* currently loaded fpu owner */
struct cpupart *cpu_part; /* partition with this CPU */
struct lgrp_ld *cpu_lpl; /* pointer to this cpu's load */
- struct chip *cpu_chip; /* cpu's chip data */
- int cpu_rechoose; /* cpu's rechoose_interval */
int cpu_cache_offset; /* see kmem.c for details */
/*
@@ -121,10 +119,11 @@ typedef struct cpu {
struct cpu *cpu_prev_part; /* prev CPU in partition */
struct cpu *cpu_next_lgrp; /* next CPU in latency group */
struct cpu *cpu_prev_lgrp; /* prev CPU in latency group */
- struct cpu *cpu_next_chip; /* next CPU on chip */
- struct cpu *cpu_prev_chip; /* prev CPU on chip */
struct cpu *cpu_next_lpl; /* next CPU in lgrp partition */
struct cpu *cpu_prev_lpl;
+
+ struct cpu_pg *cpu_pg; /* cpu's processor groups */
+
void *cpu_reserved[4]; /* reserved for future use */
/*
diff --git a/usr/src/uts/common/sys/disp.h b/usr/src/uts/common/sys/disp.h
index 50464b6ddf..534feb4bd0 100644
--- a/usr/src/uts/common/sys/disp.h
+++ b/usr/src/uts/common/sys/disp.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -98,13 +97,6 @@ extern pri_t maxclsyspri; /* maximum level of any system class */
extern pri_t intr_pri; /* interrupt thread priority base level */
/*
- * Amount of time that may elapse before a thread is considered to have
- * lost it's cache investment.
- */
-#define RECHOOSE_INTERVAL 3
-extern int rechoose_interval;
-
-/*
* Minimum amount of time that a thread can remain runnable before it can
* be stolen by another CPU (in nanoseconds).
*/
diff --git a/usr/src/uts/common/sys/group.h b/usr/src/uts/common/sys/group.h
new file mode 100644
index 0000000000..89a5ca1f1a
--- /dev/null
+++ b/usr/src/uts/common/sys/group.h
@@ -0,0 +1,104 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _GROUP_H
+#define _GROUP_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Group Abstraction
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if (defined(_KERNEL) || defined(_KMEMUSER))
+#include <sys/types.h>
+
+#define GRP_RESIZE 0x1 /* Resize group capacity if needed */
+#define GRP_NORESIZE 0x2 /* Do not resize group capacity; may fail */
+
+/*
+ * group structure
+ */
+typedef struct group {
+ uint_t grp_size; /* # of elements */
+ uint_t grp_capacity; /* current group capacity */
+ void **grp_set; /* element vector */
+} group_t;
+
+typedef uint_t group_iter_t;
+
+
+/*
+ * Return the number of elements in the group
+ */
+#define GROUP_SIZE(grp) ((grp)->grp_size)
+
+/*
+ * Access the element at the specified group index
+ */
+#define GROUP_ACCESS(grp, index) ((grp)->grp_set[index])
+
+/*
+ * Group creation / destruction
+ */
+void group_create(group_t *);
+void group_destroy(group_t *);
+
+/*
+ * Expand a group's holding capacity
+ */
+void group_expand(group_t *, uint_t);
+
+/*
+ * Group element iteration
+ */
+void group_iter_init(group_iter_t *);
+void *group_iterate(group_t *, uint_t *);
+
+/*
+ * Add / remove an element from the group
+ */
+int group_add(group_t *, void *, int);
+int group_remove(group_t *, void *, int);
+
+/*
+ * Add / remove / access an element at a specified index.
+ * The group must already have sufficient capacity to hold
+ * an element at the specified index.
+ */
+int group_add_at(group_t *, void *, uint_t);
+void group_remove_at(group_t *, uint_t);
+
+#endif /* !_KERNEL && !_KMEMUSER */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _GROUP_H */
diff --git a/usr/src/uts/common/sys/lgrp.h b/usr/src/uts/common/sys/lgrp.h
index 3f6e191c98..8f35095adf 100644
--- a/usr/src/uts/common/sys/lgrp.h
+++ b/usr/src/uts/common/sys/lgrp.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -253,8 +253,6 @@ typedef struct lgrp {
struct cpu *lgrp_cpu; /* pointer to a cpu may be null */
uint_t lgrp_cpucnt; /* number of cpus in this lgrp */
- uint_t lgrp_chipcnt;
- struct chip *lgrp_chips; /* pointer to chips in this lgrp */
kstat_t *lgrp_kstat; /* per-lgrp kstats */
} lgrp_t;
diff --git a/usr/src/uts/common/sys/pg.h b/usr/src/uts/common/sys/pg.h
new file mode 100644
index 0000000000..99c51ca09a
--- /dev/null
+++ b/usr/src/uts/common/sys/pg.h
@@ -0,0 +1,173 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _PG_H
+#define _PG_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Processor Groups
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if (defined(_KERNEL) || defined(_KMEMUSER))
+#include <sys/cpuvar.h>
+#include <sys/group.h>
+#include <sys/processor.h>
+#include <sys/bitset.h>
+#include <sys/atomic.h>
+#include <sys/types.h>
+#include <sys/kstat.h>
+
+typedef uint_t pgid_t; /* processor group id */
+typedef uint_t pg_cid_t; /* processor group class id */
+
+/*
+ * Nature of CPU relationships
+ */
+typedef enum pg_relation {
+ PGR_LOGICAL,
+ PGR_PHYSICAL
+} pg_relation_t;
+
+/*
+ * Processor group structure
+ */
+typedef struct pg {
+ pgid_t pg_id; /* seq id */
+ pg_relation_t pg_relation; /* grouping relationship */
+ struct pg_class *pg_class; /* pg class */
+ struct group pg_cpus; /* group of CPUs */
+} pg_t;
+
+/*
+ * PG class callbacks
+ */
+struct pg_ops {
+ struct pg *(*alloc)();
+ void (*free)(struct pg *);
+ void (*cpu_init)(struct cpu *);
+ void (*cpu_fini)(struct cpu *);
+ void (*cpu_active)(struct cpu *);
+ void (*cpu_inactive)(struct cpu *);
+ void (*cpupart_in)(struct cpu *, struct cpupart *);
+ void (*cpupart_out)(struct cpu *, struct cpupart *);
+ void (*cpupart_move)(struct cpu *, struct cpupart *,
+ struct cpupart *);
+ int (*cpu_belongs)(struct pg *, struct cpu *);
+};
+
+#define PG_CLASS_NAME_MAX 32
+
+/*
+ * PG class structure
+ */
+typedef struct pg_class {
+ pg_cid_t pgc_id;
+ char pgc_name[PG_CLASS_NAME_MAX];
+ struct pg_ops *pgc_ops;
+ pg_relation_t pgc_relation;
+} pg_class_t;
+
+/*
+ * Per CPU processor group data
+ */
+typedef struct cpu_pg {
+ struct group pgs; /* All the CPU's PGs */
+ struct group cmt_pgs; /* CMT load balancing lineage */
+ /* (Group hierarchy ordered) */
+ struct pg *cmt_lineage; /* Ascending lineage chain */
+} cpu_pg_t;
+
+/*
+ * PG cpu iterator cookie
+ */
+typedef struct pg_cpu_itr {
+ pg_t *pg;
+ group_iter_t position;
+} pg_cpu_itr_t;
+
+/*
+ * Initialize a PG CPU iterator cookie
+ */
+#define PG_CPU_ITR_INIT(pgrp, itr) \
+{ \
+ group_iter_init(&(itr).position); \
+ (itr).pg = ((pg_t *)pgrp); \
+}
+
+/*
+ * Return the first CPU in a PG
+ */
+#define PG_CPU_GET_FIRST(pgrp) \
+ (GROUP_SIZE(&((pg_t *)pgrp)->pg_cpus) > 0 ? \
+ GROUP_ACCESS(&((pg_t *)pgrp)->pg_cpus, 0) : NULL)
+
+/*
+ * Framework routines
+ */
+void pg_init(void);
+pg_cid_t pg_class_register(char *, struct pg_ops *, pg_relation_t);
+
+/*
+ * PG CPU reconfiguration hooks
+ */
+void pg_cpu0_init(void);
+void pg_cpu_init(cpu_t *);
+void pg_cpu_fini(cpu_t *);
+void pg_cpu_active(cpu_t *);
+void pg_cpu_inactive(cpu_t *);
+void pg_cpu_startup(cpu_t *);
+void pg_cpu_bootstrap(cpu_t *);
+
+/*
+ * PG cpupart service hooks
+ */
+void pg_cpupart_in(cpu_t *, struct cpupart *);
+void pg_cpupart_out(cpu_t *, struct cpupart *);
+void pg_cpupart_move(cpu_t *, struct cpupart *, struct cpupart *);
+
+/*
+ * PG CPU utility routines
+ */
+pg_t *pg_create(pg_cid_t);
+void pg_destroy(pg_t *);
+void pg_cpu_add(pg_t *, cpu_t *);
+void pg_cpu_delete(pg_t *, cpu_t *);
+pg_t *pg_cpu_find_pg(cpu_t *, group_t *);
+cpu_t *pg_cpu_next(pg_cpu_itr_t *);
+
+
+#endif /* !_KERNEL && !_KMEMUSER */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PG_H */
diff --git a/usr/src/uts/common/sys/pghw.h b/usr/src/uts/common/sys/pghw.h
new file mode 100644
index 0000000000..e78be92032
--- /dev/null
+++ b/usr/src/uts/common/sys/pghw.h
@@ -0,0 +1,135 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _PGHW_H
+#define _PGHW_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if (defined(_KERNEL) || defined(_KMEMUSER))
+#include <sys/cpuvar.h>
+#include <sys/group.h>
+#include <sys/processor.h>
+#include <sys/bitmap.h>
+#include <sys/atomic.h>
+#include <sys/types.h>
+#include <sys/kstat.h>
+#include <sys/pg.h>
+
+/*
+ * Hardware that may be shared by a group of processors
+ */
+typedef enum pghw_type {
+ PGHW_START,
+ PGHW_IPIPE,
+ PGHW_CACHE,
+ PGHW_FPU,
+ PGHW_MPIPE,
+ PGHW_MEMORY,
+ PGHW_NUM_COMPONENTS
+} pghw_type_t;
+
+/*
+ * Consider the physical processor sharing relationship
+ * equivalant to a shared pipe to memory.
+ */
+#define PGHW_CHIP PGHW_MPIPE
+
+/*
+ * Anonymous instance id
+ */
+#define PGHW_INSTANCE_ANON ((id_t)0xdecafbad)
+
+/*
+ * Processor Group (physical sharing relationship)
+ */
+typedef struct pghw {
+ pg_t pghw_pg; /* processor group */
+ pghw_type_t pghw_hw; /* HW sharing relationship */
+ id_t pghw_instance; /* sharing instance identifier */
+ kstat_t *pghw_kstat; /* physical kstats exported */
+} pghw_t;
+
+/*
+ * IDs associating a CPU with various physical hardware
+ */
+typedef struct cpu_physid {
+ id_t cpu_chipid; /* CPU's physical processor */
+ id_t cpu_coreid; /* CPU's physical core */
+ id_t cpu_cacheid; /* CPU's cache id */
+} cpu_physid_t;
+
+/*
+ * Physical PG initialization / CPU service hooks
+ */
+void pghw_init(pghw_t *, cpu_t *, pghw_type_t);
+void pghw_fini(pghw_t *);
+void pghw_cpu_add(pghw_t *, cpu_t *);
+pghw_t *pghw_place_cpu(cpu_t *, pghw_type_t);
+
+/*
+ * Physical ID cache creation / destruction
+ */
+void pghw_physid_create(cpu_t *);
+void pghw_physid_destroy(cpu_t *);
+
+/*
+ * CPU / PG hardware related seach operations
+ */
+pghw_t *pghw_find_pg(cpu_t *, pghw_type_t);
+pghw_t *pghw_find_by_instance(id_t, pghw_type_t);
+group_t *pghw_set_lookup(pghw_type_t);
+
+int pghw_level(pghw_type_t);
+
+void pghw_kstat_create(pghw_t *);
+int pghw_kstat_update(kstat_t *, int);
+
+/* Hardware sharing relationship platform interfaces */
+int pg_plat_hw_shared(cpu_t *, pghw_type_t);
+int pg_plat_cpus_share(cpu_t *, cpu_t *, pghw_type_t);
+int pg_plat_hw_level(pghw_type_t);
+id_t pg_plat_hw_instance_id(cpu_t *, pghw_type_t);
+
+/*
+ * What comprises a "core" may vary across processor implementations,
+ * and so the term itself is somewhat unstable. For this reason, there
+ * is no PGHW_CORE type, but we provide an interface here to allow platforms
+ * to express cpu <=> core mappings.
+ */
+id_t pg_plat_get_core_id(cpu_t *);
+
+#endif /* !_KERNEL && !_KMEMUSER */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PGHW_H */
diff --git a/usr/src/uts/i86pc/cpu/amd_opteron/ao_cpu.c b/usr/src/uts/i86pc/cpu/amd_opteron/ao_cpu.c
index ccbc04fe4c..55084167b8 100644
--- a/usr/src/uts/i86pc/cpu/amd_opteron/ao_cpu.c
+++ b/usr/src/uts/i86pc/cpu/amd_opteron/ao_cpu.c
@@ -20,14 +20,14 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
-#include <sys/chip.h>
+#include <sys/pghw.h>
#include <sys/cmn_err.h>
#include <sys/sysmacros.h>
#include <sys/fm/protocol.h>
@@ -72,8 +72,8 @@ ao_fmri_create(ao_data_t *ao, nv_alloc_t *nva)
fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 3,
"motherboard", 0,
- "chip", ao->ao_cpu->cpu_chip->chip_id,
- "cpu", chip_plat_get_clogid(ao->ao_cpu));
+ "chip", pg_plat_hw_instance_id(ao->ao_cpu, PGHW_CHIP),
+ "cpu", cpuid_get_clogid(ao->ao_cpu));
return (nvl);
}
@@ -113,7 +113,7 @@ int
ao_scrubber_enable(void *data, uint64_t base, uint64_t ilen, int csdiscontig)
{
ao_data_t *ao = data;
- chipid_t chipid = chip_plat_get_chipid(ao->ao_cpu);
+ chipid_t chipid = pg_plat_hw_instance_id(ao->ao_cpu, PGHW_CHIP);
uint32_t rev = cpuid_getchiprev(ao->ao_cpu);
uint32_t scrubctl, lo, hi;
int rv = 1;
diff --git a/usr/src/uts/i86pc/cpu/amd_opteron/ao_main.c b/usr/src/uts/i86pc/cpu/amd_opteron/ao_main.c
index 06487043d9..908786702d 100644
--- a/usr/src/uts/i86pc/cpu/amd_opteron/ao_main.c
+++ b/usr/src/uts/i86pc/cpu/amd_opteron/ao_main.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -37,13 +37,14 @@
#include <sys/cpuvar.h>
#include <sys/x86_archext.h>
#include <sys/kmem.h>
+#include <sys/pghw.h>
#include <sys/modctl.h>
#include <sys/mc.h>
#include <sys/mca_x86.h>
#include "ao.h"
-static struct ao_chipshared *ao_shared[CHIP_MAX_CHIPS];
+static struct ao_chipshared *ao_shared[NCPU];
/*
* This cpu module supports AMD family 0xf revisions B/C/D/E/F/G. If
@@ -55,7 +56,7 @@ uint_t ao_model_limit = 0x6f;
static int
ao_init(cpu_t *cp, void **datap)
{
- uint_t chipid = chip_plat_get_chipid(CPU);
+ uint_t chipid = pg_plat_hw_instance_id(CPU, PGHW_CHIP);
struct ao_chipshared *sp, *osp;
ao_data_t *ao;
uint64_t cap;
diff --git a/usr/src/uts/i86pc/cpu/amd_opteron/ao_mca.c b/usr/src/uts/i86pc/cpu/amd_opteron/ao_mca.c
index a4dc3e7660..c9adcde2cf 100644
--- a/usr/src/uts/i86pc/cpu/amd_opteron/ao_mca.c
+++ b/usr/src/uts/i86pc/cpu/amd_opteron/ao_mca.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -35,7 +35,7 @@
#include <sys/cmn_err.h>
#include <sys/systm.h>
#include <sys/sysmacros.h>
-#include <sys/chip.h>
+#include <sys/pghw.h>
#include <sys/cyclic.h>
#include <sys/cpu_module_impl.h>
#include <sys/pci_cfgspace_impl.h>
@@ -475,7 +475,7 @@ static void
ao_nb_cfg(ao_data_t *ao, uint32_t rev)
{
const struct ao_nb_cfg *nbcp = &ao_cfg_extra[0];
- uint_t chipid = chip_plat_get_chipid(CPU);
+ uint_t chipid = pg_plat_hw_instance_id(CPU, PGHW_CHIP);
uint32_t val;
/*
@@ -547,7 +547,7 @@ int ao_nb_cfg_sparectl_noseize = 0;
static void
ao_sparectl_cfg(ao_data_t *ao)
{
- uint_t chipid = chip_plat_get_chipid(CPU);
+ uint_t chipid = pg_plat_hw_instance_id(CPU, PGHW_CHIP);
union mcreg_sparectl sparectl;
int chan, cs;
diff --git a/usr/src/uts/i86pc/io/mc/mcamd.h b/usr/src/uts/i86pc/io/mc/mcamd.h
index 489aeb1dde..28dd664b82 100644
--- a/usr/src/uts/i86pc/io/mc/mcamd.h
+++ b/usr/src/uts/i86pc/io/mc/mcamd.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -32,7 +32,7 @@
#include <sys/types.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
-#include <sys/chip.h>
+#include <sys/pghw.h>
#include <sys/ksynch.h>
#include <sys/mc_amd.h>
#include <mcamd_api.h>
@@ -234,7 +234,7 @@ struct mc {
uint32_t mc_socket; /* Package type */
uint_t mc_ref; /* reference (attach) count */
mc_func_t mc_funcs[MC_FUNC_NUM]; /* Instance, devinfo, ... */
- chip_t *mc_chip; /* Associated chip */
+ pghw_t *mc_chip; /* MC's associated chip PG */
mc_cs_t *mc_cslist; /* All active chip-selects */
mc_cs_t *mc_cslast; /* End of chip-select list */
mc_dimm_t *mc_dimmlist; /* List of all logical DIMMs, */
diff --git a/usr/src/uts/i86pc/io/mc/mcamd_drv.c b/usr/src/uts/i86pc/io/mc/mcamd_drv.c
index 1c89e0e668..fe8ad94fd9 100644
--- a/usr/src/uts/i86pc/io/mc/mcamd_drv.c
+++ b/usr/src/uts/i86pc/io/mc/mcamd_drv.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -40,7 +40,7 @@
#include <sys/cred.h>
#include <sys/ksynch.h>
#include <sys/rwlock.h>
-#include <sys/chip.h>
+#include <sys/pghw.h>
#include <sys/open.h>
#include <sys/policy.h>
#include <sys/machsystm.h>
@@ -110,7 +110,7 @@ mc_lookup_by_chipid(int chipid)
ASSERT(RW_LOCK_HELD(&mc_lock));
for (mc = mc_list; mc != NULL; mc = mc->mc_next) {
- if (mc->mc_chip->chip_id == chipid)
+ if (mc->mc_chip->pghw_instance == chipid)
return (mc);
}
@@ -595,7 +595,7 @@ mc_report_testfails(mc_t *mc)
for (mccs = mc->mc_cslist; mccs != NULL; mccs = mccs->mccs_next) {
if (mccs->mccs_props.csp_testfail) {
unum.unum_board = 0;
- unum.unum_chip = mc->mc_chip->chip_id;
+ unum.unum_chip = mc->mc_chip->pghw_instance;
unum.unum_mc = 0;
unum.unum_cs = mccs->mccs_props.csp_num;
unum.unum_rank = mccs->mccs_props.csp_dimmrank;
@@ -672,7 +672,7 @@ mc_mkprops_addrmap(mc_pcicfg_hdl_t cfghdl, mc_t *mc)
* base/limit pairs is overkill.
*/
if (MCREG_FIELD_CMN(&lim[i], DstNode) !=
- mc->mc_chip->chip_id)
+ mc->mc_chip->pghw_instance)
continue;
/*
@@ -1272,8 +1272,9 @@ mc_fm_fini(dev_info_t *dip)
static mc_t *
mc_create(chipid_t chipid)
{
- chip_t *chp = chip_lookup(chipid);
+ pghw_t *chp = pghw_find_by_instance((id_t)chipid, PGHW_CHIP);
mc_t *mc;
+ cpu_t *cpu;
ASSERT(RW_WRITE_HELD(&mc_lock));
@@ -1283,17 +1284,18 @@ mc_create(chipid_t chipid)
mc = kmem_zalloc(sizeof (mc_t), KM_SLEEP);
mc->mc_hdr.mch_type = MC_NT_MC;
mc->mc_chip = chp;
- mc->mc_props.mcp_num = mc->mc_chip->chip_id;
+ mc->mc_props.mcp_num = mc->mc_chip->pghw_instance;
mc->mc_props.mcp_sparecs = MC_INVALNUM;
mc->mc_props.mcp_badcs = MC_INVALNUM;
/*
- * We can use the first cpu in the chip_cpus list since all cores
+ * We can use one of the chip's CPUs since all cores
* of a chip share the same revision and socket type.
*/
- mc->mc_props.mcp_rev = cpuid_getchiprev(chp->chip_cpus);
- mc->mc_revname = cpuid_getchiprevstr(chp->chip_cpus);
- mc->mc_socket = cpuid_getsockettype(chp->chip_cpus);
+ cpu = PG_CPU_GET_FIRST(chp);
+ mc->mc_props.mcp_rev = cpuid_getchiprev(cpu);
+ mc->mc_revname = cpuid_getchiprevstr(cpu);
+ mc->mc_socket = cpuid_getsockettype(cpu);
if (mc_list == NULL)
mc_list = mc;
@@ -1362,7 +1364,7 @@ mc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
rw_enter(&mc_lock, RW_WRITER);
for (mc = mc_list; mc != NULL; mc = mc->mc_next) {
- if (mc->mc_chip->chip_id == chipid)
+ if (mc->mc_chip->pghw_instance == chipid)
break;
}
@@ -1405,7 +1407,7 @@ mc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
dip, "model", (char *)bm->bm_model);
(void) ddi_prop_update_int(DDI_DEV_T_NONE,
- dip, "chip-id", mc->mc_chip->chip_id);
+ dip, "chip-id", mc->mc_chip->pghw_instance);
if (bm->bm_mkprops != NULL &&
mc_pcicfg_setup(mc, bm->bm_func, &cfghdl) == DDI_SUCCESS) {
@@ -1421,11 +1423,15 @@ mc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
if (func == MC_FUNC_DEVIMAP) {
mc_props_t *mcp = &mc->mc_props;
int dram_present = 0;
+ pg_cpu_itr_t itr;
+ cpu_t *cpup;
if (ddi_create_minor_node(dip, "mc-amd", S_IFCHR,
- mc->mc_chip->chip_id, "ddi_mem_ctrl", 0) != DDI_SUCCESS) {
+ mc->mc_chip->pghw_instance, "ddi_mem_ctrl",
+ 0) != DDI_SUCCESS) {
cmn_err(CE_WARN, "failed to create minor node for chip "
- "%u memory controller\n", mc->mc_chip->chip_id);
+ "%u memory controller\n",
+ mc->mc_chip->pghw_instance);
}
/*
@@ -1440,13 +1446,11 @@ mc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
*/
kpreempt_disable(); /* prevent cpu list from changing */
- cpu = mc->mc_chip->chip_cpus;
-
+ PG_CPU_ITR_INIT(mc->mc_chip, itr);
+ cpup = cpu = pg_cpu_next(&itr);
do {
- mcamd_mc_register(cpu);
- cpu = cpu->cpu_next_chip;
- } while (cpu != mc->mc_chip->chip_cpus);
-
+ mcamd_mc_register(cpup);
+ } while ((cpup = pg_cpu_next(&itr)) != NULL);
if (mc->mc_props.mcp_lim != mc->mc_props.mcp_base) {
/*
diff --git a/usr/src/uts/i86pc/io/mc/mcamd_pcicfg.c b/usr/src/uts/i86pc/io/mc/mcamd_pcicfg.c
index 534666769a..8c772a8f01 100644
--- a/usr/src/uts/i86pc/io/mc/mcamd_pcicfg.c
+++ b/usr/src/uts/i86pc/io/mc/mcamd_pcicfg.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -86,14 +86,16 @@ mc_pcicfg_get32(mc_pcicfg_hdl_t cookie, off_t offset)
uint32_t
mc_pcicfg_get32_nohdl(mc_t *mc, enum mc_funcnum func, off_t offset)
{
- return (pci_mech1_getl(0, MC_AMD_DEV_OFFSET + mc->mc_chip->chip_id,
- func, offset));
+ return (pci_mech1_getl(0,
+ MC_AMD_DEV_OFFSET + mc->mc_chip->pghw_instance,
+ func, offset));
}
void
mc_pcicfg_put32_nohdl(mc_t *mc, enum mc_funcnum func, off_t offset,
uint32_t val)
{
- pci_mech1_putl(0, MC_AMD_DEV_OFFSET + mc->mc_chip->chip_id,
+ pci_mech1_putl(0,
+ MC_AMD_DEV_OFFSET + mc->mc_chip->pghw_instance,
func, offset, val);
}
diff --git a/usr/src/uts/i86pc/io/mc/mcamd_subr.c b/usr/src/uts/i86pc/io/mc/mcamd_subr.c
index 7ce0633a56..9f9786b10b 100644
--- a/usr/src/uts/i86pc/io/mc/mcamd_subr.c
+++ b/usr/src/uts/i86pc/io/mc/mcamd_subr.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -617,7 +617,7 @@ mc_fmri_create(mc_t *mc)
fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 3,
"motherboard", 0,
- "chip", mc->mc_chip->chip_id,
+ "chip", mc->mc_chip->pghw_instance,
"memory-controller", 0);
return (nvl);
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 45fb59ff73..094092ed0b 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -40,7 +40,7 @@
#include <sys/sunndi.h>
#include <sys/cpuvar.h>
#include <sys/processor.h>
-#include <sys/chip.h>
+#include <sys/pg.h>
#include <sys/fp.h>
#include <sys/controlregs.h>
#include <sys/auxv_386.h>
@@ -1886,8 +1886,8 @@ cpuid_getsockettype(struct cpu *cpu)
return (cpu->cpu_m.mcpu_cpi->cpi_socket);
}
-chipid_t
-chip_plat_get_chipid(cpu_t *cpu)
+int
+cpuid_get_chipid(cpu_t *cpu)
{
ASSERT(cpuid_checkpass(cpu, 1));
@@ -1897,14 +1897,14 @@ chip_plat_get_chipid(cpu_t *cpu)
}
id_t
-chip_plat_get_coreid(cpu_t *cpu)
+cpuid_get_coreid(cpu_t *cpu)
{
ASSERT(cpuid_checkpass(cpu, 1));
return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
}
int
-chip_plat_get_clogid(cpu_t *cpu)
+cpuid_get_clogid(cpu_t *cpu)
{
ASSERT(cpuid_checkpass(cpu, 1));
return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
diff --git a/usr/src/uts/i86pc/os/lgrpplat.c b/usr/src/uts/i86pc/os/lgrpplat.c
index 5397d0303d..79ea557578 100644
--- a/usr/src/uts/i86pc/os/lgrpplat.c
+++ b/usr/src/uts/i86pc/os/lgrpplat.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -39,6 +39,7 @@
#include <sys/pci_cfgspace.h>
#include <sys/pci_impl.h>
#include <sys/param.h>
+#include <sys/pghw.h>
#include <sys/promif.h> /* for prom_printf() */
#include <sys/systm.h>
#include <sys/thread.h>
@@ -50,7 +51,6 @@
#include <vm/vm_dep.h>
-
/*
* lgroup platform support for x86 platforms.
*/
@@ -58,7 +58,7 @@
#define MAX_NODES 8
#define NLGRP (MAX_NODES * (MAX_NODES - 1) + 1)
-#define LGRP_PLAT_CPU_TO_NODE(cpu) (chip_plat_get_chipid(cpu))
+#define LGRP_PLAT_CPU_TO_NODE(cpu) (pg_plat_hw_instance_id(cpu, PGHW_CHIP))
#define LGRP_PLAT_PROBE_NROUNDS 64 /* default laps for probing */
#define LGRP_PLAT_PROBE_NSAMPLES 1 /* default samples to take */
diff --git a/usr/src/uts/i86pc/os/mlsetup.c b/usr/src/uts/i86pc/os/mlsetup.c
index 6e97330601..067f417551 100644
--- a/usr/src/uts/i86pc/os/mlsetup.c
+++ b/usr/src/uts/i86pc/os/mlsetup.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -41,7 +41,7 @@
#include <sys/cpupart.h>
#include <sys/pset.h>
#include <sys/copyops.h>
-#include <sys/chip.h>
+#include <sys/pg.h>
#include <sys/disp.h>
#include <sys/debug.h>
#include <sys/sunddi.h>
@@ -122,7 +122,6 @@ void
mlsetup(struct regs *rp)
{
extern struct classfuncs sys_classfuncs;
- extern struct chip cpu0_chip;
extern disp_t cpu0_disp;
extern char t0stack[];
int boot_ncpus;
@@ -328,16 +327,6 @@ mlsetup(struct regs *rp)
*/
lgrp_init();
- /*
- * The lgroup code needs to at least know about a CPU's
- * chip association, but it's too early to fully initialize
- * cpu0_chip, since the device node for the boot CPU doesn't
- * exist yet. Initialize enough of it to get by until formal
- * initialization.
- */
- CPU->cpu_rechoose = rechoose_interval;
- CPU->cpu_chip = &cpu0_chip;
-
rp->r_fp = 0; /* terminate kernel stack traces! */
prom_init("kernel", (void *)NULL);
diff --git a/usr/src/uts/i86pc/os/mp_machdep.c b/usr/src/uts/i86pc/os/mp_machdep.c
index 46e4f5a710..c2e3123a24 100644
--- a/usr/src/uts/i86pc/os/mp_machdep.c
+++ b/usr/src/uts/i86pc/os/mp_machdep.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -38,7 +38,7 @@
#include <sys/x86_archext.h>
#include <sys/cpupart.h>
#include <sys/cpuvar.h>
-#include <sys/chip.h>
+#include <sys/pghw.h>
#include <sys/disp.h>
#include <sys/cpu.h>
#include <sys/archsystm.h>
@@ -86,6 +86,8 @@ extern uint64_t freq_tsc(uint32_t *);
extern uint64_t freq_notsc(uint32_t *);
#endif
extern void pc_gethrestime(timestruc_t *);
+extern int cpuid_get_coreid(cpu_t *);
+extern int cpuid_get_chipid(cpu_t *);
/*
* PSM functions initialization
@@ -164,35 +166,94 @@ int simulator_run = 0; /* patch to non-zero if running under simics */
#endif /* _SIMULATOR_SUPPORT */
-/* ARGSUSED */
-void
-chip_plat_define_chip(cpu_t *cp, chip_def_t *cd)
+
+/*ARGSUSED*/
+int
+pg_plat_hw_shared(cpu_t *cp, pghw_type_t hw)
{
- if ((x86_feature & (X86_HTT|X86_CMP)) == X86_HTT) {
- /*
- * Single-core Pentiums with Hyper-Threading enabled.
- */
- cd->chipd_type = CHIP_SMT;
- } else if ((x86_feature & (X86_HTT|X86_CMP)) == X86_CMP) {
- /*
- * Multi-core Opterons or Multi-core Pentiums with
- * Hyper-Threading disabled.
- */
- cd->chipd_type = CHIP_CMP_SPLIT_CACHE;
- } else if ((x86_feature & (X86_HTT|X86_CMP)) == (X86_HTT|X86_CMP)) {
- /*
- * Multi-core Pentiums with Hyper-Threading enabled.
- */
- cd->chipd_type = CHIP_CMT;
- } else {
- /*
- * Single-core/single-threaded chips.
- */
- cd->chipd_type = CHIP_DEFAULT;
+ switch (hw) {
+ case PGHW_IPIPE:
+ if (x86_feature & (X86_HTT)) {
+ /*
+ * Hyper-threading is SMT
+ */
+ return (1);
+ } else {
+ return (0);
+ }
+ case PGHW_CHIP:
+ if (x86_feature & (X86_CMP|X86_HTT))
+ return (1);
+ else
+ return (0);
+ default:
+ return (0);
+ }
+}
+
+/*
+ * Compare two CPUs and see if they have a pghw_type_t sharing relationship
+ * If pghw_type_t is an unsupported hardware type, then return -1
+ */
+int
+pg_plat_cpus_share(cpu_t *cpu_a, cpu_t *cpu_b, pghw_type_t hw)
+{
+ id_t pgp_a, pgp_b;
+
+ pgp_a = pg_plat_hw_instance_id(cpu_a, hw);
+ pgp_b = pg_plat_hw_instance_id(cpu_b, hw);
+
+ if (pgp_a == -1 || pgp_b == -1)
+ return (-1);
+
+ return (pgp_a == pgp_b);
+}
+
+/*
+ * Return a physical instance identifier for known hardware sharing
+ * relationships
+ */
+id_t
+pg_plat_hw_instance_id(cpu_t *cpu, pghw_type_t hw)
+{
+ switch (hw) {
+ case PGHW_IPIPE:
+ return (cpuid_get_coreid(cpu));
+ case PGHW_CHIP:
+ return (cpuid_get_chipid(cpu));
+ default:
+ return (-1);
}
+}
+
+int
+pg_plat_hw_level(pghw_type_t hw)
+{
+ int i;
+ static pghw_type_t hw_hier[] = {
+ PGHW_IPIPE,
+ PGHW_CHIP,
+ PGHW_NUM_COMPONENTS
+ };
+
+ for (i = 0; hw_hier[i] != PGHW_NUM_COMPONENTS; i++) {
+ if (hw_hier[i] == hw)
+ return (i);
+ }
+ return (-1);
+}
- cd->chipd_rechoose_adj = 0;
- cd->chipd_nosteal = 100000ULL; /* 100 usec */
+id_t
+pg_plat_get_core_id(cpu_t *cpu)
+{
+ return ((id_t)cpuid_get_coreid(cpu));
+}
+
+void
+cmp_set_nosteal_interval(void)
+{
+ /* Set the nosteal interval (used by disp_getbest()) to 100us */
+ nosteal_nsec = 100000UL;
}
/*
diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c
index 6c00d2bb20..5fd2325888 100644
--- a/usr/src/uts/i86pc/os/mp_startup.c
+++ b/usr/src/uts/i86pc/os/mp_startup.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -54,7 +54,8 @@
#include <sys/traptrace.h>
#include <sys/clock.h>
#include <sys/cpc_impl.h>
-#include <sys/chip.h>
+#include <sys/pg.h>
+#include <sys/cmt.h>
#include <sys/dtrace.h>
#include <sys/archsystm.h>
#include <sys/fp.h>
@@ -321,11 +322,9 @@ extern void *long_mode_64(void);
tp->t_disp_queue = cp->cpu_disp;
/*
- * Bootstrap the CPU for CMT aware scheduling
- * The rest of the initialization will happen from
- * mp_startup()
+ * Bootstrap the CPU's PG data
*/
- chip_bootstrap_cpu(cp);
+ pg_cpu_bootstrap(cp);
/*
* Perform CPC intialization on the new CPU.
@@ -1171,15 +1170,13 @@ mp_startup(void)
mutex_enter(&cpu_lock);
/*
- * It's unfortunate that chip_cpu_init() has to be called here.
- * It really belongs in cpu_add_unit(), but unfortunately it is
- * dependent on the cpuid probing, which must be done in the
- * context of the current CPU. Care must be taken on x86 to ensure
- * that mp_startup can safely block even though chip_cpu_init() and
- * cpu_add_active() have not yet been called.
+ * Processor group initialization for this CPU is dependent on the
+ * cpuid probing, which must be done in the context of the current
+ * CPU.
*/
- chip_cpu_init(cp);
- chip_cpu_startup(cp);
+ pghw_physid_create(cp);
+ pg_cpu_init(cp);
+ pg_cmt_cpu_startup(cp);
cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_ENABLE | CPU_EXISTS;
cpu_add_active(cp);
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index b131db010c..6d7954db30 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -107,7 +107,7 @@
#include <sys/kobj.h>
#include <sys/kobj_lex.h>
#include <sys/cpc_impl.h>
-#include <sys/chip.h>
+#include <sys/pg.h>
#include <sys/x86_archext.h>
#include <sys/cpu_module.h>
#include <sys/smbios.h>
@@ -1984,16 +1984,6 @@ post_startup(void)
maxmem = freemem;
add_cpunode2devtree(CPU->cpu_id, CPU->cpu_m.mcpu_cpi);
-
- /*
- * Perform the formal initialization of the boot chip,
- * and associate the boot cpu with it.
- * This must be done after the cpu node for CPU has been
- * added to the device tree, when the necessary probing to
- * know the chip type and chip "id" is performed.
- */
- chip_cpu_init(CPU);
- chip_cpu_assign(CPU);
}
static int
diff --git a/usr/src/uts/intel/ia32/os/cpc_subr.c b/usr/src/uts/intel/ia32/os/cpc_subr.c
index e09f018242..4ab36ece98 100644
--- a/usr/src/uts/intel/ia32/os/cpc_subr.c
+++ b/usr/src/uts/intel/ia32/os/cpc_subr.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -43,7 +42,7 @@
#include <sys/cpc_impl.h>
#include <sys/x_call.h>
#include <sys/cmn_err.h>
-#include <sys/chip.h>
+#include <sys/cmt.h>
#include <sys/spl.h>
#include <io/pcplusmp/apic.h>
@@ -79,7 +78,8 @@ kcpc_hw_rem_ovf_intr(void)
static int
kcpc_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
{
- chip_t *chp = cpu[cpuid]->cpu_chip;
+ pg_cmt_t *chip_pg;
+ int active_cpus_cnt;
if (what != CPU_ON)
return (0);
@@ -95,8 +95,12 @@ kcpc_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
* If this chip now has more than 1 active cpu, we must invalidate all
* contexts in the system.
*/
- if (chp->chip_ncpu > 1)
- kcpc_invalidate_all();
+ chip_pg = (pg_cmt_t *)pghw_find_pg(cpu[cpuid], PGHW_CHIP);
+ if (chip_pg != NULL) {
+ active_cpus_cnt = GROUP_SIZE(&chip_pg->cmt_cpus_actv);
+ if (active_cpus_cnt > 1)
+ kcpc_invalidate_all();
+ }
return (0);
}
@@ -177,7 +181,9 @@ kcpc_remote_stop(cpu_t *cp)
int
kcpc_hw_cpu_hook(processorid_t cpuid, ulong_t *kcpc_cpumap)
{
- cpu_t *p, *cpu;
+ cpu_t *cpu, *p;
+ pg_t *chip_pg;
+ pg_cpu_itr_t itr;
if ((x86_feature & X86_HTT) == 0)
return (0);
@@ -187,8 +193,7 @@ kcpc_hw_cpu_hook(processorid_t cpuid, ulong_t *kcpc_cpumap)
* once.
*
* This loop is protected by holding cpu_lock, in order to properly
- * access the cpu_t of the desired cpu. This also guarantees that the
- * per chip cpu lists will not change whilst we look at them.
+ * access the cpu_t of the desired cpu.
*/
mutex_enter(&cpu_lock);
if ((cpu = cpu_get(cpuid)) == NULL) {
@@ -196,7 +201,12 @@ kcpc_hw_cpu_hook(processorid_t cpuid, ulong_t *kcpc_cpumap)
return (-1);
}
- for (p = cpu->cpu_next_chip; p != cpu; p = p->cpu_next_chip) {
+ chip_pg = (pg_t *)pghw_find_pg(cpu, PGHW_CHIP);
+
+ PG_CPU_ITR_INIT(chip_pg, itr);
+ while ((p = pg_cpu_next(&itr)) != NULL) {
+ if (p == cpu)
+ continue;
if (BT_TEST(kcpc_cpumap, p->cpu_id)) {
mutex_exit(&cpu_lock);
return (-1);
@@ -213,7 +223,9 @@ kcpc_hw_cpu_hook(processorid_t cpuid, ulong_t *kcpc_cpumap)
int
kcpc_hw_lwp_hook(void)
{
- chip_t *p;
+ pg_cmt_t *chip;
+ group_t *chips;
+ group_iter_t i;
if ((x86_feature & X86_HTT) == 0)
return (0);
@@ -222,14 +234,21 @@ kcpc_hw_lwp_hook(void)
* Only one CPU per chip may be online.
*/
mutex_enter(&cpu_lock);
- p = CPU->cpu_chip;
- do {
- if (p->chip_ncpu > 1) {
+
+ chips = pghw_set_lookup(PGHW_CHIP);
+ if (chips == NULL) {
+ mutex_exit(&cpu_lock);
+ return (0);
+ }
+
+ group_iter_init(&i);
+ while ((chip = group_iterate(chips, &i)) != NULL) {
+ if (GROUP_SIZE(&chip->cmt_cpus_actv) > 1) {
mutex_exit(&cpu_lock);
return (-1);
}
- p = p->chip_next;
- } while (p != CPU->cpu_chip);
+ }
+
mutex_exit(&cpu_lock);
return (0);
}
diff --git a/usr/src/uts/intel/pcbe/p4_pcbe.c b/usr/src/uts/intel/pcbe/p4_pcbe.c
index c7263a099c..bfaa46a0ab 100644
--- a/usr/src/uts/intel/pcbe/p4_pcbe.c
+++ b/usr/src/uts/intel/pcbe/p4_pcbe.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -61,7 +60,7 @@ static void p4_pcbe_allstop(void);
static void p4_pcbe_sample(void *token);
static void p4_pcbe_free(void *config);
-extern int chip_plat_get_clogid(cpu_t *);
+extern int cpuid_get_clogid(cpu_t *);
static pcbe_ops_t p4_pcbe_ops = {
PCBE_VER_1,
@@ -807,7 +806,7 @@ p4_pcbe_program(void *token)
* CCCR (and thus enables the counter).
*/
if (p4_htt) {
- int lid = chip_plat_get_clogid(CPU); /* Logical ID of CPU */
+ int lid = cpuid_get_clogid(CPU); /* Logical ID of CPU */
for (i = 0; i < 18; i++) {
uint64_t escr;
diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h
index cf76d4f237..af06a50a92 100644
--- a/usr/src/uts/intel/sys/x86_archext.h
+++ b/usr/src/uts/intel/sys/x86_archext.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -531,6 +531,9 @@ extern uint_t cpuid_getmodel(struct cpu *);
extern uint_t cpuid_getstep(struct cpu *);
extern uint_t cpuid_get_ncpu_per_chip(struct cpu *);
extern uint_t cpuid_get_ncore_per_chip(struct cpu *);
+extern int cpuid_get_chipid(struct cpu *);
+extern id_t cpuid_get_coreid(struct cpu *);
+extern int cpuid_get_clogid(struct cpu *);
extern int cpuid_is_cmt(struct cpu *);
extern int cpuid_syscall32_insn(struct cpu *);
extern int getl2cacheinfo(struct cpu *, int *, int *, int *);
diff --git a/usr/src/uts/sun4/io/trapstat.c b/usr/src/uts/sun4/io/trapstat.c
index fa7b98350b..6cfed113ae 100644
--- a/usr/src/uts/sun4/io/trapstat.c
+++ b/usr/src/uts/sun4/io/trapstat.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -46,7 +46,7 @@
#include <sys/hypervisor_api.h>
#endif
#ifndef sun4v
-#include <sys/chip.h>
+#include <sys/pghw.h>
#endif
/* BEGIN CSTYLED */
@@ -1439,7 +1439,7 @@ trapstat_setup(processorid_t cpu)
cp = cpu_get(cpu);
ASSERT(cp != NULL);
- if ((strand_idx = cpu ^ chip_plat_get_coreid(cp)) != 0) {
+ if ((strand_idx = cpu ^ pg_plat_hw_instance_id(cp, PGHW_IPIPE)) != 0) {
/*
* On sun4u platforms with multiple CPUs sharing the MMU
* (Olympus-C has 2 strands per core), each CPU uses a
diff --git a/usr/src/uts/sun4/os/machdep.c b/usr/src/uts/sun4/os/machdep.c
index 9661c8b83c..821999a3c0 100644
--- a/usr/src/uts/sun4/os/machdep.c
+++ b/usr/src/uts/sun4/os/machdep.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -47,7 +47,6 @@
#include <sys/cmn_err.h>
#include <sys/time.h>
#include <sys/clock.h>
-#include <sys/chip.h>
#include <sys/cmp.h>
#include <sys/platform_module.h>
#include <sys/bl.h>
diff --git a/usr/src/uts/sun4/os/mlsetup.c b/usr/src/uts/sun4/os/mlsetup.c
index 771822bb37..d66c6c13c1 100644
--- a/usr/src/uts/sun4/os/mlsetup.c
+++ b/usr/src/uts/sun4/os/mlsetup.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -56,7 +56,7 @@
#include <sys/copyops.h>
#include <sys/panic.h>
#include <sys/bootconf.h> /* for bootops */
-#include <sys/chip.h>
+#include <sys/pg.h>
#include <sys/kdi.h>
#include <sys/fpras.h>
@@ -274,15 +274,6 @@ mlsetup(struct regs *rp, void *cif, kfpu_t *fp)
setcputype();
map_wellknown_devices();
setcpudelay();
-
- /*
- * Associate the boot cpu with a physical processor.
- * This needs to be done after devices are mapped, since
- * we need to know what type of physical processor this is.
- * (CMP for example)
- */
- chip_cpu_init(CPU);
- chip_cpu_assign(CPU);
}
/*
diff --git a/usr/src/uts/sun4/os/mp_startup.c b/usr/src/uts/sun4/os/mp_startup.c
index 814d35bf15..b770ebbe2e 100644
--- a/usr/src/uts/sun4/os/mp_startup.c
+++ b/usr/src/uts/sun4/os/mp_startup.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -35,7 +35,8 @@
#include <sys/machsystm.h>
#include <sys/callb.h>
#include <sys/cpu_module.h>
-#include <sys/chip.h>
+#include <sys/pg.h>
+#include <sys/cmt.h>
#include <sys/dtrace.h>
#include <sys/reboot.h>
#include <sys/kdi.h>
@@ -78,6 +79,9 @@ static void slave_startup(void);
*/
#define CPU_WAKEUP_GRACE_MSEC 1000
+extern hrtime_t nosteal_nsec;
+extern void cmp_set_nosteal_interval(void);
+
#ifdef TRAPTRACE
/*
* This function bop allocs traptrace buffers for all cpus
@@ -408,9 +412,13 @@ setup_cpu_common(int cpuid)
cpu_init_private(cp);
/*
- * Associate this CPU with a physical processor
+ * Initialize the CPUs physical ID cache, and processor groups
*/
- chip_cpu_init(cp);
+ pghw_physid_create(cp);
+ pg_cpu_init(cp);
+
+ if (nosteal_nsec == -1)
+ cmp_set_nosteal_interval();
cpu_intrq_setup(cp);
@@ -630,9 +638,9 @@ slave_startup(void)
kcpc_hw_startup_cpu(original_flags);
/*
- * Notify the CMT subsystem that the slave has started
+ * Notify the PG subsystem that the CPU has started
*/
- chip_cpu_startup(CPU);
+ pg_cmt_cpu_startup(CPU);
/*
* Now we are done with the startup thread, so free it up.
diff --git a/usr/src/uts/sun4u/cpu/us3_cheetahplus.c b/usr/src/uts/sun4u/cpu/us3_cheetahplus.c
index cf4ffc353f..c4a66883b3 100644
--- a/usr/src/uts/sun4u/cpu/us3_cheetahplus.c
+++ b/usr/src/uts/sun4u/cpu/us3_cheetahplus.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -63,6 +63,7 @@
#include <sys/fm/protocol.h>
#include <sys/fm/cpu/UltraSPARC-III.h>
#include <sys/fm/util.h>
+#include <sys/pghw.h>
#ifdef CHEETAHPLUS_ERRATUM_25
#include <sys/cyclic.h>
@@ -1331,13 +1332,27 @@ cpu_scrub_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
static cpu_t *
cpu_get_sibling_core(cpu_t *cpup)
{
- cpu_t *nextp;
+ cpu_t *nextp;
+ pg_t *pg;
+ pg_cpu_itr_t i;
if ((cpup == NULL) || (!cmp_cpu_is_cmp(cpup->cpu_id)))
return (NULL);
+ pg = (pg_t *)pghw_find_pg(cpup, PGHW_CHIP);
+ if (pg == NULL)
+ return (NULL);
+
+ /*
+ * Iterate over the CPUs in the chip PG looking
+ * for a CPU that isn't cpup
+ */
+ PG_CPU_ITR_INIT(pg, i);
+ while ((nextp = pg_cpu_next(&i)) != NULL) {
+ if (nextp != cpup)
+ break;
+ }
- nextp = cpup->cpu_next_chip;
- if ((nextp == NULL) || (nextp == cpup))
+ if (nextp == NULL)
return (NULL);
return (nextp);
diff --git a/usr/src/uts/sun4u/cpu/us3_common.c b/usr/src/uts/sun4u/cpu/us3_common.c
index 4904bff814..67839633ea 100644
--- a/usr/src/uts/sun4u/cpu/us3_common.c
+++ b/usr/src/uts/sun4u/cpu/us3_common.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -75,6 +75,7 @@
#include <sys/cyclic.h>
#include <sys/errorq.h>
#include <sys/errclassify.h>
+#include <sys/pghw.h>
#ifdef CHEETAHPLUS_ERRATUM_25
#include <sys/xc_impl.h>
@@ -2991,7 +2992,7 @@ ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
!cpu_flagged_active(sp->cpu_flags) ||
(sp == dtcr && !(flags & PTNR_SELFOK)) ||
- (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id &&
+ (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
!(flags & PTNR_SIBLINGOK))) {
sp = dtcr->cpu_next_part;
} else {
@@ -2999,8 +3000,7 @@ ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
*typep = CE_XDIAG_PTNR_REMOTE;
} else if (sp == dtcr) {
*typep = CE_XDIAG_PTNR_SELF;
- } else if (sp->cpu_chip->chip_id ==
- dtcr->cpu_chip->chip_id) {
+ } else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
*typep = CE_XDIAG_PTNR_SIBLING;
} else {
*typep = CE_XDIAG_PTNR_LOCAL;
@@ -3051,7 +3051,7 @@ ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
*typep = CE_XDIAG_PTNR_REMOTE;
return (ptnr);
}
- if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) {
+ if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
if (sibptnr == NULL)
sibptnr = ptnr;
continue;
diff --git a/usr/src/uts/sun4u/os/cmp.c b/usr/src/uts/sun4u/os/cmp.c
index 03aa7621e8..3c2fa2503f 100644
--- a/usr/src/uts/sun4u/os/cmp.c
+++ b/usr/src/uts/sun4u/os/cmp.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -29,8 +29,8 @@
#include <sys/machsystm.h>
#include <sys/x_call.h>
#include <sys/cmp.h>
+#include <sys/pghw.h>
#include <sys/debug.h>
-#include <sys/chip.h>
#include <sys/disp.h>
#include <sys/cheetahregs.h>
@@ -141,64 +141,107 @@ cmp_cpu_to_chip(processorid_t cpuid)
return (cpunodes[cpuid].portid);
}
-/*
- * Return a chip "id" for the given cpu_t
- * cpu_t's residing on the same physical processor
- * should map to the same "id"
- */
-chipid_t
-chip_plat_get_chipid(cpu_t *cp)
-{
- return (cmp_cpu_to_chip(cp->cpu_id));
-}
-
-/*
- * Return the "core id" for the given cpu_t
- * The "core id" space spans uniquely across all
- * cpu chips.
- */
-id_t
-chip_plat_get_coreid(cpu_t *cp)
+/* ARGSUSED */
+int
+pg_plat_hw_shared(cpu_t *cp, pghw_type_t hw)
{
int impl;
impl = cpunodes[cp->cpu_id].implementation;
- if (IS_OLYMPUS_C(impl)) {
- /*
- * Currently only Fujitsu Olympus-c processor supports
- * multi-stranded cores. Return the cpu_id with
- * the strand bit masked out.
- */
- return ((id_t)((uint_t)cp->cpu_id & ~(0x1)));
- } else {
- return (cp->cpu_id);
+ switch (hw) {
+ case PGHW_IPIPE:
+ if (IS_OLYMPUS_C(impl))
+ return (1);
+ break;
+ case PGHW_CHIP:
+ if (IS_JAGUAR(impl) || IS_PANTHER(impl))
+ return (1);
+ break;
+ case PGHW_CACHE:
+ if (IS_PANTHER(impl))
+ return (1);
+ break;
}
+ return (0);
}
-void
-chip_plat_define_chip(cpu_t *cp, chip_def_t *cd)
+int
+pg_plat_cpus_share(cpu_t *cpu_a, cpu_t *cpu_b, pghw_type_t hw)
{
- int impl;
+ int impla, implb;
+
+ impla = cpunodes[cpu_a->cpu_id].implementation;
+ implb = cpunodes[cpu_b->cpu_id].implementation;
+
+ switch (hw) {
+ case PGHW_IPIPE:
+ case PGHW_CHIP:
+ return (pg_plat_hw_instance_id(cpu_a, hw) ==
+ pg_plat_hw_instance_id(cpu_b, hw));
+ case PGHW_CACHE:
+ return (IS_PANTHER(impla) && IS_PANTHER(implb) &&
+ pg_plat_cpus_share(cpu_a, cpu_b, PGHW_CHIP));
+ }
+ return (0);
+}
- /*
- * Define the chip's type
- */
- impl = cpunodes[cp->cpu_id].implementation;
+id_t
+pg_plat_hw_instance_id(cpu_t *cpu, pghw_type_t hw)
+{
+ int impl;
+
+ switch (hw) {
+ case PGHW_IPIPE:
+ impl = cpunodes[cpu->cpu_id].implementation;
+
+ if (IS_OLYMPUS_C(impl)) {
+ /*
+ * Currently only Fujitsu Olympus-c processor supports
+ * multi-stranded cores. Return the cpu_id with
+ * the strand bit masked out.
+ */
+ return ((id_t)((uint_t)cpu->cpu_id & ~(0x1)));
+ } else {
+ return (cpu->cpu_id);
+ }
+ case PGHW_CHIP:
+ return (cmp_cpu_to_chip(cpu->cpu_id));
+ case PGHW_CACHE:
+ return (IS_PANTHER(impl) &&
+ pg_plat_hw_instance_id(cpu, PGHW_CHIP));
+ default:
+ return (-1);
+ }
+}
- if (IS_JAGUAR(impl)) {
- cd->chipd_type = CHIP_CMP_SPLIT_CACHE;
- } else if (IS_PANTHER(impl) || IS_OLYMPUS_C(impl)) {
- cd->chipd_type = CHIP_CMP_SHARED_CACHE;
- } else {
- cd->chipd_type = CHIP_DEFAULT;
+int
+pg_plat_hw_level(pghw_type_t hw)
+{
+ int i;
+ static pghw_type_t hw_hier[] = {
+ PGHW_IPIPE,
+ PGHW_CHIP,
+ PGHW_CACHE,
+ PGHW_NUM_COMPONENTS
+ };
+
+ for (i = 0; hw_hier[i] != PGHW_NUM_COMPONENTS; i++) {
+ if (hw_hier[i] == hw)
+ return (i);
}
+ return (-1);
+}
+
+id_t
+pg_plat_get_core_id(cpu_t *cp)
+{
+ return (pg_plat_hw_instance_id(cp, PGHW_IPIPE));
+}
- /*
- * Define any needed adjustment of rechoose_interval
- * For now, all chips use the default. This
- * will change with future processors.
- */
- cd->chipd_rechoose_adj = 0;
- cd->chipd_nosteal = 100000ULL; /* 100 usecs */
+void
+cmp_set_nosteal_interval(void)
+{
+ /* Set the nosteal interval (used by disp_getbest()) to 100us */
+ nosteal_nsec = 100000UL;
}
diff --git a/usr/src/uts/sun4v/cpu/generic.c b/usr/src/uts/sun4v/cpu/generic.c
index 0a6d9394f1..6c93042813 100644
--- a/usr/src/uts/sun4v/cpu/generic.c
+++ b/usr/src/uts/sun4v/cpu/generic.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -152,13 +152,20 @@ void
cpu_init_private(struct cpu *cp)
{
/*
- * The cpu_ipipe field is initialized based on the execution
- * unit sharing information from the Machine Description table.
- * It defaults to the CPU id in the absence of such information.
+ * The cpu_ipipe and cpu_fpu fields are initialized based on
+ * the execution unit sharing information from the Machine
+ * Description table. They default to the CPU id in the
+ * absence of such information.
*/
cp->cpu_m.cpu_ipipe = cpunodes[cp->cpu_id].exec_unit_mapping;
if (cp->cpu_m.cpu_ipipe == NO_EU_MAPPING_FOUND)
cp->cpu_m.cpu_ipipe = (id_t)(cp->cpu_id);
+
+ cp->cpu_m.cpu_fpu = cpunodes[cp->cpu_id].fpu_mapping;
+ if (cp->cpu_m.cpu_fpu == NO_EU_MAPPING_FOUND)
+ cp->cpu_m.cpu_fpu = (id_t)(cp->cpu_id);
+
+ cp->cpu_m.cpu_core = (id_t)(cp->cpu_id);
}
void
diff --git a/usr/src/uts/sun4v/cpu/niagara.c b/usr/src/uts/sun4v/cpu/niagara.c
index 125ca8e224..7ed9c3c641 100644
--- a/usr/src/uts/sun4v/cpu/niagara.c
+++ b/usr/src/uts/sun4v/cpu/niagara.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -173,14 +173,23 @@ cpu_init_private(struct cpu *cp)
extern int niagara_kstat_init(void);
/*
- * The cpu_ipipe field is initialized based on the execution
- * unit sharing information from the MD. It defaults to the
- * virtual CPU id in the absence of such information.
+ * The cpu_ipipe and cpu_fpu fields are initialized based on
+ * the execution unit sharing information from the MD. They default
+ * to the virtual CPU id in the absence of such information.
*/
cp->cpu_m.cpu_ipipe = cpunodes[cp->cpu_id].exec_unit_mapping;
if (cp->cpu_m.cpu_ipipe == NO_EU_MAPPING_FOUND)
cp->cpu_m.cpu_ipipe = (id_t)(cp->cpu_id);
+ cp->cpu_m.cpu_fpu = cpunodes[cp->cpu_id].fpu_mapping;
+ if (cp->cpu_m.cpu_fpu == NO_EU_MAPPING_FOUND)
+ cp->cpu_m.cpu_fpu = (id_t)(cp->cpu_id);
+
+ /*
+ * Niagara defines the the core to be at the ipipe level
+ */
+ cp->cpu_m.cpu_core = cp->cpu_m.cpu_ipipe;
+
ASSERT(MUTEX_HELD(&cpu_lock));
if (niagara_cpucnt++ == 0 && niagara_hsvc_available == B_TRUE) {
(void) niagara_kstat_init();
diff --git a/usr/src/uts/sun4v/cpu/niagara2.c b/usr/src/uts/sun4v/cpu/niagara2.c
index 83f5e6733b..382352411d 100644
--- a/usr/src/uts/sun4v/cpu/niagara2.c
+++ b/usr/src/uts/sun4v/cpu/niagara2.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -161,14 +161,23 @@ cpu_init_private(struct cpu *cp)
extern int niagara_kstat_init(void);
/*
- * The cpu_ipipe field is initialized based on the execution
- * unit sharing information from the MD. It defaults to the
- * virtual CPU id in the absence of such information.
+ * The cpu_ipipe and cpu_fpu fields are initialized based on
+ * the execution unit sharing information from the MD. They default
+ * to the virtual CPU id in the absence of such information.
*/
cp->cpu_m.cpu_ipipe = cpunodes[cp->cpu_id].exec_unit_mapping;
if (cp->cpu_m.cpu_ipipe == NO_EU_MAPPING_FOUND)
cp->cpu_m.cpu_ipipe = (id_t)(cp->cpu_id);
+ cp->cpu_m.cpu_fpu = cpunodes[cp->cpu_id].fpu_mapping;
+ if (cp->cpu_m.cpu_fpu == NO_EU_MAPPING_FOUND)
+ cp->cpu_m.cpu_fpu = (id_t)(cp->cpu_id);
+
+ /*
+ * Niagara 2 defines the core to be at the FPU level
+ */
+ cp->cpu_m.cpu_core = cp->cpu_m.cpu_fpu;
+
ASSERT(MUTEX_HELD(&cpu_lock));
if ((niagara2_cpucnt++ == 0) && (niagara2_hsvc_available == B_TRUE))
(void) niagara_kstat_init();
diff --git a/usr/src/uts/sun4v/os/cmp.c b/usr/src/uts/sun4v/os/cmp.c
index c82f3e006e..7219b639bf 100644
--- a/usr/src/uts/sun4v/os/cmp.c
+++ b/usr/src/uts/sun4v/os/cmp.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -29,7 +28,7 @@
#include <sys/types.h>
#include <sys/machsystm.h>
#include <sys/cmp.h>
-#include <sys/chip.h>
+#include <sys/pghw.h>
/*
* Note: For now assume the chip ID as 0 for all the cpus until additional
@@ -89,40 +88,80 @@ cmp_cpu_to_chip(processorid_t cpuid)
return (0);
}
-/*
- * Return a chip "id" for the given cpu_t
- * cpu_t's residing on the same physical processor
- * should map to the same "id"
- */
-chipid_t
-chip_plat_get_chipid(cpu_t *cp)
+/*ARGSUSED*/
+int
+pg_plat_hw_shared(cpu_t *cp, pghw_type_t hw)
{
- return (cmp_cpu_to_chip(cp->cpu_id));
+ switch (hw) {
+ case PGHW_IPIPE:
+ return (1);
+ case PGHW_FPU:
+ return (1);
+ case PGHW_CHIP:
+ return (1);
+ }
+ return (0);
}
-/*ARGSUSED*/
-void
-chip_plat_define_chip(cpu_t *cp, chip_def_t *cd)
+int
+pg_plat_cpus_share(cpu_t *cpu_a, cpu_t *cpu_b, pghw_type_t hw)
{
- cd->chipd_type = CHIP_CMT;
-
- /*
- * Define any needed adjustment of rechoose_interval
- * For now, all chips use the default. This
- * will change with future processors.
- */
- cd->chipd_rechoose_adj = 0;
- cd->chipd_nosteal = 0;
+ if (pg_plat_hw_shared(cpu_a, hw) == 0 ||
+ pg_plat_hw_shared(cpu_b, hw) == 0)
+ return (0);
+
+ return (pg_plat_hw_instance_id(cpu_a, hw) ==
+ pg_plat_hw_instance_id(cpu_b, hw));
+}
+
+id_t
+pg_plat_hw_instance_id(cpu_t *cpu, pghw_type_t hw)
+{
+ switch (hw) {
+ case PGHW_IPIPE:
+ return (cpu->cpu_m.cpu_ipipe);
+ case PGHW_CHIP:
+ return (cmp_cpu_to_chip(cpu->cpu_id));
+ case PGHW_FPU:
+ return (cpu->cpu_m.cpu_fpu);
+ default:
+ return (-1);
+ }
}
/*
- * Return a pipeline "id" for the given cpu_t
- * cpu_t's sharing the same instruction pipeline
- * should map to the same "id"
+ * Order the relevant hw sharing relationships
+ * from least, to greatest physical scope.
+ *
+ * The hierarchy *must* be defined for all hw that
+ * pg_plat_hw_shared() returns non-zero.
*/
+int
+pg_plat_hw_level(pghw_type_t hw)
+{
+ int i;
+ static pghw_type_t hw_hier[] = {
+ PGHW_IPIPE,
+ PGHW_FPU,
+ PGHW_CHIP,
+ PGHW_NUM_COMPONENTS
+ };
+
+ for (i = 0; hw_hier[i] != PGHW_NUM_COMPONENTS; i++) {
+ if (hw_hier[i] == hw)
+ return (i);
+ }
+ return (-1);
+}
id_t
-chip_plat_get_coreid(cpu_t *cp)
+pg_plat_get_core_id(cpu_t *cpu)
+{
+ return (cpu->cpu_m.cpu_core);
+}
+
+void
+cmp_set_nosteal_interval(void)
{
- return (cp->cpu_m.cpu_ipipe);
+ nosteal_nsec = 0;
}
diff --git a/usr/src/uts/sun4v/os/fillsysinfo.c b/usr/src/uts/sun4v/os/fillsysinfo.c
index c909f670fb..a2718cbed1 100644
--- a/usr/src/uts/sun4v/os/fillsysinfo.c
+++ b/usr/src/uts/sun4v/os/fillsysinfo.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -218,6 +218,7 @@ setup_exec_unit_mappings(md_t *mdp)
int idx, i, j;
processorid_t cpuid;
char *eunit_name = broken_md_flag ? "exec_unit" : "exec-unit";
+ enum eu_type { INTEGER, FPU } etype;
/*
* Find the cpu integer exec units - and
@@ -236,7 +237,8 @@ setup_exec_unit_mappings(md_t *mdp)
num_eunits = md_alloc_scan_dag(mdp, cpus_node, eunit_name,
"fwd", &eunit);
if (num_eunits > 0) {
- char *match_type = broken_md_flag ? "int" : "integer";
+ char *int_str = broken_md_flag ? "int" : "integer";
+ char *fpu_str = "fp";
/* Spin through and find all the integer exec units */
for (i = 0; i < num_eunits; i++) {
@@ -245,13 +247,19 @@ setup_exec_unit_mappings(md_t *mdp)
int vallen;
uint64_t lcpuid;
- /* ignore nodes with no type */
+ /* ignore nodes with no type */
if (md_get_prop_data(mdp, eunit[i], "type",
(uint8_t **)&val, &vallen)) continue;
for (p = val; *p != '\0'; p += strlen(p) + 1) {
- if (strcmp(p, match_type) == 0)
+ if (strcmp(p, int_str) == 0) {
+ etype = INTEGER;
goto found;
+ }
+ if (strcmp(p, fpu_str) == 0) {
+ etype = FPU;
+ goto found;
+ }
}
continue;
@@ -275,7 +283,14 @@ found:
if (lcpuid >= NCPU)
continue;
cpuid = (processorid_t)lcpuid;
- cpunodes[cpuid].exec_unit_mapping = idx;
+ switch (etype) {
+ case INTEGER:
+ cpunodes[cpuid].exec_unit_mapping = idx;
+ break;
+ case FPU:
+ cpunodes[cpuid].fpu_mapping = idx;
+ break;
+ }
}
md_free_scan_dag(mdp, &node);
}
diff --git a/usr/src/uts/sun4v/sys/machcpuvar.h b/usr/src/uts/sun4v/sys/machcpuvar.h
index e0f4f2162b..632a5c767b 100644
--- a/usr/src/uts/sun4v/sys/machcpuvar.h
+++ b/usr/src/uts/sun4v/sys/machcpuvar.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -159,7 +159,9 @@ struct machcpu {
uint64_t mondo_data_ra; /* mono data pa */
uint16_t *cpu_list; /* uint16_t [NCPU] */
uint64_t cpu_list_ra; /* cpu list ra */
- id_t cpu_ipipe; /* cpu exec unit id */
+ id_t cpu_ipipe; /* cpu int exec unit id */
+ id_t cpu_fpu; /* cpu fpu unit id */
+ id_t cpu_core; /* cpu core id */
kthread_t *startup_thread;
};
@@ -213,6 +215,7 @@ struct cpu_node {
int ecache_setsize;
uint64_t device_id;
id_t exec_unit_mapping;
+ id_t fpu_mapping;
};
extern struct cpu_node cpunodes[];