diff options
author | esaxe <none@none> | 2007-01-17 18:01:29 -0800 |
---|---|---|
committer | esaxe <none@none> | 2007-01-17 18:01:29 -0800 |
commit | fb2f18f820d90b001aea4fb27dd654bc1263c440 (patch) | |
tree | 4b88b69e1244f360a85d70294a4498ecf57ca283 /usr/src | |
parent | 9a7670889e9c36ec355371e6b02f2d9084f040dc (diff) | |
download | illumos-joyent-fb2f18f820d90b001aea4fb27dd654bc1263c440.tar.gz |
6461311 multi-level CMT scheduling optimizations
6509639 cpu0 is not in the right chip_t if its chipid is not zero
--HG--
rename : usr/src/uts/common/os/chip.c => deleted_files/usr/src/uts/common/os/chip.c
rename : usr/src/uts/common/sys/chip.h => deleted_files/usr/src/uts/common/sys/chip.h
Diffstat (limited to 'usr/src')
64 files changed, 4027 insertions, 1303 deletions
diff --git a/usr/src/cmd/mdb/common/modules/genunix/genunix.c b/usr/src/cmd/mdb/common/modules/genunix/genunix.c index 9863d8a241..66acba369f 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/genunix.c +++ b/usr/src/cmd/mdb/common/modules/genunix/genunix.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -74,6 +74,8 @@ #include "devinfo.h" #include "leaky.h" #include "lgrp.h" +#include "pg.h" +#include "group.h" #include "list.h" #include "log.h" #include "kgrep.h" @@ -3422,6 +3424,12 @@ static const mdb_dcmd_t dcmds[] = { { NVLIST_DCMD_NAME, NVLIST_DCMD_USAGE, NVLIST_DCMD_DESCR, print_nvlist }, + /* from pg.c */ + { "pg", "?[-q]", "display a pg", pg}, + /* from group.c */ + { "group", "?[-q]", "display a group", group}, + + /* from log.c */ /* from rctl.c */ { "rctl_dict", "?", "print systemwide default rctl definitions", rctl_dict }, @@ -3714,6 +3722,10 @@ static const mdb_walker_t walkers[] = { { "lgrp_rsrc_cpu", "walk lgroup CPU resources of given lgroup", lgrp_rsrc_cpu_walk_init, lgrp_set_walk_step, NULL }, + /* from group.c */ + { "group", "walk all elements of a group", + group_walk_init, group_walk_step, NULL }, + /* from list.c */ { LIST_WALK_NAME, LIST_WALK_DESC, list_walk_init, list_walk_step, list_walk_fini }, diff --git a/usr/src/cmd/mdb/common/modules/genunix/group.c b/usr/src/cmd/mdb/common/modules/genunix/group.c new file mode 100644 index 0000000000..6040ba4939 --- /dev/null +++ b/usr/src/cmd/mdb/common/modules/genunix/group.c @@ -0,0 +1,179 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Display group information and walk all elements of a group + */ + +#include "group.h" + +#include <mdb/mdb_modapi.h> +#include <sys/group.h> + +/* + * Display group information + */ + +/* ARGSUSED */ +int +group(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + group_t group; + int opt_q = 0; /* display only address. */ + + /* Should provide an address */ + if (!(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (mdb_getopts(argc, argv, + 'q', MDB_OPT_SETBITS, TRUE, &opt_q, + NULL) != argc) + return (DCMD_USAGE); + + if (flags & DCMD_PIPE_OUT) + opt_q = B_TRUE; + + if (DCMD_HDRSPEC(flags) && !opt_q) { + mdb_printf("%?s %6s %9s %?s\n", + "ADDR", + "SIZE", + "CAPACITY", + "SET"); + } + + if (mdb_vread(&group, sizeof (struct group), addr) == -1) { + mdb_warn("unable to read 'group' at %p", addr); + return (DCMD_ERR); + } + + if (opt_q) { + mdb_printf("%0?p\n", addr); + return (DCMD_OK); + } + + mdb_printf("%?p %6d %9d %?p\n", + addr, group.grp_size, group.grp_capacity, group.grp_set); + + return (DCMD_OK); +} + +/* + * Walk all elements in the group set. + */ + +typedef struct group_walk { + uintptr_t *gw_set; + int gw_size; + int gw_pos; + int gw_initialized; +} group_walk_t; + + +/* + * Initialize the walk structure with the copy of a group set, its size and the + * initial pointer position. + */ +int +group_walk_init(mdb_walk_state_t *wsp) +{ + group_walk_t *gw; + group_t group; + + gw = mdb_alloc(sizeof (group_walk_t), UM_SLEEP | UM_GC); + + if (mdb_vread(&group, sizeof (struct group), wsp->walk_addr) == -1) { + mdb_warn("couldn't read 'group' at %p", wsp->walk_addr); + return (WALK_ERR); + } + + gw->gw_size = group.grp_size; + gw->gw_initialized = 0; + gw->gw_pos = 0; + + if (gw->gw_size < 0) { + mdb_warn("invalid group at %p", wsp->walk_addr); + return (WALK_ERR); + } + + if (gw->gw_size == 0) + return (WALK_DONE); + + /* + * Allocate space for the set and copy all set entries. + */ + gw->gw_set = mdb_alloc(group.grp_size * sizeof (uintptr_t), + UM_SLEEP | UM_GC); + + if (mdb_vread(gw->gw_set, group.grp_size * sizeof (uintptr_t), + (uintptr_t)group.grp_set) == -1) { + mdb_warn("couldn't read 'group set' at %p", group.grp_set); + return (WALK_ERR); + } + + wsp->walk_data = gw; + wsp->walk_addr = gw->gw_set[0]; + gw->gw_pos = 0; + + return (WALK_NEXT); +} + +/* + * Print element of the set and advance the pointer. + */ +int +group_walk_step(mdb_walk_state_t *wsp) +{ + group_walk_t *gw = (group_walk_t *)wsp->walk_data; + int status; + + /* + * Already visited all valid elements, nothing else to do. + */ + if (gw->gw_size < 0) + return (WALK_DONE); + + /* + * Print non-NULL elements + */ + status = wsp->walk_addr == NULL ? + WALK_NEXT : + wsp->walk_callback(wsp->walk_addr, wsp->walk_data, + wsp->walk_cbdata); + + /* + * Adjust walk_addr to point to the next element + */ + gw->gw_size--; + + if (gw->gw_size > 0) + wsp->walk_addr = gw->gw_set[++gw->gw_pos]; + else + status = WALK_DONE; + + return (status); +} diff --git a/usr/src/cmd/mdb/common/modules/genunix/group.h b/usr/src/cmd/mdb/common/modules/genunix/group.h new file mode 100644 index 0000000000..3d8ca22097 --- /dev/null +++ b/usr/src/cmd/mdb/common/modules/genunix/group.h @@ -0,0 +1,50 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _MDB_GROUP_H +#define _MDB_GROUP_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Block comment that describes the contents of this file. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <mdb/mdb_modapi.h> + +int group(uintptr_t, uint_t, int, const mdb_arg_t *); +int group_walk_init(mdb_walk_state_t *); +int group_walk_step(mdb_walk_state_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _MDB_GROUP_H */ diff --git a/usr/src/cmd/mdb/common/modules/genunix/pg.c b/usr/src/cmd/mdb/common/modules/genunix/pg.c new file mode 100644 index 0000000000..4d23c08bc5 --- /dev/null +++ b/usr/src/cmd/mdb/common/modules/genunix/pg.c @@ -0,0 +1,141 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Display processor group information + */ + +#include "pg.h" + +#include <mdb/mdb_modapi.h> +#include <sys/pghw.h> + +/* + * PG hardware types indexed by hardware ID + */ +char *pg_hw_names[] = { + "hw", + "ipipe", + "cache", + "fpu", + "mpipe/chip", + "memory", +}; + +#define A_CNT(arr) (sizeof (arr) / sizeof (arr[0])) + +#define NHW A_CNT(pg_hw_names) + +/* + * Convert HW id to symbolic name + */ +static char * +pg_hw_name(int hw) +{ + return ((hw < 0 || hw > NHW) ? "UNKNOWN" : pg_hw_names[hw]); +} + +/* + * Display processor group. + */ +/* ARGSUSED */ +int +pg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + pg_t pg; + pghw_t pghw; + pg_class_t pg_class; + int opt_q = 0; /* display only address. */ + + /* Should provide an address */ + if (! (flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (mdb_getopts(argc, argv, + 'q', MDB_OPT_SETBITS, TRUE, &opt_q, + NULL) != argc) + return (DCMD_USAGE); + + if (flags & DCMD_PIPE_OUT) + opt_q = B_TRUE; + + if (DCMD_HDRSPEC(flags) && !opt_q) { + mdb_printf("%6s %?s %6s %7s %9s %5s\n", + "PGID", + "ADDR", + "PHYSID", + "CLASS", + "HARDWARE", + "#CPUs"); + } + + /* + * Read pg at specified address + */ + if (mdb_vread(&pg, sizeof (struct pg), addr) == -1) { + mdb_warn("unable to read 'pg' at %p", addr); + return (DCMD_ERR); + } + + /* + * In quiet mode just print pg address + */ + if (opt_q) { + mdb_printf("%0?p\n", addr); + return (DCMD_OK); + } + + if (mdb_vread(&pg_class, sizeof (struct pg_class), + (uintptr_t)pg.pg_class) == -1) { + mdb_warn("unable to read 'pg_class' at %p", pg.pg_class); + return (DCMD_ERR); + } + + if (pg.pg_relation == PGR_PHYSICAL) { + if (mdb_vread(&pghw, sizeof (struct pghw), addr) == -1) { + mdb_warn("unable to read 'pghw' at %p", addr); + return (DCMD_ERR); + } + /* + * Display the physical PG info. + */ + mdb_printf("%6d %?p %6d %7s %9s %5d\n", + pg.pg_id, addr, pghw.pghw_instance, + pg_class.pgc_name, pg_hw_name(pghw.pghw_hw), + pg.pg_cpus.grp_size); + } else { + /* + * Display the basic PG info. + */ + mdb_printf("%6d %?p %7s %5d\n", + pg.pg_id, addr, pg_class.pgc_name, + pg.pg_cpus.grp_size); + } + + return (DCMD_OK); +} diff --git a/usr/src/cmd/mdb/common/modules/genunix/pg.h b/usr/src/cmd/mdb/common/modules/genunix/pg.h new file mode 100644 index 0000000000..1c87475cab --- /dev/null +++ b/usr/src/cmd/mdb/common/modules/genunix/pg.h @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _MDB_PG_H +#define _MDB_PG_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Block comment that describes the contents of this file. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <mdb/mdb_modapi.h> + +int pg(uintptr_t, uint_t, int, const mdb_arg_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _MDB_PG_H */ diff --git a/usr/src/cmd/mdb/intel/amd64/genunix/Makefile b/usr/src/cmd/mdb/intel/amd64/genunix/Makefile index 04c22008ba..801d16db46 100644 --- a/usr/src/cmd/mdb/intel/amd64/genunix/Makefile +++ b/usr/src/cmd/mdb/intel/amd64/genunix/Makefile @@ -21,7 +21,7 @@ # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -40,6 +40,7 @@ COMMONSRCS = \ findstack.c \ fm.c \ genunix.c \ + group.c \ kgrep.c \ kmem.c \ ldi.c \ @@ -55,6 +56,7 @@ COMMONSRCS = \ ndievents.c \ net.c \ nvpair.c \ + pg.c \ rctl.c \ sobj.c \ streams.c \ diff --git a/usr/src/cmd/mdb/intel/ia32/Makefile b/usr/src/cmd/mdb/intel/ia32/Makefile index 4a0e384174..7bf0dc08b2 100644 --- a/usr/src/cmd/mdb/intel/ia32/Makefile +++ b/usr/src/cmd/mdb/intel/ia32/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" diff --git a/usr/src/cmd/mdb/intel/ia32/genunix/Makefile b/usr/src/cmd/mdb/intel/ia32/genunix/Makefile index e02aea1345..63609d42b2 100644 --- a/usr/src/cmd/mdb/intel/ia32/genunix/Makefile +++ b/usr/src/cmd/mdb/intel/ia32/genunix/Makefile @@ -21,7 +21,7 @@ # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -40,6 +40,7 @@ COMMONSRCS = \ findstack.c \ fm.c \ genunix.c \ + group.c \ kgrep.c \ kmem.c \ ldi.c \ @@ -55,6 +56,7 @@ COMMONSRCS = \ ndievents.c \ net.c \ nvpair.c \ + pg.c \ rctl.c \ sobj.c \ streams.c \ diff --git a/usr/src/cmd/mdb/sparc/v9/genunix/Makefile b/usr/src/cmd/mdb/sparc/v9/genunix/Makefile index 4e67aae365..2cf7038d9e 100644 --- a/usr/src/cmd/mdb/sparc/v9/genunix/Makefile +++ b/usr/src/cmd/mdb/sparc/v9/genunix/Makefile @@ -21,7 +21,7 @@ # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -40,6 +40,7 @@ COMMONSRCS = \ findstack.c \ fm.c \ genunix.c \ + group.c \ kgrep.c \ kmem.c \ ldi.c \ @@ -55,6 +56,7 @@ COMMONSRCS = \ ndievents.c \ net.c \ nvpair.c \ + pg.c \ rctl.c \ sobj.c \ streams.c \ diff --git a/usr/src/pkgdefs/SUNWhea/prototype_com b/usr/src/pkgdefs/SUNWhea/prototype_com index b10c0e5dd6..fc2a8c7d1e 100644 --- a/usr/src/pkgdefs/SUNWhea/prototype_com +++ b/usr/src/pkgdefs/SUNWhea/prototype_com @@ -529,6 +529,7 @@ f none usr/include/sys/avl.h 644 root bin f none usr/include/sys/avl_impl.h 644 root bin f none usr/include/sys/battery.h 644 root bin f none usr/include/sys/bitmap.h 644 root bin +f none usr/include/sys/bitset.h 644 root bin f none usr/include/sys/bl.h 644 root bin f none usr/include/sys/bofi.h 644 root bin f none usr/include/sys/bofi_impl.h 644 root bin @@ -544,7 +545,6 @@ f none usr/include/sys/callb.h 644 root bin f none usr/include/sys/callo.h 644 root bin f none usr/include/sys/ccompile.h 644 root bin f none usr/include/sys/cdio.h 644 root bin -f none usr/include/sys/chip.h 644 root bin f none usr/include/sys/cis.h 644 root bin f none usr/include/sys/cis_handlers.h 644 root bin f none usr/include/sys/cis_protos.h 644 root bin @@ -756,6 +756,7 @@ f none usr/include/sys/fssnap_if.h 644 root bin f none usr/include/sys/fstyp.h 644 root bin f none usr/include/sys/ftrace.h 644 root bin f none usr/include/sys/gfs.h 644 root bin +f none usr/include/sys/group.h 644 root bin f none usr/include/sys/hdio.h 644 root bin f none usr/include/sys/hook.h 644 root bin f none usr/include/sys/hook_event.h 644 root bin @@ -938,6 +939,8 @@ f none usr/include/sys/pem.h 644 root bin f none usr/include/sys/physmem.h 644 root bin f none usr/include/sys/serializer.h 644 root bin f none usr/include/sys/pfmod.h 644 root bin +f none usr/include/sys/pg.h 644 root bin +f none usr/include/sys/pghw.h 644 root bin f none usr/include/sys/pm.h 0644 root bin f none usr/include/sys/pool.h 644 root bin f none usr/include/sys/pool_impl.h 644 root bin diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index d16d18b02d..b7f1d6b864 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -40,13 +40,15 @@ sparc_CORE_OBJS += COMMON_CORE_OBJS += \ atomic.o \ + bitset.o \ bp_map.o \ brand.o \ - chip.o \ + cmt.o \ cpu.o \ cpu_intr.o \ cpupart.o \ disp.o \ + group.o \ kstat_fr.o \ lgrp.o \ lgrp_topo.o \ @@ -55,6 +57,8 @@ COMMON_CORE_OBJS += \ page_retire.o \ panic.o \ param.o \ + pg.o \ + pghw.o \ putnext.o \ rctl_proc.o \ rwlock.o \ diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c index 0385139384..815ef1dd46 100644 --- a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c +++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -57,6 +57,7 @@ #include <sys/pool_pset.h> #include <sys/pset.h> #include <sys/zone.h> +#include <sys/pghw.h> /* Dependent on the Solaris procfs */ extern kthread_t *prchoose(proc_t *); @@ -1861,7 +1862,8 @@ lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) */ lxpr_uiobuf_printf(uiobuf, "physical id\t: %lu\n" - "siblings\t: %u\n", chip_plat_get_chipid(cp), + "siblings\t: %u\n", + pg_plat_hw_instance_id(cp, PGHW_CHIP), cpuid_get_ncpu_per_chip(cp)); } diff --git a/usr/src/uts/common/conf/param.c b/usr/src/uts/common/conf/param.c index 6b39fd54b2..bdce7ec148 100644 --- a/usr/src/uts/common/conf/param.c +++ b/usr/src/uts/common/conf/param.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -191,6 +191,9 @@ extern void deadman_init(void); extern void clock_timer_init(void); extern void clock_realtime_init(void); extern void clock_highres_init(void); +extern void pg_init(void); +extern void pg_cmt_class_init(void); +extern void pg_cpu0_init(void); void (*init_tbl[])(void) = { system_taskq_init, @@ -208,6 +211,9 @@ void (*init_tbl[])(void) = { anon_init, segvn_init, flk_init, + pg_init, + pg_cmt_class_init, + pg_cpu0_init, schedctl_init, fdb_init, deadman_init, diff --git a/usr/src/uts/common/disp/cmt.c b/usr/src/uts/common/disp/cmt.c new file mode 100644 index 0000000000..1bf0704346 --- /dev/null +++ b/usr/src/uts/common/disp/cmt.c @@ -0,0 +1,804 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/systm.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/thread.h> +#include <sys/cpuvar.h> +#include <sys/cpupart.h> +#include <sys/kmem.h> +#include <sys/cmn_err.h> +#include <sys/kstat.h> +#include <sys/processor.h> +#include <sys/disp.h> +#include <sys/group.h> +#include <sys/pghw.h> +#include <sys/bitset.h> +#include <sys/lgrp.h> +#include <sys/cmt.h> + +/* + * CMT scheduler / dispatcher support + * + * This file implements CMT scheduler support using Processor Groups. + * The CMT processor group class creates and maintains the CMT class + * specific processor group pg_cmt_t. + * + * ---------------------------- <-- pg_cmt_t * + * | pghw_t | + * ---------------------------- + * | CMT class specific data | + * | - hierarchy linkage | + * | - CMT load balancing data| + * | - active CPU group/bitset| + * ---------------------------- + * + * The scheduler/dispatcher leverages knowledge of the performance + * relevant CMT sharing relationships existing between cpus to implement + * optimized affinity and load balancing policies. + * + * Load balancing policy seeks to improve performance by minimizing + * contention over shared processor resources / facilities, while the + * affinity policies seek to improve cache and TLB utilization. + * + * The CMT PGs created by this class are already arranged into a + * hierarchy (which is done in the pghw layer). To implement the top-down + * CMT load balancing algorithm, the CMT PGs additionally maintain + * parent, child and sibling hierarchy relationships. + * Parent PGs always contain a superset of their children(s) resources, + * each PG can have at most one parent, and siblings are the group of PGs + * sharing the same parent. + * + * On NUMA systems, the CMT load balancing algorithm balances across the + * CMT PGs within their respective lgroups. On UMA based system, there + * exists a top level group of PGs to balance across. On NUMA systems multiple + * top level groups are instantiated, where the top level balancing begins by + * balancng across the CMT PGs within their respective (per lgroup) top level + * groups. + */ + +typedef struct cmt_lgrp { + group_t cl_pgs; /* Top level group of active CMT PGs */ + int cl_npgs; /* # of top level PGs in the lgroup */ + lgrp_handle_t cl_hand; /* lgroup's platform handle */ + struct cmt_lgrp *cl_next; /* next cmt_lgrp */ +} cmt_lgrp_t; + +static cmt_lgrp_t *cmt_lgrps = NULL; + +static int is_cpu0 = 1; +static int cmt_sched_disabled = 0; + +static pg_cid_t pg_cmt_class_id; /* PG class id */ + +static pg_t *pg_cmt_alloc(); +static void pg_cmt_free(pg_t *); +static void pg_cmt_cpu_init(cpu_t *); +static void pg_cmt_cpu_fini(cpu_t *); +static void pg_cmt_cpu_active(cpu_t *); +static void pg_cmt_cpu_inactive(cpu_t *); +static void pg_cmt_cpupart_in(cpu_t *, cpupart_t *); +static void pg_cmt_cpupart_move(cpu_t *, cpupart_t *, cpupart_t *); +static void pg_cmt_hier_pack(pg_cmt_t **, int); +static int pg_cmt_cpu_belongs(pg_t *, cpu_t *); +static int pg_cmt_hw(pghw_type_t); +static cmt_lgrp_t *pg_cmt_find_lgrp(lgrp_handle_t); + +/* + * Macro to test if PG is managed by the CMT PG class + */ +#define IS_CMT_PG(pg) (((pg_t *)(pg))->pg_class->pgc_id == pg_cmt_class_id) + +/* + * CMT PG ops + */ +struct pg_ops pg_ops_cmt = { + pg_cmt_alloc, + pg_cmt_free, + pg_cmt_cpu_init, + pg_cmt_cpu_fini, + pg_cmt_cpu_active, + pg_cmt_cpu_inactive, + pg_cmt_cpupart_in, + NULL, /* cpupart_out */ + pg_cmt_cpupart_move, + pg_cmt_cpu_belongs, +}; + +/* + * Initialize the CMT PG class + */ +void +pg_cmt_class_init(void) +{ + if (cmt_sched_disabled) + return; + + pg_cmt_class_id = pg_class_register("cmt", &pg_ops_cmt, PGR_PHYSICAL); +} + +/* + * Called to indicate a new CPU has started up so + * that either t0 or the slave startup thread can + * be accounted for. + */ +void +pg_cmt_cpu_startup(cpu_t *cp) +{ + PG_NRUN_UPDATE(cp, 1); +} + +/* + * Adjust the CMT load in the CMT PGs in which the CPU belongs + * Note that "n" can be positive in the case of increasing + * load, or negative in the case of decreasing load. + */ +void +pg_cmt_load(cpu_t *cp, int n) +{ + pg_cmt_t *pg; + + pg = (pg_cmt_t *)cp->cpu_pg->cmt_lineage; + while (pg != NULL) { + ASSERT(IS_CMT_PG(pg)); + atomic_add_32(&pg->cmt_nrunning, n); + pg = pg->cmt_parent; + } +} + +/* + * Return non-zero if thread can migrate between "from" and "to" + * without a performance penalty + */ +int +pg_cmt_can_migrate(cpu_t *from, cpu_t *to) +{ + if (from->cpu_physid->cpu_cacheid == + to->cpu_physid->cpu_cacheid) + return (1); + return (0); +} + +/* + * CMT class specific PG allocation + */ +static pg_t * +pg_cmt_alloc(void) +{ + return (kmem_zalloc(sizeof (pg_cmt_t), KM_NOSLEEP)); +} + +/* + * Class specific PG de-allocation + */ +static void +pg_cmt_free(pg_t *pg) +{ + ASSERT(pg != NULL); + ASSERT(IS_CMT_PG(pg)); + + kmem_free((pg_cmt_t *)pg, sizeof (pg_cmt_t)); +} + +/* + * Return 1 if CMT load balancing policies should be + * implemented across instances of the specified hardware + * sharing relationship. + */ +static int +pg_cmt_load_bal_hw(pghw_type_t hw) +{ + if (hw == PGHW_IPIPE || + hw == PGHW_FPU || + hw == PGHW_CHIP) + return (1); + else + return (0); +} + +/* + * Return 1 if thread affinity polices should be implemented + * for instances of the specifed hardware sharing relationship. + */ +static int +pg_cmt_affinity_hw(pghw_type_t hw) +{ + if (hw == PGHW_CACHE) + return (1); + else + return (0); +} + +/* + * Return 1 if CMT scheduling policies should be impelmented + * for the specified hardware sharing relationship. + */ +static int +pg_cmt_hw(pghw_type_t hw) +{ + return (pg_cmt_load_bal_hw(hw) || + pg_cmt_affinity_hw(hw)); +} + +/* + * CMT class callback for a new CPU entering the system + */ +static void +pg_cmt_cpu_init(cpu_t *cp) +{ + pg_cmt_t *pg; + group_t *cmt_pgs; + int level, max_level, nlevels; + pghw_type_t hw; + pg_t *pg_cache = NULL; + pg_cmt_t *cpu_cmt_hier[PGHW_NUM_COMPONENTS]; + lgrp_handle_t lgrp_handle; + cmt_lgrp_t *lgrp; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * A new CPU is coming into the system. + * Interrogate the platform to see if the CPU + * has any performance relevant CMT sharing + * relationships + */ + cmt_pgs = &cp->cpu_pg->cmt_pgs; + cp->cpu_pg->cmt_lineage = NULL; + + bzero(cpu_cmt_hier, sizeof (cpu_cmt_hier)); + max_level = nlevels = 0; + for (hw = PGHW_START; hw < PGHW_NUM_COMPONENTS; hw++) { + + /* + * We're only interested in CMT hw sharing relationships + */ + if (pg_cmt_hw(hw) == 0 || pg_plat_hw_shared(cp, hw) == 0) + continue; + + /* + * Find (or create) the PG associated with + * the hw sharing relationship in which cp + * belongs. + * + * Determine if a suitable PG already + * exists, or if one needs to be created. + */ + pg = (pg_cmt_t *)pghw_place_cpu(cp, hw); + if (pg == NULL) { + /* + * Create a new one. + * Initialize the common... + */ + pg = (pg_cmt_t *)pg_create(pg_cmt_class_id); + + /* ... physical ... */ + pghw_init((pghw_t *)pg, cp, hw); + + /* + * ... and CMT specific portions of the + * structure. + */ + bitset_init(&pg->cmt_cpus_actv_set); + group_create(&pg->cmt_cpus_actv); + } else { + ASSERT(IS_CMT_PG(pg)); + } + + /* Add the CPU to the PG */ + pg_cpu_add((pg_t *)pg, cp); + + /* + * Ensure capacity of the active CPUs group/bitset + */ + group_expand(&pg->cmt_cpus_actv, + GROUP_SIZE(&((pg_t *)pg)->pg_cpus)); + + if (cp->cpu_seqid >= + bitset_capacity(&pg->cmt_cpus_actv_set)) { + bitset_resize(&pg->cmt_cpus_actv_set, + cp->cpu_seqid + 1); + } + + /* + * Build a lineage of CMT PGs for load balancing + */ + if (pg_cmt_load_bal_hw(hw)) { + level = pghw_level(hw); + cpu_cmt_hier[level] = pg; + if (level > max_level) + max_level = level; + nlevels++; + } + + /* Cache this for later */ + if (hw == PGHW_CACHE) + pg_cache = (pg_t *)pg; + } + + /* + * Pack out any gaps in the constructed lineage. + * Gaps may exist where the architecture knows + * about a hardware sharing relationship, but such a + * relationship either isn't relevant for load + * balancing or doesn't exist between CPUs on the system. + */ + pg_cmt_hier_pack(cpu_cmt_hier, max_level + 1); + + /* + * For each of the PGs int the CPU's lineage: + * - Add an entry in the CPU sorted CMT PG group + * which is used for top down CMT load balancing + * - Tie the PG into the CMT hierarchy by connecting + * it to it's parent and siblings. + */ + group_expand(cmt_pgs, nlevels); + + /* + * Find the lgrp that encapsulates this CPU's CMT hierarchy + */ + lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id); + lgrp = pg_cmt_find_lgrp(lgrp_handle); + + for (level = 0; level < nlevels; level++) { + uint_t children; + int err; + + pg = cpu_cmt_hier[level]; + err = group_add_at(cmt_pgs, pg, nlevels - level - 1); + ASSERT(err == 0); + + if (level == 0) + cp->cpu_pg->cmt_lineage = (pg_t *)pg; + + if (pg->cmt_siblings != NULL) { + /* Already initialized */ + ASSERT(pg->cmt_parent == NULL || + pg->cmt_parent == cpu_cmt_hier[level + 1]); + ASSERT(pg->cmt_siblings == &lgrp->cl_pgs || + pg->cmt_siblings == pg->cmt_parent->cmt_children); + continue; + } + + if ((level + 1) == nlevels) { + pg->cmt_parent = NULL; + pg->cmt_siblings = &lgrp->cl_pgs; + children = ++lgrp->cl_npgs; + } else { + pg->cmt_parent = cpu_cmt_hier[level + 1]; + + /* + * A good parent keeps track of their children. + * The parent's children group is also the PG's + * siblings. + */ + if (pg->cmt_parent->cmt_children == NULL) { + pg->cmt_parent->cmt_children = + kmem_zalloc(sizeof (group_t), KM_SLEEP); + group_create(pg->cmt_parent->cmt_children); + } + pg->cmt_siblings = pg->cmt_parent->cmt_children; + children = ++pg->cmt_parent->cmt_nchildren; + } + pg->cmt_hint = 0; + group_expand(pg->cmt_siblings, children); + } + + /* + * Cache the chip and core IDs in the cpu_t->cpu_physid structure + * for fast lookups later. + */ + if (cp->cpu_physid) { + cp->cpu_physid->cpu_chipid = + pg_plat_hw_instance_id(cp, PGHW_CHIP); + cp->cpu_physid->cpu_coreid = pg_plat_get_core_id(cp); + + /* + * If this cpu has a PG representing shared cache, then set + * cpu_cacheid to that PG's logical id + */ + if (pg_cache) + cp->cpu_physid->cpu_cacheid = pg_cache->pg_id; + } + + /* CPU0 only initialization */ + if (is_cpu0) { + pg_cmt_cpu_startup(cp); + is_cpu0 = 0; + } + +} + +/* + * Class callback when a CPU is leaving the system (deletion) + */ +static void +pg_cmt_cpu_fini(cpu_t *cp) +{ + group_iter_t i; + pg_cmt_t *pg; + group_t *pgs, *cmt_pgs; + lgrp_handle_t lgrp_handle; + cmt_lgrp_t *lgrp; + + pgs = &cp->cpu_pg->pgs; + cmt_pgs = &cp->cpu_pg->cmt_pgs; + + /* + * Find the lgroup that encapsulates this CPU's CMT hierarchy + */ + lgrp_handle = lgrp_plat_cpu_to_hand(cp->cpu_id); + lgrp = pg_cmt_find_lgrp(lgrp_handle); + + /* + * First, clean up anything load balancing specific for each of + * the CPU's PGs that participated in CMT load balancing + */ + pg = (pg_cmt_t *)cp->cpu_pg->cmt_lineage; + while (pg != NULL) { + + /* + * Remove the PG from the CPU's load balancing lineage + */ + (void) group_remove(cmt_pgs, pg, GRP_RESIZE); + + /* + * If it's about to become empty, destroy it's children + * group, and remove it's reference from it's siblings. + * This is done here (rather than below) to avoid removing + * our reference from a PG that we just eliminated. + */ + if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 1) { + if (pg->cmt_children != NULL) + group_destroy(pg->cmt_children); + if (pg->cmt_siblings != NULL) { + if (pg->cmt_siblings == &lgrp->cl_pgs) + lgrp->cl_npgs--; + else + pg->cmt_parent->cmt_nchildren--; + } + } + pg = pg->cmt_parent; + } + + ASSERT(GROUP_SIZE(cmt_pgs) == 0); + + /* + * Now that the load balancing lineage updates have happened, + * remove the CPU from all it's PGs (destroying any that become + * empty). + */ + group_iter_init(&i); + while ((pg = group_iterate(pgs, &i)) != NULL) { + if (IS_CMT_PG(pg) == 0) + continue; + + pg_cpu_delete((pg_t *)pg, cp); + /* + * Deleting the CPU from the PG changes the CPU's + * PG group over which we are actively iterating + * Re-initialize the iteration + */ + group_iter_init(&i); + + if (GROUP_SIZE(&((pg_t *)pg)->pg_cpus) == 0) { + + /* + * The PG has become zero sized, so destroy it. + */ + group_destroy(&pg->cmt_cpus_actv); + bitset_fini(&pg->cmt_cpus_actv_set); + pghw_fini((pghw_t *)pg); + + pg_destroy((pg_t *)pg); + } + } +} + +/* + * Class callback when a CPU is entering a cpu partition + */ +static void +pg_cmt_cpupart_in(cpu_t *cp, cpupart_t *pp) +{ + group_t *pgs; + pg_t *pg; + group_iter_t i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + pgs = &cp->cpu_pg->pgs; + + /* + * Ensure that the new partition's PG bitset + * is large enough for all CMT PG's to which cp + * belongs + */ + group_iter_init(&i); + while ((pg = group_iterate(pgs, &i)) != NULL) { + if (IS_CMT_PG(pg) == 0) + continue; + + if (bitset_capacity(&pp->cp_cmt_pgs) <= pg->pg_id) + bitset_resize(&pp->cp_cmt_pgs, pg->pg_id + 1); + } +} + +/* + * Class callback when a CPU is actually moving partitions + */ +static void +pg_cmt_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp) +{ + cpu_t *cpp; + group_t *pgs; + pg_t *pg; + group_iter_t pg_iter; + pg_cpu_itr_t cpu_iter; + boolean_t found; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + pgs = &cp->cpu_pg->pgs; + group_iter_init(&pg_iter); + + /* + * Iterate over the CPUs CMT PGs + */ + while ((pg = group_iterate(pgs, &pg_iter)) != NULL) { + + if (IS_CMT_PG(pg) == 0) + continue; + + /* + * Add the PG to the bitset in the new partition. + */ + bitset_add(&newpp->cp_cmt_pgs, pg->pg_id); + + /* + * Remove the PG from the bitset in the old partition + * if the last of the PG's CPUs have left. + */ + found = B_FALSE; + PG_CPU_ITR_INIT(pg, cpu_iter); + while ((cpp = pg_cpu_next(&cpu_iter)) != NULL) { + if (cpp == cp) + continue; + if (cpp->cpu_part->cp_id == oldpp->cp_id) { + found = B_TRUE; + break; + } + } + if (!found) + bitset_del(&cp->cpu_part->cp_cmt_pgs, pg->pg_id); + } +} + +/* + * Class callback when a CPU becomes active (online) + * + * This is called in a context where CPUs are paused + */ +static void +pg_cmt_cpu_active(cpu_t *cp) +{ + int err; + group_iter_t i; + pg_cmt_t *pg; + group_t *pgs; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + pgs = &cp->cpu_pg->pgs; + group_iter_init(&i); + + /* + * Iterate over the CPU's PGs + */ + while ((pg = group_iterate(pgs, &i)) != NULL) { + + if (IS_CMT_PG(pg) == 0) + continue; + + err = group_add(&pg->cmt_cpus_actv, cp, GRP_NORESIZE); + ASSERT(err == 0); + + /* + * If this is the first active CPU in the PG, and it + * represents a hardware sharing relationship over which + * CMT load balancing is performed, add it as a candidate + * for balancing with it's siblings. + */ + if (GROUP_SIZE(&pg->cmt_cpus_actv) == 1 && + pg_cmt_load_bal_hw(((pghw_t *)pg)->pghw_hw)) { + err = group_add(pg->cmt_siblings, pg, GRP_NORESIZE); + ASSERT(err == 0); + } + + /* + * Notate the CPU in the PGs active CPU bitset. + * Also notate the PG as being active in it's associated + * partition + */ + bitset_add(&pg->cmt_cpus_actv_set, cp->cpu_seqid); + bitset_add(&cp->cpu_part->cp_cmt_pgs, ((pg_t *)pg)->pg_id); + } +} + +/* + * Class callback when a CPU goes inactive (offline) + * + * This is called in a context where CPUs are paused + */ +static void +pg_cmt_cpu_inactive(cpu_t *cp) +{ + int err; + group_t *pgs; + pg_cmt_t *pg; + cpu_t *cpp; + group_iter_t i; + pg_cpu_itr_t cpu_itr; + boolean_t found; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + pgs = &cp->cpu_pg->pgs; + group_iter_init(&i); + + while ((pg = group_iterate(pgs, &i)) != NULL) { + + if (IS_CMT_PG(pg) == 0) + continue; + + /* + * Remove the CPU from the CMT PGs active CPU group + * bitmap + */ + err = group_remove(&pg->cmt_cpus_actv, cp, GRP_NORESIZE); + ASSERT(err == 0); + + bitset_del(&pg->cmt_cpus_actv_set, cp->cpu_seqid); + + /* + * If there are no more active CPUs in this PG over which + * load was balanced, remove it as a balancing candidate. + */ + if (GROUP_SIZE(&pg->cmt_cpus_actv) == 0 && + pg_cmt_load_bal_hw(((pghw_t *)pg)->pghw_hw)) { + err = group_remove(pg->cmt_siblings, pg, GRP_NORESIZE); + ASSERT(err == 0); + } + + /* + * Assert the number of active CPUs does not exceed + * the total number of CPUs in the PG + */ + ASSERT(GROUP_SIZE(&pg->cmt_cpus_actv) <= + GROUP_SIZE(&((pg_t *)pg)->pg_cpus)); + + /* + * Update the PG bitset in the CPU's old partition + */ + found = B_FALSE; + PG_CPU_ITR_INIT(pg, cpu_itr); + while ((cpp = pg_cpu_next(&cpu_itr)) != NULL) { + if (cpp == cp) + continue; + if (cpp->cpu_part->cp_id == cp->cpu_part->cp_id) { + found = B_TRUE; + break; + } + } + if (!found) { + bitset_del(&cp->cpu_part->cp_cmt_pgs, + ((pg_t *)pg)->pg_id); + } + } +} + +/* + * Return non-zero if the CPU belongs in the given PG + */ +static int +pg_cmt_cpu_belongs(pg_t *pg, cpu_t *cp) +{ + cpu_t *pg_cpu; + + pg_cpu = GROUP_ACCESS(&pg->pg_cpus, 0); + + ASSERT(pg_cpu != NULL); + + /* + * The CPU belongs if, given the nature of the hardware sharing + * relationship represented by the PG, the CPU has that + * relationship with some other CPU already in the PG + */ + if (pg_plat_cpus_share(cp, pg_cpu, ((pghw_t *)pg)->pghw_hw)) + return (1); + + return (0); +} + +/* + * Pack the CPUs CMT hierarchy + * The hierarchy order is preserved + */ +static void +pg_cmt_hier_pack(pg_cmt_t *hier[], int sz) +{ + int i, j; + + for (i = 0; i < sz; i++) { + if (hier[i] != NULL) + continue; + + for (j = i; j < sz; j++) { + if (hier[j] != NULL) { + hier[i] = hier[j]; + hier[j] = NULL; + break; + } + } + if (j == sz) + break; + } +} + +/* + * Return a cmt_lgrp_t * given an lgroup handle. + * If the right one doesn't yet exist, create one + * by growing the cmt_lgrps array + */ +static cmt_lgrp_t * +pg_cmt_find_lgrp(lgrp_handle_t hand) +{ + cmt_lgrp_t *lgrp; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + lgrp = cmt_lgrps; + while (lgrp != NULL) { + if (lgrp->cl_hand == hand) + return (lgrp); + lgrp = lgrp->cl_next; + } + + /* + * Haven't seen this lgrp yet + */ + lgrp = kmem_zalloc(sizeof (cmt_lgrp_t), KM_SLEEP); + + lgrp->cl_hand = hand; + lgrp->cl_npgs = 0; + lgrp->cl_next = cmt_lgrps; + cmt_lgrps = lgrp; + group_create(&lgrp->cl_pgs); + + return (lgrp); +} diff --git a/usr/src/uts/common/disp/cpupart.c b/usr/src/uts/common/disp/cpupart.c index 02856e4dc3..ecd5aff873 100644 --- a/usr/src/uts/common/disp/cpupart.c +++ b/usr/src/uts/common/disp/cpupart.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,7 +38,7 @@ #include <sys/var.h> #include <sys/cyclic.h> #include <sys/lgrp.h> -#include <sys/chip.h> +#include <sys/pghw.h> #include <sys/loadavg.h> #include <sys/class.h> #include <sys/fss.h> @@ -267,6 +267,8 @@ cpupart_initialize_default(void) * Set t0's home */ t0.t_lpl = &cp_default.cp_lgrploads[LGRP_ROOTID]; + + bitset_init(&cp_default.cp_cmt_pgs); } @@ -322,6 +324,15 @@ cpupart_move_cpu(cpu_t *cp, cpupart_t *newpp, int forced) cpu_inmotion = cp; membar_enter(); + /* + * Notify the Processor Groups subsystem that the CPU + * will be moving cpu partitions. This is done before + * CPUs are paused to provide an opportunity for any + * needed memory allocations. + */ + pg_cpupart_out(cp, oldpp); + pg_cpupart_in(cp, newpp); + again: if (move_threads) { int loop_count; @@ -332,6 +343,8 @@ again: if (loop_count >= 5) { cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN); + pg_cpupart_out(cp, newpp); + pg_cpupart_in(cp, oldpp); cpu_inmotion = NULL; return (EBUSY); /* some threads still bound */ } @@ -350,6 +363,8 @@ again: * a bound cyclic. */ cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN); + pg_cpupart_out(cp, newpp); + pg_cpupart_in(cp, oldpp); cpu_inmotion = NULL; return (EBUSY); } @@ -370,9 +385,10 @@ again: } /* - * Update the set of chip's being spanned + * Now that CPUs are paused, let the PG subsystem perform + * any necessary data structure updates. */ - chip_cpu_move_part(cp, oldpp, newpp); + pg_cpupart_move(cp, oldpp, newpp); /* save this cpu's lgroup -- it'll be the same in the new partition */ lgrpid = cp->cpu_lpl->lpl_lgrpid; @@ -764,7 +780,7 @@ cpupart_create(psetid_t *psid) for (i = 0; i < pp->cp_nlgrploads; i++) { pp->cp_lgrploads[i].lpl_lgrpid = i; } - CHIP_SET_ZERO(pp->cp_mach->mc_chipset); + bitset_init(&pp->cp_cmt_pgs); /* * Pause all CPUs while changing the partition list, to make sure @@ -859,10 +875,16 @@ again: p = ttoproc(t); } } - ASSERT(CHIP_SET_ISNULL(pp->cp_mach->mc_chipset)); + ASSERT(bitset_is_null(&pp->cp_cmt_pgs)); ASSERT(CPUSET_ISNULL(pp->cp_mach->mc_haltset)); /* + * Teardown the partition's group of active CMT PGs now that + * all of the CPUs have left. + */ + bitset_fini(&pp->cp_cmt_pgs); + + /* * Reset the pointers in any offline processors so they won't * try to rejoin the destroyed partition when they're turned * online. diff --git a/usr/src/uts/common/disp/disp.c b/usr/src/uts/common/disp/disp.c index dc53b411e3..b2052828b3 100644 --- a/usr/src/uts/common/disp/disp.c +++ b/usr/src/uts/common/disp/disp.c @@ -50,7 +50,9 @@ #include <sys/tnf.h> #include <sys/cpupart.h> #include <sys/lgrp.h> -#include <sys/chip.h> +#include <sys/pg.h> +#include <sys/cmt.h> +#include <sys/bitset.h> #include <sys/schedctl.h> #include <sys/atomic.h> #include <sys/dtrace.h> @@ -117,12 +119,8 @@ static void setkpdq(kthread_t *tp, int borf); * Parameter that determines how recently a thread must have run * on the CPU to be considered loosely-bound to that CPU to reduce * cold cache effects. The interval is in hertz. - * - * The platform may define a per physical processor adjustment of - * this parameter. For efficiency, the effective rechoose interval - * (rechoose_interval + per chip adjustment) is maintained in the - * cpu structures. See cpu_choose() */ +#define RECHOOSE_INTERVAL 3 int rechoose_interval = RECHOOSE_INTERVAL; static cpu_t *cpu_choose(kthread_t *, pri_t); @@ -132,14 +130,9 @@ static cpu_t *cpu_choose(kthread_t *, pri_t); * to reduce migrations. The interval is in nanoseconds. * * The nosteal_nsec should be set by a platform code to an appropriate value. - * - */ -hrtime_t nosteal_nsec = 0; - -/* - * Value of nosteal_nsec meaning that nosteal optimization should be disabled + * Setting it to 0 effectively disables the nosteal 'protection' */ -#define NOSTEAL_DISABLED 1 +hrtime_t nosteal_nsec = -1; id_t defaultcid; /* system "default" class; see dispadmin(1M) */ @@ -225,6 +218,7 @@ dispinit(void) mutex_enter(&cpu_lock); CPU->cpu_disp->disp_maxrunpri = -1; CPU->cpu_disp->disp_max_unbound_pri = -1; + /* * Initialize the default CPU partition. */ @@ -874,9 +868,9 @@ swtch() if (next != t) { if (t == cp->cpu_idle_thread) { - CHIP_NRUNNING(cp->cpu_chip, 1); + PG_NRUN_UPDATE(cp, 1); } else if (next == cp->cpu_idle_thread) { - CHIP_NRUNNING(cp->cpu_chip, -1); + PG_NRUN_UPDATE(cp, -1); } /* @@ -944,7 +938,7 @@ swtch_from_zombie() TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); if (next == cpu->cpu_idle_thread) - CHIP_NRUNNING(cpu->cpu_chip, -1); + PG_NRUN_UPDATE(cpu, -1); restore_mstate(next); @@ -1024,7 +1018,7 @@ swtch_to(kthread_t *next) TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); if (curthread == cp->cpu_idle_thread) - CHIP_NRUNNING(cp->cpu_chip, 1); + PG_NRUN_UPDATE(cp, 1); /* OK to steal anything left on run queue */ cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; @@ -1092,68 +1086,113 @@ cpu_resched(cpu_t *cp, pri_t tpri) } /* - * Routine used by setbackdq() to balance load across the physical - * processors. Returns a CPU of a lesser loaded chip in the lgroup - * if balancing is necessary, or the "hint" CPU if it's not. - * - * - tp is the thread being enqueued - * - cp is a hint CPU (chosen by cpu_choose()). - * - curchip (if not NULL) is the chip on which the current thread - * is running. - * - * The thread lock for "tp" must be held while calling this routine. + * Perform multi-level CMT load balancing of running threads. + * tp is the thread being enqueued + * cp is the hint CPU (chosen by cpu_choose()). */ static cpu_t * -chip_balance(kthread_t *tp, cpu_t *cp, chip_t *curchip) +cmt_balance(kthread_t *tp, cpu_t *cp) { - int chp_nrun, ochp_nrun; - chip_t *chp, *nchp; + int hint, i, cpu; + int self = 0; + group_t *cmt_pgs, *siblings; + pg_cmt_t *pg, *pg_tmp, *tpg = NULL; + int pg_nrun, tpg_nrun; + int level = 0; + cpu_t *newcp; + + ASSERT(THREAD_LOCK_HELD(tp)); - chp = cp->cpu_chip; - chp_nrun = chp->chip_nrunning; + cmt_pgs = &cp->cpu_pg->cmt_pgs; - if (chp == curchip) - chp_nrun--; /* Ignore curthread */ + if (GROUP_SIZE(cmt_pgs) == 0) + return (cp); /* nothing to do */ + + if (tp == curthread) + self = 1; /* - * If this chip isn't at all idle, then let - * run queue balancing do the work. + * Balance across siblings in the CPUs CMT lineage */ - if (chp_nrun == chp->chip_ncpu) - return (cp); - - nchp = chp->chip_balance; do { - if (nchp == chp || - !CHIP_IN_CPUPART(nchp, tp->t_cpupart)) - continue; + pg = GROUP_ACCESS(cmt_pgs, level); + + pg_nrun = pg->cmt_nrunning; + if (self && + bitset_in_set(&pg->cmt_cpus_actv_set, CPU->cpu_seqid)) + pg_nrun--; /* Ignore curthread's effect */ + + siblings = pg->cmt_siblings; + hint = pg->cmt_hint; - ochp_nrun = nchp->chip_nrunning; + /* + * Check for validity of the hint + * It should reference a valid sibling + */ + if (hint >= GROUP_SIZE(siblings)) + hint = pg->cmt_hint = 0; + else + pg->cmt_hint++; /* - * If the other chip is running less threads, - * or if it's running the same number of threads, but - * has more online logical CPUs, then choose to balance. + * Find a balancing candidate from among our siblings + * "hint" is a hint for where to start looking */ - if (chp_nrun > ochp_nrun || - (chp_nrun == ochp_nrun && - nchp->chip_ncpu > chp->chip_ncpu)) { - cp = nchp->chip_cpus; - nchp->chip_cpus = cp->cpu_next_chip; + i = hint; + do { + ASSERT(i < GROUP_SIZE(siblings)); + pg_tmp = GROUP_ACCESS(siblings, i); /* - * Find a CPU on the chip in the correct - * partition. We know at least one exists - * because of the CHIP_IN_CPUPART() check above. + * The candidate must not be us, and must + * have some CPU resources in the thread's + * partition */ - while (cp->cpu_part != tp->t_cpupart) - cp = cp->cpu_next_chip; + if (pg_tmp != pg && + bitset_in_set(&tp->t_cpupart->cp_cmt_pgs, + ((pg_t *)pg_tmp)->pg_id)) { + tpg = pg_tmp; + break; + } + + if (++i >= GROUP_SIZE(siblings)) + i = 0; + } while (i != hint); + + if (!tpg) + continue; /* no candidates at this level */ + + /* + * Check if the balancing target is underloaded + * Decide to balance if the target is running fewer + * threads, or if it's running the same number of threads + * with more online CPUs + */ + tpg_nrun = tpg->cmt_nrunning; + if (pg_nrun > tpg_nrun || + (pg_nrun == tpg_nrun && + (GROUP_SIZE(&tpg->cmt_cpus_actv) > + GROUP_SIZE(&pg->cmt_cpus_actv)))) { + break; } - chp->chip_balance = nchp->chip_next_lgrp; - break; - } while ((nchp = nchp->chip_next_lgrp) != chp->chip_balance); + tpg = NULL; + } while (++level < GROUP_SIZE(cmt_pgs)); + + + if (tpg) { + /* + * Select an idle CPU from the target PG + */ + for (cpu = 0; cpu < GROUP_SIZE(&tpg->cmt_cpus_actv); cpu++) { + newcp = GROUP_ACCESS(&tpg->cmt_cpus_actv, cpu); + if (newcp->cpu_part == tp->t_cpupart && + newcp->cpu_dispatch_pri == -1) { + cp = newcp; + break; + } + } + } - ASSERT(CHIP_IN_CPUPART(cp->cpu_chip, tp->t_cpupart)); return (cp); } @@ -1181,7 +1220,6 @@ setbackdq(kthread_t *tp) { dispq_t *dq; disp_t *dp; - chip_t *curchip = NULL; cpu_t *cp; pri_t tpri; int bound; @@ -1200,10 +1238,6 @@ setbackdq(kthread_t *tp) } tpri = DISP_PRIO(tp); - if (tp == curthread) { - curchip = CPU->cpu_chip; - } - if (ncpus == 1) cp = tp->t_cpu; else if (!tp->t_bound_cpu && !tp->t_weakbound_cpu) { @@ -1220,12 +1254,9 @@ setbackdq(kthread_t *tp) int qlen; /* - * Select another CPU if we need - * to do some load balancing across the - * physical processors. + * Perform any CMT load balancing */ - if (CHIP_SHOULD_BALANCE(cp->cpu_chip)) - cp = chip_balance(tp, cp, curchip); + cp = cmt_balance(tp, cp); /* * Balance across the run queues @@ -1960,8 +1991,8 @@ disp_getwork(cpu_t *cp) if (pri > maxpri) { /* * Don't steal threads that we attempted - * to be stolen very recently until - * they're ready to be stolen again. + * to steal recently until they're ready + * to be stolen again. */ stealtime = ocp->cpu_disp->disp_steal; if (stealtime == 0 || @@ -2158,8 +2189,6 @@ disp_getbest(disp_t *dp) allbound = B_TRUE; for (tp = dq->dq_first; tp != NULL; tp = tp->t_link) { hrtime_t now, nosteal, rqtime; - chip_type_t chtype; - chip_t *chip; /* * Skip over bound threads which could be here even @@ -2209,21 +2238,15 @@ disp_getbest(disp_t *dp) break; /* - * Steal immediately if the chip has shared cache and we are - * sharing the chip with the target thread's CPU. + * Steal immediately if, due to CMT processor architecture + * migraiton between cp and tcp would incur no performance + * penalty. */ - chip = tcp->cpu_chip; - chtype = chip->chip_type; - if ((chtype == CHIP_SMT || chtype == CHIP_CMP_SHARED_CACHE) && - chip == cp->cpu_chip) + if (pg_cmt_can_migrate(cp, tcp)) break; - /* - * Get the value of nosteal interval either from nosteal_nsec - * global variable or from a value specified by a chip - */ - nosteal = nosteal_nsec ? nosteal_nsec : chip->chip_nosteal; - if (nosteal == 0 || nosteal == NOSTEAL_DISABLED) + nosteal = nosteal_nsec; + if (nosteal == 0) break; /* @@ -2643,7 +2666,7 @@ cpu_choose(kthread_t *t, pri_t tpri) { ASSERT(tpri < kpqpri); - if ((((lbolt - t->t_disp_time) > t->t_cpu->cpu_rechoose) && + if ((((lbolt - t->t_disp_time) > rechoose_interval) && t != curthread) || t->t_cpu == cpu_inmotion) { return (disp_lowpri_cpu(t->t_cpu, t->t_lpl, tpri, NULL)); } diff --git a/usr/src/uts/common/os/bitset.c b/usr/src/uts/common/os/bitset.c new file mode 100644 index 0000000000..8222fd9faa --- /dev/null +++ b/usr/src/uts/common/os/bitset.c @@ -0,0 +1,168 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/bitset.h> +#include <sys/kmem.h> +#include <sys/systm.h> +#include <sys/cmn_err.h> +#include <sys/sysmacros.h> + +/* + * Initialize a bitset_t. + * After bitset_init(), the bitset will be zero sized. + */ +void +bitset_init(bitset_t *b) +{ + bzero(b, sizeof (bitset_t)); +} + +/* + * Uninitialize a bitset_t. + * This will free the bitset's data, leaving it zero sized. + */ +void +bitset_fini(bitset_t *b) +{ + if (b->bs_words > 0) + kmem_free(b->bs_set, b->bs_words * sizeof (ulong_t)); +} + +/* + * Resize a bitset to where it can hold sz number of bits. + * This can either grow or shrink the bitset holding capacity. + * In the case of shrinkage, elements that reside outside the new + * holding capacity of the bitset are lost. + */ +void +bitset_resize(bitset_t *b, uint_t sz) +{ + uint_t nwords; + ulong_t *bset_new, *bset_tmp; + + nwords = BT_BITOUL(sz); + if (b->bs_words == nwords) + return; /* already properly sized */ + + /* + * Allocate the new ulong_t array, and copy the old one. + */ + if (nwords > 0) { + bset_new = kmem_zalloc(nwords * sizeof (ulong_t), KM_SLEEP); + bcopy(b->bs_set, bset_new, + MIN(b->bs_words, nwords) * sizeof (ulong_t)); + } else { + bset_new = NULL; + } + + /* swap out the old ulong_t array for new one */ + bset_tmp = b->bs_set; + b->bs_set = bset_new; + + /* free up the old array */ + kmem_free(bset_tmp, b->bs_words * sizeof (ulong_t)); + b->bs_words = nwords; +} + +/* + * Returns the current holding capacity of the bitset + */ +uint_t +bitset_capacity(bitset_t *b) +{ + return (b->bs_words * BT_NBIPUL); +} + +/* + * Add and delete bits in the bitset. + * + * Adding a bit that is already set, and clearing a bit that's already clear + * is legal. + * + * Adding or deleting an element that falls outside the bitset's current + * holding capacity is illegal. + */ +void +bitset_add(bitset_t *b, uint_t elt) +{ + ASSERT(b->bs_words * BT_NBIPUL > elt); + + BT_SET(b->bs_set, elt); +} + +void +bitset_del(bitset_t *b, uint_t elt) +{ + ASSERT(b->bs_words * BT_NBIPUL > elt); + + BT_CLEAR(b->bs_set, elt); +} + +/* + * Return non-zero if the bit is present in the set + */ +int +bitset_in_set(bitset_t *b, uint_t elt) +{ + ASSERT(b->bs_words * BT_NBIPUL > elt); + + return (BT_TEST(b->bs_set, elt)); +} + +/* + * Return non-zero if the bitset is empty + */ +int +bitset_is_null(bitset_t *b) +{ + int i; + + for (i = 0; i < b->bs_words; i++) + if (b->bs_set[i] != 0) + return (0); + return (1); +} + +/* + * Find the first set bit in the bitset + * Return -1 if no bit was found + */ +uint_t +bitset_find(bitset_t *b) +{ + uint_t i; + uint_t elt = (uint_t)-1; + + for (i = 0; i < b->bs_words; i++) { + elt = (uint_t)(lowbit(b->bs_set[i]) - 1); + if (elt != (uint_t)-1) { + elt += i * BT_NBIPUL; + break; + } + } + return (elt); +} diff --git a/usr/src/uts/common/os/chip.c b/usr/src/uts/common/os/chip.c deleted file mode 100644 index ad11827b0f..0000000000 --- a/usr/src/uts/common/os/chip.c +++ /dev/null @@ -1,576 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/thread.h> -#include <sys/cpuvar.h> -#include <sys/cpupart.h> -#include <sys/kmem.h> -#include <sys/cmn_err.h> -#include <sys/kstat.h> -#include <sys/processor.h> -#include <sys/disp.h> -#include <sys/chip.h> - -/* - * CMT aware scheduler/dispatcher support - * - * With the introduction of Chip Multi-Threaded (CMT) processor architectures, - * it is no longer necessarily true that a given physical processor - * module (chip) will present itself as a single schedulable entity (cpu_t). - * Rather, each chip may present itself as one or more "logical" CPUs. - * - * The logical CPUs presented may share physical components on the chip - * such as caches, data pipes, FPUs, etc. It is advantageous to have the - * kernel know which logical CPUs are presented by a given chip, - * and what facilities on the chip are shared, since the kernel can then use - * this information to employ scheduling policies that help improve the - * availability of per chip resources, and increase utilization of a thread's - * cache investment. - * - * The "chip_t" structure represents a physical processor. - * It is used to keep track of which logical CPUs are presented by a given - * chip, and to provide a parameterized representation of a chip's - * properties. A count of the number of running threads is also - * maintained, and is used by the dispatcher to balance load across the - * system's chips to improve performance through increased chip resource - * availability. - * - * Locking: - * - * Safely traversing the per lgroup lists requires the same protections - * as traversing the cpu lists. One must either: - * - hold cpu_lock - * - have disabled kernel preemption - * - be at high SPL - * - have cpu's paused - * - * Safely traversing the global "chip_list" requires holding cpu_lock. - * - * A chip's nrunning count should only be modified using the - * CHIP_NRUNNING() macro, through which updates of the count are done - * atomically. - */ - -chip_t cpu0_chip; /* chip structure for first CPU */ -cpu_physid_t cpu0_physid; /* boot CPU's physical id structure */ - -/* - * chip_bootstrap is used on platforms where it is possible to enter the - * dispatcher before a new CPU's chip initialization has happened. - */ -static chip_t chip_bootstrap; - -#define CPU_HAS_NO_CHIP(cp) \ - ((cp)->cpu_chip == NULL || (cp)->cpu_chip == &chip_bootstrap) - -static chip_t *chip_list; /* protected by CPU lock */ -static chip_set_t chip_set; /* bitmap of chips in existence */ - /* indexed by chip_seqid */ -static chipid_t chip_seqid_next = 0; /* next sequential chip id */ -static int nchips = 0; /* num chips in existence */ - -static chip_t *chip_find(chipid_t); -static int chip_kstat_extract(kstat_t *, int); - -/* - * Declare static kstat names (defined in chip.h) - */ -CHIP_KSTAT_NAMES; - -/* - * Find the chip_t with the given chip_id. - */ -static chip_t * -chip_find(chipid_t chipid) -{ - chip_t *chp, *chip_start; - - ASSERT(chip_list == NULL || chip_list->chip_next == chip_list || - MUTEX_HELD(&cpu_lock)); - - if ((chp = chip_start = chip_list) != NULL) { - do { - if (chp->chip_id == chipid) { - return (chp); - } - } while ((chp = chp->chip_next) != chip_start); - } - return (NULL); -} - -chip_t * -chip_lookup(chipid_t chipid) -{ - chip_t *chp; - - mutex_enter(&cpu_lock); - chp = chip_find(chipid); - mutex_exit(&cpu_lock); - - return (chp); -} - -#ifndef sun4v -/* - * Setup the kstats for this chip, if needed - */ -void -chip_kstat_create(chip_t *chp) -{ - chip_stat_t stat; - kstat_t *chip_kstat; - - ASSERT(MUTEX_HELD(&cpu_lock)); - - if (chp->chip_kstat != NULL) - return; /* already initialized */ - - chip_kstat = kstat_create("chip", chp->chip_id, NULL, "misc", - KSTAT_TYPE_NAMED, CHIP_NUM_STATS, - KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE); - - if (chip_kstat != NULL) { - chip_kstat->ks_lock = &chp->chip_kstat_mutex; - mutex_init(chip_kstat->ks_lock, NULL, MUTEX_DEFAULT, NULL); - chip_kstat->ks_private = chp; - chip_kstat->ks_data = chp->chip_kstat_data; - for (stat = 0; stat < CHIP_NUM_STATS; stat++) - kstat_named_init(&chp->chip_kstat_data[stat], - chip_kstat_names[stat], KSTAT_DATA_INT64); - chip_kstat->ks_update = chip_kstat_extract; - chp->chip_kstat = chip_kstat; - kstat_install(chip_kstat); - } -} -#else -/* - * Note: On sun4v systems, chip kstats don't currently - * exist, since "chip" structures and policies are being - * leveraged to implement core level balancing, and exporting - * chip kstats in light of this would be both misleading - * and confusing. - */ -/* ARGSUSED */ -void -chip_kstat_create(chip_t *chp) -{ -} -#endif /* !sun4v */ - -static int -chip_kstat_extract(kstat_t *ksp, int rw) -{ - struct kstat_named *ksd; - chip_t *chp; - - chp = (chip_t *)ksp->ks_private; - - ksd = (struct kstat_named *)ksp->ks_data; - ASSERT(ksd == chp->chip_kstat_data); - - /* - * The chip kstats are read only - */ - if (rw == KSTAT_WRITE) - return (EACCES); - - ksd[CHIP_ID].value.i64 = chp->chip_id; - ksd[CHIP_NCPUS].value.i64 = chp->chip_ncpu; - ksd[CHIP_NRUNNING].value.i64 = chp->chip_nrunning; - ksd[CHIP_RECHOOSE].value.i64 = - rechoose_interval + chp->chip_rechoose_adj; - - return (0); -} - -/* - * If necessary, instantiate a chip_t for this CPU. - * Called when a CPU is being added to the system either in startup, - * or because of DR. The cpu will be assigned to the chip's active - * CPU list later in chip_cpu_assign() - */ -void -chip_cpu_init(cpu_t *cp) -{ - chipid_t cid; - int rechoose; - chip_t *chp; - chip_def_t chp_def; - - ASSERT((chip_list == NULL) || (MUTEX_HELD(&cpu_lock))); - - if (chip_list == NULL) - cp->cpu_physid = &cpu0_physid; - else - cp->cpu_physid = kmem_zalloc(sizeof (cpu_physid_t), KM_SLEEP); - - /* - * Call into the platform to fetch this cpu's chip and core ids. - * The ids are cached in the CPU's physical id structure. - * - * On sun4v platforms, the chip infrastructure is currently being - * leveraged to implement core level load balancing. - */ -#ifdef DO_CORELEVEL_LOADBAL - cid = chip_plat_get_coreid(cp); - cp->cpu_physid->cpu_coreid = cid; - cp->cpu_physid->cpu_chipid = chip_plat_get_chipid(cp); -#else - cid = chip_plat_get_chipid(cp); - cp->cpu_physid->cpu_chipid = cid; - cp->cpu_physid->cpu_coreid = chip_plat_get_coreid(cp); -#endif /* DO_CORELEVEL_LOADBAL */ - - chp = chip_find(cid); - if (chp == NULL) { - - /* - * Create a new chip - */ - if (chip_list == NULL) - chp = &cpu0_chip; - else - chp = kmem_zalloc(sizeof (*chp), KM_SLEEP); - - chp->chip_id = cid; - chp->chip_nrunning = 0; - - /* - * If we're booting, take this moment to perform - * some additional initialization - */ - if (chip_list == NULL) { - CHIP_SET_ZERO(chip_set); - CHIP_SET_ZERO(cp->cpu_part->cp_mach->mc_chipset); - chp->chip_nrunning++; /* for t0 */ - } - - /* - * Find the next free sequential chip id. - * A chip's sequential id exists in the range - * 0 .. CHIP_MAX_CHIPS, and is suitable for use with - * chip sets. - */ - while (CHIP_SET_TEST(chip_set, chip_seqid_next)) - chip_seqid_next++; - chp->chip_seqid = chip_seqid_next++; - CHIP_SET_ADD(chip_set, chp->chip_seqid); - - ASSERT(chip_seqid_next <= CHIP_MAX_CHIPS); - - - /* - * Query the platform specific parameters - * for this chip - */ - chip_plat_define_chip(cp, &chp_def); - chp->chip_rechoose_adj = chp_def.chipd_rechoose_adj; - chp->chip_type = chp_def.chipd_type; - chp->chip_nosteal = chp_def.chipd_nosteal; - - ASSERT((chp->chip_type < CHIP_NUM_TYPES) && - (chp->chip_type >= CHIP_DEFAULT)); - - /* - * Insert this chip in chip_list - */ - if (chip_list == NULL) { - chip_list = chp; - chp->chip_next = chp->chip_prev = chp; - } else { - chip_t *chptr; - - chptr = chip_list; - chp->chip_next = chptr; - chp->chip_prev = chptr->chip_prev; - chptr->chip_prev->chip_next = chp; - chptr->chip_prev = chp; - } - - nchips++; - ASSERT(nchips <= CHIP_MAX_CHIPS); - - /* - * The boot cpu will create the first chip's kstats - * later in cpu_kstat_init() - */ - if (chp != &cpu0_chip) - chip_kstat_create(chp); - } - - /* - * Initialize the effective rechoose interval cached - * in this cpu structure. - */ - rechoose = rechoose_interval + chp->chip_rechoose_adj; - cp->cpu_rechoose = (rechoose < 0) ? 0 : rechoose; - - cp->cpu_chip = chp; - chp->chip_ref++; -} - -/* - * This cpu is being deleted. It has already been removed from - * the chip's active cpu list back in chip_cpu_unassign(). Here - * we remove the cpu's reference to the chip, and cleanup/destroy - * the chip if needed. - */ -void -chip_cpu_fini(cpu_t *cp) -{ - chip_t *chp; - chip_t *prev, *next; - - ASSERT(MUTEX_HELD(&cpu_lock)); - - /* - * This can happen if the CPU failed to power on - */ - if (CPU_HAS_NO_CHIP(cp)) - return; - - chp = cp->cpu_chip; - cp->cpu_chip = NULL; - - /* - * Clear out and free the CPU's physical id structure - */ - cp->cpu_physid->cpu_chipid = -1; - cp->cpu_physid->cpu_coreid = -1; - - if (cp->cpu_physid != &cpu0_physid) { - ASSERT(cp->cpu_physid != NULL); - kmem_free(cp->cpu_physid, sizeof (cpu_physid_t)); - } - cp->cpu_physid = NULL; - - /* - * Delete the chip if its last CPU is being deleted - */ - if (--chp->chip_ref == 0) { - - ASSERT(chp->chip_ncpu == 0); - ASSERT(chp->chip_cpus == NULL); - ASSERT(chp->chip_nrunning == 0); - ASSERT(chp->chip_lgrp == NULL); - ASSERT((chp->chip_next_lgrp == NULL) && - (chp->chip_prev_lgrp == NULL)); - - if (chip_seqid_next > chp->chip_seqid) - chip_seqid_next = chp->chip_seqid; - CHIP_SET_REMOVE(chip_set, chp->chip_seqid); - - chp->chip_id = -1; - chp->chip_seqid = -1; - - /* - * remove the chip from the system's chip list - */ - if (chip_list == chp) - chip_list = chp->chip_next; - - prev = chp->chip_prev; - next = chp->chip_next; - - prev->chip_next = next; - next->chip_prev = prev; - - chp->chip_next = chp->chip_prev = NULL; - - nchips--; - - /* - * clean up any chip kstats - */ - if (chp->chip_kstat) { - kstat_delete(chp->chip_kstat); - chp->chip_kstat = NULL; - } - /* - * If the chip_t structure was dynamically - * allocated, free it. - */ - if (chp != &cpu0_chip) - kmem_free(chp, sizeof (*chp)); - } -} - -/* - * This cpu is becoming active (online). - * Perform all the necessary bookkeeping in it's chip_t - */ -void -chip_cpu_assign(cpu_t *cp) -{ - chip_t *chp; - cpu_t *cptr; - - ASSERT(chip_list == NULL || chip_list->chip_next == chip_list || - MUTEX_HELD(&cpu_lock)); - - chp = cp->cpu_chip; - - /* - * Add this cpu to the chip's cpu list - */ - if (chp->chip_ncpu == 0) { - chp->chip_cpus = cp; - cp->cpu_next_chip = cp->cpu_prev_chip = cp; - } else { - cptr = chp->chip_cpus; - cp->cpu_next_chip = cptr; - cp->cpu_prev_chip = cptr->cpu_prev_chip; - cp->cpu_prev_chip->cpu_next_chip = cp; - cptr->cpu_prev_chip = cp; - } - - chp->chip_ncpu++; - - /* - * Notate this chip's seqid in the cpu partition's chipset - */ - chip_cpu_move_part(cp, NULL, cp->cpu_part); -} - -/* - * This cpu is being offlined, so do the reverse - * of cpu_chip_assign() - */ -void -chip_cpu_unassign(cpu_t *cp) -{ - chip_t *chp; - struct cpu *prev; - struct cpu *next; - - ASSERT(MUTEX_HELD(&cpu_lock)); - - chp = cp->cpu_chip; - - chip_cpu_move_part(cp, cp->cpu_part, NULL); - - /* - * remove this cpu from the chip's cpu list - */ - prev = cp->cpu_prev_chip; - next = cp->cpu_next_chip; - - prev->cpu_next_chip = next; - next->cpu_prev_chip = prev; - - cp->cpu_next_chip = cp->cpu_prev_chip = NULL; - - chp->chip_ncpu--; - - if (chp->chip_ncpu == 0) { - chp->chip_cpus = NULL; - } else if (chp->chip_cpus == cp) { - chp->chip_cpus = next; - } -} - -/* - * A cpu on the chip is moving into and/or out of a cpu partition. - * Maintain the cpuparts' chip membership set. - * oldpp is NULL when a cpu is being offlined. - * newpp is NULL when a cpu is being onlined. - */ -void -chip_cpu_move_part(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp) -{ - cpu_t *cpp; - chip_t *chp; - - ASSERT(chip_list->chip_next == chip_list || MUTEX_HELD(&cpu_lock)); - - chp = cp->cpu_chip; - - if (newpp != NULL) { - /* - * Add the chip's seqid to the cpupart's chip set - */ - CHIP_SET_ADD(newpp->cp_mach->mc_chipset, chp->chip_seqid); - } - - if (oldpp != NULL) { - cpp = cp; - while ((cpp = cpp->cpu_next_chip) != cp) { - if (cpp->cpu_part->cp_id == oldpp->cp_id) { - /* - * Another cpu on the chip is in the old - * cpu partition, so we're done - */ - return; - } - } - - /* - * No other cpu on the chip is in the old partition - * so remove the chip's seqid from it's set - */ - CHIP_SET_REMOVE(oldpp->cp_mach->mc_chipset, chp->chip_seqid); - } -} - -/* - * Called to indicate a slave CPU has started up. - */ -void -chip_cpu_startup(cpu_t *cp) -{ - /* - * Indicate that the chip has a new running thread - * (slave startup) - */ - CHIP_NRUNNING(cp->cpu_chip, 1); -} - -/* - * Provide the specified CPU a bootstrap chip - */ -void -chip_bootstrap_cpu(cpu_t *cp) -{ - cp->cpu_chip = &chip_bootstrap; -} - -/* - * Given a chip set, return 1 if it is empty. - */ -int -chip_set_isnull(chip_set_t *set) -{ - int i; - - for (i = 0; i < CHIP_SET_WORDS; i++) { - if (set->csb[i] != 0) - return (0); - } - return (1); -} diff --git a/usr/src/uts/common/os/clock.c b/usr/src/uts/common/os/clock.c index 0152c2e958..a1040f1270 100644 --- a/usr/src/uts/common/os/clock.c +++ b/usr/src/uts/common/os/clock.c @@ -23,7 +23,7 @@ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -66,7 +66,6 @@ #include <sys/cpupart.h> #include <sys/rctl.h> #include <sys/task.h> -#include <sys/chip.h> #include <sys/sdt.h> #ifdef __sparc @@ -260,13 +259,6 @@ cyclic_id_t deadman_cyclic; /* deadman()'s cyclic_id */ static int lgrp_ticks; /* counter to schedule lgrp load calcs */ /* - * rechoose_interval_history is used to detect when rechoose_interval's - * value has changed (via hotpatching for example), so that the - * cached values in the cpu structures may be updated. - */ -static int rechoose_interval_history = RECHOOSE_INTERVAL; - -/* * for tod fault detection */ #define TOD_REF_FREQ ((longlong_t)(NANOSEC)) @@ -345,8 +337,6 @@ clock(void) int64_t lltemp; int s; int do_lgrp_load; - int rechoose_update = 0; - int rechoose; int i; if (panicstr) @@ -430,21 +420,9 @@ clock(void) do_lgrp_load = 1; } - /* - * The dispatcher tunable rechoose_interval may be hot-patched. - * Note if it has a new value. If so, the effective rechoose_interval - * cached in the cpu structures needs to be updated. - * If needed we'll do this during the walk of the cpu_list below. - */ - if (rechoose_interval != rechoose_interval_history) { - rechoose_interval_history = rechoose_interval; - rechoose_update = 1; - } - if (one_sec) loadavg_update(); - /* * First count the threads waiting on kpreempt queues in each * CPU partition. @@ -522,19 +500,6 @@ clock(void) lgrp_loadavg(cp->cpu_lpl, cpu_nrunnable * LGRP_LOADAVG_IN_THREAD_MAX, 1); } - /* - * The platform may define a per physical processor - * adjustment of rechoose_interval. The effective - * (base + adjustment) rechoose_interval is cached - * in the cpu structures for efficiency. Above we detect - * if the cached values need updating, and here is where - * the update happens. - */ - if (rechoose_update) { - rechoose = rechoose_interval + - cp->cpu_chip->chip_rechoose_adj; - cp->cpu_rechoose = (rechoose < 0) ? 0 : rechoose; - } } while ((cp = cp->cpu_next) != cpu_list); /* diff --git a/usr/src/uts/common/os/cpu.c b/usr/src/uts/common/os/cpu.c index 5ca51ec3da..9237517a69 100644 --- a/usr/src/uts/common/os/cpu.c +++ b/usr/src/uts/common/os/cpu.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -45,7 +45,7 @@ #include <sys/cpupart.h> #include <sys/lgrp.h> #include <sys/pset.h> -#include <sys/chip.h> +#include <sys/pghw.h> #include <sys/kmem.h> #include <sys/kmem_impl.h> /* to set per-cpu kmem_cache offset */ #include <sys/atomic.h> @@ -1266,6 +1266,11 @@ cpu_offline(cpu_t *cp, int flags) cpu_state_change_notify(cp->cpu_id, CPU_OFF); /* + * Tell the PG subsystem that the CPU is leaving the partition + */ + pg_cpupart_out(cp, pp); + + /* * Take the CPU out of interrupt participation so we won't find * bound kernel threads. If the architecture cannot completely * shut off interrupts on the CPU, don't quiesce it, but don't @@ -1512,6 +1517,11 @@ out: cyclic_online(cp); /* + * If we failed, tell the PG subsystem that the CPU is back + */ + pg_cpupart_in(cp, pp); + + /* * If we failed, we need to notify everyone that this CPU is back on. */ if (error != 0) @@ -1732,7 +1742,12 @@ cpu_del_unit(int cpuid) ASSERT(cp->cpu_next_part == cp); ASSERT(cp->cpu_prev_part == cp); - chip_cpu_fini(cp); + /* + * Tear down the CPU's physical ID cache, and update any + * processor groups + */ + pg_cpu_fini(cp); + pghw_physid_destroy(cp); /* * Destroy kstat stuff. @@ -1816,8 +1831,7 @@ cpu_add_active_internal(cpu_t *cp) ASSERT(cp_numparts_nonempty != 0); } - chip_cpu_assign(cp); - + pg_cpu_active(cp); lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)cp, 0); bzero(&cp->cpu_loadavg, sizeof (cp->cpu_loadavg)); @@ -1830,9 +1844,12 @@ cpu_add_active_internal(cpu_t *cp) void cpu_add_active(cpu_t *cp) { + pg_cpupart_in(cp, cp->cpu_part); + pause_cpus(NULL); cpu_add_active_internal(cp); start_cpus(); + cpu_stats_kstat_create(cp); cpu_create_intrstat(cp); lgrp_kstat_create(cp); @@ -1854,7 +1871,7 @@ cpu_remove_active(cpu_t *cp) ASSERT(cp->cpu_next_onln != cp); /* not the last one */ ASSERT(cp->cpu_prev_onln != cp); /* not the last one */ - chip_cpu_unassign(cp); + pg_cpu_inactive(cp); lgrp_config(LGRP_CONFIG_CPU_OFFLINE, (uintptr_t)cp, 0); @@ -2146,11 +2163,12 @@ cpu_info_kstat_update(kstat_t *ksp, int rw) (void) strncpy(cpu_info_template.ci_fpu_type.value.c, cp->cpu_type_info.pi_fputypes, 15); cpu_info_template.ci_clock_MHz.value.l = cp->cpu_type_info.pi_clock; - cpu_info_template.ci_chip_id.value.l = chip_plat_get_chipid(cp); + cpu_info_template.ci_chip_id.value.l = + pg_plat_hw_instance_id(cp, PGHW_CHIP); kstat_named_setstr(&cpu_info_template.ci_implementation, cp->cpu_idstr); kstat_named_setstr(&cpu_info_template.ci_brandstr, cp->cpu_brandstr); - cpu_info_template.ci_core_id.value.l = chip_plat_get_coreid(cp); + cpu_info_template.ci_core_id.value.l = pg_plat_get_core_id(cp); #if defined(__sparcv9) cpu_info_template.ci_device_ID.value.ui64 = @@ -2163,7 +2181,7 @@ cpu_info_kstat_update(kstat_t *ksp, int rw) cpu_info_template.ci_family.value.l = cpuid_getfamily(cp); cpu_info_template.ci_model.value.l = cpuid_getmodel(cp); cpu_info_template.ci_step.value.l = cpuid_getstep(cp); - cpu_info_template.ci_clogid.value.l = chip_plat_get_clogid(cp); + cpu_info_template.ci_clogid.value.l = cpuid_get_clogid(cp); #endif return (0); @@ -2215,11 +2233,13 @@ cpu_info_kstat_destroy(cpu_t *cp) void cpu_kstat_init(cpu_t *cp) { + /* + * XXX need pg kstats for boot CPU + */ mutex_enter(&cpu_lock); cpu_info_kstat_create(cp); cpu_stats_kstat_create(cp); cpu_create_intrstat(cp); - chip_kstat_create(cp->cpu_chip); cpu_set_state(cp); mutex_exit(&cpu_lock); } diff --git a/usr/src/uts/common/os/group.c b/usr/src/uts/common/os/group.c new file mode 100644 index 0000000000..b15dff181f --- /dev/null +++ b/usr/src/uts/common/os/group.c @@ -0,0 +1,322 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/systm.h> +#include <sys/param.h> +#include <sys/debug.h> +#include <sys/kmem.h> +#include <sys/group.h> + + +#define GRP_SET_SIZE_DEFAULT 2 + +static void group_grow_set(group_t *); +static void group_shrink_set(group_t *); +static void group_pack_set(void **, uint_t); + +/* + * Initialize a group_t + */ +void +group_create(group_t *g) +{ + bzero(g, sizeof (group_t)); +} + +/* + * Destroy a group_t + * The group must already be empty + */ +void +group_destroy(group_t *g) +{ + ASSERT(g->grp_size == 0); + + if (g->grp_capacity > 0) { + kmem_free(g->grp_set, g->grp_capacity * sizeof (void *)); + g->grp_capacity = 0; + } + g->grp_set = NULL; +} + +/* + * Add element "e" to group "g" + * + * Returns -1 if addition would result in overcapacity, and + * resize operations aren't allowed, and 0 otherwise + */ +int +group_add(group_t *g, void *e, int gflag) +{ + int entry; + + if ((gflag & GRP_NORESIZE) && + g->grp_size == g->grp_capacity) + return (-1); + + ASSERT(g->grp_size != g->grp_capacity || (gflag & GRP_RESIZE)); + + entry = g->grp_size++; + if (g->grp_size > g->grp_capacity) + group_grow_set(g); + + ASSERT(g->grp_set[entry] == NULL); + g->grp_set[entry] = e; + + return (0); +} + +/* + * Remove element "e" from group "g" + * + * Returns -1 if "e" was not present in "g" and 0 otherwise + */ +int +group_remove(group_t *g, void *e, int gflag) +{ + int i; + + /* + * Find the element in the group's set + */ + for (i = 0; i < g->grp_size; i++) + if (g->grp_set[i] == e) + break; + if (g->grp_set[i] != e) + return (-1); + + g->grp_set[i] = NULL; + group_pack_set(g->grp_set, g->grp_size); + g->grp_size--; + + if ((gflag & GRP_RESIZE) && + g->grp_size > GRP_SET_SIZE_DEFAULT && + ((g->grp_size - 1) & g->grp_size) == 0) + group_shrink_set(g); + + return (0); +} + +/* + * Expand the capacity of group "g" so that it may + * contain at least "n" elements + */ +void +group_expand(group_t *g, uint_t n) +{ + while (g->grp_capacity < n) + group_grow_set(g); +} + +/* + * Upsize a group's holding capacity + */ +static void +group_grow_set(group_t *g) +{ + uint_t cap_old, cap_new; + void **set_old, **set_new; + + cap_old = g->grp_capacity; + set_old = g->grp_set; + + /* + * The array size grows in powers of two + */ + if ((cap_new = (cap_old << 1)) == 0) { + /* + * The set is unallocated. + * Allocate a default sized set. + */ + cap_new = GRP_SET_SIZE_DEFAULT; + g->grp_set = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP); + g->grp_capacity = cap_new; + } else { + /* + * Allocate a newly sized array, + * copy the data, and free the old array. + */ + set_new = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP); + (void) kcopy(set_old, set_new, cap_old * sizeof (void *)); + g->grp_set = set_new; + g->grp_capacity = cap_new; + kmem_free(set_old, cap_old * sizeof (void *)); + } + /* + * The new array size should be a power of two + */ + ASSERT(((cap_new - 1) & cap_new) == 0); +} + +/* + * Downsize a group's holding capacity + */ +static void +group_shrink_set(group_t *g) +{ + uint_t cap_old, cap_new; + void **set_old, **set_new; + + cap_old = g->grp_capacity; + set_old = g->grp_set; + + /* + * The group's existing array size must already + * be a power of two + */ + ASSERT(((cap_old - 1) & cap_old) == 0); + cap_new = cap_old >> 1; + + /* + * GRP_SET_SIZE_DEFAULT is the minumum set size. + */ + if (cap_new < GRP_SET_SIZE_DEFAULT) + return; + + set_new = kmem_zalloc(cap_new * sizeof (void *), KM_SLEEP); + (void) kcopy(set_old, set_new, cap_new * sizeof (void *)); + g->grp_capacity = cap_new; + g->grp_set = set_new; + + ASSERT(((cap_new - 1) & cap_new) == 0); + kmem_free(set_old, cap_old * sizeof (void *)); +} + +/* + * Pack a group's set + * Element order is not preserved + */ +static void +group_pack_set(void **set, uint_t sz) +{ + uint_t i, j, free; + + free = (uint_t)-1; + + for (i = 0; i < sz; i++) { + if (set[i] == NULL && free == (uint_t)-1) { + /* + * Found a new free slot. + * Start packing from here. + */ + free = i; + } else if (set[i] != NULL && free != (uint_t)-1) { + /* + * Found a slot to pack into + * an earlier free slot. + */ + ASSERT(set[free] == NULL); + set[free] = set[i]; + set[i] = NULL; + + /* + * Find the next free slot + */ + for (j = free + 1; set[j] != NULL; j++) { + ASSERT(j <= i); + if (j == i) + break; + } + if (set[j] == NULL) + free = j; + else + free = (uint_t)-1; + } + } +} + +/* + * Initialize a group iterator cookie + */ +void +group_iter_init(group_iter_t *iter) +{ + *iter = 0; +} + +/* + * Iterate over the elements in a group + */ +void * +group_iterate(group_t *g, group_iter_t *iter) +{ + uint_t idx = *iter; + void *data = NULL; + + while (idx < g->grp_size) { + data = g->grp_set[idx++]; + if (data != NULL) + break; + } + *iter = idx; + + return (data); +} + +/* + * Indexed access to a group's elements + */ +void * +group_access_at(group_t *g, uint_t idx) +{ + if (idx >= g->grp_capacity) + return (NULL); + + return (g->grp_set[idx]); +} + +/* + * Add a new ordered group element at specified + * index. The group must already be of sufficient + * capacity to hold an element at the specified index. + * + * Returns 0 if addition was sucessful, and -1 if the + * addition failed because the table was too small + */ +int +group_add_at(group_t *g, void *e, uint_t idx) +{ + if (idx >= g->grp_capacity) + return (-1); + + if (idx >= g->grp_size) + g->grp_size = idx + 1; + + ASSERT(g->grp_set[idx] == NULL); + g->grp_set[idx] = e; + return (0); +} + +/* + * Remove the entry at the specified index + */ +void +group_remove_at(group_t *g, uint_t idx) +{ + ASSERT(idx < g->grp_capacity); + g->grp_set[idx] = NULL; +} diff --git a/usr/src/uts/common/os/lgrp.c b/usr/src/uts/common/os/lgrp.c index 83f67e1088..2007f7b158 100644 --- a/usr/src/uts/common/os/lgrp.c +++ b/usr/src/uts/common/os/lgrp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -89,7 +89,7 @@ #include <sys/cmn_err.h> #include <sys/kstat.h> #include <sys/sysmacros.h> -#include <sys/chip.h> +#include <sys/pg.h> #include <sys/promif.h> #include <sys/sdt.h> @@ -314,8 +314,6 @@ lgrp_root_init(void) klgrpset_clear(lgrp_root->lgrp_children); klgrpset_clear(lgrp_root->lgrp_leaves); lgrp_root->lgrp_parent = NULL; - lgrp_root->lgrp_chips = NULL; - lgrp_root->lgrp_chipcnt = 0; lgrp_root->lgrp_latency = lgrp_plat_latency(hand, hand); for (i = 0; i < LGRP_RSRC_COUNT; i++) @@ -679,7 +677,6 @@ lgrp_cpu_init(struct cpu *cp) lgrp_t *my_lgrp; lgrp_id_t lgrpid; struct cpu *cptr; - struct chip *chp; /* * This is the first time through if the resource set @@ -795,33 +792,6 @@ lgrp_cpu_init(struct cpu *cp) cptr->cpu_prev_lgrp = cp; } my_lgrp->lgrp_cpucnt++; - - /* - * Add this cpu's chip to the per lgroup list - * if necessary - */ - if (cp->cpu_chip->chip_lgrp == NULL) { - struct chip *lcpr; - - chp = cp->cpu_chip; - - if (my_lgrp->lgrp_chipcnt == 0) { - my_lgrp->lgrp_chips = chp; - chp->chip_next_lgrp = - chp->chip_prev_lgrp = chp; - } else { - lcpr = my_lgrp->lgrp_chips; - chp->chip_next_lgrp = lcpr; - chp->chip_prev_lgrp = - lcpr->chip_prev_lgrp; - lcpr->chip_prev_lgrp->chip_next_lgrp = - chp; - lcpr->chip_prev_lgrp = chp; - } - chp->chip_lgrp = my_lgrp; - chp->chip_balance = chp->chip_next_lgrp; - my_lgrp->lgrp_chipcnt++; - } } lgrp_t * @@ -890,8 +860,6 @@ lgrp_create(void) my_lgrp->lgrp_cpu = NULL; my_lgrp->lgrp_cpucnt = 0; - my_lgrp->lgrp_chips = NULL; - my_lgrp->lgrp_chipcnt = 0; if (my_lgrp->lgrp_kstat != NULL) lgrp_kstat_reset(lgrpid); @@ -945,8 +913,6 @@ lgrp_destroy(lgrp_t *lgrp) lgrp->lgrp_cpu = NULL; lgrp->lgrp_cpucnt = 0; - lgrp->lgrp_chipcnt = 0; - lgrp->lgrp_chips = NULL; nlgrps--; } @@ -1022,7 +988,6 @@ lgrp_cpu_fini(struct cpu *cp, lgrp_id_t lgrpid) lgrp_t *my_lgrp; struct cpu *prev; struct cpu *next; - chip_t *chp; ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized); @@ -1042,42 +1007,6 @@ lgrp_cpu_fini(struct cpu *cp, lgrp_id_t lgrpid) my_lgrp->lgrp_cpucnt--; /* - * If the last CPU on it's chip is being offlined - * then remove this chip from the per lgroup list. - * - * This is also done for the boot CPU when it needs - * to move between lgroups as a consequence of - * null proc lpa. - */ - chp = cp->cpu_chip; - if (chp->chip_ncpu == 0 || !lgrp_initialized) { - - chip_t *chpp; - - if (--my_lgrp->lgrp_chipcnt == 0) - my_lgrp->lgrp_chips = NULL; - else if (my_lgrp->lgrp_chips == chp) - my_lgrp->lgrp_chips = chp->chip_next_lgrp; - - /* - * Walk this lgroup's chip list looking for chips that - * may try to balance against the one that's leaving - */ - for (chpp = chp->chip_next_lgrp; chpp != chp; - chpp = chpp->chip_next_lgrp) { - if (chpp->chip_balance == chp) - chpp->chip_balance = chp->chip_next_lgrp; - } - - chp->chip_prev_lgrp->chip_next_lgrp = chp->chip_next_lgrp; - chp->chip_next_lgrp->chip_prev_lgrp = chp->chip_prev_lgrp; - - chp->chip_next_lgrp = chp->chip_prev_lgrp = NULL; - chp->chip_lgrp = NULL; - chp->chip_balance = NULL; - } - - /* * Removing last CPU in lgroup, so update lgroup topology */ if (my_lgrp->lgrp_cpucnt == 0) { @@ -1661,7 +1590,7 @@ lgrp_phys_to_lgrp(u_longlong_t physaddr) * Return the leaf lgroup containing the given CPU * * The caller needs to take precautions necessary to prevent - * "cpu" from going away across a call to this function. + * "cpu", and it's lpl from going away across a call to this function. * hint: kpreempt_disable()/kpreempt_enable() */ static lgrp_t * diff --git a/usr/src/uts/common/os/pg.c b/usr/src/uts/common/os/pg.c new file mode 100644 index 0000000000..cb8295b38e --- /dev/null +++ b/usr/src/uts/common/os/pg.c @@ -0,0 +1,624 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/systm.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/thread.h> +#include <sys/cpuvar.h> +#include <sys/cpupart.h> +#include <sys/kmem.h> +#include <sys/cmn_err.h> +#include <sys/kstat.h> +#include <sys/processor.h> +#include <sys/disp.h> +#include <sys/group.h> +#include <sys/pg.h> + +/* + * Processor groups + * + * With the introduction of Chip Multi-Threaded (CMT) processor architectures, + * it is no longer necessarily true that a given physical processor module + * will present itself as a single schedulable entity (cpu_t). Rather, each + * chip and/or processor core may present itself as one or more "logical" CPUs. + * + * The logical CPUs presented may share physical components such as caches, + * data pipes, execution pipelines, FPUs, etc. It is advantageous to have the + * kernel be aware of the relationships existing between logical CPUs so that + * the appropriate optmizations may be employed. + * + * The processor group abstraction represents a set of logical CPUs that + * generally share some sort of physical or characteristic relationship. + * + * In the case of a physical sharing relationship, the CPUs in the group may + * share a pipeline, cache or floating point unit. In the case of a logical + * relationship, a PG may represent the set of CPUs in a processor set, or the + * set of CPUs running at a particular clock speed. + * + * The generic processor group structure, pg_t, contains the elements generic + * to a group of CPUs. Depending on the nature of the CPU relationship + * (LOGICAL or PHYSICAL), a pointer to a pg may be recast to a "view" of that + * PG where more specific data is represented. + * + * As an example, a PG representing a PHYSICAL relationship, may be recast to + * a pghw_t, where data further describing the hardware sharing relationship + * is maintained. See pghw.c and pghw.h for details on physical PGs. + * + * At this time a more specialized casting of a PG representing a LOGICAL + * relationship has not been implemented, but the architecture allows for this + * in the future. + * + * Processor Group Classes + * + * Processor group consumers may wish to maintain and associate specific + * data with the PGs they create. For this reason, a mechanism for creating + * class specific PGs exists. Classes may overload the default functions for + * creating, destroying, and associating CPUs with PGs, and may also register + * class specific callbacks to be invoked when the CPU related system + * configuration changes. Class specific data is stored/associated with + * PGs by incorporating the pg_t (or pghw_t, as appropriate), as the first + * element of a class specific PG object. In memory, such a structure may look + * like: + * + * ----------------------- - - - + * | common | | | | <--(pg_t *) + * ----------------------- | | - + * | HW specific | | | <-----(pghw_t *) + * ----------------------- | - + * | class specific | | <-------(pg_cmt_t *) + * ----------------------- - + * + * Access to the PG class specific data can be had by casting a pointer to + * it's class specific view. + */ + +static pg_t *pg_alloc_default(pg_class_t); +static void pg_free_default(pg_t *); + +/* + * Bootstrap CPU specific PG data + * See pg_cpu_bootstrap() + */ +static cpu_pg_t bootstrap_pg_data; + +/* + * Bitset of allocated PG ids (they are sequential) + * and the next free id in the set. + */ +static bitset_t pg_id_set; +static pgid_t pg_id_next = 0; + +/* + * Default and externed PG ops vectors + */ +static struct pg_ops pg_ops_default = { + pg_alloc_default, /* alloc */ + pg_free_default, /* free */ + NULL, /* cpu_init */ + NULL, /* cpu_fini */ + NULL, /* cpu_active */ + NULL, /* cpu_inactive */ + NULL, /* cpupart_in */ + NULL, /* cpupart_out */ + NULL, /* cpupart_move */ + NULL, /* cpu_belongs */ +}; + +/* + * Class specific PG allocation callbacks + */ +#define PG_ALLOC(class) \ + (pg_classes[class].pgc_ops->alloc ? \ + pg_classes[class].pgc_ops->alloc() : \ + pg_classes[pg_default_cid].pgc_ops->alloc()) + +#define PG_FREE(pg) \ + ((pg)->pg_class->pgc_ops->free ? \ + (pg)->pg_class->pgc_ops->free(pg) : \ + pg_classes[pg_default_cid].pgc_ops->free(pg)) \ + + +/* + * Class specific membership test callback + */ +#define PG_CPU_BELONGS(pg, cp) \ + ((pg)->pg_class->pgc_ops->cpu_belongs ? \ + (pg)->pg_class->pgc_ops->cpu_belongs(pg, cp) : 0) \ + +/* + * CPU configuration callbacks + */ +#define PG_CPU_INIT(class, cp) \ +{ \ + if (pg_classes[class].pgc_ops->cpu_init) \ + pg_classes[class].pgc_ops->cpu_init(cp); \ +} + +#define PG_CPU_FINI(class, cp) \ +{ \ + if (pg_classes[class].pgc_ops->cpu_fini) \ + pg_classes[class].pgc_ops->cpu_fini(cp); \ +} + +#define PG_CPU_ACTIVE(class, cp) \ +{ \ + if (pg_classes[class].pgc_ops->cpu_active) \ + pg_classes[class].pgc_ops->cpu_active(cp); \ +} + +#define PG_CPU_INACTIVE(class, cp) \ +{ \ + if (pg_classes[class].pgc_ops->cpu_inactive) \ + pg_classes[class].pgc_ops->cpu_inactive(cp); \ +} + +/* + * CPU / cpupart configuration callbacks + */ +#define PG_CPUPART_IN(class, cp, pp) \ +{ \ + if (pg_classes[class].pgc_ops->cpupart_in) \ + pg_classes[class].pgc_ops->cpupart_in(cp, pp); \ +} + +#define PG_CPUPART_OUT(class, cp, pp) \ +{ \ + if (pg_classes[class].pgc_ops->cpupart_out) \ + pg_classes[class].pgc_ops->cpupart_out(cp, pp); \ +} + +#define PG_CPUPART_MOVE(class, cp, old, new) \ +{ \ + if (pg_classes[class].pgc_ops->cpupart_move) \ + pg_classes[class].pgc_ops->cpupart_move(cp, old, new); \ +} + + + +static pg_class_t *pg_classes; +static int pg_nclasses; + +static pg_cid_t pg_default_cid; + +/* + * Initialze common PG subsystem. Perform CPU 0 initialization + */ +void +pg_init(void) +{ + pg_default_cid = + pg_class_register("default", &pg_ops_default, PGR_LOGICAL); +} + +/* + * Perform CPU 0 initialization + */ +void +pg_cpu0_init(void) +{ + extern void pghw_physid_create(); + + /* + * Create the physical ID cache for the boot CPU + */ + pghw_physid_create(CPU); + + /* + * pg_cpu_* require that cpu_lock be held + */ + mutex_enter(&cpu_lock); + + pg_cpu_init(CPU); + pg_cpupart_in(CPU, &cp_default); + pg_cpu_active(CPU); + + mutex_exit(&cpu_lock); +} + +/* + * Register a new PG class + */ +pg_cid_t +pg_class_register(char *name, struct pg_ops *ops, pg_relation_t relation) +{ + pg_class_t *newclass; + pg_class_t *classes_old; + id_t cid; + + mutex_enter(&cpu_lock); + + /* + * Allocate a new pg_class_t in the pg_classes array + */ + if (pg_nclasses == 0) { + pg_classes = kmem_zalloc(sizeof (pg_class_t), KM_SLEEP); + } else { + classes_old = pg_classes; + pg_classes = + kmem_zalloc(sizeof (pg_class_t) * (pg_nclasses + 1), + KM_SLEEP); + (void) kcopy(classes_old, pg_classes, + sizeof (pg_class_t) * pg_nclasses); + kmem_free(classes_old, sizeof (pg_class_t) * pg_nclasses); + } + + cid = pg_nclasses++; + newclass = &pg_classes[cid]; + + (void) strncpy(newclass->pgc_name, name, PG_CLASS_NAME_MAX); + newclass->pgc_id = cid; + newclass->pgc_ops = ops; + newclass->pgc_relation = relation; + + mutex_exit(&cpu_lock); + + return (cid); +} + +/* + * Try to find an existing pg in set in which to place cp. + * Returns the pg if found, and NULL otherwise. + * In the event that the CPU could belong to multiple + * PGs in the set, the first matching PG will be returned. + */ +pg_t * +pg_cpu_find_pg(cpu_t *cp, group_t *set) +{ + pg_t *pg; + group_iter_t i; + + group_iter_init(&i); + while ((pg = group_iterate(set, &i)) != NULL) { + /* + * Ask the class if the CPU belongs here + */ + if (PG_CPU_BELONGS(pg, cp)) + return (pg); + } + return (NULL); +} + +/* + * Iterate over the CPUs in a PG after initializing + * the iterator with PG_CPU_ITR_INIT() + */ +cpu_t * +pg_cpu_next(pg_cpu_itr_t *itr) +{ + cpu_t *cpu; + pg_t *pg = itr->pg; + + cpu = group_iterate(&pg->pg_cpus, &itr->position); + return (cpu); +} + +/* + * Create a PG of a given class. + * This routine may block. + */ +pg_t * +pg_create(pg_cid_t cid) +{ + pg_t *pg; + pgid_t id; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Call the class specific PG allocation routine + */ + pg = PG_ALLOC(cid); + pg->pg_class = &pg_classes[cid]; + pg->pg_relation = pg->pg_class->pgc_relation; + + /* + * Find the next free sequential pg id + */ + do { + if (pg_id_next >= bitset_capacity(&pg_id_set)) + bitset_resize(&pg_id_set, pg_id_next + 1); + id = pg_id_next++; + } while (bitset_in_set(&pg_id_set, id)); + + pg->pg_id = id; + bitset_add(&pg_id_set, pg->pg_id); + + /* + * Create the PG's CPU group + */ + group_create(&pg->pg_cpus); + + return (pg); +} + +/* + * Destroy a PG. + * This routine may block. + */ +void +pg_destroy(pg_t *pg) +{ + ASSERT(MUTEX_HELD(&cpu_lock)); + + group_destroy(&pg->pg_cpus); + + /* + * Unassign the pg_id + */ + if (pg_id_next > pg->pg_id) + pg_id_next = pg->pg_id; + bitset_del(&pg_id_set, pg->pg_id); + + /* + * Invoke the class specific de-allocation routine + */ + PG_FREE(pg); +} + +/* + * Add the CPU "cp" to processor group "pg" + * This routine may block. + */ +void +pg_cpu_add(pg_t *pg, cpu_t *cp) +{ + int err; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* This adds the CPU to the PG's CPU group */ + err = group_add(&pg->pg_cpus, cp, GRP_RESIZE); + ASSERT(err == 0); + + /* This adds the PG to the CPUs PG group */ + ASSERT(cp->cpu_pg != &bootstrap_pg_data); + err = group_add(&cp->cpu_pg->pgs, pg, GRP_RESIZE); + ASSERT(err == 0); +} + +/* + * Remove "cp" from "pg". + * This routine may block. + */ +void +pg_cpu_delete(pg_t *pg, cpu_t *cp) +{ + int err; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* Remove the CPU from the PG */ + err = group_remove(&pg->pg_cpus, cp, GRP_RESIZE); + ASSERT(err == 0); + + /* Remove the PG from the CPU's PG group */ + ASSERT(cp->cpu_pg != &bootstrap_pg_data); + err = group_remove(&cp->cpu_pg->pgs, pg, GRP_RESIZE); + ASSERT(err == 0); +} + +/* + * Allocate a CPU's PG data. This hangs off struct cpu at cpu_pg + */ +static cpu_pg_t * +pg_cpu_data_alloc(void) +{ + cpu_pg_t *pgd; + + pgd = kmem_zalloc(sizeof (cpu_pg_t), KM_SLEEP); + group_create(&pgd->pgs); + group_create(&pgd->cmt_pgs); + + return (pgd); +} + +/* + * Free the CPU's PG data. + */ +static void +pg_cpu_data_free(cpu_pg_t *pgd) +{ + group_destroy(&pgd->pgs); + group_destroy(&pgd->cmt_pgs); + kmem_free(pgd, sizeof (cpu_pg_t)); +} + +/* + * A new CPU is coming into the system, either via booting or DR. + * Allocate it's PG data, and notify all registered classes about + * the new CPU. + * + * This routine may block. + */ +void +pg_cpu_init(cpu_t *cp) +{ + pg_cid_t i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Allocate and size the per CPU pg data + */ + cp->cpu_pg = pg_cpu_data_alloc(); + + /* + * Notify all registered classes about the new CPU + */ + for (i = 0; i < pg_nclasses; i++) + PG_CPU_INIT(i, cp); +} + +/* + * This CPU is being deleted from the system. Notify the classes + * and free up the CPU's PG data. + */ +void +pg_cpu_fini(cpu_t *cp) +{ + pg_cid_t i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * This can happen if the CPU coming into the system + * failed to power on. + */ + if (cp->cpu_pg == NULL || + cp->cpu_pg == &bootstrap_pg_data) + return; + + for (i = 0; i < pg_nclasses; i++) + PG_CPU_FINI(i, cp); + + pg_cpu_data_free(cp->cpu_pg); + cp->cpu_pg = NULL; +} + +/* + * This CPU is becoming active (online) + * This routine may not block as it is called from paused CPUs + * context. + */ +void +pg_cpu_active(cpu_t *cp) +{ + pg_cid_t i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Notify all registered classes about the new CPU + */ + for (i = 0; i < pg_nclasses; i++) + PG_CPU_ACTIVE(i, cp); +} + +/* + * This CPU is going inactive (offline) + * This routine may not block, as it is called from paused + * CPUs context. + */ +void +pg_cpu_inactive(cpu_t *cp) +{ + pg_cid_t i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Notify all registered classes about the new CPU + */ + for (i = 0; i < pg_nclasses; i++) + PG_CPU_INACTIVE(i, cp); +} + +/* + * Invoked when the CPU is about to move into the partition + * This routine may block. + */ +void +pg_cpupart_in(cpu_t *cp, cpupart_t *pp) +{ + int i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Notify all registered classes that the + * CPU is about to enter the CPU partition + */ + for (i = 0; i < pg_nclasses; i++) + PG_CPUPART_IN(i, cp, pp); +} + +/* + * Invoked when the CPU is about to move out of the partition + * This routine may block. + */ +/*ARGSUSED*/ +void +pg_cpupart_out(cpu_t *cp, cpupart_t *pp) +{ + int i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Notify all registered classes that the + * CPU is about to leave the CPU partition + */ + for (i = 0; i < pg_nclasses; i++) + PG_CPUPART_OUT(i, cp, pp); +} + +/* + * Invoked when the CPU is *moving* partitions. + * + * This routine may not block, as it is called from paused CPUs + * context. + */ +void +pg_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp) +{ + int i; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Notify all registered classes that the + * CPU is about to leave the CPU partition + */ + for (i = 0; i < pg_nclasses; i++) + PG_CPUPART_MOVE(i, cp, oldpp, newpp); +} + +/* + * Provide the specified CPU a bootstrap pg + * This is needed to allow sane behaviour if any PG consuming + * code needs to deal with a partially initialized CPU + */ +void +pg_cpu_bootstrap(cpu_t *cp) +{ + cp->cpu_pg = &bootstrap_pg_data; +} + +/*ARGSUSED*/ +static pg_t * +pg_alloc_default(pg_class_t class) +{ + return (kmem_zalloc(sizeof (pg_t), KM_SLEEP)); +} + +/*ARGSUSED*/ +static void +pg_free_default(struct pg *pg) +{ + kmem_free(pg, sizeof (pg_t)); +} diff --git a/usr/src/uts/common/os/pghw.c b/usr/src/uts/common/os/pghw.c new file mode 100644 index 0000000000..e2dc2a38f2 --- /dev/null +++ b/usr/src/uts/common/os/pghw.c @@ -0,0 +1,420 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/systm.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/thread.h> +#include <sys/cpuvar.h> +#include <sys/kmem.h> +#include <sys/cmn_err.h> +#include <sys/group.h> +#include <sys/pg.h> +#include <sys/pghw.h> + +/* + * Processor Groups: Hardware sharing relationship layer + * + * This file implements an extension to Processor Groups to capture + * hardware sharing relationships existing between logical CPUs. Examples of + * hardware sharing relationships include shared caches on some CMT + * procesoor architectures, or shared local memory controllers on NUMA + * based system architectures. + * + * The pghw_t structure represents the extended PG. The first member + * of the structure is the generic pg_t with the pghw specific members + * following. The generic pg_t *must* remain the first member of the + * structure as the code uses casting of structure references to access + * the generic pg_t structure elements. + * + * In addition to the generic CPU grouping, physical PGs have a hardware + * sharing relationship enumerated "type", and an instance id. The enumerated + * type is defined by the pghw_type_t enumeration, while the instance id + * uniquely identifies the sharing instance from among others of the same + * hardware sharing type. + * + * The physical PGs are organized into an overall hierarchy, and are tracked + * in a number of different per CPU, and per pghw_type_t type groups. + * As an example: + * + * ------------- + * | pg_hw | + * | (group_t) | + * ------------- + * || ============================ + * ||\\-----------------------// \\ \\ + * || | hwset (PGC_HW_CHIP) | ------------- ------------- + * || | (group_t) | | pghw_t | | pghw_t | + * || ----------------------- | chip 0 | | chip 1 | + * || ------------- ------------- + * || \\ \\ \\ \\ \\ \\ \\ \\ + * || cpu cpu cpu cpu cpu cpu cpu cpu + * || + * || ============================ + * ||\\-----------------------// \\ \\ + * || | hwset (PGC_HW_IPIPE)| ------------- ------------- + * || | (group_t) | | pghw_t | | pghw_t | + * || ----------------------- | ipipe 0 | | ipipe 1 | + * || ------------- ------------- + * || \\ \\ \\ \\ + * || cpu cpu cpu cpu + * ... + * + * + * The top level pg_hw is a group of "hwset" groups. Each hwset holds of group + * of physical PGs of the same hardware sharing type. Within each hwset, the + * PG's instance id uniquely identifies the grouping relationshsip among other + * groupings of the same sharing type. The instance id for a grouping is + * platform defined, and in some cases may be used by platform code as a handle + * to search for a particular relationship instance. + * + * Each physical PG (by virtue of the embedded pg_t) contains a group of CPUs + * that participate in the sharing relationship. Each CPU also has associated + * with it a grouping tracking the PGs in which the CPU belongs. This can be + * used to iterate over the various relationships in which the CPU participates + * (the CPU's chip, cache, lgroup, etc.). + * + * The hwsets are created dynamically as new hardware sharing relationship types + * are instantiated. They are never destroyed, as once a given relathionship + * type appears in the system, it is quite likely that at least one instance of + * that relationship will always persist as long as the system is running. + */ + +static group_t *pg_hw; /* top level pg hw group */ + +/* + * Lookup table mapping hardware sharing relationships with hierarchy levels + */ +static int pghw_level_table[PGHW_NUM_COMPONENTS]; + +/* + * Physical PG kstats + */ +struct pghw_kstat { + kstat_named_t pg_id; + kstat_named_t pg_class; + kstat_named_t pg_ncpus; + kstat_named_t pg_instance_id; + kstat_named_t pg_hw; +} pghw_kstat = { + { "id", KSTAT_DATA_UINT64 }, + { "pg_class", KSTAT_DATA_STRING }, + { "ncpus", KSTAT_DATA_UINT64 }, + { "instance_id", KSTAT_DATA_UINT64 }, + { "hardware", KSTAT_DATA_STRING }, +}; + +kmutex_t pghw_kstat_lock; + +/* + * hwset operations + */ +static group_t *pghw_set_create(pghw_type_t); +static void pghw_set_add(group_t *, pghw_t *); +static void pghw_set_remove(group_t *, pghw_t *); + +/* + * Initialize the physical portion of a physical PG + */ +void +pghw_init(pghw_t *pg, cpu_t *cp, pghw_type_t hw) +{ + group_t *hwset; + + if ((hwset = pghw_set_lookup(hw)) == NULL) { + /* + * Haven't seen this hardware type yet + */ + hwset = pghw_set_create(hw); + } + + pghw_set_add(hwset, pg); + pg->pghw_hw = hw; + pg->pghw_instance = + pg_plat_hw_instance_id(cp, hw); + pghw_kstat_create(pg); +} + +/* + * Teardown the physical portion of a physical PG + */ +void +pghw_fini(pghw_t *pg) +{ + group_t *hwset; + + hwset = pghw_set_lookup(pg->pghw_hw); + ASSERT(hwset != NULL); + + pghw_set_remove(hwset, pg); + pg->pghw_instance = (id_t)PGHW_INSTANCE_ANON; + pg->pghw_hw = (pghw_type_t)-1; + + if (pg->pghw_kstat) + kstat_delete(pg->pghw_kstat); +} + +/* + * Find an existing physical PG in which to place + * the given CPU for the specified hardware sharing + * relationship + */ +pghw_t * +pghw_place_cpu(cpu_t *cp, pghw_type_t hw) +{ + group_t *hwset; + + if ((hwset = pghw_set_lookup(hw)) == NULL) { + return (NULL); + } + + return ((pghw_t *)pg_cpu_find_pg(cp, hwset)); +} + +/* + * Find the pg representing the hw sharing relationship in which + * cp belongs + */ +pghw_t * +pghw_find_pg(cpu_t *cp, pghw_type_t hw) +{ + group_iter_t i; + pghw_t *pg; + + group_iter_init(&i); + while ((pg = group_iterate(&cp->cpu_pg->pgs, &i)) != NULL) { + if (pg->pghw_hw == hw) + return (pg); + } + return (NULL); +} + +/* + * Find the PG of the given hardware sharing relationship + * type with the given instance id + */ +pghw_t * +pghw_find_by_instance(id_t id, pghw_type_t hw) +{ + group_iter_t i; + group_t *set; + pghw_t *pg; + + set = pghw_set_lookup(hw); + if (!set) + return (NULL); + + group_iter_init(&i); + while ((pg = group_iterate(set, &i)) != NULL) { + if (pg->pghw_instance == id) + return (pg); + } + return (NULL); +} + +/* + * CPUs physical ID cache creation / destruction + * The cache's elements are initialized to the CPU's id + */ +void +pghw_physid_create(cpu_t *cp) +{ + int i; + + cp->cpu_physid = kmem_alloc(sizeof (cpu_physid_t), KM_SLEEP); + + for (i = 0; i < (sizeof (cpu_physid_t) / sizeof (id_t)); i++) { + ((id_t *)cp->cpu_physid)[i] = cp->cpu_id; + } +} + +void +pghw_physid_destroy(cpu_t *cp) +{ + if (cp->cpu_physid) { + kmem_free(cp->cpu_physid, sizeof (cpu_physid_t)); + cp->cpu_physid = NULL; + } +} + +/* + * Return a sequential level identifier for the specified + * hardware sharing relationship + */ +int +pghw_level(pghw_type_t hw) +{ + return (pg_plat_hw_level(hw)); +} + +/* + * Create a new, empty hwset. + * This routine may block, and must not be called from any + * paused CPU context. + */ +static group_t * +pghw_set_create(pghw_type_t hw) +{ + group_t *g; + int ret; + + /* + * Create the top level PG hw group if it doesn't already exist + * This is a "set" of hardware sets, that is ordered (and indexed) + * by the pghw_type_t enum. + */ + if (pg_hw == NULL) { + pg_hw = kmem_alloc(sizeof (group_t), KM_SLEEP); + group_create(pg_hw); + group_expand(pg_hw, (uint_t)PGHW_NUM_COMPONENTS); + } + + /* + * Create the new hwset + * Add it to the top level pg_hw group. + */ + g = kmem_alloc(sizeof (group_t), KM_SLEEP); + group_create(g); + + ret = group_add_at(pg_hw, g, (uint_t)hw); + ASSERT(ret == 0); + + /* + * Update the table that maps hardware sharing relationships + * to hierarchy levels + */ + ASSERT(pghw_level_table[hw] == NULL); + pghw_level_table[hw] = pg_plat_hw_level(hw); + + return (g); +} + +/* + * Find the hwset associated with the given hardware sharing type + */ +group_t * +pghw_set_lookup(pghw_type_t hw) +{ + group_t *hwset; + + if (pg_hw == NULL) + return (NULL); + + hwset = GROUP_ACCESS(pg_hw, (uint_t)hw); + return (hwset); +} + +/* + * Add a PG to a hwset + */ +static void +pghw_set_add(group_t *hwset, pghw_t *pg) +{ + (void) group_add(hwset, pg, GRP_RESIZE); +} + +/* + * Remove a PG from a hwset + */ +static void +pghw_set_remove(group_t *hwset, pghw_t *pg) +{ + int result; + + result = group_remove(hwset, pg, GRP_RESIZE); + ASSERT(result == 0); +} + + +/* + * Return a string name given a pg_hw sharing type + */ +#define PGHW_TYPE_NAME_MAX 8 + +static char * +pghw_type_string(pghw_type_t hw) +{ + switch (hw) { + case PGHW_IPIPE: + return ("ipipe"); + case PGHW_CACHE: + return ("cache"); + case PGHW_FPU: + return ("fpu"); + case PGHW_CHIP: + return ("chip"); + case PGHW_MEMORY: + return ("memory"); + default: + return ("unknown"); + } +} + +/* + * Create / Update routines for PG hw kstats + * + * It is the intention of these kstats to provide some level + * of informational / debugging observability into the types + * and nature of the system's detected hardware sharing relationships + */ +void +pghw_kstat_create(pghw_t *pg) +{ + /* + * Create a physical pg kstat + */ + if ((pg->pghw_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id, + "pg", "pg", KSTAT_TYPE_NAMED, + sizeof (pghw_kstat) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL)) != NULL) { + pg->pghw_kstat->ks_data_size += PG_CLASS_NAME_MAX; + pg->pghw_kstat->ks_data_size += PGHW_TYPE_NAME_MAX; + pg->pghw_kstat->ks_lock = &pghw_kstat_lock; + pg->pghw_kstat->ks_data = &pghw_kstat; + pg->pghw_kstat->ks_update = pghw_kstat_update; + pg->pghw_kstat->ks_private = pg; + kstat_install(pg->pghw_kstat); + } +} + +int +pghw_kstat_update(kstat_t *ksp, int rw) +{ + struct pghw_kstat *pgsp = &pghw_kstat; + pghw_t *pg = ksp->ks_private; + + if (rw == KSTAT_WRITE) + return (EACCES); + + pgsp->pg_id.value.ui64 = ((pg_t *)pg)->pg_id; + pgsp->pg_ncpus.value.ui64 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus); + pgsp->pg_instance_id.value.ui64 = (uint64_t)pg->pghw_instance; + kstat_named_setstr(&pgsp->pg_class, ((pg_t *)pg)->pg_class->pgc_name); + kstat_named_setstr(&pgsp->pg_hw, pghw_type_string(pg->pghw_hw)); + + return (0); +} diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index aecbb6b281..c0947b12a5 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -97,6 +97,7 @@ CHKHDRS= \ avl_impl.h \ battery.h \ bitmap.h \ + bitset.h \ bl.h \ bofi.h \ bofi_impl.h \ @@ -111,7 +112,6 @@ CHKHDRS= \ callo.h \ ccompile.h \ cdio.h \ - chip.h \ cladm.h \ class.h \ clconf.h \ @@ -246,6 +246,7 @@ CHKHDRS= \ gfs.h \ gld.h \ gldpriv.h \ + group.h \ hdio.h \ hook.h \ hook_event.h \ @@ -370,6 +371,8 @@ CHKHDRS= \ pctypes.h \ pem.h \ pfmod.h \ + pg.h \ + pghw.h \ physmem.h \ pm.h \ policy.h \ diff --git a/usr/src/uts/common/sys/bitset.h b/usr/src/uts/common/sys/bitset.h new file mode 100644 index 0000000000..98e5872892 --- /dev/null +++ b/usr/src/uts/common/sys/bitset.h @@ -0,0 +1,76 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _BITSET_H +#define _BITSET_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#if (defined(_KERNEL) || defined(_KMEMUSER)) +#include <sys/bitmap.h> +#include <sys/types.h> + +typedef struct bitset { + ulong_t *bs_set; + uint_t bs_words; +} bitset_t; + +/* + * Bitset initialiation / teardown + */ +void bitset_init(bitset_t *); +void bitset_fini(bitset_t *); + +/* + * Resize / query a bitset's holding capacity + */ +void bitset_resize(bitset_t *, uint_t); +uint_t bitset_capacity(bitset_t *); + +/* + * Set / clear a bit in the set + */ +void bitset_add(bitset_t *, uint_t); +void bitset_del(bitset_t *, uint_t); + +/* + * Bitset queries + */ +int bitset_in_set(bitset_t *, uint_t); +int bitset_is_null(bitset_t *); +uint_t bitset_find(bitset_t *); + + +#endif /* !_KERNEL && !_KMEMUSER */ + +#ifdef __cplusplus +} +#endif + +#endif /* _BITSET_H */ diff --git a/usr/src/uts/common/sys/chip.h b/usr/src/uts/common/sys/chip.h deleted file mode 100644 index 8889756884..0000000000 --- a/usr/src/uts/common/sys/chip.h +++ /dev/null @@ -1,207 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _CHIP_H -#define _CHIP_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * generic kernel CMT processor support - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#if (defined(_KERNEL) || defined(_KMEMUSER)) -#include <sys/cpuvar.h> -#include <sys/processor.h> -#include <sys/bitmap.h> -#include <sys/atomic.h> -#include <sys/time.h> -#include <sys/types.h> - -/* - * Chip types - */ -typedef enum chip_type { - CHIP_DEFAULT, /* Default, non CMT processor */ - CHIP_SMT, /* SMT, single core */ - CHIP_CMP_SPLIT_CACHE, /* CMP with split caches */ - CHIP_CMP_SHARED_CACHE, /* CMP with shared caches */ - CHIP_CMT, /* CMT w/ multiple cores and threads */ - CHIP_NUM_TYPES -} chip_type_t; - - -/* - * Balancing is possible if multiple chips exist in the lgroup - * but only necessary if the chip has multiple online logical CPUs - */ -#define CHIP_SHOULD_BALANCE(chp) \ - (((chp)->chip_ncpu > 1) && ((chp)->chip_next_lgrp != (chp))) - -/* - * Platform's definition of a chip's properties - */ -typedef struct chip_def { - chip_type_t chipd_type; - int chipd_rechoose_adj; - hrtime_t chipd_nosteal; -} chip_def_t; - -/* - * Per chip kstats - */ -typedef enum chip_stat_types { - CHIP_ID, /* chip "id" */ - CHIP_NCPUS, /* number of active cpus */ - CHIP_NRUNNING, /* number of running threads on chip */ - CHIP_RECHOOSE, /* chip's rechoose_interval */ - CHIP_NUM_STATS /* always last */ -} chip_stat_t; - -#define CHIP_KSTAT_NAMES \ -static char *chip_kstat_names[] = { \ - \ - "chip_id", \ - "logical_cpus", \ - "chip_nrunning", \ - "chip_rechoose_interval", \ -} - -/* - * Physical processor (chip) structure. - */ -typedef struct chip { - chipid_t chip_id; /* chip's "id" */ - chipid_t chip_seqid; /* sequential id */ - struct chip *chip_prev; /* previous chip on list */ - struct chip *chip_next; /* next chip on list */ - struct chip *chip_prev_lgrp; /* prev chip in lgroup */ - struct chip *chip_next_lgrp; /* next chip in lgroup */ - chip_type_t chip_type; /* type of chip */ - uint16_t chip_ncpu; /* number of active cpus */ - uint16_t chip_ref; /* chip's reference count */ - struct cpu *chip_cpus; /* per chip cpu list */ - struct lgrp *chip_lgrp; /* chip lives in this lgroup */ - int chip_rechoose_adj; /* chip specific adjustment */ - - /* - * chip kstats - */ - kstat_t *chip_kstat; - kmutex_t chip_kstat_mutex; - struct kstat_named chip_kstat_data[CHIP_NUM_STATS]; - - struct chip *chip_balance; /* chip to balance against */ - uint32_t chip_nrunning; /* # of running threads */ - hrtime_t chip_nosteal; /* nosteal interval (nsecs) */ -} chip_t; - -/* - * IDs associating a CPU with various physical hardware - */ -typedef struct cpu_physid { - chipid_t cpu_chipid; /* CPU's physical processor */ - id_t cpu_coreid; /* CPU's physical core */ -} cpu_physid_t; - -/* - * Change the number of running threads on the chip - */ -#define CHIP_NRUNNING(chp, n) { \ - atomic_add_32(&((chp)->chip_nrunning), (n)); \ -} - -/* - * True if this CPU is active on the chip - */ -#define CHIP_CPU_ACTIVE(cp) ((cp)->cpu_next_chip != NULL) - -/* - * Sets of chips - * The "id" used here should be a chip's sequential id. - * (chip_seqid) - */ -#if defined(_MACHDEP) - -#define CHIP_MAX_CHIPS NCPU -#define CHIP_SET_WORDS BT_BITOUL(CHIP_MAX_CHIPS) - -typedef struct chip_set { - ulong_t csb[CHIP_SET_WORDS]; -} chip_set_t; - -extern int chip_set_isnull(chip_set_t *); - -#define CHIP_SET_ISNULL(set) chip_set_isnull(&(set)) -#define CHIP_SET_TEST(set, id) BT_TEST((set).csb, id) -#define CHIP_SET_REMOVE(set, id) BT_CLEAR((set).csb, id) -#define CHIP_SET_ADD(set, id) BT_SET((set).csb, id) - -#define CHIP_SET_ZERO(set) { \ - int _i; \ - for (_i = 0; _i < CHIP_SET_WORDS; _i++) \ - (set).csb[_i] = 0; \ -} - -#define CHIP_IN_CPUPART(chp, cp) \ - (CHIP_SET_TEST((cp)->cp_mach->mc_chipset, (chp)->chip_seqid)) - -#endif /* _MACHDEP */ - -/* - * Common kernel chip operations - */ -void chip_cpu_init(cpu_t *); -void chip_cpu_fini(cpu_t *); -void chip_cpu_assign(cpu_t *); -void chip_cpu_unassign(cpu_t *); -void chip_cpu_startup(cpu_t *); -chip_t *chip_lookup(chipid_t); -void chip_bootstrap_cpu(cpu_t *); - -void chip_cpu_move_part(cpu_t *, struct cpupart *, - struct cpupart *); - -void chip_kstat_create(chip_t *); - -/* - * Platform chip operations - */ -chipid_t chip_plat_get_chipid(cpu_t *); -id_t chip_plat_get_coreid(cpu_t *); -int chip_plat_get_clogid(cpu_t *); -void chip_plat_define_chip(cpu_t *, chip_def_t *); - -#endif /* !_KERNEL && !_KMEMUSER */ - -#ifdef __cplusplus -} -#endif - -#endif /* _CHIP_H */ diff --git a/usr/src/uts/common/sys/cmt.h b/usr/src/uts/common/sys/cmt.h new file mode 100644 index 0000000000..4638438eec --- /dev/null +++ b/usr/src/uts/common/sys/cmt.h @@ -0,0 +1,75 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _CMT_H +#define _CMT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * CMT PG class + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#if (defined(_KERNEL) || defined(_KMEMUSER)) +#include <sys/group.h> +#include <sys/pghw.h> +#include <sys/types.h> + +/* + * CMT pg structure + */ +typedef struct pg_cmt { + struct pghw cmt_pg; /* physical grouping */ + struct group *cmt_siblings; /* CMT PGs to balance with */ + struct pg_cmt *cmt_parent; /* Parent CMT PG */ + struct group *cmt_children; /* Active children CMT PGs */ + int cmt_nchildren; /* # of children CMT PGs */ + int cmt_hint; /* hint for balancing */ + uint32_t cmt_nrunning; /* # of running threads */ + struct group cmt_cpus_actv; + bitset_t cmt_cpus_actv_set; /* bitset of active CPUs */ +} pg_cmt_t; + + +/* + * Change the number of running threads on the pg + */ +#define PG_NRUN_UPDATE(cp, n) (pg_cmt_load((cp), (n))) + +void pg_cmt_load(cpu_t *, int); +void pg_cmt_cpu_startup(cpu_t *); +int pg_cmt_can_migrate(cpu_t *, cpu_t *); + +#endif /* !_KERNEL && !_KMEMUSER */ + +#ifdef __cplusplus +} +#endif + +#endif /* _CMT_H */ diff --git a/usr/src/uts/common/sys/cpupart.h b/usr/src/uts/common/sys/cpupart.h index 0a74d17379..b9e0da4e19 100644 --- a/usr/src/uts/common/sys/cpupart.h +++ b/usr/src/uts/common/sys/cpupart.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,7 +35,8 @@ #include <sys/pset.h> #include <sys/lgrp.h> #include <sys/lgrp_user.h> -#include <sys/chip.h> +#include <sys/pg.h> +#include <sys/bitset.h> #include <sys/time.h> #ifdef __cplusplus @@ -60,7 +61,6 @@ typedef int cpupartid_t; #if defined(_MACHDEP) struct mach_cpupart { cpuset_t mc_haltset; - chip_set_t mc_chipset; }; extern struct mach_cpupart cp_default_mach; @@ -102,6 +102,7 @@ typedef struct cpupart { uint_t cp_attr; /* bitmask of attributes */ lgrp_gen_t cp_gen; /* generation number */ lgrp_id_t cp_lgrp_hint; /* last home lgroup chosen */ + bitset_t cp_cmt_pgs; /* CMT PGs represented */ struct mach_cpupart *cp_mach; /* mach-specific */ } cpupart_t; diff --git a/usr/src/uts/common/sys/cpuvar.h b/usr/src/uts/common/sys/cpuvar.h index f413431b0d..1e467e4b64 100644 --- a/usr/src/uts/common/sys/cpuvar.h +++ b/usr/src/uts/common/sys/cpuvar.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -100,8 +100,6 @@ typedef struct cpu { klwp_id_t cpu_fpowner; /* currently loaded fpu owner */ struct cpupart *cpu_part; /* partition with this CPU */ struct lgrp_ld *cpu_lpl; /* pointer to this cpu's load */ - struct chip *cpu_chip; /* cpu's chip data */ - int cpu_rechoose; /* cpu's rechoose_interval */ int cpu_cache_offset; /* see kmem.c for details */ /* @@ -121,10 +119,11 @@ typedef struct cpu { struct cpu *cpu_prev_part; /* prev CPU in partition */ struct cpu *cpu_next_lgrp; /* next CPU in latency group */ struct cpu *cpu_prev_lgrp; /* prev CPU in latency group */ - struct cpu *cpu_next_chip; /* next CPU on chip */ - struct cpu *cpu_prev_chip; /* prev CPU on chip */ struct cpu *cpu_next_lpl; /* next CPU in lgrp partition */ struct cpu *cpu_prev_lpl; + + struct cpu_pg *cpu_pg; /* cpu's processor groups */ + void *cpu_reserved[4]; /* reserved for future use */ /* diff --git a/usr/src/uts/common/sys/disp.h b/usr/src/uts/common/sys/disp.h index 50464b6ddf..534feb4bd0 100644 --- a/usr/src/uts/common/sys/disp.h +++ b/usr/src/uts/common/sys/disp.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -98,13 +97,6 @@ extern pri_t maxclsyspri; /* maximum level of any system class */ extern pri_t intr_pri; /* interrupt thread priority base level */ /* - * Amount of time that may elapse before a thread is considered to have - * lost it's cache investment. - */ -#define RECHOOSE_INTERVAL 3 -extern int rechoose_interval; - -/* * Minimum amount of time that a thread can remain runnable before it can * be stolen by another CPU (in nanoseconds). */ diff --git a/usr/src/uts/common/sys/group.h b/usr/src/uts/common/sys/group.h new file mode 100644 index 0000000000..89a5ca1f1a --- /dev/null +++ b/usr/src/uts/common/sys/group.h @@ -0,0 +1,104 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _GROUP_H +#define _GROUP_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Group Abstraction + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#if (defined(_KERNEL) || defined(_KMEMUSER)) +#include <sys/types.h> + +#define GRP_RESIZE 0x1 /* Resize group capacity if needed */ +#define GRP_NORESIZE 0x2 /* Do not resize group capacity; may fail */ + +/* + * group structure + */ +typedef struct group { + uint_t grp_size; /* # of elements */ + uint_t grp_capacity; /* current group capacity */ + void **grp_set; /* element vector */ +} group_t; + +typedef uint_t group_iter_t; + + +/* + * Return the number of elements in the group + */ +#define GROUP_SIZE(grp) ((grp)->grp_size) + +/* + * Access the element at the specified group index + */ +#define GROUP_ACCESS(grp, index) ((grp)->grp_set[index]) + +/* + * Group creation / destruction + */ +void group_create(group_t *); +void group_destroy(group_t *); + +/* + * Expand a group's holding capacity + */ +void group_expand(group_t *, uint_t); + +/* + * Group element iteration + */ +void group_iter_init(group_iter_t *); +void *group_iterate(group_t *, uint_t *); + +/* + * Add / remove an element from the group + */ +int group_add(group_t *, void *, int); +int group_remove(group_t *, void *, int); + +/* + * Add / remove / access an element at a specified index. + * The group must already have sufficient capacity to hold + * an element at the specified index. + */ +int group_add_at(group_t *, void *, uint_t); +void group_remove_at(group_t *, uint_t); + +#endif /* !_KERNEL && !_KMEMUSER */ + +#ifdef __cplusplus +} +#endif + +#endif /* _GROUP_H */ diff --git a/usr/src/uts/common/sys/lgrp.h b/usr/src/uts/common/sys/lgrp.h index 3f6e191c98..8f35095adf 100644 --- a/usr/src/uts/common/sys/lgrp.h +++ b/usr/src/uts/common/sys/lgrp.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -253,8 +253,6 @@ typedef struct lgrp { struct cpu *lgrp_cpu; /* pointer to a cpu may be null */ uint_t lgrp_cpucnt; /* number of cpus in this lgrp */ - uint_t lgrp_chipcnt; - struct chip *lgrp_chips; /* pointer to chips in this lgrp */ kstat_t *lgrp_kstat; /* per-lgrp kstats */ } lgrp_t; diff --git a/usr/src/uts/common/sys/pg.h b/usr/src/uts/common/sys/pg.h new file mode 100644 index 0000000000..99c51ca09a --- /dev/null +++ b/usr/src/uts/common/sys/pg.h @@ -0,0 +1,173 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _PG_H +#define _PG_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Processor Groups + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#if (defined(_KERNEL) || defined(_KMEMUSER)) +#include <sys/cpuvar.h> +#include <sys/group.h> +#include <sys/processor.h> +#include <sys/bitset.h> +#include <sys/atomic.h> +#include <sys/types.h> +#include <sys/kstat.h> + +typedef uint_t pgid_t; /* processor group id */ +typedef uint_t pg_cid_t; /* processor group class id */ + +/* + * Nature of CPU relationships + */ +typedef enum pg_relation { + PGR_LOGICAL, + PGR_PHYSICAL +} pg_relation_t; + +/* + * Processor group structure + */ +typedef struct pg { + pgid_t pg_id; /* seq id */ + pg_relation_t pg_relation; /* grouping relationship */ + struct pg_class *pg_class; /* pg class */ + struct group pg_cpus; /* group of CPUs */ +} pg_t; + +/* + * PG class callbacks + */ +struct pg_ops { + struct pg *(*alloc)(); + void (*free)(struct pg *); + void (*cpu_init)(struct cpu *); + void (*cpu_fini)(struct cpu *); + void (*cpu_active)(struct cpu *); + void (*cpu_inactive)(struct cpu *); + void (*cpupart_in)(struct cpu *, struct cpupart *); + void (*cpupart_out)(struct cpu *, struct cpupart *); + void (*cpupart_move)(struct cpu *, struct cpupart *, + struct cpupart *); + int (*cpu_belongs)(struct pg *, struct cpu *); +}; + +#define PG_CLASS_NAME_MAX 32 + +/* + * PG class structure + */ +typedef struct pg_class { + pg_cid_t pgc_id; + char pgc_name[PG_CLASS_NAME_MAX]; + struct pg_ops *pgc_ops; + pg_relation_t pgc_relation; +} pg_class_t; + +/* + * Per CPU processor group data + */ +typedef struct cpu_pg { + struct group pgs; /* All the CPU's PGs */ + struct group cmt_pgs; /* CMT load balancing lineage */ + /* (Group hierarchy ordered) */ + struct pg *cmt_lineage; /* Ascending lineage chain */ +} cpu_pg_t; + +/* + * PG cpu iterator cookie + */ +typedef struct pg_cpu_itr { + pg_t *pg; + group_iter_t position; +} pg_cpu_itr_t; + +/* + * Initialize a PG CPU iterator cookie + */ +#define PG_CPU_ITR_INIT(pgrp, itr) \ +{ \ + group_iter_init(&(itr).position); \ + (itr).pg = ((pg_t *)pgrp); \ +} + +/* + * Return the first CPU in a PG + */ +#define PG_CPU_GET_FIRST(pgrp) \ + (GROUP_SIZE(&((pg_t *)pgrp)->pg_cpus) > 0 ? \ + GROUP_ACCESS(&((pg_t *)pgrp)->pg_cpus, 0) : NULL) + +/* + * Framework routines + */ +void pg_init(void); +pg_cid_t pg_class_register(char *, struct pg_ops *, pg_relation_t); + +/* + * PG CPU reconfiguration hooks + */ +void pg_cpu0_init(void); +void pg_cpu_init(cpu_t *); +void pg_cpu_fini(cpu_t *); +void pg_cpu_active(cpu_t *); +void pg_cpu_inactive(cpu_t *); +void pg_cpu_startup(cpu_t *); +void pg_cpu_bootstrap(cpu_t *); + +/* + * PG cpupart service hooks + */ +void pg_cpupart_in(cpu_t *, struct cpupart *); +void pg_cpupart_out(cpu_t *, struct cpupart *); +void pg_cpupart_move(cpu_t *, struct cpupart *, struct cpupart *); + +/* + * PG CPU utility routines + */ +pg_t *pg_create(pg_cid_t); +void pg_destroy(pg_t *); +void pg_cpu_add(pg_t *, cpu_t *); +void pg_cpu_delete(pg_t *, cpu_t *); +pg_t *pg_cpu_find_pg(cpu_t *, group_t *); +cpu_t *pg_cpu_next(pg_cpu_itr_t *); + + +#endif /* !_KERNEL && !_KMEMUSER */ + +#ifdef __cplusplus +} +#endif + +#endif /* _PG_H */ diff --git a/usr/src/uts/common/sys/pghw.h b/usr/src/uts/common/sys/pghw.h new file mode 100644 index 0000000000..e78be92032 --- /dev/null +++ b/usr/src/uts/common/sys/pghw.h @@ -0,0 +1,135 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _PGHW_H +#define _PGHW_H + +#pragma ident "%Z%%M% %I% %E% SMI" + + +#ifdef __cplusplus +extern "C" { +#endif + +#if (defined(_KERNEL) || defined(_KMEMUSER)) +#include <sys/cpuvar.h> +#include <sys/group.h> +#include <sys/processor.h> +#include <sys/bitmap.h> +#include <sys/atomic.h> +#include <sys/types.h> +#include <sys/kstat.h> +#include <sys/pg.h> + +/* + * Hardware that may be shared by a group of processors + */ +typedef enum pghw_type { + PGHW_START, + PGHW_IPIPE, + PGHW_CACHE, + PGHW_FPU, + PGHW_MPIPE, + PGHW_MEMORY, + PGHW_NUM_COMPONENTS +} pghw_type_t; + +/* + * Consider the physical processor sharing relationship + * equivalant to a shared pipe to memory. + */ +#define PGHW_CHIP PGHW_MPIPE + +/* + * Anonymous instance id + */ +#define PGHW_INSTANCE_ANON ((id_t)0xdecafbad) + +/* + * Processor Group (physical sharing relationship) + */ +typedef struct pghw { + pg_t pghw_pg; /* processor group */ + pghw_type_t pghw_hw; /* HW sharing relationship */ + id_t pghw_instance; /* sharing instance identifier */ + kstat_t *pghw_kstat; /* physical kstats exported */ +} pghw_t; + +/* + * IDs associating a CPU with various physical hardware + */ +typedef struct cpu_physid { + id_t cpu_chipid; /* CPU's physical processor */ + id_t cpu_coreid; /* CPU's physical core */ + id_t cpu_cacheid; /* CPU's cache id */ +} cpu_physid_t; + +/* + * Physical PG initialization / CPU service hooks + */ +void pghw_init(pghw_t *, cpu_t *, pghw_type_t); +void pghw_fini(pghw_t *); +void pghw_cpu_add(pghw_t *, cpu_t *); +pghw_t *pghw_place_cpu(cpu_t *, pghw_type_t); + +/* + * Physical ID cache creation / destruction + */ +void pghw_physid_create(cpu_t *); +void pghw_physid_destroy(cpu_t *); + +/* + * CPU / PG hardware related seach operations + */ +pghw_t *pghw_find_pg(cpu_t *, pghw_type_t); +pghw_t *pghw_find_by_instance(id_t, pghw_type_t); +group_t *pghw_set_lookup(pghw_type_t); + +int pghw_level(pghw_type_t); + +void pghw_kstat_create(pghw_t *); +int pghw_kstat_update(kstat_t *, int); + +/* Hardware sharing relationship platform interfaces */ +int pg_plat_hw_shared(cpu_t *, pghw_type_t); +int pg_plat_cpus_share(cpu_t *, cpu_t *, pghw_type_t); +int pg_plat_hw_level(pghw_type_t); +id_t pg_plat_hw_instance_id(cpu_t *, pghw_type_t); + +/* + * What comprises a "core" may vary across processor implementations, + * and so the term itself is somewhat unstable. For this reason, there + * is no PGHW_CORE type, but we provide an interface here to allow platforms + * to express cpu <=> core mappings. + */ +id_t pg_plat_get_core_id(cpu_t *); + +#endif /* !_KERNEL && !_KMEMUSER */ + +#ifdef __cplusplus +} +#endif + +#endif /* _PGHW_H */ diff --git a/usr/src/uts/i86pc/cpu/amd_opteron/ao_cpu.c b/usr/src/uts/i86pc/cpu/amd_opteron/ao_cpu.c index ccbc04fe4c..55084167b8 100644 --- a/usr/src/uts/i86pc/cpu/amd_opteron/ao_cpu.c +++ b/usr/src/uts/i86pc/cpu/amd_opteron/ao_cpu.c @@ -20,14 +20,14 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include <sys/types.h> -#include <sys/chip.h> +#include <sys/pghw.h> #include <sys/cmn_err.h> #include <sys/sysmacros.h> #include <sys/fm/protocol.h> @@ -72,8 +72,8 @@ ao_fmri_create(ao_data_t *ao, nv_alloc_t *nva) fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 3, "motherboard", 0, - "chip", ao->ao_cpu->cpu_chip->chip_id, - "cpu", chip_plat_get_clogid(ao->ao_cpu)); + "chip", pg_plat_hw_instance_id(ao->ao_cpu, PGHW_CHIP), + "cpu", cpuid_get_clogid(ao->ao_cpu)); return (nvl); } @@ -113,7 +113,7 @@ int ao_scrubber_enable(void *data, uint64_t base, uint64_t ilen, int csdiscontig) { ao_data_t *ao = data; - chipid_t chipid = chip_plat_get_chipid(ao->ao_cpu); + chipid_t chipid = pg_plat_hw_instance_id(ao->ao_cpu, PGHW_CHIP); uint32_t rev = cpuid_getchiprev(ao->ao_cpu); uint32_t scrubctl, lo, hi; int rv = 1; diff --git a/usr/src/uts/i86pc/cpu/amd_opteron/ao_main.c b/usr/src/uts/i86pc/cpu/amd_opteron/ao_main.c index 06487043d9..908786702d 100644 --- a/usr/src/uts/i86pc/cpu/amd_opteron/ao_main.c +++ b/usr/src/uts/i86pc/cpu/amd_opteron/ao_main.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -37,13 +37,14 @@ #include <sys/cpuvar.h> #include <sys/x86_archext.h> #include <sys/kmem.h> +#include <sys/pghw.h> #include <sys/modctl.h> #include <sys/mc.h> #include <sys/mca_x86.h> #include "ao.h" -static struct ao_chipshared *ao_shared[CHIP_MAX_CHIPS]; +static struct ao_chipshared *ao_shared[NCPU]; /* * This cpu module supports AMD family 0xf revisions B/C/D/E/F/G. If @@ -55,7 +56,7 @@ uint_t ao_model_limit = 0x6f; static int ao_init(cpu_t *cp, void **datap) { - uint_t chipid = chip_plat_get_chipid(CPU); + uint_t chipid = pg_plat_hw_instance_id(CPU, PGHW_CHIP); struct ao_chipshared *sp, *osp; ao_data_t *ao; uint64_t cap; diff --git a/usr/src/uts/i86pc/cpu/amd_opteron/ao_mca.c b/usr/src/uts/i86pc/cpu/amd_opteron/ao_mca.c index a4dc3e7660..c9adcde2cf 100644 --- a/usr/src/uts/i86pc/cpu/amd_opteron/ao_mca.c +++ b/usr/src/uts/i86pc/cpu/amd_opteron/ao_mca.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,7 +35,7 @@ #include <sys/cmn_err.h> #include <sys/systm.h> #include <sys/sysmacros.h> -#include <sys/chip.h> +#include <sys/pghw.h> #include <sys/cyclic.h> #include <sys/cpu_module_impl.h> #include <sys/pci_cfgspace_impl.h> @@ -475,7 +475,7 @@ static void ao_nb_cfg(ao_data_t *ao, uint32_t rev) { const struct ao_nb_cfg *nbcp = &ao_cfg_extra[0]; - uint_t chipid = chip_plat_get_chipid(CPU); + uint_t chipid = pg_plat_hw_instance_id(CPU, PGHW_CHIP); uint32_t val; /* @@ -547,7 +547,7 @@ int ao_nb_cfg_sparectl_noseize = 0; static void ao_sparectl_cfg(ao_data_t *ao) { - uint_t chipid = chip_plat_get_chipid(CPU); + uint_t chipid = pg_plat_hw_instance_id(CPU, PGHW_CHIP); union mcreg_sparectl sparectl; int chan, cs; diff --git a/usr/src/uts/i86pc/io/mc/mcamd.h b/usr/src/uts/i86pc/io/mc/mcamd.h index 489aeb1dde..28dd664b82 100644 --- a/usr/src/uts/i86pc/io/mc/mcamd.h +++ b/usr/src/uts/i86pc/io/mc/mcamd.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,7 +32,7 @@ #include <sys/types.h> #include <sys/ddi.h> #include <sys/sunddi.h> -#include <sys/chip.h> +#include <sys/pghw.h> #include <sys/ksynch.h> #include <sys/mc_amd.h> #include <mcamd_api.h> @@ -234,7 +234,7 @@ struct mc { uint32_t mc_socket; /* Package type */ uint_t mc_ref; /* reference (attach) count */ mc_func_t mc_funcs[MC_FUNC_NUM]; /* Instance, devinfo, ... */ - chip_t *mc_chip; /* Associated chip */ + pghw_t *mc_chip; /* MC's associated chip PG */ mc_cs_t *mc_cslist; /* All active chip-selects */ mc_cs_t *mc_cslast; /* End of chip-select list */ mc_dimm_t *mc_dimmlist; /* List of all logical DIMMs, */ diff --git a/usr/src/uts/i86pc/io/mc/mcamd_drv.c b/usr/src/uts/i86pc/io/mc/mcamd_drv.c index 1c89e0e668..fe8ad94fd9 100644 --- a/usr/src/uts/i86pc/io/mc/mcamd_drv.c +++ b/usr/src/uts/i86pc/io/mc/mcamd_drv.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -40,7 +40,7 @@ #include <sys/cred.h> #include <sys/ksynch.h> #include <sys/rwlock.h> -#include <sys/chip.h> +#include <sys/pghw.h> #include <sys/open.h> #include <sys/policy.h> #include <sys/machsystm.h> @@ -110,7 +110,7 @@ mc_lookup_by_chipid(int chipid) ASSERT(RW_LOCK_HELD(&mc_lock)); for (mc = mc_list; mc != NULL; mc = mc->mc_next) { - if (mc->mc_chip->chip_id == chipid) + if (mc->mc_chip->pghw_instance == chipid) return (mc); } @@ -595,7 +595,7 @@ mc_report_testfails(mc_t *mc) for (mccs = mc->mc_cslist; mccs != NULL; mccs = mccs->mccs_next) { if (mccs->mccs_props.csp_testfail) { unum.unum_board = 0; - unum.unum_chip = mc->mc_chip->chip_id; + unum.unum_chip = mc->mc_chip->pghw_instance; unum.unum_mc = 0; unum.unum_cs = mccs->mccs_props.csp_num; unum.unum_rank = mccs->mccs_props.csp_dimmrank; @@ -672,7 +672,7 @@ mc_mkprops_addrmap(mc_pcicfg_hdl_t cfghdl, mc_t *mc) * base/limit pairs is overkill. */ if (MCREG_FIELD_CMN(&lim[i], DstNode) != - mc->mc_chip->chip_id) + mc->mc_chip->pghw_instance) continue; /* @@ -1272,8 +1272,9 @@ mc_fm_fini(dev_info_t *dip) static mc_t * mc_create(chipid_t chipid) { - chip_t *chp = chip_lookup(chipid); + pghw_t *chp = pghw_find_by_instance((id_t)chipid, PGHW_CHIP); mc_t *mc; + cpu_t *cpu; ASSERT(RW_WRITE_HELD(&mc_lock)); @@ -1283,17 +1284,18 @@ mc_create(chipid_t chipid) mc = kmem_zalloc(sizeof (mc_t), KM_SLEEP); mc->mc_hdr.mch_type = MC_NT_MC; mc->mc_chip = chp; - mc->mc_props.mcp_num = mc->mc_chip->chip_id; + mc->mc_props.mcp_num = mc->mc_chip->pghw_instance; mc->mc_props.mcp_sparecs = MC_INVALNUM; mc->mc_props.mcp_badcs = MC_INVALNUM; /* - * We can use the first cpu in the chip_cpus list since all cores + * We can use one of the chip's CPUs since all cores * of a chip share the same revision and socket type. */ - mc->mc_props.mcp_rev = cpuid_getchiprev(chp->chip_cpus); - mc->mc_revname = cpuid_getchiprevstr(chp->chip_cpus); - mc->mc_socket = cpuid_getsockettype(chp->chip_cpus); + cpu = PG_CPU_GET_FIRST(chp); + mc->mc_props.mcp_rev = cpuid_getchiprev(cpu); + mc->mc_revname = cpuid_getchiprevstr(cpu); + mc->mc_socket = cpuid_getsockettype(cpu); if (mc_list == NULL) mc_list = mc; @@ -1362,7 +1364,7 @@ mc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) rw_enter(&mc_lock, RW_WRITER); for (mc = mc_list; mc != NULL; mc = mc->mc_next) { - if (mc->mc_chip->chip_id == chipid) + if (mc->mc_chip->pghw_instance == chipid) break; } @@ -1405,7 +1407,7 @@ mc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) dip, "model", (char *)bm->bm_model); (void) ddi_prop_update_int(DDI_DEV_T_NONE, - dip, "chip-id", mc->mc_chip->chip_id); + dip, "chip-id", mc->mc_chip->pghw_instance); if (bm->bm_mkprops != NULL && mc_pcicfg_setup(mc, bm->bm_func, &cfghdl) == DDI_SUCCESS) { @@ -1421,11 +1423,15 @@ mc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) if (func == MC_FUNC_DEVIMAP) { mc_props_t *mcp = &mc->mc_props; int dram_present = 0; + pg_cpu_itr_t itr; + cpu_t *cpup; if (ddi_create_minor_node(dip, "mc-amd", S_IFCHR, - mc->mc_chip->chip_id, "ddi_mem_ctrl", 0) != DDI_SUCCESS) { + mc->mc_chip->pghw_instance, "ddi_mem_ctrl", + 0) != DDI_SUCCESS) { cmn_err(CE_WARN, "failed to create minor node for chip " - "%u memory controller\n", mc->mc_chip->chip_id); + "%u memory controller\n", + mc->mc_chip->pghw_instance); } /* @@ -1440,13 +1446,11 @@ mc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) */ kpreempt_disable(); /* prevent cpu list from changing */ - cpu = mc->mc_chip->chip_cpus; - + PG_CPU_ITR_INIT(mc->mc_chip, itr); + cpup = cpu = pg_cpu_next(&itr); do { - mcamd_mc_register(cpu); - cpu = cpu->cpu_next_chip; - } while (cpu != mc->mc_chip->chip_cpus); - + mcamd_mc_register(cpup); + } while ((cpup = pg_cpu_next(&itr)) != NULL); if (mc->mc_props.mcp_lim != mc->mc_props.mcp_base) { /* diff --git a/usr/src/uts/i86pc/io/mc/mcamd_pcicfg.c b/usr/src/uts/i86pc/io/mc/mcamd_pcicfg.c index 534666769a..8c772a8f01 100644 --- a/usr/src/uts/i86pc/io/mc/mcamd_pcicfg.c +++ b/usr/src/uts/i86pc/io/mc/mcamd_pcicfg.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -86,14 +86,16 @@ mc_pcicfg_get32(mc_pcicfg_hdl_t cookie, off_t offset) uint32_t mc_pcicfg_get32_nohdl(mc_t *mc, enum mc_funcnum func, off_t offset) { - return (pci_mech1_getl(0, MC_AMD_DEV_OFFSET + mc->mc_chip->chip_id, - func, offset)); + return (pci_mech1_getl(0, + MC_AMD_DEV_OFFSET + mc->mc_chip->pghw_instance, + func, offset)); } void mc_pcicfg_put32_nohdl(mc_t *mc, enum mc_funcnum func, off_t offset, uint32_t val) { - pci_mech1_putl(0, MC_AMD_DEV_OFFSET + mc->mc_chip->chip_id, + pci_mech1_putl(0, + MC_AMD_DEV_OFFSET + mc->mc_chip->pghw_instance, func, offset, val); } diff --git a/usr/src/uts/i86pc/io/mc/mcamd_subr.c b/usr/src/uts/i86pc/io/mc/mcamd_subr.c index 7ce0633a56..9f9786b10b 100644 --- a/usr/src/uts/i86pc/io/mc/mcamd_subr.c +++ b/usr/src/uts/i86pc/io/mc/mcamd_subr.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -617,7 +617,7 @@ mc_fmri_create(mc_t *mc) fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 3, "motherboard", 0, - "chip", mc->mc_chip->chip_id, + "chip", mc->mc_chip->pghw_instance, "memory-controller", 0); return (nvl); diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c index 45fb59ff73..094092ed0b 100644 --- a/usr/src/uts/i86pc/os/cpuid.c +++ b/usr/src/uts/i86pc/os/cpuid.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -40,7 +40,7 @@ #include <sys/sunndi.h> #include <sys/cpuvar.h> #include <sys/processor.h> -#include <sys/chip.h> +#include <sys/pg.h> #include <sys/fp.h> #include <sys/controlregs.h> #include <sys/auxv_386.h> @@ -1886,8 +1886,8 @@ cpuid_getsockettype(struct cpu *cpu) return (cpu->cpu_m.mcpu_cpi->cpi_socket); } -chipid_t -chip_plat_get_chipid(cpu_t *cpu) +int +cpuid_get_chipid(cpu_t *cpu) { ASSERT(cpuid_checkpass(cpu, 1)); @@ -1897,14 +1897,14 @@ chip_plat_get_chipid(cpu_t *cpu) } id_t -chip_plat_get_coreid(cpu_t *cpu) +cpuid_get_coreid(cpu_t *cpu) { ASSERT(cpuid_checkpass(cpu, 1)); return (cpu->cpu_m.mcpu_cpi->cpi_coreid); } int -chip_plat_get_clogid(cpu_t *cpu) +cpuid_get_clogid(cpu_t *cpu) { ASSERT(cpuid_checkpass(cpu, 1)); return (cpu->cpu_m.mcpu_cpi->cpi_clogid); diff --git a/usr/src/uts/i86pc/os/lgrpplat.c b/usr/src/uts/i86pc/os/lgrpplat.c index 5397d0303d..79ea557578 100644 --- a/usr/src/uts/i86pc/os/lgrpplat.c +++ b/usr/src/uts/i86pc/os/lgrpplat.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,6 +39,7 @@ #include <sys/pci_cfgspace.h> #include <sys/pci_impl.h> #include <sys/param.h> +#include <sys/pghw.h> #include <sys/promif.h> /* for prom_printf() */ #include <sys/systm.h> #include <sys/thread.h> @@ -50,7 +51,6 @@ #include <vm/vm_dep.h> - /* * lgroup platform support for x86 platforms. */ @@ -58,7 +58,7 @@ #define MAX_NODES 8 #define NLGRP (MAX_NODES * (MAX_NODES - 1) + 1) -#define LGRP_PLAT_CPU_TO_NODE(cpu) (chip_plat_get_chipid(cpu)) +#define LGRP_PLAT_CPU_TO_NODE(cpu) (pg_plat_hw_instance_id(cpu, PGHW_CHIP)) #define LGRP_PLAT_PROBE_NROUNDS 64 /* default laps for probing */ #define LGRP_PLAT_PROBE_NSAMPLES 1 /* default samples to take */ diff --git a/usr/src/uts/i86pc/os/mlsetup.c b/usr/src/uts/i86pc/os/mlsetup.c index 6e97330601..067f417551 100644 --- a/usr/src/uts/i86pc/os/mlsetup.c +++ b/usr/src/uts/i86pc/os/mlsetup.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,7 +41,7 @@ #include <sys/cpupart.h> #include <sys/pset.h> #include <sys/copyops.h> -#include <sys/chip.h> +#include <sys/pg.h> #include <sys/disp.h> #include <sys/debug.h> #include <sys/sunddi.h> @@ -122,7 +122,6 @@ void mlsetup(struct regs *rp) { extern struct classfuncs sys_classfuncs; - extern struct chip cpu0_chip; extern disp_t cpu0_disp; extern char t0stack[]; int boot_ncpus; @@ -328,16 +327,6 @@ mlsetup(struct regs *rp) */ lgrp_init(); - /* - * The lgroup code needs to at least know about a CPU's - * chip association, but it's too early to fully initialize - * cpu0_chip, since the device node for the boot CPU doesn't - * exist yet. Initialize enough of it to get by until formal - * initialization. - */ - CPU->cpu_rechoose = rechoose_interval; - CPU->cpu_chip = &cpu0_chip; - rp->r_fp = 0; /* terminate kernel stack traces! */ prom_init("kernel", (void *)NULL); diff --git a/usr/src/uts/i86pc/os/mp_machdep.c b/usr/src/uts/i86pc/os/mp_machdep.c index 46e4f5a710..c2e3123a24 100644 --- a/usr/src/uts/i86pc/os/mp_machdep.c +++ b/usr/src/uts/i86pc/os/mp_machdep.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,7 +38,7 @@ #include <sys/x86_archext.h> #include <sys/cpupart.h> #include <sys/cpuvar.h> -#include <sys/chip.h> +#include <sys/pghw.h> #include <sys/disp.h> #include <sys/cpu.h> #include <sys/archsystm.h> @@ -86,6 +86,8 @@ extern uint64_t freq_tsc(uint32_t *); extern uint64_t freq_notsc(uint32_t *); #endif extern void pc_gethrestime(timestruc_t *); +extern int cpuid_get_coreid(cpu_t *); +extern int cpuid_get_chipid(cpu_t *); /* * PSM functions initialization @@ -164,35 +166,94 @@ int simulator_run = 0; /* patch to non-zero if running under simics */ #endif /* _SIMULATOR_SUPPORT */ -/* ARGSUSED */ -void -chip_plat_define_chip(cpu_t *cp, chip_def_t *cd) + +/*ARGSUSED*/ +int +pg_plat_hw_shared(cpu_t *cp, pghw_type_t hw) { - if ((x86_feature & (X86_HTT|X86_CMP)) == X86_HTT) { - /* - * Single-core Pentiums with Hyper-Threading enabled. - */ - cd->chipd_type = CHIP_SMT; - } else if ((x86_feature & (X86_HTT|X86_CMP)) == X86_CMP) { - /* - * Multi-core Opterons or Multi-core Pentiums with - * Hyper-Threading disabled. - */ - cd->chipd_type = CHIP_CMP_SPLIT_CACHE; - } else if ((x86_feature & (X86_HTT|X86_CMP)) == (X86_HTT|X86_CMP)) { - /* - * Multi-core Pentiums with Hyper-Threading enabled. - */ - cd->chipd_type = CHIP_CMT; - } else { - /* - * Single-core/single-threaded chips. - */ - cd->chipd_type = CHIP_DEFAULT; + switch (hw) { + case PGHW_IPIPE: + if (x86_feature & (X86_HTT)) { + /* + * Hyper-threading is SMT + */ + return (1); + } else { + return (0); + } + case PGHW_CHIP: + if (x86_feature & (X86_CMP|X86_HTT)) + return (1); + else + return (0); + default: + return (0); + } +} + +/* + * Compare two CPUs and see if they have a pghw_type_t sharing relationship + * If pghw_type_t is an unsupported hardware type, then return -1 + */ +int +pg_plat_cpus_share(cpu_t *cpu_a, cpu_t *cpu_b, pghw_type_t hw) +{ + id_t pgp_a, pgp_b; + + pgp_a = pg_plat_hw_instance_id(cpu_a, hw); + pgp_b = pg_plat_hw_instance_id(cpu_b, hw); + + if (pgp_a == -1 || pgp_b == -1) + return (-1); + + return (pgp_a == pgp_b); +} + +/* + * Return a physical instance identifier for known hardware sharing + * relationships + */ +id_t +pg_plat_hw_instance_id(cpu_t *cpu, pghw_type_t hw) +{ + switch (hw) { + case PGHW_IPIPE: + return (cpuid_get_coreid(cpu)); + case PGHW_CHIP: + return (cpuid_get_chipid(cpu)); + default: + return (-1); } +} + +int +pg_plat_hw_level(pghw_type_t hw) +{ + int i; + static pghw_type_t hw_hier[] = { + PGHW_IPIPE, + PGHW_CHIP, + PGHW_NUM_COMPONENTS + }; + + for (i = 0; hw_hier[i] != PGHW_NUM_COMPONENTS; i++) { + if (hw_hier[i] == hw) + return (i); + } + return (-1); +} - cd->chipd_rechoose_adj = 0; - cd->chipd_nosteal = 100000ULL; /* 100 usec */ +id_t +pg_plat_get_core_id(cpu_t *cpu) +{ + return ((id_t)cpuid_get_coreid(cpu)); +} + +void +cmp_set_nosteal_interval(void) +{ + /* Set the nosteal interval (used by disp_getbest()) to 100us */ + nosteal_nsec = 100000UL; } /* diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c index 6c00d2bb20..5fd2325888 100644 --- a/usr/src/uts/i86pc/os/mp_startup.c +++ b/usr/src/uts/i86pc/os/mp_startup.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -54,7 +54,8 @@ #include <sys/traptrace.h> #include <sys/clock.h> #include <sys/cpc_impl.h> -#include <sys/chip.h> +#include <sys/pg.h> +#include <sys/cmt.h> #include <sys/dtrace.h> #include <sys/archsystm.h> #include <sys/fp.h> @@ -321,11 +322,9 @@ extern void *long_mode_64(void); tp->t_disp_queue = cp->cpu_disp; /* - * Bootstrap the CPU for CMT aware scheduling - * The rest of the initialization will happen from - * mp_startup() + * Bootstrap the CPU's PG data */ - chip_bootstrap_cpu(cp); + pg_cpu_bootstrap(cp); /* * Perform CPC intialization on the new CPU. @@ -1171,15 +1170,13 @@ mp_startup(void) mutex_enter(&cpu_lock); /* - * It's unfortunate that chip_cpu_init() has to be called here. - * It really belongs in cpu_add_unit(), but unfortunately it is - * dependent on the cpuid probing, which must be done in the - * context of the current CPU. Care must be taken on x86 to ensure - * that mp_startup can safely block even though chip_cpu_init() and - * cpu_add_active() have not yet been called. + * Processor group initialization for this CPU is dependent on the + * cpuid probing, which must be done in the context of the current + * CPU. */ - chip_cpu_init(cp); - chip_cpu_startup(cp); + pghw_physid_create(cp); + pg_cpu_init(cp); + pg_cmt_cpu_startup(cp); cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_ENABLE | CPU_EXISTS; cpu_add_active(cp); diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c index b131db010c..6d7954db30 100644 --- a/usr/src/uts/i86pc/os/startup.c +++ b/usr/src/uts/i86pc/os/startup.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -107,7 +107,7 @@ #include <sys/kobj.h> #include <sys/kobj_lex.h> #include <sys/cpc_impl.h> -#include <sys/chip.h> +#include <sys/pg.h> #include <sys/x86_archext.h> #include <sys/cpu_module.h> #include <sys/smbios.h> @@ -1984,16 +1984,6 @@ post_startup(void) maxmem = freemem; add_cpunode2devtree(CPU->cpu_id, CPU->cpu_m.mcpu_cpi); - - /* - * Perform the formal initialization of the boot chip, - * and associate the boot cpu with it. - * This must be done after the cpu node for CPU has been - * added to the device tree, when the necessary probing to - * know the chip type and chip "id" is performed. - */ - chip_cpu_init(CPU); - chip_cpu_assign(CPU); } static int diff --git a/usr/src/uts/intel/ia32/os/cpc_subr.c b/usr/src/uts/intel/ia32/os/cpc_subr.c index e09f018242..4ab36ece98 100644 --- a/usr/src/uts/intel/ia32/os/cpc_subr.c +++ b/usr/src/uts/intel/ia32/os/cpc_subr.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -43,7 +42,7 @@ #include <sys/cpc_impl.h> #include <sys/x_call.h> #include <sys/cmn_err.h> -#include <sys/chip.h> +#include <sys/cmt.h> #include <sys/spl.h> #include <io/pcplusmp/apic.h> @@ -79,7 +78,8 @@ kcpc_hw_rem_ovf_intr(void) static int kcpc_cpu_setup(cpu_setup_t what, int cpuid, void *arg) { - chip_t *chp = cpu[cpuid]->cpu_chip; + pg_cmt_t *chip_pg; + int active_cpus_cnt; if (what != CPU_ON) return (0); @@ -95,8 +95,12 @@ kcpc_cpu_setup(cpu_setup_t what, int cpuid, void *arg) * If this chip now has more than 1 active cpu, we must invalidate all * contexts in the system. */ - if (chp->chip_ncpu > 1) - kcpc_invalidate_all(); + chip_pg = (pg_cmt_t *)pghw_find_pg(cpu[cpuid], PGHW_CHIP); + if (chip_pg != NULL) { + active_cpus_cnt = GROUP_SIZE(&chip_pg->cmt_cpus_actv); + if (active_cpus_cnt > 1) + kcpc_invalidate_all(); + } return (0); } @@ -177,7 +181,9 @@ kcpc_remote_stop(cpu_t *cp) int kcpc_hw_cpu_hook(processorid_t cpuid, ulong_t *kcpc_cpumap) { - cpu_t *p, *cpu; + cpu_t *cpu, *p; + pg_t *chip_pg; + pg_cpu_itr_t itr; if ((x86_feature & X86_HTT) == 0) return (0); @@ -187,8 +193,7 @@ kcpc_hw_cpu_hook(processorid_t cpuid, ulong_t *kcpc_cpumap) * once. * * This loop is protected by holding cpu_lock, in order to properly - * access the cpu_t of the desired cpu. This also guarantees that the - * per chip cpu lists will not change whilst we look at them. + * access the cpu_t of the desired cpu. */ mutex_enter(&cpu_lock); if ((cpu = cpu_get(cpuid)) == NULL) { @@ -196,7 +201,12 @@ kcpc_hw_cpu_hook(processorid_t cpuid, ulong_t *kcpc_cpumap) return (-1); } - for (p = cpu->cpu_next_chip; p != cpu; p = p->cpu_next_chip) { + chip_pg = (pg_t *)pghw_find_pg(cpu, PGHW_CHIP); + + PG_CPU_ITR_INIT(chip_pg, itr); + while ((p = pg_cpu_next(&itr)) != NULL) { + if (p == cpu) + continue; if (BT_TEST(kcpc_cpumap, p->cpu_id)) { mutex_exit(&cpu_lock); return (-1); @@ -213,7 +223,9 @@ kcpc_hw_cpu_hook(processorid_t cpuid, ulong_t *kcpc_cpumap) int kcpc_hw_lwp_hook(void) { - chip_t *p; + pg_cmt_t *chip; + group_t *chips; + group_iter_t i; if ((x86_feature & X86_HTT) == 0) return (0); @@ -222,14 +234,21 @@ kcpc_hw_lwp_hook(void) * Only one CPU per chip may be online. */ mutex_enter(&cpu_lock); - p = CPU->cpu_chip; - do { - if (p->chip_ncpu > 1) { + + chips = pghw_set_lookup(PGHW_CHIP); + if (chips == NULL) { + mutex_exit(&cpu_lock); + return (0); + } + + group_iter_init(&i); + while ((chip = group_iterate(chips, &i)) != NULL) { + if (GROUP_SIZE(&chip->cmt_cpus_actv) > 1) { mutex_exit(&cpu_lock); return (-1); } - p = p->chip_next; - } while (p != CPU->cpu_chip); + } + mutex_exit(&cpu_lock); return (0); } diff --git a/usr/src/uts/intel/pcbe/p4_pcbe.c b/usr/src/uts/intel/pcbe/p4_pcbe.c index c7263a099c..bfaa46a0ab 100644 --- a/usr/src/uts/intel/pcbe/p4_pcbe.c +++ b/usr/src/uts/intel/pcbe/p4_pcbe.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -61,7 +60,7 @@ static void p4_pcbe_allstop(void); static void p4_pcbe_sample(void *token); static void p4_pcbe_free(void *config); -extern int chip_plat_get_clogid(cpu_t *); +extern int cpuid_get_clogid(cpu_t *); static pcbe_ops_t p4_pcbe_ops = { PCBE_VER_1, @@ -807,7 +806,7 @@ p4_pcbe_program(void *token) * CCCR (and thus enables the counter). */ if (p4_htt) { - int lid = chip_plat_get_clogid(CPU); /* Logical ID of CPU */ + int lid = cpuid_get_clogid(CPU); /* Logical ID of CPU */ for (i = 0; i < 18; i++) { uint64_t escr; diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h index cf76d4f237..af06a50a92 100644 --- a/usr/src/uts/intel/sys/x86_archext.h +++ b/usr/src/uts/intel/sys/x86_archext.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -531,6 +531,9 @@ extern uint_t cpuid_getmodel(struct cpu *); extern uint_t cpuid_getstep(struct cpu *); extern uint_t cpuid_get_ncpu_per_chip(struct cpu *); extern uint_t cpuid_get_ncore_per_chip(struct cpu *); +extern int cpuid_get_chipid(struct cpu *); +extern id_t cpuid_get_coreid(struct cpu *); +extern int cpuid_get_clogid(struct cpu *); extern int cpuid_is_cmt(struct cpu *); extern int cpuid_syscall32_insn(struct cpu *); extern int getl2cacheinfo(struct cpu *, int *, int *, int *); diff --git a/usr/src/uts/sun4/io/trapstat.c b/usr/src/uts/sun4/io/trapstat.c index fa7b98350b..6cfed113ae 100644 --- a/usr/src/uts/sun4/io/trapstat.c +++ b/usr/src/uts/sun4/io/trapstat.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -46,7 +46,7 @@ #include <sys/hypervisor_api.h> #endif #ifndef sun4v -#include <sys/chip.h> +#include <sys/pghw.h> #endif /* BEGIN CSTYLED */ @@ -1439,7 +1439,7 @@ trapstat_setup(processorid_t cpu) cp = cpu_get(cpu); ASSERT(cp != NULL); - if ((strand_idx = cpu ^ chip_plat_get_coreid(cp)) != 0) { + if ((strand_idx = cpu ^ pg_plat_hw_instance_id(cp, PGHW_IPIPE)) != 0) { /* * On sun4u platforms with multiple CPUs sharing the MMU * (Olympus-C has 2 strands per core), each CPU uses a diff --git a/usr/src/uts/sun4/os/machdep.c b/usr/src/uts/sun4/os/machdep.c index 9661c8b83c..821999a3c0 100644 --- a/usr/src/uts/sun4/os/machdep.c +++ b/usr/src/uts/sun4/os/machdep.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -47,7 +47,6 @@ #include <sys/cmn_err.h> #include <sys/time.h> #include <sys/clock.h> -#include <sys/chip.h> #include <sys/cmp.h> #include <sys/platform_module.h> #include <sys/bl.h> diff --git a/usr/src/uts/sun4/os/mlsetup.c b/usr/src/uts/sun4/os/mlsetup.c index 771822bb37..d66c6c13c1 100644 --- a/usr/src/uts/sun4/os/mlsetup.c +++ b/usr/src/uts/sun4/os/mlsetup.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -56,7 +56,7 @@ #include <sys/copyops.h> #include <sys/panic.h> #include <sys/bootconf.h> /* for bootops */ -#include <sys/chip.h> +#include <sys/pg.h> #include <sys/kdi.h> #include <sys/fpras.h> @@ -274,15 +274,6 @@ mlsetup(struct regs *rp, void *cif, kfpu_t *fp) setcputype(); map_wellknown_devices(); setcpudelay(); - - /* - * Associate the boot cpu with a physical processor. - * This needs to be done after devices are mapped, since - * we need to know what type of physical processor this is. - * (CMP for example) - */ - chip_cpu_init(CPU); - chip_cpu_assign(CPU); } /* diff --git a/usr/src/uts/sun4/os/mp_startup.c b/usr/src/uts/sun4/os/mp_startup.c index 814d35bf15..b770ebbe2e 100644 --- a/usr/src/uts/sun4/os/mp_startup.c +++ b/usr/src/uts/sun4/os/mp_startup.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,7 +35,8 @@ #include <sys/machsystm.h> #include <sys/callb.h> #include <sys/cpu_module.h> -#include <sys/chip.h> +#include <sys/pg.h> +#include <sys/cmt.h> #include <sys/dtrace.h> #include <sys/reboot.h> #include <sys/kdi.h> @@ -78,6 +79,9 @@ static void slave_startup(void); */ #define CPU_WAKEUP_GRACE_MSEC 1000 +extern hrtime_t nosteal_nsec; +extern void cmp_set_nosteal_interval(void); + #ifdef TRAPTRACE /* * This function bop allocs traptrace buffers for all cpus @@ -408,9 +412,13 @@ setup_cpu_common(int cpuid) cpu_init_private(cp); /* - * Associate this CPU with a physical processor + * Initialize the CPUs physical ID cache, and processor groups */ - chip_cpu_init(cp); + pghw_physid_create(cp); + pg_cpu_init(cp); + + if (nosteal_nsec == -1) + cmp_set_nosteal_interval(); cpu_intrq_setup(cp); @@ -630,9 +638,9 @@ slave_startup(void) kcpc_hw_startup_cpu(original_flags); /* - * Notify the CMT subsystem that the slave has started + * Notify the PG subsystem that the CPU has started */ - chip_cpu_startup(CPU); + pg_cmt_cpu_startup(CPU); /* * Now we are done with the startup thread, so free it up. diff --git a/usr/src/uts/sun4u/cpu/us3_cheetahplus.c b/usr/src/uts/sun4u/cpu/us3_cheetahplus.c index cf4ffc353f..c4a66883b3 100644 --- a/usr/src/uts/sun4u/cpu/us3_cheetahplus.c +++ b/usr/src/uts/sun4u/cpu/us3_cheetahplus.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -63,6 +63,7 @@ #include <sys/fm/protocol.h> #include <sys/fm/cpu/UltraSPARC-III.h> #include <sys/fm/util.h> +#include <sys/pghw.h> #ifdef CHEETAHPLUS_ERRATUM_25 #include <sys/cyclic.h> @@ -1331,13 +1332,27 @@ cpu_scrub_cpu_setup(cpu_setup_t what, int cpuid, void *arg) static cpu_t * cpu_get_sibling_core(cpu_t *cpup) { - cpu_t *nextp; + cpu_t *nextp; + pg_t *pg; + pg_cpu_itr_t i; if ((cpup == NULL) || (!cmp_cpu_is_cmp(cpup->cpu_id))) return (NULL); + pg = (pg_t *)pghw_find_pg(cpup, PGHW_CHIP); + if (pg == NULL) + return (NULL); + + /* + * Iterate over the CPUs in the chip PG looking + * for a CPU that isn't cpup + */ + PG_CPU_ITR_INIT(pg, i); + while ((nextp = pg_cpu_next(&i)) != NULL) { + if (nextp != cpup) + break; + } - nextp = cpup->cpu_next_chip; - if ((nextp == NULL) || (nextp == cpup)) + if (nextp == NULL) return (NULL); return (nextp); diff --git a/usr/src/uts/sun4u/cpu/us3_common.c b/usr/src/uts/sun4u/cpu/us3_common.c index 4904bff814..67839633ea 100644 --- a/usr/src/uts/sun4u/cpu/us3_common.c +++ b/usr/src/uts/sun4u/cpu/us3_common.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -75,6 +75,7 @@ #include <sys/cyclic.h> #include <sys/errorq.h> #include <sys/errclassify.h> +#include <sys/pghw.h> #ifdef CHEETAHPLUS_ERRATUM_25 #include <sys/xc_impl.h> @@ -2991,7 +2992,7 @@ ce_ptnr_select(struct async_flt *aflt, int flags, int *typep) if (sp == NULL || sp->cpu_part != dtcr->cpu_part || !cpu_flagged_active(sp->cpu_flags) || (sp == dtcr && !(flags & PTNR_SELFOK)) || - (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id && + (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) && !(flags & PTNR_SIBLINGOK))) { sp = dtcr->cpu_next_part; } else { @@ -2999,8 +3000,7 @@ ce_ptnr_select(struct async_flt *aflt, int flags, int *typep) *typep = CE_XDIAG_PTNR_REMOTE; } else if (sp == dtcr) { *typep = CE_XDIAG_PTNR_SELF; - } else if (sp->cpu_chip->chip_id == - dtcr->cpu_chip->chip_id) { + } else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) { *typep = CE_XDIAG_PTNR_SIBLING; } else { *typep = CE_XDIAG_PTNR_LOCAL; @@ -3051,7 +3051,7 @@ ce_ptnr_select(struct async_flt *aflt, int flags, int *typep) *typep = CE_XDIAG_PTNR_REMOTE; return (ptnr); } - if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) { + if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) { if (sibptnr == NULL) sibptnr = ptnr; continue; diff --git a/usr/src/uts/sun4u/os/cmp.c b/usr/src/uts/sun4u/os/cmp.c index 03aa7621e8..3c2fa2503f 100644 --- a/usr/src/uts/sun4u/os/cmp.c +++ b/usr/src/uts/sun4u/os/cmp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,8 +29,8 @@ #include <sys/machsystm.h> #include <sys/x_call.h> #include <sys/cmp.h> +#include <sys/pghw.h> #include <sys/debug.h> -#include <sys/chip.h> #include <sys/disp.h> #include <sys/cheetahregs.h> @@ -141,64 +141,107 @@ cmp_cpu_to_chip(processorid_t cpuid) return (cpunodes[cpuid].portid); } -/* - * Return a chip "id" for the given cpu_t - * cpu_t's residing on the same physical processor - * should map to the same "id" - */ -chipid_t -chip_plat_get_chipid(cpu_t *cp) -{ - return (cmp_cpu_to_chip(cp->cpu_id)); -} - -/* - * Return the "core id" for the given cpu_t - * The "core id" space spans uniquely across all - * cpu chips. - */ -id_t -chip_plat_get_coreid(cpu_t *cp) +/* ARGSUSED */ +int +pg_plat_hw_shared(cpu_t *cp, pghw_type_t hw) { int impl; impl = cpunodes[cp->cpu_id].implementation; - if (IS_OLYMPUS_C(impl)) { - /* - * Currently only Fujitsu Olympus-c processor supports - * multi-stranded cores. Return the cpu_id with - * the strand bit masked out. - */ - return ((id_t)((uint_t)cp->cpu_id & ~(0x1))); - } else { - return (cp->cpu_id); + switch (hw) { + case PGHW_IPIPE: + if (IS_OLYMPUS_C(impl)) + return (1); + break; + case PGHW_CHIP: + if (IS_JAGUAR(impl) || IS_PANTHER(impl)) + return (1); + break; + case PGHW_CACHE: + if (IS_PANTHER(impl)) + return (1); + break; } + return (0); } -void -chip_plat_define_chip(cpu_t *cp, chip_def_t *cd) +int +pg_plat_cpus_share(cpu_t *cpu_a, cpu_t *cpu_b, pghw_type_t hw) { - int impl; + int impla, implb; + + impla = cpunodes[cpu_a->cpu_id].implementation; + implb = cpunodes[cpu_b->cpu_id].implementation; + + switch (hw) { + case PGHW_IPIPE: + case PGHW_CHIP: + return (pg_plat_hw_instance_id(cpu_a, hw) == + pg_plat_hw_instance_id(cpu_b, hw)); + case PGHW_CACHE: + return (IS_PANTHER(impla) && IS_PANTHER(implb) && + pg_plat_cpus_share(cpu_a, cpu_b, PGHW_CHIP)); + } + return (0); +} - /* - * Define the chip's type - */ - impl = cpunodes[cp->cpu_id].implementation; +id_t +pg_plat_hw_instance_id(cpu_t *cpu, pghw_type_t hw) +{ + int impl; + + switch (hw) { + case PGHW_IPIPE: + impl = cpunodes[cpu->cpu_id].implementation; + + if (IS_OLYMPUS_C(impl)) { + /* + * Currently only Fujitsu Olympus-c processor supports + * multi-stranded cores. Return the cpu_id with + * the strand bit masked out. + */ + return ((id_t)((uint_t)cpu->cpu_id & ~(0x1))); + } else { + return (cpu->cpu_id); + } + case PGHW_CHIP: + return (cmp_cpu_to_chip(cpu->cpu_id)); + case PGHW_CACHE: + return (IS_PANTHER(impl) && + pg_plat_hw_instance_id(cpu, PGHW_CHIP)); + default: + return (-1); + } +} - if (IS_JAGUAR(impl)) { - cd->chipd_type = CHIP_CMP_SPLIT_CACHE; - } else if (IS_PANTHER(impl) || IS_OLYMPUS_C(impl)) { - cd->chipd_type = CHIP_CMP_SHARED_CACHE; - } else { - cd->chipd_type = CHIP_DEFAULT; +int +pg_plat_hw_level(pghw_type_t hw) +{ + int i; + static pghw_type_t hw_hier[] = { + PGHW_IPIPE, + PGHW_CHIP, + PGHW_CACHE, + PGHW_NUM_COMPONENTS + }; + + for (i = 0; hw_hier[i] != PGHW_NUM_COMPONENTS; i++) { + if (hw_hier[i] == hw) + return (i); } + return (-1); +} + +id_t +pg_plat_get_core_id(cpu_t *cp) +{ + return (pg_plat_hw_instance_id(cp, PGHW_IPIPE)); +} - /* - * Define any needed adjustment of rechoose_interval - * For now, all chips use the default. This - * will change with future processors. - */ - cd->chipd_rechoose_adj = 0; - cd->chipd_nosteal = 100000ULL; /* 100 usecs */ +void +cmp_set_nosteal_interval(void) +{ + /* Set the nosteal interval (used by disp_getbest()) to 100us */ + nosteal_nsec = 100000UL; } diff --git a/usr/src/uts/sun4v/cpu/generic.c b/usr/src/uts/sun4v/cpu/generic.c index 0a6d9394f1..6c93042813 100644 --- a/usr/src/uts/sun4v/cpu/generic.c +++ b/usr/src/uts/sun4v/cpu/generic.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -152,13 +152,20 @@ void cpu_init_private(struct cpu *cp) { /* - * The cpu_ipipe field is initialized based on the execution - * unit sharing information from the Machine Description table. - * It defaults to the CPU id in the absence of such information. + * The cpu_ipipe and cpu_fpu fields are initialized based on + * the execution unit sharing information from the Machine + * Description table. They default to the CPU id in the + * absence of such information. */ cp->cpu_m.cpu_ipipe = cpunodes[cp->cpu_id].exec_unit_mapping; if (cp->cpu_m.cpu_ipipe == NO_EU_MAPPING_FOUND) cp->cpu_m.cpu_ipipe = (id_t)(cp->cpu_id); + + cp->cpu_m.cpu_fpu = cpunodes[cp->cpu_id].fpu_mapping; + if (cp->cpu_m.cpu_fpu == NO_EU_MAPPING_FOUND) + cp->cpu_m.cpu_fpu = (id_t)(cp->cpu_id); + + cp->cpu_m.cpu_core = (id_t)(cp->cpu_id); } void diff --git a/usr/src/uts/sun4v/cpu/niagara.c b/usr/src/uts/sun4v/cpu/niagara.c index 125ca8e224..7ed9c3c641 100644 --- a/usr/src/uts/sun4v/cpu/niagara.c +++ b/usr/src/uts/sun4v/cpu/niagara.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -173,14 +173,23 @@ cpu_init_private(struct cpu *cp) extern int niagara_kstat_init(void); /* - * The cpu_ipipe field is initialized based on the execution - * unit sharing information from the MD. It defaults to the - * virtual CPU id in the absence of such information. + * The cpu_ipipe and cpu_fpu fields are initialized based on + * the execution unit sharing information from the MD. They default + * to the virtual CPU id in the absence of such information. */ cp->cpu_m.cpu_ipipe = cpunodes[cp->cpu_id].exec_unit_mapping; if (cp->cpu_m.cpu_ipipe == NO_EU_MAPPING_FOUND) cp->cpu_m.cpu_ipipe = (id_t)(cp->cpu_id); + cp->cpu_m.cpu_fpu = cpunodes[cp->cpu_id].fpu_mapping; + if (cp->cpu_m.cpu_fpu == NO_EU_MAPPING_FOUND) + cp->cpu_m.cpu_fpu = (id_t)(cp->cpu_id); + + /* + * Niagara defines the the core to be at the ipipe level + */ + cp->cpu_m.cpu_core = cp->cpu_m.cpu_ipipe; + ASSERT(MUTEX_HELD(&cpu_lock)); if (niagara_cpucnt++ == 0 && niagara_hsvc_available == B_TRUE) { (void) niagara_kstat_init(); diff --git a/usr/src/uts/sun4v/cpu/niagara2.c b/usr/src/uts/sun4v/cpu/niagara2.c index 83f5e6733b..382352411d 100644 --- a/usr/src/uts/sun4v/cpu/niagara2.c +++ b/usr/src/uts/sun4v/cpu/niagara2.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -161,14 +161,23 @@ cpu_init_private(struct cpu *cp) extern int niagara_kstat_init(void); /* - * The cpu_ipipe field is initialized based on the execution - * unit sharing information from the MD. It defaults to the - * virtual CPU id in the absence of such information. + * The cpu_ipipe and cpu_fpu fields are initialized based on + * the execution unit sharing information from the MD. They default + * to the virtual CPU id in the absence of such information. */ cp->cpu_m.cpu_ipipe = cpunodes[cp->cpu_id].exec_unit_mapping; if (cp->cpu_m.cpu_ipipe == NO_EU_MAPPING_FOUND) cp->cpu_m.cpu_ipipe = (id_t)(cp->cpu_id); + cp->cpu_m.cpu_fpu = cpunodes[cp->cpu_id].fpu_mapping; + if (cp->cpu_m.cpu_fpu == NO_EU_MAPPING_FOUND) + cp->cpu_m.cpu_fpu = (id_t)(cp->cpu_id); + + /* + * Niagara 2 defines the core to be at the FPU level + */ + cp->cpu_m.cpu_core = cp->cpu_m.cpu_fpu; + ASSERT(MUTEX_HELD(&cpu_lock)); if ((niagara2_cpucnt++ == 0) && (niagara2_hsvc_available == B_TRUE)) (void) niagara_kstat_init(); diff --git a/usr/src/uts/sun4v/os/cmp.c b/usr/src/uts/sun4v/os/cmp.c index c82f3e006e..7219b639bf 100644 --- a/usr/src/uts/sun4v/os/cmp.c +++ b/usr/src/uts/sun4v/os/cmp.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,7 +28,7 @@ #include <sys/types.h> #include <sys/machsystm.h> #include <sys/cmp.h> -#include <sys/chip.h> +#include <sys/pghw.h> /* * Note: For now assume the chip ID as 0 for all the cpus until additional @@ -89,40 +88,80 @@ cmp_cpu_to_chip(processorid_t cpuid) return (0); } -/* - * Return a chip "id" for the given cpu_t - * cpu_t's residing on the same physical processor - * should map to the same "id" - */ -chipid_t -chip_plat_get_chipid(cpu_t *cp) +/*ARGSUSED*/ +int +pg_plat_hw_shared(cpu_t *cp, pghw_type_t hw) { - return (cmp_cpu_to_chip(cp->cpu_id)); + switch (hw) { + case PGHW_IPIPE: + return (1); + case PGHW_FPU: + return (1); + case PGHW_CHIP: + return (1); + } + return (0); } -/*ARGSUSED*/ -void -chip_plat_define_chip(cpu_t *cp, chip_def_t *cd) +int +pg_plat_cpus_share(cpu_t *cpu_a, cpu_t *cpu_b, pghw_type_t hw) { - cd->chipd_type = CHIP_CMT; - - /* - * Define any needed adjustment of rechoose_interval - * For now, all chips use the default. This - * will change with future processors. - */ - cd->chipd_rechoose_adj = 0; - cd->chipd_nosteal = 0; + if (pg_plat_hw_shared(cpu_a, hw) == 0 || + pg_plat_hw_shared(cpu_b, hw) == 0) + return (0); + + return (pg_plat_hw_instance_id(cpu_a, hw) == + pg_plat_hw_instance_id(cpu_b, hw)); +} + +id_t +pg_plat_hw_instance_id(cpu_t *cpu, pghw_type_t hw) +{ + switch (hw) { + case PGHW_IPIPE: + return (cpu->cpu_m.cpu_ipipe); + case PGHW_CHIP: + return (cmp_cpu_to_chip(cpu->cpu_id)); + case PGHW_FPU: + return (cpu->cpu_m.cpu_fpu); + default: + return (-1); + } } /* - * Return a pipeline "id" for the given cpu_t - * cpu_t's sharing the same instruction pipeline - * should map to the same "id" + * Order the relevant hw sharing relationships + * from least, to greatest physical scope. + * + * The hierarchy *must* be defined for all hw that + * pg_plat_hw_shared() returns non-zero. */ +int +pg_plat_hw_level(pghw_type_t hw) +{ + int i; + static pghw_type_t hw_hier[] = { + PGHW_IPIPE, + PGHW_FPU, + PGHW_CHIP, + PGHW_NUM_COMPONENTS + }; + + for (i = 0; hw_hier[i] != PGHW_NUM_COMPONENTS; i++) { + if (hw_hier[i] == hw) + return (i); + } + return (-1); +} id_t -chip_plat_get_coreid(cpu_t *cp) +pg_plat_get_core_id(cpu_t *cpu) +{ + return (cpu->cpu_m.cpu_core); +} + +void +cmp_set_nosteal_interval(void) { - return (cp->cpu_m.cpu_ipipe); + nosteal_nsec = 0; } diff --git a/usr/src/uts/sun4v/os/fillsysinfo.c b/usr/src/uts/sun4v/os/fillsysinfo.c index c909f670fb..a2718cbed1 100644 --- a/usr/src/uts/sun4v/os/fillsysinfo.c +++ b/usr/src/uts/sun4v/os/fillsysinfo.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -218,6 +218,7 @@ setup_exec_unit_mappings(md_t *mdp) int idx, i, j; processorid_t cpuid; char *eunit_name = broken_md_flag ? "exec_unit" : "exec-unit"; + enum eu_type { INTEGER, FPU } etype; /* * Find the cpu integer exec units - and @@ -236,7 +237,8 @@ setup_exec_unit_mappings(md_t *mdp) num_eunits = md_alloc_scan_dag(mdp, cpus_node, eunit_name, "fwd", &eunit); if (num_eunits > 0) { - char *match_type = broken_md_flag ? "int" : "integer"; + char *int_str = broken_md_flag ? "int" : "integer"; + char *fpu_str = "fp"; /* Spin through and find all the integer exec units */ for (i = 0; i < num_eunits; i++) { @@ -245,13 +247,19 @@ setup_exec_unit_mappings(md_t *mdp) int vallen; uint64_t lcpuid; - /* ignore nodes with no type */ + /* ignore nodes with no type */ if (md_get_prop_data(mdp, eunit[i], "type", (uint8_t **)&val, &vallen)) continue; for (p = val; *p != '\0'; p += strlen(p) + 1) { - if (strcmp(p, match_type) == 0) + if (strcmp(p, int_str) == 0) { + etype = INTEGER; goto found; + } + if (strcmp(p, fpu_str) == 0) { + etype = FPU; + goto found; + } } continue; @@ -275,7 +283,14 @@ found: if (lcpuid >= NCPU) continue; cpuid = (processorid_t)lcpuid; - cpunodes[cpuid].exec_unit_mapping = idx; + switch (etype) { + case INTEGER: + cpunodes[cpuid].exec_unit_mapping = idx; + break; + case FPU: + cpunodes[cpuid].fpu_mapping = idx; + break; + } } md_free_scan_dag(mdp, &node); } diff --git a/usr/src/uts/sun4v/sys/machcpuvar.h b/usr/src/uts/sun4v/sys/machcpuvar.h index e0f4f2162b..632a5c767b 100644 --- a/usr/src/uts/sun4v/sys/machcpuvar.h +++ b/usr/src/uts/sun4v/sys/machcpuvar.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -159,7 +159,9 @@ struct machcpu { uint64_t mondo_data_ra; /* mono data pa */ uint16_t *cpu_list; /* uint16_t [NCPU] */ uint64_t cpu_list_ra; /* cpu list ra */ - id_t cpu_ipipe; /* cpu exec unit id */ + id_t cpu_ipipe; /* cpu int exec unit id */ + id_t cpu_fpu; /* cpu fpu unit id */ + id_t cpu_core; /* cpu core id */ kthread_t *startup_thread; }; @@ -213,6 +215,7 @@ struct cpu_node { int ecache_setsize; uint64_t device_id; id_t exec_unit_mapping; + id_t fpu_mapping; }; extern struct cpu_node cpunodes[]; |