summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/cpc/common/cputrack.c54
-rw-r--r--usr/src/lib/libcpc/common/libcpc.c25
-rw-r--r--usr/src/lib/libcpc/common/libcpc.h4
-rw-r--r--usr/src/lib/libcpc/common/mapfile-vers1
-rw-r--r--usr/src/lib/libpctx/common/libpctx.c29
-rw-r--r--usr/src/lib/libpctx/common/libpctx.h11
-rw-r--r--usr/src/lib/libpctx/common/mapfile-vers1
-rw-r--r--usr/src/pkgdefs/SUNWhea/prototype_com1
-rw-r--r--usr/src/uts/common/Makefile.files1
-rw-r--r--usr/src/uts/common/conf/param.c2
-rw-r--r--usr/src/uts/common/disp/cmt.c17
-rw-r--r--usr/src/uts/common/dtrace/dcpc.c59
-rw-r--r--usr/src/uts/common/io/cpc.c36
-rw-r--r--usr/src/uts/common/os/cap_util.c1652
-rw-r--r--usr/src/uts/common/os/cpu.c16
-rw-r--r--usr/src/uts/common/os/group.c100
-rw-r--r--usr/src/uts/common/os/kcpc.c1009
-rw-r--r--usr/src/uts/common/os/pg.c6
-rw-r--r--usr/src/uts/common/os/pghw.c302
-rw-r--r--usr/src/uts/common/sys/Makefile1
-rw-r--r--usr/src/uts/common/sys/cap_util.h173
-rw-r--r--usr/src/uts/common/sys/cmt.h1
-rw-r--r--usr/src/uts/common/sys/cpc_impl.h10
-rw-r--r--usr/src/uts/common/sys/cpc_pcbe.h11
-rw-r--r--usr/src/uts/common/sys/cpuvar.h70
-rw-r--r--usr/src/uts/common/sys/group.h11
-rw-r--r--usr/src/uts/common/sys/kcpc.h79
-rw-r--r--usr/src/uts/common/sys/pghw.h75
-rw-r--r--usr/src/uts/common/sys/systm.h1
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c7
-rw-r--r--usr/src/uts/i86pc/os/intr.c6
-rw-r--r--usr/src/uts/i86pc/os/mp_call.c37
-rw-r--r--usr/src/uts/i86pc/os/mp_machdep.c36
-rw-r--r--usr/src/uts/i86pc/sys/xc_levels.h1
-rw-r--r--usr/src/uts/intel/genunix/Makefile4
-rw-r--r--usr/src/uts/intel/ia32/os/cpc_subr.c52
-rw-r--r--usr/src/uts/intel/pcbe/opteron_pcbe.c46
-rw-r--r--usr/src/uts/intel/pcbe/p4_pcbe.c2
-rw-r--r--usr/src/uts/intel/sys/x86_archext.h1
-rw-r--r--usr/src/uts/sun4/os/mp_call.c47
-rw-r--r--usr/src/uts/sun4/os/x_call.c13
-rw-r--r--usr/src/uts/sun4u/genunix/Makefile4
-rw-r--r--usr/src/uts/sun4u/os/cmp.c16
-rw-r--r--usr/src/uts/sun4u/os/cpc_subr.c39
-rw-r--r--usr/src/uts/sun4v/genunix/Makefile4
-rw-r--r--usr/src/uts/sun4v/os/cmp.c16
-rw-r--r--usr/src/uts/sun4v/os/cpc_subr.c38
-rw-r--r--usr/src/uts/sun4v/pcbe/niagara2_pcbe.c6
48 files changed, 3759 insertions, 374 deletions
diff --git a/usr/src/cmd/cpc/common/cputrack.c b/usr/src/cmd/cpc/common/cputrack.c
index 22ad2673e2..41034aef6e 100644
--- a/usr/src/cmd/cpc/common/cputrack.c
+++ b/usr/src/cmd/cpc/common/cputrack.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -62,6 +62,12 @@ static const struct options *opts = (const struct options *)&__options;
static cpc_t *cpc;
+/*
+ * How many signals caught from terminal
+ * We bail out as soon as possible when interrupt is set
+ */
+static int interrupt = 0;
+
/*ARGSUSED*/
static void
cputrack_errfn(const char *fn, int subcode, const char *fmt, va_list ap)
@@ -79,6 +85,8 @@ cputrack_pctx_errfn(const char *fn, const char *fmt, va_list ap)
}
static int cputrack(int argc, char *argv[], int optind);
+static void intr(int);
+
#if defined(__i386)
static void p4_ht_error(void);
#endif
@@ -220,6 +228,19 @@ main(int argc, char *argv[])
exit(2);
}
+ /*
+ * Catch signals from terminal, so they can be handled asynchronously
+ * when we're ready instead of when we're not (;-)
+ */
+ if (sigset(SIGHUP, SIG_IGN) == SIG_DFL)
+ (void) sigset(SIGHUP, intr);
+ if (sigset(SIGINT, SIG_IGN) == SIG_DFL)
+ (void) sigset(SIGINT, intr);
+ if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL)
+ (void) sigset(SIGQUIT, intr);
+ (void) sigset(SIGPIPE, intr);
+ (void) sigset(SIGTERM, intr);
+
cpc_setgrp_reset(opts->master);
(void) setvbuf(opts->log, NULL, _IOLBF, 0);
ret = cputrack(argc, argv, optind);
@@ -310,6 +331,9 @@ pinit_lwp(pctx_t *pctx, pid_t pid, id_t lwpid, void *arg)
char *errstr;
int nreq;
+ if (interrupt)
+ return (0);
+
if (state->maxlwpid < lwpid) {
state->sgrps = realloc(state->sgrps,
lwpid * sizeof (state->sgrps));
@@ -373,6 +397,9 @@ pfini_lwp(pctx_t *pctx, pid_t pid, id_t lwpid, void *arg)
cpc_buf_t **data1, **data2, **scratch;
int nreq;
+ if (interrupt)
+ return (0);
+
set = cpc_setgrp_getset(sgrp);
nreq = cpc_setgrp_getbufs(sgrp, &data1, &data2, &scratch);
if (cpc_set_sample(cpc, set, *scratch) == 0) {
@@ -424,6 +451,9 @@ plwp_create(pctx_t *pctx, pid_t pid, id_t lwpid, void *arg)
cpc_buf_t **data1, **data2, **scratch;
int nreq;
+ if (interrupt)
+ return (0);
+
nreq = cpc_setgrp_getbufs(sgrp, &data1, &data2, &scratch);
print_sample(pid, lwpid, "lwp_create",
@@ -442,6 +472,9 @@ plwp_exit(pctx_t *pctx, pid_t pid, id_t lwpid, void *arg)
int nreq;
cpc_buf_t **data1, **data2, **scratch;
+ if (interrupt)
+ return (0);
+
start = cpc_setgrp_getset(sgrp);
do {
nreq = cpc_setgrp_getbufs(sgrp, &data1, &data2, &scratch);
@@ -465,6 +498,9 @@ pexec(pctx_t *pctx, pid_t pid, id_t lwpid, char *name, void *arg)
cpc_buf_t **data1, **data2, **scratch;
hrtime_t hrt;
+ if (interrupt)
+ return (0);
+
/*
* Print the accumulated results from the previous program image
*/
@@ -505,6 +541,9 @@ pexit(pctx_t *pctx, pid_t pid, id_t lwpid, int status, void *arg)
int nreq;
cpc_buf_t **data1, **data2, **scratch;
+ if (interrupt)
+ return;
+
cpc_setgrp_reset(state->accum);
start = cpc_setgrp_getset(state->accum);
do {
@@ -539,6 +578,9 @@ ptick(pctx_t *pctx, pid_t pid, id_t lwpid, void *arg)
char *errstr;
int nreqs;
+ if (interrupt)
+ return (0);
+
nreqs = cpc_setgrp_getbufs(sgrp, &data1, &data2, &scratch);
if (opts->nsets == 1) {
@@ -704,7 +746,6 @@ cputrack(int argc, char *argv[], int optind)
state->accum = NULL;
}
}
- pctx_release(pctx);
return (err != 0 ? 1 : 0);
}
@@ -834,3 +875,12 @@ p4_ht_error(void)
}
#endif /* defined(__i386) */
+
+/*ARGSUSED*/
+static void
+intr(int sig)
+{
+ interrupt++;
+ if (cpc != NULL)
+ cpc_terminate(cpc);
+}
diff --git a/usr/src/lib/libcpc/common/libcpc.c b/usr/src/lib/libcpc/common/libcpc.c
index 5bdba39fda..9f4f6ac848 100644
--- a/usr/src/lib/libcpc/common/libcpc.c
+++ b/usr/src/lib/libcpc/common/libcpc.c
@@ -168,6 +168,23 @@ cpc_close(cpc_t *cpc)
return (0);
}
+/*
+ * Terminate everything that runs in pctx_run
+ */
+void
+cpc_terminate(cpc_t *cpc)
+{
+ cpc_set_t *csp;
+ int sigblocked;
+
+ sigblocked = cpc_lock(cpc);
+ for (csp = cpc->cpc_sets; csp != NULL; csp = csp->cs_next) {
+ if (csp->cs_pctx != NULL)
+ pctx_terminate(csp->cs_pctx);
+ }
+ cpc_unlock(cpc, sigblocked);
+}
+
cpc_set_t *
cpc_set_create(cpc_t *cpc)
{
@@ -224,6 +241,14 @@ cpc_set_destroy(cpc_t *cpc, cpc_set_t *set)
if (csp->cs_state != CS_UNBOUND)
(void) cpc_unbind(cpc, csp);
+ /*
+ * Detach from the process
+ */
+ if (csp->cs_pctx != NULL) {
+ pctx_release(csp->cs_pctx);
+ csp->cs_pctx = NULL;
+ }
+
for (req = csp->cs_request; req != NULL; req = next) {
next = req->cr_next;
diff --git a/usr/src/lib/libcpc/common/libcpc.h b/usr/src/lib/libcpc/common/libcpc.h
index 384474a76c..73627345a0 100644
--- a/usr/src/lib/libcpc/common/libcpc.h
+++ b/usr/src/lib/libcpc/common/libcpc.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -163,6 +163,8 @@ extern void cpc_walk_attrs(cpc_t *cpc, void *arg,
extern int cpc_enable(cpc_t *cpc);
extern int cpc_disable(cpc_t *cpc);
+extern void cpc_terminate(cpc_t *);
+
#if defined(__sparc) || defined(__i386)
/*
diff --git a/usr/src/lib/libcpc/common/mapfile-vers b/usr/src/lib/libcpc/common/mapfile-vers
index 91f1689c9f..e577fc7c5e 100644
--- a/usr/src/lib/libcpc/common/mapfile-vers
+++ b/usr/src/lib/libcpc/common/mapfile-vers
@@ -83,6 +83,7 @@ SUNW_1.2 {
SUNWprivate_1.1 {
global:
SUNWprivate_1.1;
+ cpc_terminate;
local:
*;
};
diff --git a/usr/src/lib/libpctx/common/libpctx.c b/usr/src/lib/libpctx/common/libpctx.c
index 9c28fb9b9b..f17e238322 100644
--- a/usr/src/lib/libpctx/common/libpctx.c
+++ b/usr/src/lib/libpctx/common/libpctx.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* This file contains a set of generic routines for periodically
* sampling the state of another process, or tree of processes.
@@ -66,6 +64,7 @@ struct __pctx {
int verbose;
int created;
int sigblocked;
+ int terminate;
sigset_t savedset;
cpc_t *cpc;
};
@@ -108,6 +107,7 @@ pctx_create(
pctx = calloc(1, sizeof (*pctx));
pctx->uarg = arg;
pctx->verbose = verbose;
+ pctx->terminate = 0;
pctx->errfn = errfn ? errfn : pctx_default_errfn;
if ((pctx->Pr = Pcreate(filename, argv, &err, 0, 0)) == NULL) {
@@ -487,6 +487,7 @@ pctx_release(pctx_t *pctx)
Prelease(pctx->Pr, PRELEASE_CLEAR);
pctx->Pr = NULL;
}
+
pctx_free(pctx);
bzero(pctx, sizeof (*pctx));
free(pctx);
@@ -577,7 +578,7 @@ pctx_run(
* exited successfully or the number of time samples has expired.
* Otherwise, if an error has occurred, running becomes -1.
*/
- while (running == 1) {
+ while (running == 1 && !pctx->terminate) {
if (Psetrun(pctx->Pr, 0, 0) != 0) {
if (pctx->verbose)
@@ -609,10 +610,13 @@ pctx_run(
if (nsamples != 1)
nsamples--;
}
- } while (mswait == 0);
+ } while (mswait == 0 && !pctx->terminate);
}
- (void) Pwait(pctx->Pr, mswait);
+ if (pctx->terminate)
+ goto bailout;
+ else
+ (void) Pwait(pctx->Pr, mswait);
checkstate:
switch (pstate = Pstate(pctx->Pr)) {
@@ -854,6 +858,9 @@ checkstate:
bailout:
(void) signal(SIGCHLD, sigsaved);
+ if (pctx->terminate)
+ return (0);
+
switch (running) {
case 0:
return (0);
@@ -885,6 +892,7 @@ __pctx_cpc(pctx_t *pctx, cpc_t *cpc,
* We store the last cpc_t used by libpctx, so that when this pctx is
* destroyed, libpctx can notify libcpc.
*/
+
if (pctx->cpc != NULL && pctx->cpc != cpc && pctx_cpc_callback != NULL)
(*pctx_cpc_callback)(pctx->cpc, pctx);
pctx->cpc = cpc;
@@ -993,3 +1001,12 @@ __pctx_cpc_register_callback(void (*arg)(struct __cpc *, struct __pctx *))
{
pctx_cpc_callback = arg;
}
+
+/*
+ * Tell pctx_run to bail out immediately
+ */
+void
+pctx_terminate(struct __pctx *pctx)
+{
+ pctx->terminate = 1;
+}
diff --git a/usr/src/lib/libpctx/common/libpctx.h b/usr/src/lib/libpctx/common/libpctx.h
index 10d0fb7c7e..7cd9ffff91 100644
--- a/usr/src/lib/libpctx/common/libpctx.h
+++ b/usr/src/lib/libpctx/common/libpctx.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _LIBPCTX_H
#define _LIBPCTX_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <fcntl.h>
#include <stdarg.h>
@@ -67,6 +64,8 @@ typedef int pctx_init_lwpfn_t(pctx_t *, pid_t, id_t, void *);
typedef int pctx_fini_lwpfn_t(pctx_t *, pid_t, id_t, void *);
typedef int pctx_sysc_lwp_exitfn_t(pctx_t *, pid_t, id_t, void *);
+extern void pctx_terminate(pctx_t *);
+
typedef enum {
PCTX_NULL_EVENT = 0,
PCTX_SYSC_EXEC_EVENT,
diff --git a/usr/src/lib/libpctx/common/mapfile-vers b/usr/src/lib/libpctx/common/mapfile-vers
index 1b296817d4..e316020c8b 100644
--- a/usr/src/lib/libpctx/common/mapfile-vers
+++ b/usr/src/lib/libpctx/common/mapfile-vers
@@ -50,6 +50,7 @@ SUNWprivate_1.1 {
global:
__pctx_cpc;
__pctx_cpc_register_callback;
+ pctx_terminate;
local:
*;
};
diff --git a/usr/src/pkgdefs/SUNWhea/prototype_com b/usr/src/pkgdefs/SUNWhea/prototype_com
index 8ad553b07c..88ab8b3f20 100644
--- a/usr/src/pkgdefs/SUNWhea/prototype_com
+++ b/usr/src/pkgdefs/SUNWhea/prototype_com
@@ -682,6 +682,7 @@ f none usr/include/sys/bustypes.h 644 root bin
f none usr/include/sys/byteorder.h 644 root bin
f none usr/include/sys/callb.h 644 root bin
f none usr/include/sys/callo.h 644 root bin
+f none usr/include/sys/cap_util.h 644 root bin
f none usr/include/sys/cpucaps.h 644 root bin
f none usr/include/sys/cpucaps_impl.h 644 root bin
f none usr/include/sys/ccompile.h 644 root bin
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 83b7bf34c6..974cec5d3f 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -47,6 +47,7 @@ COMMON_CORE_OBJS += \
cpu_intr.o \
cpu_pm.o \
cpupart.o \
+ cap_util.o \
disp.o \
group.o \
kstat_fr.o \
diff --git a/usr/src/uts/common/conf/param.c b/usr/src/uts/common/conf/param.c
index e6d77020a6..09e529b934 100644
--- a/usr/src/uts/common/conf/param.c
+++ b/usr/src/uts/common/conf/param.c
@@ -212,6 +212,7 @@ extern void clock_timer_init(void);
extern void clock_realtime_init(void);
extern void clock_highres_init(void);
extern void clock_tick_mp_init(void);
+extern void cu_init(void);
extern void callout_mp_init(void);
extern void cpu_seq_tbl_init(void);
@@ -257,6 +258,7 @@ void (*mp_init_tbl[])(void) = {
siron_mp_init,
#endif
clock_tick_mp_init,
+ cu_init,
callout_mp_init,
0
};
diff --git a/usr/src/uts/common/disp/cmt.c b/usr/src/uts/common/disp/cmt.c
index b2f219472d..a5f1a52e34 100644
--- a/usr/src/uts/common/disp/cmt.c
+++ b/usr/src/uts/common/disp/cmt.c
@@ -159,7 +159,6 @@ static void cmt_ev_thread_remain_pwr(pg_t *, cpu_t *, kthread_t *);
static cmt_lineage_validation_t pg_cmt_lineage_validate(pg_cmt_t **, int *,
cpu_pg_t *);
-
/*
* CMT PG ops
*/
@@ -583,6 +582,8 @@ pg_cmt_cpu_init(cpu_t *cp, cpu_pg_t *pgdata)
ASSERT(IS_CMT_PG(pg));
}
+ ((pghw_t *)pg)->pghw_generation++;
+
/* Add the CPU to the PG */
pg_cpu_add((pg_t *)pg, cp, pgdata);
@@ -762,7 +763,7 @@ pg_cmt_cpu_init(cpu_t *cp, cpu_pg_t *pgdata)
*
* cp->cpu_pg is used by the dispatcher to access the CPU's PG data
* references a "bootstrap" structure across this function's invocation.
- * pg_cmt_cpu_init() and the routines it calls must be careful to operate only
+ * pg_cmt_cpu_fini() and the routines it calls must be careful to operate only
* on the "pgdata" argument, and not cp->cpu_pg.
*/
static void
@@ -818,6 +819,8 @@ pg_cmt_cpu_fini(cpu_t *cp, cpu_pg_t *pgdata)
pg = (pg_cmt_t *)pgdata->cmt_lineage;
while (pg != NULL) {
+ ((pghw_t *)pg)->pghw_generation++;
+
/*
* Remove the PG from the CPU's load balancing lineage
*/
@@ -990,6 +993,11 @@ pg_cmt_cpu_active(cpu_t *cp)
if (IS_CMT_PG(pg) == 0)
continue;
+ /*
+ * Move to the next generation since topology is changing
+ */
+ ((pghw_t *)pg)->pghw_generation++;
+
err = group_add(&pg->cmt_cpus_actv, cp, GRP_NORESIZE);
ASSERT(err == 0);
@@ -1056,6 +1064,11 @@ pg_cmt_cpu_inactive(cpu_t *cp)
continue;
/*
+ * Move to the next generation since topology is changing
+ */
+ ((pghw_t *)pg)->pghw_generation++;
+
+ /*
* Remove the CPU from the CMT PGs active CPU group
* bitmap
*/
diff --git a/usr/src/uts/common/dtrace/dcpc.c b/usr/src/uts/common/dtrace/dcpc.c
index e780d1e620..c410e65eaa 100644
--- a/usr/src/uts/common/dtrace/dcpc.c
+++ b/usr/src/uts/common/dtrace/dcpc.c
@@ -35,6 +35,7 @@
#include <sys/conf.h>
#include <sys/kmem.h>
#include <sys/kcpc.h>
+#include <sys/cap_util.h>
#include <sys/cpc_pcbe.h>
#include <sys/cpc_impl.h>
#include <sys/dtrace_impl.h>
@@ -463,8 +464,7 @@ dcpc_program_cpu_event(cpu_t *c)
set = dcpc_create_set(c);
- octx = NULL;
- set->ks_ctx = ctx = kcpc_ctx_alloc();
+ set->ks_ctx = ctx = kcpc_ctx_alloc(KM_SLEEP);
ctx->kc_set = set;
ctx->kc_cpuid = c->cpu_id;
@@ -489,11 +489,9 @@ dcpc_program_cpu_event(cpu_t *c)
* If we already have an active enabling then save the current cpc
* context away.
*/
- if (c->cpu_cpc_ctx != NULL)
- octx = c->cpu_cpc_ctx;
+ octx = c->cpu_cpc_ctx;
- c->cpu_cpc_ctx = ctx;
- kcpc_remote_program(c);
+ kcpc_cpu_program(c, ctx);
if (octx != NULL) {
kcpc_set_t *oset = octx->kc_set;
@@ -528,9 +526,14 @@ dcpc_disable_cpu(cpu_t *c)
if (c->cpu_flags & CPU_OFFLINE)
return;
- kcpc_remote_stop(c);
-
+ /*
+ * Grab CPUs CPC context before kcpc_cpu_stop() stops counters and
+ * changes it.
+ */
ctx = c->cpu_cpc_ctx;
+
+ kcpc_cpu_stop(c, B_FALSE);
+
set = ctx->kc_set;
kcpc_free_configs(set);
@@ -538,7 +541,6 @@ dcpc_disable_cpu(cpu_t *c)
kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
kcpc_free_set(set);
kcpc_ctx_free(ctx);
- c->cpu_cpc_ctx = NULL;
}
/*
@@ -615,8 +617,21 @@ dcpc_program_event(dcpc_probe_t *pp)
if (c->cpu_flags & CPU_OFFLINE)
continue;
+ /*
+ * Stop counters but preserve existing DTrace CPC context
+ * if there is one.
+ *
+ * If we come here when the first event is programmed for a CPU,
+ * there should be no DTrace CPC context installed. In this
+ * case, kcpc_cpu_stop() will ensure that there is no other
+ * context on the CPU.
+ *
+ * If we add new enabling to the original one, the CPU should
+ * have the old DTrace CPC context which we need to keep around
+ * since dcpc_program_event() will add to it.
+ */
if (c->cpu_cpc_ctx != NULL)
- kcpc_remote_stop(c);
+ kcpc_cpu_stop(c, B_TRUE);
} while ((c = c->cpu_next) != cpu_list);
dcpc_release_interrupts();
@@ -708,6 +723,13 @@ dcpc_enable(void *arg, dtrace_id_t id, void *parg)
ASSERT(pp->dcpc_actv_req_idx >= 0);
/*
+ * DTrace is taking over CPC contexts, so stop collecting
+ * capacity/utilization data for all CPUs.
+ */
+ if (dtrace_cpc_in_use == 1)
+ cu_disable();
+
+ /*
* The following must hold true if we are to (attempt to) enable
* this request:
*
@@ -758,7 +780,7 @@ dcpc_enable(void *arg, dtrace_id_t id, void *parg)
if (c->cpu_flags & CPU_OFFLINE)
continue;
- kcpc_remote_program(c);
+ kcpc_cpu_program(c, c->cpu_cpc_ctx);
} while ((c = c->cpu_next) != cpu_list);
}
@@ -766,6 +788,13 @@ dcpc_enable(void *arg, dtrace_id_t id, void *parg)
dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL;
pp->dcpc_actv_req_idx = pp->dcpc_picno = -1;
+ /*
+ * If all probes are removed, enable capacity/utilization data
+ * collection for every CPU.
+ */
+ if (dtrace_cpc_in_use == 0)
+ cu_enable();
+
return (-1);
}
@@ -841,6 +870,13 @@ dcpc_disable(void *arg, dtrace_id_t id, void *parg)
dtrace_cpc_in_use--;
pp->dcpc_enabled = 0;
pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1;
+
+ /*
+ * If all probes are removed, enable capacity/utilization data
+ * collection for every CPU
+ */
+ if (dtrace_cpc_in_use == 0)
+ cu_enable();
}
/*ARGSUSED*/
@@ -891,7 +927,6 @@ dcpc_cpu_setup(cpu_setup_t what, processorid_t cpu, void *arg)
*/
if (dtrace_cpc_in_use) {
c = cpu_get(cpu);
-
(void) dcpc_program_cpu_event(c);
}
break;
diff --git a/usr/src/uts/common/io/cpc.c b/usr/src/uts/common/io/cpc.c
index 6881380251..0b003c3ee1 100644
--- a/usr/src/uts/common/io/cpc.c
+++ b/usr/src/uts/common/io/cpc.c
@@ -942,49 +942,19 @@ static struct modlinkage modl = {
#endif
};
-static void
-kcpc_init(void)
-{
- long hash;
-
- rw_init(&kcpc_cpuctx_lock, NULL, RW_DEFAULT, NULL);
- for (hash = 0; hash < CPC_HASH_BUCKETS; hash++)
- mutex_init(&kcpc_ctx_llock[hash],
- NULL, MUTEX_DRIVER, (void *)(uintptr_t)15);
-}
-
-static void
-kcpc_fini(void)
-{
- long hash;
-
- for (hash = 0; hash < CPC_HASH_BUCKETS; hash++)
- mutex_destroy(&kcpc_ctx_llock[hash]);
- rw_destroy(&kcpc_cpuctx_lock);
-}
-
int
_init(void)
{
- int ret;
-
- if (kcpc_hw_load_pcbe() != 0)
+ if (kcpc_init() != 0)
return (ENOTSUP);
- kcpc_init();
- if ((ret = mod_install(&modl)) != 0)
- kcpc_fini();
- return (ret);
+ return (mod_install(&modl));
}
int
_fini(void)
{
- int ret;
-
- if ((ret = mod_remove(&modl)) == 0)
- kcpc_fini();
- return (ret);
+ return (mod_remove(&modl));
}
int
diff --git a/usr/src/uts/common/os/cap_util.c b/usr/src/uts/common/os/cap_util.c
new file mode 100644
index 0000000000..16ff7f45fd
--- /dev/null
+++ b/usr/src/uts/common/os/cap_util.c
@@ -0,0 +1,1652 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Support for determining capacity and utilization of performance relevant
+ * hardware components in a computer
+ *
+ * THEORY
+ * ------
+ * The capacity and utilization of the performance relevant hardware components
+ * is needed to be able to optimize performance while minimizing the amount of
+ * power used on a system. The idea is to use hardware performance counters
+ * and potentially other means to determine the capacity and utilization of
+ * performance relevant hardware components (eg. execution pipeline, cache,
+ * memory, etc.) and attribute the utilization to the responsible CPU and the
+ * thread running there.
+ *
+ * This will help characterize the utilization of performance relevant
+ * components and how much is used by each CPU and each thread. With
+ * that data, the utilization can be aggregated to all the CPUs sharing each
+ * performance relevant hardware component to calculate the total utilization
+ * of each component and compare that with the component's capacity to
+ * essentially determine the actual hardware load of the component. The
+ * hardware utilization attributed to each running thread can also be
+ * aggregated to determine the total hardware utilization of each component to
+ * a workload.
+ *
+ * Once that is done, one can determine how much of each performance relevant
+ * hardware component is needed by a given thread or set of threads (eg. a
+ * workload) and size up exactly what hardware is needed by the threads and how
+ * much. With this info, we can better place threads among CPUs to match their
+ * exact hardware resource needs and potentially lower or raise the power based
+ * on their utilization or pack threads onto the fewest hardware components
+ * needed and power off any remaining unused components to minimize power
+ * without sacrificing performance.
+ *
+ * IMPLEMENTATION
+ * --------------
+ * The code has been designed and implemented to make (un)programming and
+ * reading the counters for a given CPU as lightweight and fast as possible.
+ * This is very important because we need to read and potentially (un)program
+ * the counters very often and in performance sensitive code. Specifically,
+ * the counters may need to be (un)programmed during context switch and/or a
+ * cyclic handler when there are more counter events to count than existing
+ * counters.
+ *
+ * Consequently, the code has been split up to allow allocating and
+ * initializing everything needed to program and read the counters on a given
+ * CPU once and make (un)programming and reading the counters for a given CPU
+ * not have to allocate/free memory or grab any locks. To do this, all the
+ * state needed to (un)program and read the counters on a CPU is kept per CPU
+ * and is made lock free by forcing any code that reads or manipulates the
+ * counters or the state needed to (un)program or read the counters to run on
+ * the target CPU and disable preemption while running on the target CPU to
+ * protect any critical sections. All counter manipulation on the target CPU is
+ * happening either from a cross-call to the target CPU or at the same PIL as
+ * used by the cross-call subsystem. This guarantees that counter manipulation
+ * is not interrupted by cross-calls from other CPUs.
+ *
+ * The synchronization has been made lock free or as simple as possible for
+ * performance and to avoid getting the locking all tangled up when we interpose
+ * on the CPC routines that (un)program the counters to manage the counters
+ * between the kernel and user on each CPU. When the user starts using the
+ * counters on a given CPU, the kernel will unprogram the counters that it is
+ * using on that CPU just before they are programmed for the user. Then the
+ * kernel will program the counters on a given CPU for its own use when the user
+ * stops using them.
+ *
+ * There is a special interaction with DTrace cpc provider (dcpc). Before dcpc
+ * enables any probe, it requests to disable and unprogram all counters used for
+ * capacity and utilizations. These counters are never re-programmed back until
+ * dcpc completes. When all DTrace cpc probes are removed, dcpc notifies CU
+ * framework and it re-programs the counters.
+ *
+ * When a CPU is going offline, its CU counters are unprogrammed and disabled,
+ * so that they would not be re-programmed again by some other activity on the
+ * CPU that is going offline.
+ *
+ * The counters are programmed during boot. However, a flag is available to
+ * disable this if necessary (see cu_flag below). A handler is provided to
+ * (un)program the counters during CPU on/offline. Basic routines are provided
+ * to initialize and tear down this module, initialize and tear down any state
+ * needed for a given CPU, and (un)program the counters for a given CPU.
+ * Lastly, a handler is provided to read the counters and attribute the
+ * utilization to the responsible CPU.
+ */
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/cpuvar.h>
+#include <sys/ddi.h>
+#include <sys/disp.h>
+#include <sys/sdt.h>
+#include <sys/sunddi.h>
+#include <sys/thread.h>
+#include <sys/pghw.h>
+#include <sys/cmt.h>
+#include <sys/x_call.h>
+#include <sys/cap_util.h>
+
+#include <sys/archsystm.h>
+#include <sys/promif.h>
+
+#if defined(__x86)
+#include <sys/xc_levels.h>
+#endif
+
+
+/*
+ * Default CPU hardware performance counter flags to use for measuring capacity
+ * and utilization
+ */
+#define CU_CPC_FLAGS_DEFAULT \
+ (CPC_COUNT_USER|CPC_COUNT_SYSTEM|CPC_OVF_NOTIFY_EMT)
+
+/*
+ * Possible Flags for controlling this module.
+ */
+#define CU_FLAG_ENABLE 1 /* Enable module */
+#define CU_FLAG_READY 2 /* Ready to setup module */
+#define CU_FLAG_ON 4 /* Module is on */
+
+/*
+ * pg_cpu kstats calculate utilization rate and maximum utilization rate for
+ * some CPUs. The rate is calculated based on data from two subsequent
+ * snapshots. When the time between such two snapshots is too small, the
+ * resulting rate may have low accuracy, so we only consider snapshots which
+ * are separated by SAMPLE_INTERVAL nanoseconds from one another. We do not
+ * update the rate if the interval is smaller than that.
+ *
+ * Use one tenth of a second as the minimum interval for utilization rate
+ * calculation.
+ *
+ * NOTE: The CU_SAMPLE_INTERVAL_MIN should be higher than the scaling factor in
+ * the CU_RATE() macro below to guarantee that we never divide by zero.
+ *
+ * Rate is the number of events per second. The rate is the number of events
+ * divided by time and multiplied by the number of nanoseconds in a second. We
+ * do not want time to be too small since it will cause large errors in
+ * division.
+ *
+ * We do not want to multiply two large numbers (the instruction count and
+ * NANOSEC) either since it may cause integer overflow. So we divide both the
+ * numerator and the denominator by the same value.
+ *
+ * NOTE: The scaling factor below should be less than CU_SAMPLE_INTERVAL_MIN
+ * above to guarantee that time divided by this value is always non-zero.
+ */
+#define CU_RATE(val, time) \
+ (((val) * (NANOSEC / CU_SCALE)) / ((time) / CU_SCALE))
+
+#define CU_SAMPLE_INTERVAL_MIN (NANOSEC / 10)
+
+#define CU_SCALE (CU_SAMPLE_INTERVAL_MIN / 10000)
+
+/*
+ * When the time between two kstat reads for the same CPU is less than
+ * CU_UPDATE_THRESHOLD use the old counter data and skip updating counter values
+ * for the CPU. This helps reduce cross-calls when kstat consumers read data
+ * very often or when they read PG utilization data and then CPU utilization
+ * data quickly after that.
+ */
+#define CU_UPDATE_THRESHOLD (NANOSEC / 10)
+
+/*
+ * The IS_HIPIL() macro verifies that the code is executed either from a
+ * cross-call or from high-PIL interrupt
+ */
+#ifdef DEBUG
+#define IS_HIPIL() (getpil() >= XCALL_PIL)
+#else
+#define IS_HIPIL()
+#endif /* DEBUG */
+
+
+typedef void (*cu_cpu_func_t)(uintptr_t, int *);
+
+
+/*
+ * Flags to use for programming CPU hardware performance counters to measure
+ * capacity and utilization
+ */
+int cu_cpc_flags = CU_CPC_FLAGS_DEFAULT;
+
+/*
+ * Initial value used for programming hardware counters
+ */
+uint64_t cu_cpc_preset_value = 0;
+
+/*
+ * List of CPC event requests for capacity and utilization.
+ */
+static kcpc_request_list_t *cu_cpc_reqs = NULL;
+
+/*
+ * When a CPU is a member of PG with a sharing relationship that is supported
+ * by the capacity/utilization framework, a kstat is created for that CPU and
+ * sharing relationship.
+ *
+ * These kstats are updated one at a time, so we can have a single scratch
+ * space to fill the data.
+ *
+ * CPU counter kstats fields:
+ *
+ * cu_cpu_id CPU ID for this kstat
+ *
+ * cu_generation Generation value that increases whenever any CPU goes
+ * offline or online. Two kstat snapshots for the same
+ * CPU may only be compared if they have the same
+ * generation.
+ *
+ * cu_pg_id PG ID for the relationship described by this kstat
+ *
+ * cu_cpu_util Running value of CPU utilization for the sharing
+ * relationship
+ *
+ * cu_cpu_time_running Total time spent collecting CU data. The time may be
+ * less than wall time if CU counters were stopped for
+ * some time.
+ *
+ * cu_cpu_time_stopped Total time the CU counters were stopped.
+ *
+ * cu_cpu_rate Utilization rate, expressed in operations per second.
+ *
+ * cu_cpu_rate_max Maximum observed value of utilization rate.
+ */
+struct cu_cpu_kstat {
+ kstat_named_t cu_cpu_id;
+ kstat_named_t cu_generation;
+ kstat_named_t cu_pg_id;
+ kstat_named_t cu_cpu_util;
+ kstat_named_t cu_cpu_time_running;
+ kstat_named_t cu_cpu_time_stopped;
+ kstat_named_t cu_cpu_rate;
+ kstat_named_t cu_cpu_rate_max;
+} cu_cpu_kstat = {
+ { "id", KSTAT_DATA_UINT32 },
+ { "generation", KSTAT_DATA_UINT32 },
+ { "pg_id", KSTAT_DATA_LONG },
+ { "hw_util", KSTAT_DATA_UINT64 },
+ { "hw_util_time_running", KSTAT_DATA_UINT64 },
+ { "hw_util_time_stopped", KSTAT_DATA_UINT64 },
+ { "hw_util_rate", KSTAT_DATA_UINT64 },
+ { "hw_util_rate_max", KSTAT_DATA_UINT64 },
+};
+
+/*
+ * Flags for controlling this module
+ */
+uint_t cu_flags = CU_FLAG_ENABLE;
+
+/*
+ * Error return value for cu_init() since it can't return anything to be called
+ * from mp_init_tbl[] (:-(
+ */
+static int cu_init_error = 0;
+
+hrtime_t cu_sample_interval_min = CU_SAMPLE_INTERVAL_MIN;
+
+hrtime_t cu_update_threshold = CU_UPDATE_THRESHOLD;
+
+static kmutex_t pg_cpu_kstat_lock;
+
+
+/*
+ * Forward declaration of interface routines
+ */
+void cu_disable(void);
+void cu_enable(void);
+void cu_init(void);
+void cu_cpc_program(cpu_t *cp, int *err);
+void cu_cpc_unprogram(cpu_t *cp, int *err);
+int cu_cpu_update(struct cpu *cp, boolean_t move_to);
+void cu_pg_update(pghw_t *pg);
+
+
+/*
+ * Forward declaration of private routines
+ */
+static int cu_cpc_init(cpu_t *cp, kcpc_request_list_t *reqs, int nreqs);
+static void cu_cpc_program_xcall(uintptr_t arg, int *err);
+static int cu_cpc_req_add(char *event, kcpc_request_list_t *reqs,
+ int nreqs, cu_cntr_stats_t *stats, int kmem_flags, int *nevents);
+static int cu_cpu_callback(cpu_setup_t what, int id, void *arg);
+static void cu_cpu_disable(cpu_t *cp);
+static void cu_cpu_enable(cpu_t *cp);
+static int cu_cpu_init(cpu_t *cp, kcpc_request_list_t *reqs);
+static int cu_cpu_fini(cpu_t *cp);
+static void cu_cpu_kstat_create(pghw_t *pg, cu_cntr_info_t *cntr_info);
+static int cu_cpu_kstat_update(kstat_t *ksp, int rw);
+static int cu_cpu_run(cpu_t *cp, cu_cpu_func_t func, uintptr_t arg);
+static int cu_cpu_update_stats(cu_cntr_stats_t *stats,
+ uint64_t cntr_value);
+static void cu_cpu_info_detach_xcall(void);
+
+/*
+ * Disable or enable Capacity Utilization counters on all CPUs.
+ */
+void
+cu_disable(void)
+{
+ cpu_t *cp;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ cp = cpu_active;
+ do {
+ if (!(cp->cpu_flags & CPU_OFFLINE))
+ cu_cpu_disable(cp);
+ } while ((cp = cp->cpu_next_onln) != cpu_active);
+}
+
+
+void
+cu_enable(void)
+{
+ cpu_t *cp;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ cp = cpu_active;
+ do {
+ if (!(cp->cpu_flags & CPU_OFFLINE))
+ cu_cpu_enable(cp);
+ } while ((cp = cp->cpu_next_onln) != cpu_active);
+}
+
+
+/*
+ * Setup capacity and utilization support
+ */
+void
+cu_init(void)
+{
+ cpu_t *cp;
+
+ cu_init_error = 0;
+ if (!(cu_flags & CU_FLAG_ENABLE) || (cu_flags & CU_FLAG_ON)) {
+ cu_init_error = -1;
+ return;
+ }
+
+ if (kcpc_init() != 0) {
+ cu_init_error = -2;
+ return;
+ }
+
+ /*
+ * Can't measure hardware capacity and utilization without CPU
+ * hardware performance counters
+ */
+ if (cpc_ncounters <= 0) {
+ cu_init_error = -3;
+ return;
+ }
+
+ /*
+ * Setup CPC event request queue
+ */
+ cu_cpc_reqs = kcpc_reqs_init(cpc_ncounters, KM_SLEEP);
+
+ mutex_enter(&cpu_lock);
+
+ /*
+ * Mark flags to say that module is ready to be setup
+ */
+ cu_flags |= CU_FLAG_READY;
+
+ cp = cpu_active;
+ do {
+ /*
+ * Allocate and setup state needed to measure capacity and
+ * utilization
+ */
+ if (cu_cpu_init(cp, cu_cpc_reqs) != 0)
+ cu_init_error = -5;
+
+ /*
+ * Reset list of counter event requests so its space can be
+ * reused for a different set of requests for next CPU
+ */
+ (void) kcpc_reqs_reset(cu_cpc_reqs);
+
+ cp = cp->cpu_next_onln;
+ } while (cp != cpu_active);
+
+ /*
+ * Mark flags to say that module is on now and counters are ready to be
+ * programmed on all active CPUs
+ */
+ cu_flags |= CU_FLAG_ON;
+
+ /*
+ * Program counters on currently active CPUs
+ */
+ cp = cpu_active;
+ do {
+ if (cu_cpu_run(cp, cu_cpc_program_xcall,
+ (uintptr_t)B_FALSE) != 0)
+ cu_init_error = -6;
+
+ cp = cp->cpu_next_onln;
+ } while (cp != cpu_active);
+
+ /*
+ * Register callback for CPU state changes to enable and disable
+ * CPC counters as CPUs come on and offline
+ */
+ register_cpu_setup_func(cu_cpu_callback, NULL);
+
+ mutex_exit(&cpu_lock);
+}
+
+
+/*
+ * Return number of counter events needed to measure capacity and utilization
+ * for specified CPU and fill in list of CPC requests with each counter event
+ * needed if list where to add CPC requests is given
+ *
+ * NOTE: Use KM_NOSLEEP for kmem_{,z}alloc() since cpu_lock is held and free
+ * everything that has been successfully allocated if any memory
+ * allocation fails
+ */
+static int
+cu_cpc_init(cpu_t *cp, kcpc_request_list_t *reqs, int nreqs)
+{
+ group_t *cmt_pgs;
+ cu_cntr_info_t **cntr_info_array;
+ cpu_pg_t *cpu_pgs;
+ cu_cpu_info_t *cu_cpu_info;
+ pg_cmt_t *pg_cmt;
+ pghw_t *pg_hw;
+ cu_cntr_stats_t *stats;
+ int nevents;
+ pghw_type_t pg_hw_type;
+ group_iter_t iter;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * There has to be a target CPU for this
+ */
+ if (cp == NULL)
+ return (-1);
+
+ /*
+ * Return 0 when CPU doesn't belong to any group
+ */
+ cpu_pgs = cp->cpu_pg;
+ if (cpu_pgs == NULL || GROUP_SIZE(&cpu_pgs->cmt_pgs) < 1)
+ return (0);
+
+ cmt_pgs = &cpu_pgs->cmt_pgs;
+ cu_cpu_info = cp->cpu_cu_info;
+
+ /*
+ * Grab counter statistics and info
+ */
+ if (reqs == NULL) {
+ stats = NULL;
+ cntr_info_array = NULL;
+ } else {
+ if (cu_cpu_info == NULL || cu_cpu_info->cu_cntr_stats == NULL)
+ return (-2);
+
+ stats = cu_cpu_info->cu_cntr_stats;
+ cntr_info_array = cu_cpu_info->cu_cntr_info;
+ }
+
+ /*
+ * See whether platform (or processor) specific code knows which CPC
+ * events to request, etc. are needed to measure hardware capacity and
+ * utilization on this machine
+ */
+ nevents = cu_plat_cpc_init(cp, reqs, nreqs);
+ if (nevents >= 0)
+ return (nevents);
+
+ /*
+ * Let common code decide which CPC events to request, etc. to measure
+ * capacity and utilization since platform (or processor) specific does
+ * not know....
+ *
+ * Walk CPU's PG lineage and do following:
+ *
+ * - Setup CPC request, counter info, and stats needed for each counter
+ * event to measure capacity and and utilization for each of CPU's PG
+ * hardware sharing relationships
+ *
+ * - Create PG CPU kstats to export capacity and utilization for each PG
+ */
+ nevents = 0;
+ group_iter_init(&iter);
+ while ((pg_cmt = group_iterate(cmt_pgs, &iter)) != NULL) {
+ cu_cntr_info_t *cntr_info;
+ int nevents_save;
+ int nstats;
+
+ pg_hw = (pghw_t *)pg_cmt;
+ pg_hw_type = pg_hw->pghw_hw;
+ nevents_save = nevents;
+ nstats = 0;
+
+ switch (pg_hw_type) {
+ case PGHW_IPIPE:
+ if (cu_cpc_req_add("PAPI_tot_ins", reqs, nreqs, stats,
+ KM_NOSLEEP, &nevents) != 0)
+ continue;
+ nstats = 1;
+ break;
+
+ case PGHW_FPU:
+ if (cu_cpc_req_add("PAPI_fp_ins", reqs, nreqs, stats,
+ KM_NOSLEEP, &nevents) != 0)
+ continue;
+ nstats = 1;
+ break;
+
+ default:
+ /*
+ * Don't measure capacity and utilization for this kind
+ * of PG hardware relationship so skip to next PG in
+ * CPU's PG lineage
+ */
+ continue;
+ }
+
+ cntr_info = cntr_info_array[pg_hw_type];
+
+ /*
+ * Nothing to measure for this hardware sharing relationship
+ */
+ if (nevents - nevents_save == 0) {
+ if (cntr_info != NULL)
+ kmem_free(cntr_info, sizeof (cu_cntr_info_t));
+ cntr_info_array[pg_hw_type] = NULL;
+ continue;
+ }
+
+ /*
+ * Fill in counter info for this PG hardware relationship
+ */
+ if (cntr_info == NULL) {
+ cntr_info = kmem_zalloc(sizeof (cu_cntr_info_t),
+ KM_NOSLEEP);
+ if (cntr_info == NULL)
+ continue;
+ cntr_info_array[pg_hw_type] = cntr_info;
+ }
+ cntr_info->ci_cpu = cp;
+ cntr_info->ci_pg = pg_hw;
+ cntr_info->ci_stats = &stats[nevents_save];
+ cntr_info->ci_nstats = nstats;
+
+ /*
+ * Create PG CPU kstats for this hardware relationship
+ */
+ cu_cpu_kstat_create(pg_hw, cntr_info);
+ }
+
+ return (nevents);
+}
+
+
+/*
+ * Program counters for capacity and utilization on given CPU
+ *
+ * If any of the following conditions is true, the counters are not programmed:
+ *
+ * - CU framework is disabled
+ * - The cpu_cu_info field of the cpu structure is NULL
+ * - DTrace is active
+ * - Counters are programmed already
+ * - Counters are disabled (by calls to cu_cpu_disable())
+ */
+void
+cu_cpc_program(cpu_t *cp, int *err)
+{
+ cu_cpc_ctx_t *cpu_ctx;
+ kcpc_ctx_t *ctx;
+ cu_cpu_info_t *cu_cpu_info;
+
+ ASSERT(IS_HIPIL());
+ /*
+ * Should be running on given CPU. We disable preemption to keep CPU
+ * from disappearing and make sure flags and CPC context don't change
+ * from underneath us
+ */
+ kpreempt_disable();
+ ASSERT(cp == CPU);
+
+ /*
+ * Module not ready to program counters
+ */
+ if (!(cu_flags & CU_FLAG_ON)) {
+ *err = -1;
+ kpreempt_enable();
+ return;
+ }
+
+ if (cp == NULL) {
+ *err = -2;
+ kpreempt_enable();
+ return;
+ }
+
+ cu_cpu_info = cp->cpu_cu_info;
+ if (cu_cpu_info == NULL) {
+ *err = -3;
+ kpreempt_enable();
+ return;
+ }
+
+ /*
+ * If DTrace CPC is active or counters turned on already or are
+ * disabled, just return.
+ */
+ if (dtrace_cpc_in_use || (cu_cpu_info->cu_flag & CU_CPU_CNTRS_ON) ||
+ cu_cpu_info->cu_disabled) {
+ *err = 1;
+ kpreempt_enable();
+ return;
+ }
+
+ if ((CPU->cpu_cpc_ctx != NULL) &&
+ !(CPU->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID_STOPPED)) {
+ *err = -4;
+ kpreempt_enable();
+ return;
+ }
+
+ /*
+ * Get CPU's CPC context needed for capacity and utilization
+ */
+ cpu_ctx = &cu_cpu_info->cu_cpc_ctx;
+ ASSERT(cpu_ctx != NULL);
+ ASSERT(cpu_ctx->nctx >= 0);
+
+ ASSERT(cpu_ctx->ctx_ptr_array == NULL || cpu_ctx->ctx_ptr_array_sz > 0);
+ ASSERT(cpu_ctx->nctx <= cpu_ctx->ctx_ptr_array_sz);
+ if (cpu_ctx->nctx <= 0 || cpu_ctx->ctx_ptr_array == NULL ||
+ cpu_ctx->ctx_ptr_array_sz <= 0) {
+ *err = -5;
+ kpreempt_enable();
+ return;
+ }
+
+ /*
+ * Increment index in CPU's CPC context info to point at next context
+ * to program
+ *
+ * NOTE: Do this now instead of after programming counters to ensure
+ * that index will always point at *current* context so we will
+ * always be able to unprogram *current* context if necessary
+ */
+ cpu_ctx->cur_index = (cpu_ctx->cur_index + 1) % cpu_ctx->nctx;
+
+ ctx = cpu_ctx->ctx_ptr_array[cpu_ctx->cur_index];
+
+ /*
+ * Clear KCPC_CTX_INVALID and KCPC_CTX_INVALID_STOPPED from CPU's CPC
+ * context before programming counters
+ *
+ * Context is marked with KCPC_CTX_INVALID_STOPPED when context is
+ * unprogrammed and may be marked with KCPC_CTX_INVALID when
+ * kcpc_invalidate_all() is called by cpustat(1M) and dtrace CPC to
+ * invalidate all CPC contexts before they take over all the counters.
+ *
+ * This isn't necessary since these flags are only used for thread bound
+ * CPC contexts not CPU bound CPC contexts like ones used for capacity
+ * and utilization.
+ *
+ * There is no need to protect the flag update since no one is using
+ * this context now.
+ */
+ ctx->kc_flags &= ~(KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED);
+
+ /*
+ * Program counters on this CPU
+ */
+ kcpc_program(ctx, B_FALSE, B_FALSE);
+
+ cp->cpu_cpc_ctx = ctx;
+
+ /*
+ * Set state in CPU structure to say that CPU's counters are programmed
+ * for capacity and utilization now and that they are transitioning from
+ * off to on state. This will cause cu_cpu_update to update stop times
+ * for all programmed counters.
+ */
+ cu_cpu_info->cu_flag |= CU_CPU_CNTRS_ON | CU_CPU_CNTRS_OFF_ON;
+
+ /*
+ * Update counter statistics
+ */
+ (void) cu_cpu_update(cp, B_FALSE);
+
+ cu_cpu_info->cu_flag &= ~CU_CPU_CNTRS_OFF_ON;
+
+ *err = 0;
+ kpreempt_enable();
+}
+
+
+/*
+ * Cross call wrapper routine for cu_cpc_program()
+ *
+ * Checks to make sure that counters on CPU aren't being used by someone else
+ * before calling cu_cpc_program() since cu_cpc_program() needs to assert that
+ * nobody else is using the counters to catch and prevent any broken code.
+ * Also, this check needs to happen on the target CPU since the CPU's CPC
+ * context can only be changed while running on the CPU.
+ *
+ * If the first argument is TRUE, cu_cpc_program_xcall also checks that there is
+ * no valid thread bound cpc context. This is important to check to prevent
+ * re-programming thread counters with CU counters when CPU is coming on-line.
+ */
+static void
+cu_cpc_program_xcall(uintptr_t arg, int *err)
+{
+ boolean_t avoid_thread_context = (boolean_t)arg;
+
+ kpreempt_disable();
+
+ if (CPU->cpu_cpc_ctx != NULL &&
+ !(CPU->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID_STOPPED)) {
+ *err = -100;
+ kpreempt_enable();
+ return;
+ }
+
+ if (avoid_thread_context && (curthread->t_cpc_ctx != NULL) &&
+ !(curthread->t_cpc_ctx->kc_flags & KCPC_CTX_INVALID_STOPPED)) {
+ *err = -200;
+ kpreempt_enable();
+ return;
+ }
+
+ cu_cpc_program(CPU, err);
+ kpreempt_enable();
+}
+
+
+/*
+ * Unprogram counters for capacity and utilization on given CPU
+ * This function should be always executed on the target CPU at high PIL
+ */
+void
+cu_cpc_unprogram(cpu_t *cp, int *err)
+{
+ cu_cpc_ctx_t *cpu_ctx;
+ kcpc_ctx_t *ctx;
+ cu_cpu_info_t *cu_cpu_info;
+
+ ASSERT(IS_HIPIL());
+ /*
+ * Should be running on given CPU with preemption disabled to keep CPU
+ * from disappearing and make sure flags and CPC context don't change
+ * from underneath us
+ */
+ kpreempt_disable();
+ ASSERT(cp == CPU);
+
+ /*
+ * Module not on
+ */
+ if (!(cu_flags & CU_FLAG_ON)) {
+ *err = -1;
+ kpreempt_enable();
+ return;
+ }
+
+ cu_cpu_info = cp->cpu_cu_info;
+ if (cu_cpu_info == NULL) {
+ *err = -3;
+ kpreempt_enable();
+ return;
+ }
+
+ /*
+ * Counters turned off already
+ */
+ if (!(cu_cpu_info->cu_flag & CU_CPU_CNTRS_ON)) {
+ *err = 1;
+ kpreempt_enable();
+ return;
+ }
+
+ /*
+ * Update counter statistics
+ */
+ (void) cu_cpu_update(cp, B_FALSE);
+
+ /*
+ * Get CPU's CPC context needed for capacity and utilization
+ */
+ cpu_ctx = &cu_cpu_info->cu_cpc_ctx;
+ if (cpu_ctx->nctx <= 0 || cpu_ctx->ctx_ptr_array == NULL ||
+ cpu_ctx->ctx_ptr_array_sz <= 0) {
+ *err = -5;
+ kpreempt_enable();
+ return;
+ }
+ ctx = cpu_ctx->ctx_ptr_array[cpu_ctx->cur_index];
+
+ /*
+ * CPU's CPC context should be current capacity and utilization CPC
+ * context
+ */
+ ASSERT(cp->cpu_cpc_ctx == ctx);
+ if (cp->cpu_cpc_ctx != ctx) {
+ *err = -6;
+ kpreempt_enable();
+ return;
+ }
+
+ /*
+ * Unprogram counters on CPU.
+ */
+ kcpc_unprogram(ctx, B_FALSE);
+
+ ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED);
+
+ /*
+ * Unset state in CPU structure saying that CPU's counters are
+ * programmed
+ */
+ cp->cpu_cpc_ctx = NULL;
+ cu_cpu_info->cu_flag &= ~CU_CPU_CNTRS_ON;
+
+ *err = 0;
+ kpreempt_enable();
+}
+
+
+/*
+ * Add given counter event to list of CPC requests
+ */
+static int
+cu_cpc_req_add(char *event, kcpc_request_list_t *reqs, int nreqs,
+ cu_cntr_stats_t *stats, int kmem_flags, int *nevents)
+{
+ int n;
+ int retval;
+ uint_t flags;
+
+ /*
+ * Return error when no counter event specified, counter event not
+ * supported by CPC's PCBE, or number of events not given
+ */
+ if (event == NULL || kcpc_event_supported(event) == B_FALSE ||
+ nevents == NULL)
+ return (-1);
+
+ n = *nevents;
+
+ /*
+ * Only count number of counter events needed if list
+ * where to add CPC requests not given
+ */
+ if (reqs == NULL) {
+ n++;
+ *nevents = n;
+ return (-3);
+ }
+
+ /*
+ * Return error when stats not given or not enough room on list of CPC
+ * requests for more counter events
+ */
+ if (stats == NULL || (nreqs <= 0 && n >= nreqs))
+ return (-4);
+
+ /*
+ * Use flags in cu_cpc_flags to program counters and enable overflow
+ * interrupts/traps (unless PCBE can't handle overflow interrupts) so
+ * PCBE can catch counters before they wrap to hopefully give us an
+ * accurate (64-bit) virtualized counter
+ */
+ flags = cu_cpc_flags;
+ if ((kcpc_pcbe_capabilities() & CPC_CAP_OVERFLOW_INTERRUPT) == 0)
+ flags &= ~CPC_OVF_NOTIFY_EMT;
+
+ /*
+ * Add CPC request to list
+ */
+ retval = kcpc_reqs_add(reqs, event, cu_cpc_preset_value,
+ flags, 0, NULL, &stats[n], kmem_flags);
+
+ if (retval != 0)
+ return (-5);
+
+ n++;
+ *nevents = n;
+ return (0);
+}
+
+static void
+cu_cpu_info_detach_xcall(void)
+{
+ ASSERT(IS_HIPIL());
+
+ CPU->cpu_cu_info = NULL;
+}
+
+
+/*
+ * Enable or disable collection of capacity/utilization data for a current CPU.
+ * Counters are enabled if 'on' argument is True and disabled if it is False.
+ * This function should be always executed at high PIL
+ */
+static void
+cu_cpc_trigger(uintptr_t arg1, uintptr_t arg2)
+{
+ cpu_t *cp = (cpu_t *)arg1;
+ boolean_t on = (boolean_t)arg2;
+ int error;
+ cu_cpu_info_t *cu_cpu_info;
+
+ ASSERT(IS_HIPIL());
+ kpreempt_disable();
+ ASSERT(cp == CPU);
+
+ if (!(cu_flags & CU_FLAG_ON)) {
+ kpreempt_enable();
+ return;
+ }
+
+ cu_cpu_info = cp->cpu_cu_info;
+ if (cu_cpu_info == NULL) {
+ kpreempt_enable();
+ return;
+ }
+
+ ASSERT(!cu_cpu_info->cu_disabled ||
+ !(cu_cpu_info->cu_flag & CU_CPU_CNTRS_ON));
+
+ if (on) {
+ /*
+ * Decrement the cu_disabled counter.
+ * Once it drops to zero, call cu_cpc_program.
+ */
+ if (cu_cpu_info->cu_disabled > 0)
+ cu_cpu_info->cu_disabled--;
+ if (cu_cpu_info->cu_disabled == 0)
+ cu_cpc_program(CPU, &error);
+ } else if (cu_cpu_info->cu_disabled++ == 0) {
+ /*
+ * This is the first attempt to disable CU, so turn it off
+ */
+ cu_cpc_unprogram(cp, &error);
+ ASSERT(!(cu_cpu_info->cu_flag & CU_CPU_CNTRS_ON));
+ }
+
+ kpreempt_enable();
+}
+
+
+/*
+ * Callback for changes in CPU states
+ * Used to enable or disable hardware performance counters on CPUs that are
+ * turned on or off
+ *
+ * NOTE: cpc should be programmed/unprogrammed while running on the target CPU.
+ * We have to use thread_affinity_set to hop to the right CPU because these
+ * routines expect cpu_lock held, so we can't cross-call other CPUs while
+ * holding CPU lock.
+ */
+static int
+/* LINTED E_FUNC_ARG_UNUSED */
+cu_cpu_callback(cpu_setup_t what, int id, void *arg)
+{
+ cpu_t *cp;
+ int retval = 0;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ if (!(cu_flags & CU_FLAG_ON))
+ return (-1);
+
+ cp = cpu_get(id);
+ if (cp == NULL)
+ return (-2);
+
+ switch (what) {
+ case CPU_ON:
+ /*
+ * Setup counters on CPU being turned on
+ */
+ retval = cu_cpu_init(cp, cu_cpc_reqs);
+
+ /*
+ * Reset list of counter event requests so its space can be
+ * reused for a different set of requests for next CPU
+ */
+ (void) kcpc_reqs_reset(cu_cpc_reqs);
+ break;
+ case CPU_INTR_ON:
+ /*
+ * Setup counters on CPU being turned on.
+ */
+ retval = cu_cpu_run(cp, cu_cpc_program_xcall,
+ (uintptr_t)B_TRUE);
+ break;
+ case CPU_OFF:
+ /*
+ * Disable counters on CPU being turned off. Counters will not
+ * be re-enabled on this CPU until it comes back online.
+ */
+ cu_cpu_disable(cp);
+ ASSERT(!CU_CPC_ON(cp));
+ retval = cu_cpu_fini(cp);
+ break;
+ default:
+ break;
+ }
+ return (retval);
+}
+
+
+/*
+ * Disable or enable Capacity Utilization counters on a given CPU. This function
+ * can be called from any CPU to disable counters on the given CPU.
+ */
+static void
+cu_cpu_disable(cpu_t *cp)
+{
+ cpu_call(cp, cu_cpc_trigger, (uintptr_t)cp, (uintptr_t)B_FALSE);
+}
+
+
+static void
+cu_cpu_enable(cpu_t *cp)
+{
+ cpu_call(cp, cu_cpc_trigger, (uintptr_t)cp, (uintptr_t)B_TRUE);
+}
+
+
+/*
+ * Setup capacity and utilization support for given CPU
+ *
+ * NOTE: Use KM_NOSLEEP for kmem_{,z}alloc() since cpu_lock is held and free
+ * everything that has been successfully allocated including cpu_cu_info
+ * if any memory allocation fails
+ */
+static int
+cu_cpu_init(cpu_t *cp, kcpc_request_list_t *reqs)
+{
+ kcpc_ctx_t **ctx_ptr_array;
+ size_t ctx_ptr_array_sz;
+ cu_cpc_ctx_t *cpu_ctx;
+ cu_cpu_info_t *cu_cpu_info;
+ int n;
+
+ /*
+ * cpu_lock should be held and protect against CPU going away and races
+ * with cu_{init,fini,cpu_fini}()
+ */
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Return if not ready to setup counters yet
+ */
+ if (!(cu_flags & CU_FLAG_READY))
+ return (-1);
+
+ if (cp->cpu_cu_info == NULL) {
+ cp->cpu_cu_info = kmem_zalloc(sizeof (cu_cpu_info_t),
+ KM_NOSLEEP);
+ if (cp->cpu_cu_info == NULL)
+ return (-2);
+ }
+
+ /*
+ * Get capacity and utilization CPC context for CPU and check to see
+ * whether it has been setup already
+ */
+ cu_cpu_info = cp->cpu_cu_info;
+ cu_cpu_info->cu_cpu = cp;
+ cu_cpu_info->cu_disabled = dtrace_cpc_in_use ? 1 : 0;
+
+ cpu_ctx = &cu_cpu_info->cu_cpc_ctx;
+ if (cpu_ctx->nctx > 0 && cpu_ctx->ctx_ptr_array != NULL &&
+ cpu_ctx->ctx_ptr_array_sz > 0) {
+ return (1);
+ }
+
+ /*
+ * Should have no contexts since it hasn't been setup already
+ */
+ ASSERT(cpu_ctx->nctx == 0 && cpu_ctx->ctx_ptr_array == NULL &&
+ cpu_ctx->ctx_ptr_array_sz == 0);
+
+ /*
+ * Determine how many CPC events needed to measure capacity and
+ * utilization for this CPU, allocate space for counter statistics for
+ * each event, and fill in list of CPC event requests with corresponding
+ * counter stats for each request to make attributing counter data
+ * easier later....
+ */
+ n = cu_cpc_init(cp, NULL, 0);
+ if (n <= 0) {
+ (void) cu_cpu_fini(cp);
+ return (-3);
+ }
+
+ cu_cpu_info->cu_cntr_stats = kmem_zalloc(n * sizeof (cu_cntr_stats_t),
+ KM_NOSLEEP);
+ if (cu_cpu_info->cu_cntr_stats == NULL) {
+ (void) cu_cpu_fini(cp);
+ return (-4);
+ }
+
+ cu_cpu_info->cu_ncntr_stats = n;
+
+ n = cu_cpc_init(cp, reqs, n);
+ if (n <= 0) {
+ (void) cu_cpu_fini(cp);
+ return (-5);
+ }
+
+ /*
+ * Create CPC context with given requests
+ */
+ ctx_ptr_array = NULL;
+ ctx_ptr_array_sz = 0;
+ n = kcpc_cpu_ctx_create(cp, reqs, KM_NOSLEEP, &ctx_ptr_array,
+ &ctx_ptr_array_sz);
+ if (n <= 0) {
+ (void) cu_cpu_fini(cp);
+ return (-6);
+ }
+
+ /*
+ * Should have contexts
+ */
+ ASSERT(n > 0 && ctx_ptr_array != NULL && ctx_ptr_array_sz > 0);
+ if (ctx_ptr_array == NULL || ctx_ptr_array_sz <= 0) {
+ (void) cu_cpu_fini(cp);
+ return (-7);
+ }
+
+ /*
+ * Fill in CPC context info for CPU needed for capacity and utilization
+ */
+ cpu_ctx->cur_index = 0;
+ cpu_ctx->nctx = n;
+ cpu_ctx->ctx_ptr_array = ctx_ptr_array;
+ cpu_ctx->ctx_ptr_array_sz = ctx_ptr_array_sz;
+ return (0);
+}
+
+/*
+ * Tear down capacity and utilization support for given CPU
+ */
+static int
+cu_cpu_fini(cpu_t *cp)
+{
+ kcpc_ctx_t *ctx;
+ cu_cpc_ctx_t *cpu_ctx;
+ cu_cpu_info_t *cu_cpu_info;
+ int i;
+ pghw_type_t pg_hw_type;
+
+ /*
+ * cpu_lock should be held and protect against CPU going away and races
+ * with cu_{init,fini,cpu_init}()
+ */
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Have to at least be ready to setup counters to have allocated
+ * anything that needs to be deallocated now
+ */
+ if (!(cu_flags & CU_FLAG_READY))
+ return (-1);
+
+ /*
+ * Nothing to do if CPU's capacity and utilization info doesn't exist
+ */
+ cu_cpu_info = cp->cpu_cu_info;
+ if (cu_cpu_info == NULL)
+ return (1);
+
+ /*
+ * Tear down any existing kstats and counter info for each hardware
+ * sharing relationship
+ */
+ for (pg_hw_type = PGHW_START; pg_hw_type < PGHW_NUM_COMPONENTS;
+ pg_hw_type++) {
+ cu_cntr_info_t *cntr_info;
+
+ cntr_info = cu_cpu_info->cu_cntr_info[pg_hw_type];
+ if (cntr_info == NULL)
+ continue;
+
+ if (cntr_info->ci_kstat != NULL) {
+ kstat_delete(cntr_info->ci_kstat);
+ cntr_info->ci_kstat = NULL;
+ }
+ kmem_free(cntr_info, sizeof (cu_cntr_info_t));
+ }
+
+ /*
+ * Free counter statistics for CPU
+ */
+ ASSERT(cu_cpu_info->cu_cntr_stats == NULL ||
+ cu_cpu_info->cu_ncntr_stats > 0);
+ if (cu_cpu_info->cu_cntr_stats != NULL &&
+ cu_cpu_info->cu_ncntr_stats > 0) {
+ kmem_free(cu_cpu_info->cu_cntr_stats,
+ cu_cpu_info->cu_ncntr_stats * sizeof (cu_cntr_stats_t));
+ cu_cpu_info->cu_cntr_stats = NULL;
+ cu_cpu_info->cu_ncntr_stats = 0;
+ }
+
+ /*
+ * Get capacity and utilization CPC contexts for given CPU and check to
+ * see whether they have been freed already
+ */
+ cpu_ctx = &cu_cpu_info->cu_cpc_ctx;
+ if (cpu_ctx != NULL && cpu_ctx->ctx_ptr_array != NULL &&
+ cpu_ctx->ctx_ptr_array_sz > 0) {
+ /*
+ * Free CPC contexts for given CPU
+ */
+ for (i = 0; i < cpu_ctx->nctx; i++) {
+ ctx = cpu_ctx->ctx_ptr_array[i];
+ if (ctx == NULL)
+ continue;
+ kcpc_free(ctx, 0);
+ }
+
+ /*
+ * Free CPC context pointer array
+ */
+ kmem_free(cpu_ctx->ctx_ptr_array, cpu_ctx->ctx_ptr_array_sz);
+
+ /*
+ * Zero CPC info for CPU
+ */
+ bzero(cpu_ctx, sizeof (cu_cpc_ctx_t));
+ }
+
+ /*
+ * Set cp->cpu_cu_info pointer to NULL. Go through cross-call to ensure
+ * that no one is going to access the cpu_cu_info whicch we are going to
+ * free.
+ */
+ if (cpu_is_online(cp))
+ cpu_call(cp, (cpu_call_func_t)cu_cpu_info_detach_xcall, 0, 0);
+ else
+ cp->cpu_cu_info = NULL;
+
+ /*
+ * Free CPU's capacity and utilization info
+ */
+ kmem_free(cu_cpu_info, sizeof (cu_cpu_info_t));
+
+ return (0);
+}
+
+/*
+ * Create capacity & utilization kstats for given PG CPU hardware sharing
+ * relationship
+ */
+static void
+cu_cpu_kstat_create(pghw_t *pg, cu_cntr_info_t *cntr_info)
+{
+ char *class, *sh_name;
+ kstat_t *ks;
+
+ /*
+ * Just return when no counter info or CPU
+ */
+ if (cntr_info == NULL || cntr_info->ci_cpu == NULL)
+ return;
+
+ /*
+ * Get the class name from the leaf PG that this CPU belongs to.
+ * If there are no PGs, just use the default class "cpu".
+ */
+ class = pg ? pghw_type_string(pg->pghw_hw) : "cpu";
+ sh_name = pg ? pghw_type_shortstring(pg->pghw_hw) : "cpu";
+
+ if ((ks = kstat_create_zone("pg_cpu", cntr_info->ci_cpu->cpu_id,
+ sh_name, class, KSTAT_TYPE_NAMED,
+ sizeof (cu_cpu_kstat) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID)) == NULL)
+ return;
+
+ ks->ks_lock = &pg_cpu_kstat_lock;
+ ks->ks_data = &cu_cpu_kstat;
+ ks->ks_update = cu_cpu_kstat_update;
+
+ ks->ks_private = cntr_info;
+ cntr_info->ci_kstat = ks;
+ kstat_install(cntr_info->ci_kstat);
+}
+
+
+/*
+ * Propagate values from CPU capacity & utilization stats to kstats
+ */
+static int
+cu_cpu_kstat_update(kstat_t *ksp, int rw)
+{
+ cpu_t *cp;
+ cu_cntr_info_t *cntr_info = ksp->ks_private;
+ struct cu_cpu_kstat *kstat = &cu_cpu_kstat;
+ pghw_t *pg;
+ cu_cntr_stats_t *stats;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ kpreempt_disable();
+
+ /*
+ * Update capacity and utilization statistics needed for CPU's PG (CPU)
+ * kstats
+ */
+ cp = cntr_info->ci_cpu;
+ (void) cu_cpu_update(cp, B_TRUE);
+
+ pg = cntr_info->ci_pg;
+ stats = cntr_info->ci_stats;
+ kstat->cu_cpu_id.value.ui32 = cp->cpu_id;
+ kstat->cu_generation.value.ui32 = cp->cpu_generation;
+ if (pg == NULL)
+ kstat->cu_pg_id.value.l = -1;
+ else
+ kstat->cu_pg_id.value.l = pg->pghw_pg.pg_id;
+
+ kstat->cu_cpu_util.value.ui64 = stats->cs_value_total;
+ kstat->cu_cpu_rate.value.ui64 = stats->cs_rate;
+ kstat->cu_cpu_rate_max.value.ui64 = stats->cs_rate_max;
+ kstat->cu_cpu_time_running.value.ui64 = stats->cs_time_running;
+ kstat->cu_cpu_time_stopped.value.ui64 = stats->cs_time_stopped;
+ /*
+ * Counters are stopped now, so the cs_time_stopped was last
+ * updated at cs_time_start time. Add the time passed since then
+ * to the stopped time.
+ */
+ if (!(cp->cpu_cu_info->cu_flag & CU_CPU_CNTRS_ON))
+ kstat->cu_cpu_time_stopped.value.ui64 +=
+ gethrtime() - stats->cs_time_start;
+
+ kpreempt_enable();
+
+ return (0);
+}
+
+/*
+ * Run specified function with specified argument on a given CPU and return
+ * whatever the function returns
+ */
+static int
+cu_cpu_run(cpu_t *cp, cu_cpu_func_t func, uintptr_t arg)
+{
+ int error = 0;
+
+ /*
+ * cpu_call() will call func on the CPU specified with given argument
+ * and return func's return value in last argument
+ */
+ cpu_call(cp, (cpu_call_func_t)func, arg, (uintptr_t)&error);
+ return (error);
+}
+
+
+/*
+ * Update counter statistics on a given CPU.
+ *
+ * If move_to argument is True, execute the function on the CPU specified
+ * Otherwise, assume that it is already runninng on the right CPU
+ *
+ * If move_to is specified, the caller should hold cpu_lock or have preemption
+ * disabled. Otherwise it is up to the caller to guarantee that things do not
+ * change in the process.
+ */
+int
+cu_cpu_update(struct cpu *cp, boolean_t move_to)
+{
+ int retval;
+ cu_cpu_info_t *cu_cpu_info = cp->cpu_cu_info;
+ hrtime_t time_snap;
+
+ ASSERT(!move_to || MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0);
+
+ /*
+ * Nothing to do if counters are not programmed
+ */
+ if (!(cu_flags & CU_FLAG_ON) ||
+ (cu_cpu_info == NULL) ||
+ !(cu_cpu_info->cu_flag & CU_CPU_CNTRS_ON))
+ return (0);
+
+ /*
+ * Don't update CPU statistics if it was updated recently
+ * and provide old results instead
+ */
+ time_snap = gethrtime();
+ if ((time_snap - cu_cpu_info->cu_sample_time) < cu_update_threshold) {
+ DTRACE_PROBE1(cu__drop__sample, cpu_t *, cp);
+ return (0);
+ }
+
+ cu_cpu_info->cu_sample_time = time_snap;
+
+ /*
+ * CPC counter should be read on the CPU that is running the counter. We
+ * either have to move ourselves to the target CPU or insure that we
+ * already run there.
+ *
+ * We use cross-call to the target CPU to execute kcpc_read() and
+ * cu_cpu_update_stats() there.
+ */
+ retval = 0;
+ if (move_to)
+ (void) cu_cpu_run(cp, (cu_cpu_func_t)kcpc_read,
+ (uintptr_t)cu_cpu_update_stats);
+ else {
+ retval = kcpc_read((kcpc_update_func_t)cu_cpu_update_stats);
+ /*
+ * Offset negative return value by -10 so we can distinguish it
+ * from error return values of this routine vs kcpc_read()
+ */
+ if (retval < 0)
+ retval -= 10;
+ }
+
+ return (retval);
+}
+
+
+/*
+ * Update CPU counter statistics for current CPU.
+ * This function may be called from a cross-call
+ */
+static int
+cu_cpu_update_stats(cu_cntr_stats_t *stats, uint64_t cntr_value)
+{
+ cu_cpu_info_t *cu_cpu_info = CPU->cpu_cu_info;
+ uint_t flags;
+ uint64_t delta;
+ hrtime_t time_delta;
+ hrtime_t time_snap;
+
+ if (stats == NULL)
+ return (-1);
+
+ /*
+ * Nothing to do if counters are not programmed. This should not happen,
+ * but we check just in case.
+ */
+ ASSERT(cu_flags & CU_FLAG_ON);
+ ASSERT(cu_cpu_info != NULL);
+ if (!(cu_flags & CU_FLAG_ON) ||
+ (cu_cpu_info == NULL))
+ return (-2);
+
+ flags = cu_cpu_info->cu_flag;
+ ASSERT(flags & CU_CPU_CNTRS_ON);
+ if (!(flags & CU_CPU_CNTRS_ON))
+ return (-2);
+
+ /*
+ * Take snapshot of high resolution timer
+ */
+ time_snap = gethrtime();
+
+ /*
+ * CU counters have just been programmed. We cannot assume that the new
+ * cntr_value continues from where we left off, so use the cntr_value as
+ * the new initial value.
+ */
+ if (flags & CU_CPU_CNTRS_OFF_ON)
+ stats->cs_value_start = cntr_value;
+
+ /*
+ * Calculate delta in counter values between start of sampling period
+ * and now
+ */
+ delta = cntr_value - stats->cs_value_start;
+
+ /*
+ * Calculate time between start of sampling period and now
+ */
+ time_delta = stats->cs_time_start ?
+ time_snap - stats->cs_time_start :
+ 0;
+ stats->cs_time_start = time_snap;
+ stats->cs_value_start = cntr_value;
+
+ if (time_delta > 0) { /* wrap shouldn't happen */
+ /*
+ * Update either running or stopped time based on the transition
+ * state
+ */
+ if (flags & CU_CPU_CNTRS_OFF_ON)
+ stats->cs_time_stopped += time_delta;
+ else
+ stats->cs_time_running += time_delta;
+ }
+
+ /*
+ * Update rest of counter statistics if counter value didn't wrap
+ */
+ if (delta > 0) {
+ /*
+ * Update utilization rate if the interval between samples is
+ * sufficient.
+ */
+ ASSERT(cu_sample_interval_min > CU_SCALE);
+ if (time_delta > cu_sample_interval_min)
+ stats->cs_rate = CU_RATE(delta, time_delta);
+ if (stats->cs_rate_max < stats->cs_rate)
+ stats->cs_rate_max = stats->cs_rate;
+
+ stats->cs_value_last = delta;
+ stats->cs_value_total += delta;
+ }
+
+ return (0);
+}
+
+/*
+ * Update CMT PG utilization data.
+ *
+ * This routine computes the running total utilization and times for the
+ * specified PG by adding up the total utilization and counter running and
+ * stopped times of all CPUs in the PG and calculates the utilization rate and
+ * maximum rate for all CPUs in the PG.
+ */
+void
+cu_pg_update(pghw_t *pg)
+{
+ pg_cpu_itr_t cpu_iter;
+ pghw_type_t pg_hwtype;
+ cpu_t *cpu;
+ pghw_util_t *hw_util = &pg->pghw_stats;
+ uint64_t old_utilization = hw_util->pghw_util;
+ hrtime_t now;
+ hrtime_t time_delta;
+ uint64_t utilization_delta;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ now = gethrtime();
+
+ pg_hwtype = pg->pghw_hw;
+
+ /*
+ * Initialize running total utilization and times for PG to 0
+ */
+ hw_util->pghw_util = 0;
+ hw_util->pghw_time_running = 0;
+ hw_util->pghw_time_stopped = 0;
+
+ /*
+ * Iterate over all CPUs in the PG and aggregate utilization, running
+ * time and stopped time.
+ */
+ PG_CPU_ITR_INIT(pg, cpu_iter);
+ while ((cpu = pg_cpu_next(&cpu_iter)) != NULL) {
+ cu_cpu_info_t *cu_cpu_info = cpu->cpu_cu_info;
+ cu_cntr_info_t *cntr_info;
+ cu_cntr_stats_t *stats;
+
+ if (cu_cpu_info == NULL)
+ continue;
+
+ /*
+ * Update utilization data for the CPU and then
+ * aggregate per CPU running totals for PG
+ */
+ (void) cu_cpu_update(cpu, B_TRUE);
+ cntr_info = cu_cpu_info->cu_cntr_info[pg_hwtype];
+
+ if (cntr_info == NULL || (stats = cntr_info->ci_stats) == NULL)
+ continue;
+
+ hw_util->pghw_util += stats->cs_value_total;
+ hw_util->pghw_time_running += stats->cs_time_running;
+ hw_util->pghw_time_stopped += stats->cs_time_stopped;
+
+ /*
+ * If counters are stopped now, the pg_time_stopped was last
+ * updated at cs_time_start time. Add the time passed since then
+ * to the stopped time.
+ */
+ if (!(cu_cpu_info->cu_flag & CU_CPU_CNTRS_ON))
+ hw_util->pghw_time_stopped +=
+ now - stats->cs_time_start;
+ }
+
+ /*
+ * Compute per PG instruction rate and maximum rate
+ */
+ time_delta = now - hw_util->pghw_time_stamp;
+ hw_util->pghw_time_stamp = now;
+
+ if (old_utilization == 0)
+ return;
+
+ /*
+ * Calculate change in utilization over sampling period and set this to
+ * 0 if the delta would be 0 or negative which may happen if any CPUs go
+ * offline during the sampling period
+ */
+ if (hw_util->pghw_util > old_utilization)
+ utilization_delta = hw_util->pghw_util - old_utilization;
+ else
+ utilization_delta = 0;
+
+ /*
+ * Update utilization rate if the interval between samples is
+ * sufficient.
+ */
+ ASSERT(cu_sample_interval_min > CU_SCALE);
+ if (time_delta > CU_SAMPLE_INTERVAL_MIN)
+ hw_util->pghw_rate = CU_RATE(utilization_delta, time_delta);
+
+ /*
+ * Update the maximum observed rate
+ */
+ if (hw_util->pghw_rate_max < hw_util->pghw_rate)
+ hw_util->pghw_rate_max = hw_util->pghw_rate;
+}
diff --git a/usr/src/uts/common/os/cpu.c b/usr/src/uts/common/os/cpu.c
index 009598f03f..62e8eeb2fe 100644
--- a/usr/src/uts/common/os/cpu.c
+++ b/usr/src/uts/common/os/cpu.c
@@ -1203,12 +1203,14 @@ cpu_online(cpu_t *cp)
}
cp->cpu_flags &= ~(CPU_QUIESCED | CPU_OFFLINE | CPU_FROZEN |
CPU_SPARE);
+ CPU_NEW_GENERATION(cp);
start_cpus();
cpu_stats_kstat_create(cp);
cpu_create_intrstat(cp);
lgrp_kstat_create(cp);
cpu_state_change_notify(cp->cpu_id, CPU_ON);
cpu_intr_enable(cp); /* arch-dep hook */
+ cpu_state_change_notify(cp->cpu_id, CPU_INTR_ON);
cpu_set_state(cp);
cyclic_online(cp);
/*
@@ -1284,6 +1286,7 @@ cpu_offline(cpu_t *cp, int flags)
/*
* Tell interested parties that this CPU is going offline.
*/
+ CPU_NEW_GENERATION(cp);
cpu_state_change_notify(cp->cpu_id, CPU_OFF);
/*
@@ -1557,8 +1560,11 @@ out:
/*
* If we failed, we need to notify everyone that this CPU is back on.
*/
- if (error != 0)
+ if (error != 0) {
+ CPU_NEW_GENERATION(cp);
cpu_state_change_notify(cp->cpu_id, CPU_ON);
+ cpu_state_change_notify(cp->cpu_id, CPU_INTR_ON);
+ }
return (error);
}
@@ -2152,6 +2158,7 @@ static struct {
kstat_named_t ci_core_id;
kstat_named_t ci_curr_clock_Hz;
kstat_named_t ci_supp_freq_Hz;
+ kstat_named_t ci_pg_id;
#if defined(__sparcv9)
kstat_named_t ci_device_ID;
kstat_named_t ci_cpu_fru;
@@ -2167,6 +2174,7 @@ static struct {
kstat_named_t ci_ncoreperchip;
kstat_named_t ci_max_cstates;
kstat_named_t ci_curr_cstate;
+ kstat_named_t ci_cacheid;
kstat_named_t ci_sktstr;
#endif
} cpu_info_template = {
@@ -2181,6 +2189,7 @@ static struct {
{ "core_id", KSTAT_DATA_LONG },
{ "current_clock_Hz", KSTAT_DATA_UINT64 },
{ "supported_frequencies_Hz", KSTAT_DATA_STRING },
+ { "pg_id", KSTAT_DATA_LONG },
#if defined(__sparcv9)
{ "device_ID", KSTAT_DATA_UINT64 },
{ "cpu_fru", KSTAT_DATA_STRING },
@@ -2196,6 +2205,7 @@ static struct {
{ "ncore_per_chip", KSTAT_DATA_INT32 },
{ "supported_max_cstates", KSTAT_DATA_INT32 },
{ "current_cstate", KSTAT_DATA_INT32 },
+ { "cache_id", KSTAT_DATA_INT32 },
{ "socket_type", KSTAT_DATA_STRING },
#endif
};
@@ -2253,6 +2263,9 @@ cpu_info_kstat_update(kstat_t *ksp, int rw)
cpu_info_template.ci_core_id.value.l = pg_plat_get_core_id(cp);
cpu_info_template.ci_curr_clock_Hz.value.ui64 =
cp->cpu_curr_clock;
+ cpu_info_template.ci_pg_id.value.l =
+ cp->cpu_pg && cp->cpu_pg->cmt_lineage ?
+ cp->cpu_pg->cmt_lineage->pg_id : -1;
kstat_named_setstr(&cpu_info_template.ci_supp_freq_Hz,
cp->cpu_supp_freqs);
#if defined(__sparcv9)
@@ -2273,6 +2286,7 @@ cpu_info_kstat_update(kstat_t *ksp, int rw)
cpu_info_template.ci_pkg_core_id.value.l = cpuid_get_pkgcoreid(cp);
cpu_info_template.ci_max_cstates.value.l = cp->cpu_m.max_cstates;
cpu_info_template.ci_curr_cstate.value.l = cpu_idle_get_cpu_state(cp);
+ cpu_info_template.ci_cacheid.value.i32 = cpuid_get_cacheid(cp);
kstat_named_setstr(&cpu_info_template.ci_sktstr,
cpuid_getsocketstr(cp));
#endif
diff --git a/usr/src/uts/common/os/group.c b/usr/src/uts/common/os/group.c
index 01e3f1ebdd..e46e7f600c 100644
--- a/usr/src/uts/common/os/group.c
+++ b/usr/src/uts/common/os/group.c
@@ -28,6 +28,7 @@
#include <sys/debug.h>
#include <sys/kmem.h>
#include <sys/group.h>
+#include <sys/cmn_err.h>
#define GRP_SET_SIZE_DEFAULT 2
@@ -352,3 +353,102 @@ group_find(group_t *g, void *e)
}
return ((uint_t)-1);
}
+
+/*
+ * Return a string in a given buffer with list of integer entries in a group.
+ * The string concatenates consecutive integer ranges ax x-y.
+ * The resulting string looks like "1,2-5,8"
+ *
+ * The convert argument is used to map group elements to integer IDs.
+ */
+char *
+group2intlist(group_t *group, char *buffer, size_t len, int (convert)(void*))
+{
+ char *ptr = buffer;
+ void *v;
+ group_iter_t iter;
+ boolean_t first_iteration = B_TRUE;
+ boolean_t first_value = B_TRUE;
+ int start = 0, end = 0;
+
+ /*
+ * Allow for the terminating NULL-byte
+ */
+ len = len -1;
+
+ group_iter_init(&iter);
+ while ((v = group_iterate(group, &iter)) != NULL && len > 0) {
+ int id = convert(v);
+ int nbytes = 0;
+
+ if (first_iteration) {
+ start = end = id;
+ first_iteration = B_FALSE;
+ } else if (end + 1 == id) {
+ /*
+ * Got consecutive ID, so extend end of range without
+ * doing anything since the range may extend further
+ */
+ end = id;
+ } else {
+ if (first_value) {
+ first_value = B_FALSE;
+ } else {
+ *ptr++ = ',';
+ len--;
+ }
+
+ if (len == 0)
+ break;
+
+ /*
+ * Next ID is not consecutive, so dump IDs gotten so
+ * far.
+ */
+ if (end > start + 1) /* range */
+ nbytes = snprintf(ptr, len, "%d-%d",
+ start, end);
+ else if (end > start) /* different values */
+ nbytes = snprintf(ptr, len, "%d,%d",
+ start, end);
+ else /* same value */
+ nbytes = snprintf(ptr, len, "%d", start);
+
+ if (nbytes <= 0) {
+ len = 0;
+ break;
+ }
+
+ /*
+ * Advance position in the string
+ */
+ ptr += nbytes;
+ len -= nbytes;
+
+ /*
+ * Try finding consecutive range starting from current
+ * ID.
+ */
+ start = end = id;
+ }
+ }
+
+ if (!first_value) {
+ *ptr++ = ',';
+ len--;
+ }
+ /*
+ * Print last ID(s)
+ */
+ if (len > 0) {
+ if (end > start + 1) {
+ (void) snprintf(ptr, len, "%d-%d", start, end);
+ } else if (end != start) {
+ (void) snprintf(ptr, len, "%d,%d", start, end);
+ } else {
+ (void) snprintf(ptr, len, "%d", start);
+ }
+ }
+
+ return (buffer);
+}
diff --git a/usr/src/uts/common/os/kcpc.c b/usr/src/uts/common/os/kcpc.c
index e5cab151b8..50a999dcc5 100644
--- a/usr/src/uts/common/os/kcpc.c
+++ b/usr/src/uts/common/os/kcpc.c
@@ -39,12 +39,17 @@
#include <sys/sunddi.h>
#include <sys/modctl.h>
#include <sys/sdt.h>
+#include <sys/archsystm.h>
+#include <sys/promif.h>
+#include <sys/x_call.h>
+#include <sys/cap_util.h>
#if defined(__x86)
#include <asm/clock.h>
+#include <sys/xc_levels.h>
#endif
-kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */
-kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */
+static kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */
+static kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */
krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */
@@ -73,10 +78,75 @@ static int kcpc_nullctx_panic = 0;
static void kcpc_lwp_create(kthread_t *t, kthread_t *ct);
static void kcpc_restore(kcpc_ctx_t *ctx);
static void kcpc_save(kcpc_ctx_t *ctx);
-static void kcpc_free(kcpc_ctx_t *ctx, int isexec);
static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx);
static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch);
static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set);
+static kcpc_set_t *kcpc_set_create(kcpc_request_t *reqs, int nreqs,
+ int set_flags, int kmem_flags);
+
+/*
+ * Macros to manipulate context flags. All flag updates should use one of these
+ * two macros
+ *
+ * Flags should be always be updated atomically since some of the updates are
+ * not protected by locks.
+ */
+#define KCPC_CTX_FLAG_SET(ctx, flag) atomic_or_uint(&(ctx)->kc_flags, (flag))
+#define KCPC_CTX_FLAG_CLR(ctx, flag) atomic_and_uint(&(ctx)->kc_flags, ~(flag))
+
+/*
+ * The IS_HIPIL() macro verifies that the code is executed either from a
+ * cross-call or from high-PIL interrupt
+ */
+#ifdef DEBUG
+#define IS_HIPIL() (getpil() >= XCALL_PIL)
+#else
+#define IS_HIPIL()
+#endif /* DEBUG */
+
+
+extern int kcpc_hw_load_pcbe(void);
+
+/*
+ * Return value from kcpc_hw_load_pcbe()
+ */
+static int kcpc_pcbe_error = 0;
+
+/*
+ * Perform one-time initialization of kcpc framework.
+ * This function performs the initialization only the first time it is called.
+ * It is safe to call it multiple times.
+ */
+int
+kcpc_init(void)
+{
+ long hash;
+ static uint32_t kcpc_initialized = 0;
+
+ /*
+ * We already tried loading platform pcbe module and failed
+ */
+ if (kcpc_pcbe_error != 0)
+ return (-1);
+
+ /*
+ * The kcpc framework should be initialized at most once
+ */
+ if (atomic_cas_32(&kcpc_initialized, 0, 1) != 0)
+ return (0);
+
+ rw_init(&kcpc_cpuctx_lock, NULL, RW_DEFAULT, NULL);
+ for (hash = 0; hash < CPC_HASH_BUCKETS; hash++)
+ mutex_init(&kcpc_ctx_llock[hash],
+ NULL, MUTEX_DRIVER, (void *)(uintptr_t)15);
+
+ /*
+ * Load platform-specific pcbe module
+ */
+ kcpc_pcbe_error = kcpc_hw_load_pcbe();
+
+ return (kcpc_pcbe_error == 0 ? 0 : -1);
+}
void
kcpc_register_pcbe(pcbe_ops_t *ops)
@@ -103,8 +173,9 @@ kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode)
cpu_t *cp;
kcpc_ctx_t *ctx;
int error;
+ int save_spl;
- ctx = kcpc_ctx_alloc();
+ ctx = kcpc_ctx_alloc(KM_SLEEP);
if (kcpc_assign_reqs(set, ctx) != 0) {
kcpc_ctx_free(ctx);
@@ -141,28 +212,34 @@ kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode)
goto unbound;
mutex_enter(&cp->cpu_cpc_ctxlock);
+ kpreempt_disable();
+ save_spl = spl_xcall();
- if (cp->cpu_cpc_ctx != NULL) {
+ /*
+ * Check to see whether counters for CPU already being used by someone
+ * other than kernel for capacity and utilization (since kernel will
+ * let go of counters for user in kcpc_program() below)
+ */
+ if (cp->cpu_cpc_ctx != NULL && !CU_CPC_ON(cp)) {
/*
* If this CPU already has a bound set, return an error.
*/
+ splx(save_spl);
+ kpreempt_enable();
mutex_exit(&cp->cpu_cpc_ctxlock);
goto unbound;
}
if (curthread->t_bind_cpu != cpuid) {
+ splx(save_spl);
+ kpreempt_enable();
mutex_exit(&cp->cpu_cpc_ctxlock);
goto unbound;
}
- cp->cpu_cpc_ctx = ctx;
- /*
- * Kernel preemption must be disabled while fiddling with the hardware
- * registers to prevent partial updates.
- */
- kpreempt_disable();
- ctx->kc_rawtick = KCPC_GET_TICK();
- pcbe_ops->pcbe_program(ctx);
+ kcpc_program(ctx, B_FALSE, B_TRUE);
+
+ splx(save_spl);
kpreempt_enable();
mutex_exit(&cp->cpu_cpc_ctxlock);
@@ -197,14 +274,14 @@ kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode)
if (t->t_cpc_ctx != NULL)
return (EEXIST);
- ctx = kcpc_ctx_alloc();
+ ctx = kcpc_ctx_alloc(KM_SLEEP);
/*
* The context must begin life frozen until it has been properly
* programmed onto the hardware. This prevents the context ops from
* worrying about it until we're ready.
*/
- ctx->kc_flags |= KCPC_CTX_FREEZE;
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE);
ctx->kc_hrtime = gethrtime();
if (kcpc_assign_reqs(set, ctx) != 0) {
@@ -215,13 +292,13 @@ kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode)
ctx->kc_cpuid = -1;
if (set->ks_flags & CPC_BIND_LWP_INHERIT)
- ctx->kc_flags |= KCPC_CTX_LWPINHERIT;
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_LWPINHERIT);
ctx->kc_thread = t;
t->t_cpc_ctx = ctx;
/*
* Permit threads to look at their own hardware counters from userland.
*/
- ctx->kc_flags |= KCPC_CTX_NONPRIV;
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_NONPRIV);
/*
* Create the data store for this set.
@@ -248,12 +325,14 @@ kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode)
* Ask the backend to program the hardware.
*/
if (t == curthread) {
+ int save_spl;
+
kpreempt_disable();
- ctx->kc_rawtick = KCPC_GET_TICK();
- atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
- pcbe_ops->pcbe_program(ctx);
+ save_spl = spl_xcall();
+ kcpc_program(ctx, B_TRUE, B_TRUE);
+ splx(save_spl);
kpreempt_enable();
- } else
+ } else {
/*
* Since we are the agent LWP, we know the victim LWP is stopped
* until we're done here; no need to worry about preemption or
@@ -262,7 +341,8 @@ kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode)
* still be accessed from, for instance, another CPU doing a
* kcpc_invalidate_all().
*/
- atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
+ KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
+ }
mutex_enter(&set->ks_lock);
set->ks_state |= KCPC_SET_BOUND;
@@ -304,7 +384,7 @@ kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode)
* notification, we flag the context as being one that
* cares about overflow.
*/
- ctx->kc_flags |= KCPC_CTX_SIGOVF;
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_SIGOVF);
}
rp->kr_config = NULL;
@@ -349,7 +429,7 @@ int
kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick)
{
kcpc_ctx_t *ctx = set->ks_ctx;
- uint64_t curtick = KCPC_GET_TICK();
+ int save_spl;
mutex_enter(&set->ks_lock);
if ((set->ks_state & KCPC_SET_BOUND) == 0) {
@@ -358,41 +438,53 @@ kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick)
}
mutex_exit(&set->ks_lock);
- if (ctx->kc_flags & KCPC_CTX_INVALID)
+ /*
+ * Kernel preemption must be disabled while reading the hardware regs,
+ * and if this is a CPU-bound context, while checking the CPU binding of
+ * the current thread.
+ */
+ kpreempt_disable();
+ save_spl = spl_xcall();
+
+ if (ctx->kc_flags & KCPC_CTX_INVALID) {
+ splx(save_spl);
+ kpreempt_enable();
return (EAGAIN);
+ }
if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) {
- /*
- * Kernel preemption must be disabled while reading the
- * hardware regs, and if this is a CPU-bound context, while
- * checking the CPU binding of the current thread.
- */
- kpreempt_disable();
-
if (ctx->kc_cpuid != -1) {
if (curthread->t_bind_cpu != ctx->kc_cpuid) {
+ splx(save_spl);
kpreempt_enable();
return (EAGAIN);
}
}
if (ctx->kc_thread == curthread) {
- ctx->kc_hrtime = gethrtime();
+ uint64_t curtick = KCPC_GET_TICK();
+
+ ctx->kc_hrtime = gethrtime_waitfree();
pcbe_ops->pcbe_sample(ctx);
ctx->kc_vtick += curtick - ctx->kc_rawtick;
ctx->kc_rawtick = curtick;
}
- kpreempt_enable();
-
/*
* The config may have been invalidated by
* the pcbe_sample op.
*/
- if (ctx->kc_flags & KCPC_CTX_INVALID)
+ if (ctx->kc_flags & KCPC_CTX_INVALID) {
+ splx(save_spl);
+ kpreempt_enable();
return (EAGAIN);
+ }
+
}
+ splx(save_spl);
+ kpreempt_enable();
+
if (copyout(set->ks_data, buf,
set->ks_nreqs * sizeof (uint64_t)) == -1)
return (EFAULT);
@@ -412,20 +504,17 @@ kcpc_stop_hw(kcpc_ctx_t *ctx)
{
cpu_t *cp;
- ASSERT((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED))
- == KCPC_CTX_INVALID);
-
kpreempt_disable();
- cp = cpu_get(ctx->kc_cpuid);
- ASSERT(cp != NULL);
+ if (ctx->kc_cpuid == CPU->cpu_id) {
+ cp = CPU;
+ } else {
+ cp = cpu_get(ctx->kc_cpuid);
+ }
+
+ ASSERT(cp != NULL && cp->cpu_cpc_ctx == ctx);
+ kcpc_cpu_stop(cp, B_FALSE);
- if (cp == CPU) {
- pcbe_ops->pcbe_allstop();
- atomic_or_uint(&ctx->kc_flags,
- KCPC_CTX_INVALID_STOPPED);
- } else
- kcpc_remote_stop(cp);
kpreempt_enable();
}
@@ -451,7 +540,7 @@ kcpc_unbind(kcpc_set_t *set)
* Use kc_lock to synchronize with kcpc_restore().
*/
mutex_enter(&ctx->kc_lock);
- ctx->kc_flags |= KCPC_CTX_INVALID;
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
mutex_exit(&ctx->kc_lock);
if (ctx->kc_cpuid == -1) {
@@ -461,12 +550,14 @@ kcpc_unbind(kcpc_set_t *set)
* context. It will be freed via removectx() calling
* freectx() calling kcpc_free().
*/
- if (t == curthread &&
- (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) {
+ if (t == curthread) {
+ int save_spl;
+
kpreempt_disable();
- pcbe_ops->pcbe_allstop();
- atomic_or_uint(&ctx->kc_flags,
- KCPC_CTX_INVALID_STOPPED);
+ save_spl = spl_xcall();
+ if (!(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED))
+ kcpc_unprogram(ctx, B_TRUE);
+ splx(save_spl);
kpreempt_enable();
}
#ifdef DEBUG
@@ -503,7 +594,6 @@ kcpc_unbind(kcpc_set_t *set)
if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0)
kcpc_stop_hw(ctx);
ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED);
- cp->cpu_cpc_ctx = NULL;
mutex_exit(&cp->cpu_cpc_ctxlock);
}
mutex_exit(&cpu_lock);
@@ -543,12 +633,20 @@ kcpc_restart(kcpc_set_t *set)
{
kcpc_ctx_t *ctx = set->ks_ctx;
int i;
+ int save_spl;
ASSERT(set->ks_state & KCPC_SET_BOUND);
ASSERT(ctx->kc_thread == curthread);
ASSERT(ctx->kc_cpuid == -1);
+ for (i = 0; i < set->ks_nreqs; i++) {
+ *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset;
+ pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset,
+ 0, 0, NULL, &set->ks_req[i].kr_config, NULL);
+ }
+
kpreempt_disable();
+ save_spl = spl_xcall();
/*
* If the user is doing this on a running set, make sure the counters
@@ -557,18 +655,13 @@ kcpc_restart(kcpc_set_t *set)
if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0)
pcbe_ops->pcbe_allstop();
- for (i = 0; i < set->ks_nreqs; i++) {
- *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset;
- pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset,
- 0, 0, NULL, &set->ks_req[i].kr_config, NULL);
- }
-
/*
* Ask the backend to program the hardware.
*/
ctx->kc_rawtick = KCPC_GET_TICK();
- atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
+ KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
pcbe_ops->pcbe_program(ctx);
+ splx(save_spl);
kpreempt_enable();
return (0);
@@ -604,7 +697,7 @@ kcpc_enable(kthread_t *t, int cmd, int enable)
if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0)
return (EINVAL);
kpreempt_disable();
- atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
+ KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
kcpc_restore(ctx);
kpreempt_enable();
} else if (cmd == CPC_DISABLE) {
@@ -612,7 +705,7 @@ kcpc_enable(kthread_t *t, int cmd, int enable)
return (EINVAL);
kpreempt_disable();
kcpc_save(ctx);
- atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE);
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE);
kpreempt_enable();
} else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) {
/*
@@ -624,10 +717,11 @@ kcpc_enable(kthread_t *t, int cmd, int enable)
CPC_COUNT_USER: CPC_COUNT_SYSTEM;
kpreempt_disable();
- atomic_or_uint(&ctx->kc_flags,
+ KCPC_CTX_FLAG_SET(ctx,
KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED);
pcbe_ops->pcbe_allstop();
kpreempt_enable();
+
for (i = 0; i < set->ks_nreqs; i++) {
set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data);
if (enable)
@@ -715,12 +809,14 @@ kcpc_next_config(void *token, void *current, uint64_t **data)
kcpc_ctx_t *
-kcpc_ctx_alloc(void)
+kcpc_ctx_alloc(int kmem_flags)
{
kcpc_ctx_t *ctx;
long hash;
- ctx = (kcpc_ctx_t *)kmem_zalloc(sizeof (kcpc_ctx_t), KM_SLEEP);
+ ctx = (kcpc_ctx_t *)kmem_zalloc(sizeof (kcpc_ctx_t), kmem_flags);
+ if (ctx == NULL)
+ return (NULL);
hash = CPC_HASH_CTX(ctx);
mutex_enter(&kcpc_ctx_llock[hash]);
@@ -909,9 +1005,10 @@ kcpc_overflow_intr(caddr_t arg, uint64_t bitmap)
*/
if (kcpc_nullctx_panic)
panic("null cpc context, thread %p", (void *)t);
-
- cmn_err(CE_WARN,
+#ifdef DEBUG
+ cmn_err(CE_NOTE,
"null cpc context found in overflow handler!\n");
+#endif
atomic_add_32(&kcpc_nullctx_count, 1);
} else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) {
/*
@@ -935,13 +1032,20 @@ kcpc_overflow_intr(caddr_t arg, uint64_t bitmap)
* so freeze the context. The interrupt handler
* has already stopped the counter hardware.
*/
- atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE);
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE);
atomic_or_uint(&ctx->kc_pics[i].kp_flags,
KCPC_PIC_OVERFLOWED);
}
}
aston(t);
+ } else if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) {
+ /*
+ * Thread context is no longer valid, but here may be a valid
+ * CPU context.
+ */
+ return (curthread->t_cpu->cpu_cpc_ctx);
}
+
return (NULL);
}
@@ -956,6 +1060,7 @@ kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2)
kcpc_ctx_t *ctx;
uint64_t bitmap;
uint8_t *state;
+ int save_spl;
if (pcbe_ops == NULL ||
(bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0)
@@ -985,6 +1090,13 @@ kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2)
(*dtrace_cpc_fire)(bitmap);
ctx = curthread->t_cpu->cpu_cpc_ctx;
+ if (ctx == NULL) {
+#ifdef DEBUG
+ cmn_err(CE_NOTE, "null cpc context in"
+ "hardware overflow handler!\n");
+#endif
+ return (DDI_INTR_CLAIMED);
+ }
/* Reset any counters that have overflowed */
for (i = 0; i < ctx->kc_set->ks_nreqs; i++) {
@@ -1025,7 +1137,12 @@ kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2)
* the middle of updating it, no AST has been posted, and so we
* should sample the counters here, and restart them with no
* further fuss.
+ *
+ * The CPU's CPC context may disappear as a result of cross-call which
+ * has higher PIL on x86, so protect the context by raising PIL to the
+ * cross-call level.
*/
+ save_spl = spl_xcall();
if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) {
uint64_t curtick = KCPC_GET_TICK();
@@ -1035,6 +1152,7 @@ kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2)
pcbe_ops->pcbe_sample(ctx);
pcbe_ops->pcbe_program(ctx);
}
+ splx(save_spl);
return (DDI_INTR_CLAIMED);
}
@@ -1087,7 +1205,7 @@ kcpc_overflow_ast()
* Otherwise, re-enable the counters and continue life as before.
*/
kpreempt_disable();
- atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
+ KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
pcbe_ops->pcbe_program(ctx);
kpreempt_enable();
return (0);
@@ -1099,43 +1217,68 @@ kcpc_overflow_ast()
static void
kcpc_save(kcpc_ctx_t *ctx)
{
+ int err;
+ int save_spl;
+
+ kpreempt_disable();
+ save_spl = spl_xcall();
+
if (ctx->kc_flags & KCPC_CTX_INVALID) {
- if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED)
+ if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) {
+ splx(save_spl);
+ kpreempt_enable();
return;
+ }
/*
* This context has been invalidated but the counters have not
* been stopped. Stop them here and mark the context stopped.
*/
- pcbe_ops->pcbe_allstop();
- atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED);
+ kcpc_unprogram(ctx, B_TRUE);
+ splx(save_spl);
+ kpreempt_enable();
return;
}
pcbe_ops->pcbe_allstop();
- if (ctx->kc_flags & KCPC_CTX_FREEZE)
+ if (ctx->kc_flags & KCPC_CTX_FREEZE) {
+ splx(save_spl);
+ kpreempt_enable();
return;
+ }
/*
* Need to sample for all reqs into each req's current mpic.
*/
- ctx->kc_hrtime = gethrtime();
+ ctx->kc_hrtime = gethrtime_waitfree();
ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick;
pcbe_ops->pcbe_sample(ctx);
+
+ /*
+ * Program counter for measuring capacity and utilization since user
+ * thread isn't using counter anymore
+ */
+ ASSERT(ctx->kc_cpuid == -1);
+ cu_cpc_program(CPU, &err);
+ splx(save_spl);
+ kpreempt_enable();
}
static void
kcpc_restore(kcpc_ctx_t *ctx)
{
+ int save_spl;
+
mutex_enter(&ctx->kc_lock);
+
if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) ==
- KCPC_CTX_INVALID)
+ KCPC_CTX_INVALID) {
/*
* The context is invalidated but has not been marked stopped.
* We mark it as such here because we will not start the
* counters during this context switch.
*/
- ctx->kc_flags |= KCPC_CTX_INVALID_STOPPED;
-
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID_STOPPED);
+ }
if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) {
mutex_exit(&ctx->kc_lock);
@@ -1151,7 +1294,7 @@ kcpc_restore(kcpc_ctx_t *ctx)
* doing this, we're asking kcpc_free() to cv_wait() until
* kcpc_restore() has completed.
*/
- ctx->kc_flags |= KCPC_CTX_RESTORE;
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_RESTORE);
mutex_exit(&ctx->kc_lock);
/*
@@ -1159,14 +1302,17 @@ kcpc_restore(kcpc_ctx_t *ctx)
* don't do an explicit pcbe_allstop() here because they should have
* been stopped already by the last consumer.
*/
- ctx->kc_rawtick = KCPC_GET_TICK();
- pcbe_ops->pcbe_program(ctx);
+ kpreempt_disable();
+ save_spl = spl_xcall();
+ kcpc_program(ctx, B_TRUE, B_TRUE);
+ splx(save_spl);
+ kpreempt_enable();
/*
* Wake the agent thread if it's waiting in kcpc_free().
*/
mutex_enter(&ctx->kc_lock);
- ctx->kc_flags &= ~KCPC_CTX_RESTORE;
+ KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_RESTORE);
cv_signal(&ctx->kc_condv);
mutex_exit(&ctx->kc_lock);
}
@@ -1177,7 +1323,6 @@ kcpc_restore(kcpc_ctx_t *ctx)
* counters when the idle thread is switched on, and they start them again when
* it is switched off.
*/
-
/*ARGSUSED*/
void
kcpc_idle_save(struct cpu *cp)
@@ -1242,7 +1387,7 @@ kcpc_lwp_create(kthread_t *t, kthread_t *ct)
rw_exit(&kcpc_cpuctx_lock);
return;
}
- cctx = kcpc_ctx_alloc();
+ cctx = kcpc_ctx_alloc(KM_SLEEP);
kcpc_ctx_clone(ctx, cctx);
rw_exit(&kcpc_cpuctx_lock);
@@ -1250,7 +1395,7 @@ kcpc_lwp_create(kthread_t *t, kthread_t *ct)
* Copy the parent context's kc_flags field, but don't overwrite
* the child's in case it was modified during kcpc_ctx_clone.
*/
- cctx->kc_flags |= ctx->kc_flags;
+ KCPC_CTX_FLAG_SET(cctx, ctx->kc_flags);
cctx->kc_thread = ct;
cctx->kc_cpuid = -1;
ct->t_cpc_set = cctx->kc_set;
@@ -1265,13 +1410,14 @@ kcpc_lwp_create(kthread_t *t, kthread_t *ct)
* set to UINT64_MAX, and their pic's overflow flag turned on
* so that our trap() processing knows to send a signal.
*/
- atomic_or_uint(&cctx->kc_flags, KCPC_CTX_FREEZE);
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE);
for (i = 0; i < ks->ks_nreqs; i++) {
kcpc_request_t *kr = &ks->ks_req[i];
if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) {
*(kr->kr_data) = UINT64_MAX;
- kr->kr_picp->kp_flags |= KCPC_PIC_OVERFLOWED;
+ atomic_or_uint(&kr->kr_picp->kp_flags,
+ KCPC_PIC_OVERFLOWED);
}
}
ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW;
@@ -1315,7 +1461,7 @@ kcpc_lwp_create(kthread_t *t, kthread_t *ct)
*/
/*ARGSUSED*/
-static void
+void
kcpc_free(kcpc_ctx_t *ctx, int isexec)
{
int i;
@@ -1329,7 +1475,7 @@ kcpc_free(kcpc_ctx_t *ctx, int isexec)
mutex_enter(&ctx->kc_lock);
while (ctx->kc_flags & KCPC_CTX_RESTORE)
cv_wait(&ctx->kc_condv, &ctx->kc_lock);
- ctx->kc_flags |= KCPC_CTX_INVALID;
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
mutex_exit(&ctx->kc_lock);
if (isexec) {
@@ -1356,21 +1502,22 @@ kcpc_free(kcpc_ctx_t *ctx, int isexec)
if (cp != NULL) {
mutex_enter(&cp->cpu_cpc_ctxlock);
kcpc_stop_hw(ctx);
- cp->cpu_cpc_ctx = NULL;
mutex_exit(&cp->cpu_cpc_ctxlock);
}
mutex_exit(&cpu_lock);
ASSERT(curthread->t_cpc_ctx == NULL);
} else {
+ int save_spl;
+
/*
* Thread-bound context; stop _this_ CPU's counters.
*/
kpreempt_disable();
- pcbe_ops->pcbe_allstop();
- atomic_or_uint(&ctx->kc_flags,
- KCPC_CTX_INVALID_STOPPED);
- kpreempt_enable();
+ save_spl = spl_xcall();
+ kcpc_unprogram(ctx, B_TRUE);
curthread->t_cpc_ctx = NULL;
+ splx(save_spl);
+ kpreempt_enable();
}
/*
@@ -1435,7 +1582,7 @@ kcpc_invalidate_all(void)
for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) {
mutex_enter(&kcpc_ctx_llock[hash]);
for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next)
- atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID);
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
mutex_exit(&kcpc_ctx_llock[hash]);
}
}
@@ -1451,7 +1598,7 @@ kcpc_invalidate_config(void *token)
ASSERT(ctx != NULL);
- atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID);
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
}
/*
@@ -1462,18 +1609,11 @@ kcpc_passivate(void)
{
kcpc_ctx_t *ctx = curthread->t_cpc_ctx;
kcpc_set_t *set = curthread->t_cpc_set;
+ int save_spl;
if (set == NULL)
return;
- /*
- * We're cleaning up after this thread; ensure there are no dangling
- * CPC pointers left behind. The context and set will be freed by
- * freectx() in the case of an LWP-bound set, and by kcpc_unbind() in
- * the case of a CPU-bound set.
- */
- curthread->t_cpc_ctx = NULL;
-
if (ctx == NULL) {
/*
* This thread has a set but no context; it must be a CPU-bound
@@ -1491,6 +1631,8 @@ kcpc_passivate(void)
return;
}
+ kpreempt_disable();
+ save_spl = spl_xcall();
curthread->t_cpc_set = NULL;
/*
@@ -1500,13 +1642,20 @@ kcpc_passivate(void)
* INVALID_STOPPED flag here and kcpc_restore() setting the flag during
* a context switch.
*/
-
- kpreempt_disable();
if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) {
- pcbe_ops->pcbe_allstop();
- atomic_or_uint(&ctx->kc_flags,
+ kcpc_unprogram(ctx, B_TRUE);
+ KCPC_CTX_FLAG_SET(ctx,
KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED);
}
+
+ /*
+ * We're cleaning up after this thread; ensure there are no dangling
+ * CPC pointers left behind. The context and set will be freed by
+ * freectx().
+ */
+ curthread->t_cpc_ctx = NULL;
+
+ splx(save_spl);
kpreempt_enable();
}
@@ -1667,7 +1816,7 @@ kcpc_invalidate(kthread_t *t)
kcpc_ctx_t *ctx = t->t_cpc_ctx;
if (ctx != NULL)
- atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID);
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
}
/*
@@ -1691,6 +1840,648 @@ kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third)
"pcbe", prefix, ".", s, 3, NULL) < 0 ? -1 : 0);
}
+/*
+ * Create one or more CPC context for given CPU with specified counter event
+ * requests
+ *
+ * If number of requested counter events is less than or equal number of
+ * hardware counters on a CPU and can all be assigned to the counters on a CPU
+ * at the same time, then make one CPC context.
+ *
+ * Otherwise, multiple CPC contexts are created to allow multiplexing more
+ * counter events than existing counters onto the counters by iterating through
+ * all of the CPC contexts, programming the counters with each CPC context one
+ * at a time and measuring the resulting counter values. Each of the resulting
+ * CPC contexts contains some number of requested counter events less than or
+ * equal the number of counters on a CPU depending on whether all the counter
+ * events can be programmed on all the counters at the same time or not.
+ *
+ * Flags to kmem_{,z}alloc() are passed in as an argument to allow specifying
+ * whether memory allocation should be non-blocking or not. The code will try
+ * to allocate *whole* CPC contexts if possible. If there is any memory
+ * allocation failure during the allocations needed for a given CPC context, it
+ * will skip allocating that CPC context because it cannot allocate the whole
+ * thing. Thus, the only time that it will end up allocating none (ie. no CPC
+ * contexts whatsoever) is when it cannot even allocate *one* whole CPC context
+ * without a memory allocation failure occurring.
+ */
+int
+kcpc_cpu_ctx_create(cpu_t *cp, kcpc_request_list_t *req_list, int kmem_flags,
+ kcpc_ctx_t ***ctx_ptr_array, size_t *ctx_ptr_array_sz)
+{
+ kcpc_ctx_t **ctx_ptrs;
+ int nctx;
+ int nctx_ptrs;
+ int nreqs;
+ kcpc_request_t *reqs;
+
+ if (cp == NULL || ctx_ptr_array == NULL || ctx_ptr_array_sz == NULL ||
+ req_list == NULL || req_list->krl_cnt < 1)
+ return (-1);
+
+ /*
+ * Allocate number of sets assuming that each set contains one and only
+ * one counter event request for each counter on a CPU
+ */
+ nreqs = req_list->krl_cnt;
+ nctx_ptrs = (nreqs + cpc_ncounters - 1) / cpc_ncounters;
+ ctx_ptrs = kmem_zalloc(nctx_ptrs * sizeof (kcpc_ctx_t *), kmem_flags);
+ if (ctx_ptrs == NULL)
+ return (-2);
+
+ /*
+ * Fill in sets of requests
+ */
+ nctx = 0;
+ reqs = req_list->krl_list;
+ while (nreqs > 0) {
+ kcpc_ctx_t *ctx;
+ kcpc_set_t *set;
+ int subcode;
+
+ /*
+ * Allocate CPC context and set for requested counter events
+ */
+ ctx = kcpc_ctx_alloc(kmem_flags);
+ set = kcpc_set_create(reqs, nreqs, 0, kmem_flags);
+ if (set == NULL) {
+ kcpc_ctx_free(ctx);
+ break;
+ }
+
+ /*
+ * Determine assignment of requested counter events to specific
+ * counters
+ */
+ if (kcpc_assign_reqs(set, ctx) != 0) {
+ /*
+ * May not be able to assign requested counter events
+ * to all counters since all counters may not be able
+ * to do all events, so only do one counter event in
+ * set of counter requests when this happens since at
+ * least one of the counters must be able to do the
+ * event.
+ */
+ kcpc_free_set(set);
+ set = kcpc_set_create(reqs, 1, 0, kmem_flags);
+ if (set == NULL) {
+ kcpc_ctx_free(ctx);
+ break;
+ }
+ if (kcpc_assign_reqs(set, ctx) != 0) {
+#ifdef DEBUG
+ cmn_err(CE_NOTE, "!kcpc_cpu_ctx_create: can't "
+ "assign counter event %s!\n",
+ set->ks_req->kr_event);
+#endif
+ kcpc_free_set(set);
+ kcpc_ctx_free(ctx);
+ reqs++;
+ nreqs--;
+ continue;
+ }
+ }
+
+ /*
+ * Allocate memory needed to hold requested counter event data
+ */
+ set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t),
+ kmem_flags);
+ if (set->ks_data == NULL) {
+ kcpc_free_set(set);
+ kcpc_ctx_free(ctx);
+ break;
+ }
+
+ /*
+ * Configure requested counter events
+ */
+ if (kcpc_configure_reqs(ctx, set, &subcode) != 0) {
+#ifdef DEBUG
+ cmn_err(CE_NOTE,
+ "!kcpc_cpu_ctx_create: can't configure "
+ "set of counter event requests!\n");
+#endif
+ reqs += set->ks_nreqs;
+ nreqs -= set->ks_nreqs;
+ kmem_free(set->ks_data,
+ set->ks_nreqs * sizeof (uint64_t));
+ kcpc_free_set(set);
+ kcpc_ctx_free(ctx);
+ continue;
+ }
+
+ /*
+ * Point set of counter event requests at this context and fill
+ * in CPC context
+ */
+ set->ks_ctx = ctx;
+ ctx->kc_set = set;
+ ctx->kc_cpuid = cp->cpu_id;
+ ctx->kc_thread = curthread;
+
+ ctx_ptrs[nctx] = ctx;
+
+ /*
+ * Update requests and how many are left to be assigned to sets
+ */
+ reqs += set->ks_nreqs;
+ nreqs -= set->ks_nreqs;
+
+ /*
+ * Increment number of CPC contexts and allocate bigger array
+ * for context pointers as needed
+ */
+ nctx++;
+ if (nctx >= nctx_ptrs) {
+ kcpc_ctx_t **new;
+ int new_cnt;
+
+ /*
+ * Allocate more CPC contexts based on how many
+ * contexts allocated so far and how many counter
+ * requests left to assign
+ */
+ new_cnt = nctx_ptrs +
+ ((nreqs + cpc_ncounters - 1) / cpc_ncounters);
+ new = kmem_zalloc(new_cnt * sizeof (kcpc_ctx_t *),
+ kmem_flags);
+ if (new == NULL)
+ break;
+
+ /*
+ * Copy contents of old sets into new ones
+ */
+ bcopy(ctx_ptrs, new,
+ nctx_ptrs * sizeof (kcpc_ctx_t *));
+
+ /*
+ * Free old array of context pointers and use newly
+ * allocated one instead now
+ */
+ kmem_free(ctx_ptrs, nctx_ptrs * sizeof (kcpc_ctx_t *));
+ ctx_ptrs = new;
+ nctx_ptrs = new_cnt;
+ }
+ }
+
+ /*
+ * Return NULL if no CPC contexts filled in
+ */
+ if (nctx == 0) {
+ kmem_free(ctx_ptrs, nctx_ptrs * sizeof (kcpc_ctx_t *));
+ *ctx_ptr_array = NULL;
+ *ctx_ptr_array_sz = 0;
+ return (-2);
+ }
+
+ *ctx_ptr_array = ctx_ptrs;
+ *ctx_ptr_array_sz = nctx_ptrs * sizeof (kcpc_ctx_t *);
+ return (nctx);
+}
+
+/*
+ * Return whether PCBE supports given counter event
+ */
+boolean_t
+kcpc_event_supported(char *event)
+{
+ if (pcbe_ops == NULL || pcbe_ops->pcbe_event_coverage(event) == 0)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/*
+ * Program counters on current CPU with given CPC context
+ *
+ * If kernel is interposing on counters to measure hardware capacity and
+ * utilization, then unprogram counters for kernel *before* programming them
+ * with specified CPC context.
+ *
+ * kcpc_{program,unprogram}() may be called either directly by a thread running
+ * on the target CPU or from a cross-call from another CPU. To protect
+ * programming and unprogramming from being interrupted by cross-calls, callers
+ * who execute kcpc_{program,unprogram} should raise PIL to the level used by
+ * cross-calls.
+ */
+void
+kcpc_program(kcpc_ctx_t *ctx, boolean_t for_thread, boolean_t cu_interpose)
+{
+ int error;
+
+ ASSERT(IS_HIPIL());
+
+ /*
+ * CPC context shouldn't be NULL, its CPU field should specify current
+ * CPU or be -1 to specify any CPU when the context is bound to a
+ * thread, and preemption should be disabled
+ */
+ ASSERT(ctx != NULL && (ctx->kc_cpuid == CPU->cpu_id ||
+ ctx->kc_cpuid == -1) && curthread->t_preempt > 0);
+ if (ctx == NULL || (ctx->kc_cpuid != CPU->cpu_id &&
+ ctx->kc_cpuid != -1) || curthread->t_preempt < 1)
+ return;
+
+ /*
+ * Unprogram counters for kernel measuring hardware capacity and
+ * utilization
+ */
+ if (cu_interpose == B_TRUE) {
+ cu_cpc_unprogram(CPU, &error);
+ } else {
+ kcpc_set_t *set = ctx->kc_set;
+ int i;
+
+ ASSERT(set != NULL);
+
+ /*
+ * Since cu_interpose is false, we are programming CU context.
+ * In general, PCBE can continue from the state saved in the
+ * set, but it is not very reliable, so we start again from the
+ * preset value.
+ */
+ for (i = 0; i < set->ks_nreqs; i++) {
+ /*
+ * Reset the virtual counter value to the preset value.
+ */
+ *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset;
+
+ /*
+ * Reset PCBE to the preset value.
+ */
+ pcbe_ops->pcbe_configure(0, NULL,
+ set->ks_req[i].kr_preset,
+ 0, 0, NULL, &set->ks_req[i].kr_config, NULL);
+ }
+ }
+
+ /*
+ * Program counters with specified CPC context
+ */
+ ctx->kc_rawtick = KCPC_GET_TICK();
+ pcbe_ops->pcbe_program(ctx);
+
+ /*
+ * Denote that counters programmed for thread or CPU CPC context
+ * differently
+ */
+ if (for_thread == B_TRUE)
+ KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
+ else
+ CPU->cpu_cpc_ctx = ctx;
+}
+
+/*
+ * Unprogram counters with given CPC context on current CPU
+ *
+ * If kernel is interposing on counters to measure hardware capacity and
+ * utilization, then program counters for the kernel capacity and utilization
+ * *after* unprogramming them for given CPC context.
+ *
+ * See the comment for kcpc_program regarding the synchronization with
+ * cross-calls.
+ */
+void
+kcpc_unprogram(kcpc_ctx_t *ctx, boolean_t cu_interpose)
+{
+ int error;
+
+ ASSERT(IS_HIPIL());
+
+ /*
+ * CPC context shouldn't be NULL, its CPU field should specify current
+ * CPU or be -1 to specify any CPU when the context is bound to a
+ * thread, and preemption should be disabled
+ */
+ ASSERT(ctx != NULL && (ctx->kc_cpuid == CPU->cpu_id ||
+ ctx->kc_cpuid == -1) && curthread->t_preempt > 0);
+
+ if (ctx == NULL || (ctx->kc_cpuid != CPU->cpu_id &&
+ ctx->kc_cpuid != -1) || curthread->t_preempt < 1 ||
+ (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) != 0) {
+ return;
+ }
+
+ /*
+ * Specified CPC context to be unprogrammed should be bound to current
+ * CPU or thread
+ */
+ ASSERT(CPU->cpu_cpc_ctx == ctx || curthread->t_cpc_ctx == ctx);
+
+ /*
+ * Stop counters
+ */
+ pcbe_ops->pcbe_allstop();
+ KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID_STOPPED);
+
+ /*
+ * Allow kernel to interpose on counters and program them for its own
+ * use to measure hardware capacity and utilization if cu_interpose
+ * argument is true
+ */
+ if (cu_interpose == B_TRUE)
+ cu_cpc_program(CPU, &error);
+}
+
+/*
+ * Read CPU Performance Counter (CPC) on current CPU and call specified update
+ * routine with data for each counter event currently programmed on CPU
+ */
+int
+kcpc_read(kcpc_update_func_t update_func)
+{
+ kcpc_ctx_t *ctx;
+ int i;
+ kcpc_request_t *req;
+ int retval;
+ kcpc_set_t *set;
+
+ ASSERT(IS_HIPIL());
+
+ /*
+ * Can't grab locks or block because may be called inside dispatcher
+ */
+ kpreempt_disable();
+
+ ctx = CPU->cpu_cpc_ctx;
+ if (ctx == NULL) {
+ kpreempt_enable();
+ return (0);
+ }
+
+ /*
+ * Read counter data from current CPU
+ */
+ pcbe_ops->pcbe_sample(ctx);
+
+ set = ctx->kc_set;
+ if (set == NULL || set->ks_req == NULL) {
+ kpreempt_enable();
+ return (0);
+ }
+
+ /*
+ * Call update function with preset pointer and data for each CPC event
+ * request currently programmed on current CPU
+ */
+ req = set->ks_req;
+ retval = 0;
+ for (i = 0; i < set->ks_nreqs; i++) {
+ int ret;
+
+ if (req[i].kr_data == NULL)
+ break;
+
+ ret = update_func(req[i].kr_ptr, *req[i].kr_data);
+ if (ret < 0)
+ retval = ret;
+ }
+
+ kpreempt_enable();
+
+ return (retval);
+}
+
+/*
+ * Initialize list of counter event requests
+ */
+kcpc_request_list_t *
+kcpc_reqs_init(int nreqs, int kmem_flags)
+{
+ kcpc_request_list_t *req_list;
+ kcpc_request_t *reqs;
+
+ if (nreqs < 1)
+ return (NULL);
+
+ req_list = kmem_zalloc(sizeof (kcpc_request_list_t), kmem_flags);
+ if (req_list == NULL)
+ return (NULL);
+
+ reqs = kmem_zalloc(nreqs * sizeof (kcpc_request_t), kmem_flags);
+ if (reqs == NULL) {
+ kmem_free(req_list, sizeof (kcpc_request_list_t));
+ return (NULL);
+ }
+
+ req_list->krl_list = reqs;
+ req_list->krl_cnt = 0;
+ req_list->krl_max = nreqs;
+ return (req_list);
+}
+
+
+/*
+ * Add counter event request to given list of counter event requests
+ */
+int
+kcpc_reqs_add(kcpc_request_list_t *req_list, char *event, uint64_t preset,
+ uint_t flags, uint_t nattrs, kcpc_attr_t *attr, void *ptr, int kmem_flags)
+{
+ kcpc_request_t *req;
+
+ ASSERT(req_list->krl_max != 0);
+ if (req_list == NULL || req_list->krl_list == NULL)
+ return (-1);
+
+ /*
+ * Allocate more space (if needed)
+ */
+ if (req_list->krl_cnt > req_list->krl_max) {
+ kcpc_request_t *new;
+ kcpc_request_t *old;
+
+ old = req_list->krl_list;
+ new = kmem_zalloc((req_list->krl_max +
+ cpc_ncounters) * sizeof (kcpc_request_t), kmem_flags);
+ if (new == NULL)
+ return (-2);
+
+ req_list->krl_list = new;
+ bcopy(old, req_list->krl_list,
+ req_list->krl_cnt * sizeof (kcpc_request_t));
+ kmem_free(old, req_list->krl_max * sizeof (kcpc_request_t));
+ req_list->krl_cnt = 0;
+ req_list->krl_max += cpc_ncounters;
+ }
+
+ /*
+ * Fill in request as much as possible now, but some fields will need
+ * to be set when request is assigned to a set.
+ */
+ req = &req_list->krl_list[req_list->krl_cnt];
+ req->kr_config = NULL;
+ req->kr_picnum = -1; /* have CPC pick this */
+ req->kr_index = -1; /* set when assigning request to set */
+ req->kr_data = NULL; /* set when configuring request */
+ (void) strcpy(req->kr_event, event);
+ req->kr_preset = preset;
+ req->kr_flags = flags;
+ req->kr_nattrs = nattrs;
+ req->kr_attr = attr;
+ /*
+ * Keep pointer given by caller to give to update function when this
+ * counter event is sampled/read
+ */
+ req->kr_ptr = ptr;
+
+ req_list->krl_cnt++;
+
+ return (0);
+}
+
+/*
+ * Reset list of CPC event requests so its space can be used for another set
+ * of requests
+ */
+int
+kcpc_reqs_reset(kcpc_request_list_t *req_list)
+{
+ /*
+ * Return when pointer to request list structure or request is NULL or
+ * when max requests is less than or equal to 0
+ */
+ if (req_list == NULL || req_list->krl_list == NULL ||
+ req_list->krl_max <= 0)
+ return (-1);
+
+ /*
+ * Zero out requests and number of requests used
+ */
+ bzero(req_list->krl_list, req_list->krl_max * sizeof (kcpc_request_t));
+ req_list->krl_cnt = 0;
+ return (0);
+}
+
+/*
+ * Free given list of counter event requests
+ */
+int
+kcpc_reqs_fini(kcpc_request_list_t *req_list)
+{
+ kmem_free(req_list->krl_list,
+ req_list->krl_max * sizeof (kcpc_request_t));
+ kmem_free(req_list, sizeof (kcpc_request_list_t));
+ return (0);
+}
+
+/*
+ * Create set of given counter event requests
+ */
+static kcpc_set_t *
+kcpc_set_create(kcpc_request_t *reqs, int nreqs, int set_flags, int kmem_flags)
+{
+ int i;
+ kcpc_set_t *set;
+
+ /*
+ * Allocate set and assign number of requests in set and flags
+ */
+ set = kmem_zalloc(sizeof (kcpc_set_t), kmem_flags);
+ if (set == NULL)
+ return (NULL);
+
+ if (nreqs < cpc_ncounters)
+ set->ks_nreqs = nreqs;
+ else
+ set->ks_nreqs = cpc_ncounters;
+
+ set->ks_flags = set_flags;
+
+ /*
+ * Allocate requests needed, copy requests into set, and set index into
+ * data for each request (which may change when we assign requested
+ * counter events to counters)
+ */
+ set->ks_req = (kcpc_request_t *)kmem_zalloc(sizeof (kcpc_request_t) *
+ set->ks_nreqs, kmem_flags);
+ if (set->ks_req == NULL) {
+ kmem_free(set, sizeof (kcpc_set_t));
+ return (NULL);
+ }
+
+ bcopy(reqs, set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs);
+
+ for (i = 0; i < set->ks_nreqs; i++)
+ set->ks_req[i].kr_index = i;
+
+ return (set);
+}
+
+
+/*
+ * Stop counters on current CPU.
+ *
+ * If preserve_context is true, the caller is interested in the CPU's CPC
+ * context and wants it to be preserved.
+ *
+ * If preserve_context is false, the caller does not need the CPU's CPC context
+ * to be preserved, so it is set to NULL.
+ */
+static void
+kcpc_cpustop_func(boolean_t preserve_context)
+{
+ kpreempt_disable();
+
+ /*
+ * Someone already stopped this context before us, so there is nothing
+ * to do.
+ */
+ if (CPU->cpu_cpc_ctx == NULL) {
+ kpreempt_enable();
+ return;
+ }
+
+ kcpc_unprogram(CPU->cpu_cpc_ctx, B_TRUE);
+ /*
+ * If CU does not use counters, then clear the CPU's CPC context
+ * If the caller requested to preserve context it should disable CU
+ * first, so there should be no CU context now.
+ */
+ ASSERT(!preserve_context || !CU_CPC_ON(CPU));
+ if (!preserve_context && CPU->cpu_cpc_ctx != NULL && !CU_CPC_ON(CPU))
+ CPU->cpu_cpc_ctx = NULL;
+
+ kpreempt_enable();
+}
+
+/*
+ * Stop counters on given CPU and set its CPC context to NULL unless
+ * preserve_context is true.
+ */
+void
+kcpc_cpu_stop(cpu_t *cp, boolean_t preserve_context)
+{
+ cpu_call(cp, (cpu_call_func_t)kcpc_cpustop_func,
+ preserve_context, 0);
+}
+
+/*
+ * Program the context on the current CPU
+ */
+static void
+kcpc_remoteprogram_func(kcpc_ctx_t *ctx, uintptr_t arg)
+{
+ boolean_t for_thread = (boolean_t)arg;
+
+ ASSERT(ctx != NULL);
+
+ kpreempt_disable();
+ kcpc_program(ctx, for_thread, B_TRUE);
+ kpreempt_enable();
+}
+
+/*
+ * Program counters on given CPU
+ */
+void
+kcpc_cpu_program(cpu_t *cp, kcpc_ctx_t *ctx)
+{
+ cpu_call(cp, (cpu_call_func_t)kcpc_remoteprogram_func, (uintptr_t)ctx,
+ (uintptr_t)B_FALSE);
+}
+
char *
kcpc_list_attrs(void)
{
diff --git a/usr/src/uts/common/os/pg.c b/usr/src/uts/common/os/pg.c
index 067670dbbb..835ae3d322 100644
--- a/usr/src/uts/common/os/pg.c
+++ b/usr/src/uts/common/os/pg.c
@@ -110,7 +110,11 @@ static cpu_pg_t bootstrap_pg_data;
* and the next free id in the set.
*/
static bitset_t pg_id_set;
-static pgid_t pg_id_next = 0;
+
+/*
+ * ID space starts from 1 to assume that root has ID 0;
+ */
+static pgid_t pg_id_next = 1;
/*
* Default and externed PG ops vectors
diff --git a/usr/src/uts/common/os/pghw.c b/usr/src/uts/common/os/pghw.c
index ca59db8602..534cb2c540 100644
--- a/usr/src/uts/common/os/pghw.c
+++ b/usr/src/uts/common/os/pghw.c
@@ -34,6 +34,7 @@
#include <sys/pg.h>
#include <sys/pghw.h>
#include <sys/cpu_pm.h>
+#include <sys/cap_util.h>
/*
* Processor Groups: Hardware sharing relationship layer
@@ -116,10 +117,10 @@ struct pghw_kstat {
kstat_named_t pg_hw;
kstat_named_t pg_policy;
} pghw_kstat = {
- { "id", KSTAT_DATA_UINT64 },
+ { "id", KSTAT_DATA_UINT32 },
{ "pg_class", KSTAT_DATA_STRING },
- { "ncpus", KSTAT_DATA_UINT64 },
- { "instance_id", KSTAT_DATA_UINT64 },
+ { "ncpus", KSTAT_DATA_UINT32 },
+ { "instance_id", KSTAT_DATA_UINT32 },
{ "hardware", KSTAT_DATA_STRING },
{ "policy", KSTAT_DATA_STRING },
};
@@ -127,12 +128,92 @@ struct pghw_kstat {
kmutex_t pghw_kstat_lock;
/*
+ * Capacity and Utilization PG kstats
+ *
+ * These kstats are updated one at a time, so we can have a single scratch space
+ * to fill the data.
+ *
+ * kstat fields:
+ *
+ * pgid PG ID for PG described by this kstat
+ *
+ * pg_ncpus Number of CPUs within this PG
+ *
+ * pg_cpus String describing CPUs within this PG
+ *
+ * pg_sharing Name of sharing relationship for this PG
+ *
+ * pg_generation Generation value that increases whenever any CPU leaves
+ * or joins PG. Two kstat snapshots for the same
+ * CPU may only be compared if they have the same
+ * generation
+ *
+ * pg_hw_util Running value of PG utilization for the sharing
+ * relationship
+ *
+ * pg_hw_util_time_running
+ * Total time spent collecting CU data. The time may be
+ * less than wall time if CU counters were stopped for
+ * some time.
+ *
+ * pg_hw_util_time_stopped Total time the CU counters were stopped.
+ *
+ * pg_hw_util_rate Utilization rate, expressed in operations per second.
+ *
+ * pg_hw_util_rate_max Maximum observed value of utilization rate.
+ */
+struct pghw_cu_kstat {
+ kstat_named_t pg_id;
+ kstat_named_t pg_ncpus;
+ kstat_named_t pg_generation;
+ kstat_named_t pg_hw_util;
+ kstat_named_t pg_hw_util_time_running;
+ kstat_named_t pg_hw_util_time_stopped;
+ kstat_named_t pg_hw_util_rate;
+ kstat_named_t pg_hw_util_rate_max;
+ kstat_named_t pg_cpus;
+ kstat_named_t pg_sharing;
+} pghw_cu_kstat = {
+ { "id", KSTAT_DATA_UINT32 },
+ { "ncpus", KSTAT_DATA_UINT32 },
+ { "generation", KSTAT_DATA_UINT32 },
+ { "hw_util", KSTAT_DATA_UINT64 },
+ { "hw_util_time_running", KSTAT_DATA_UINT64 },
+ { "hw_util_time_stopped", KSTAT_DATA_UINT64 },
+ { "hw_util_rate", KSTAT_DATA_UINT64 },
+ { "hw_util_rate_max", KSTAT_DATA_UINT64 },
+ { "cpus", KSTAT_DATA_STRING },
+ { "sharing_relation", KSTAT_DATA_STRING },
+};
+
+/*
+ * Calculate the string size to represent NCPUS. Allow 5 digits for each CPU ID
+ * plus one space per CPU plus NUL byte in the end. This is only an estimate,
+ * since we try to compress CPU ranges as x-y. In the worst case the string
+ * representation of CPUs may be truncated.
+ */
+#define CPUSTR_LEN(ncpus) ((ncpus) * 6)
+
+/*
+ * Maximum length of the string that represents list of CPUs
+ */
+static int pg_cpulist_maxlen = 0;
+
+static void pghw_kstat_create(pghw_t *);
+static int pghw_kstat_update(kstat_t *, int);
+static int pghw_cu_kstat_update(kstat_t *, int);
+static int cpu2id(void *);
+
+/*
* hwset operations
*/
static group_t *pghw_set_create(pghw_type_t);
static void pghw_set_add(group_t *, pghw_t *);
static void pghw_set_remove(group_t *, pghw_t *);
+static void pghw_cpulist_alloc(pghw_t *);
+static int cpu2id(void *);
+
/*
* Initialize the physical portion of a hardware PG
*/
@@ -150,6 +231,7 @@ pghw_init(pghw_t *pg, cpu_t *cp, pghw_type_t hw)
pghw_set_add(hwset, pg);
pg->pghw_hw = hw;
+ pg->pghw_generation = 0;
pg->pghw_instance =
pg_plat_hw_instance_id(cp, hw);
pghw_kstat_create(pg);
@@ -186,8 +268,20 @@ pghw_fini(pghw_t *pg)
pg->pghw_instance = (id_t)PGHW_INSTANCE_ANON;
pg->pghw_hw = (pghw_type_t)-1;
- if (pg->pghw_kstat)
+ if (pg->pghw_kstat != NULL)
kstat_delete(pg->pghw_kstat);
+
+ /*
+ * Destroy string representation of CPUs
+ */
+ if (pg->pghw_cpulist != NULL) {
+ kmem_free(pg->pghw_cpulist,
+ pg->pghw_cpulist_len);
+ pg->pghw_cpulist = NULL;
+ }
+
+ if (pg->pghw_cu_kstat != NULL)
+ kstat_delete(pg->pghw_cu_kstat);
}
/*
@@ -344,11 +438,10 @@ pghw_set_remove(group_t *hwset, pghw_t *pg)
ASSERT(result == 0);
}
-
/*
* Return a string name given a pg_hw sharing type
*/
-static char *
+char *
pghw_type_string(pghw_type_t hw)
{
switch (hw) {
@@ -374,6 +467,34 @@ pghw_type_string(pghw_type_t hw)
}
/*
+ * Return a short string name given a pg_hw sharing type
+ */
+char *
+pghw_type_shortstring(pghw_type_t hw)
+{
+ switch (hw) {
+ case PGHW_IPIPE:
+ return ("instr_pipeline");
+ case PGHW_CACHE:
+ return ("Cache");
+ case PGHW_FPU:
+ return ("FPU");
+ case PGHW_MPIPE:
+ return ("memory_pipeline");
+ case PGHW_CHIP:
+ return ("Socket");
+ case PGHW_MEMORY:
+ return ("Memory");
+ case PGHW_POW_ACTIVE:
+ return ("CPU_PM_Active");
+ case PGHW_POW_IDLE:
+ return ("CPU_PM_Idle");
+ default:
+ return ("unknown");
+ }
+}
+
+/*
* Create / Update routines for PG hw kstats
*
* It is the intention of these kstats to provide some level
@@ -383,11 +504,14 @@ pghw_type_string(pghw_type_t hw)
void
pghw_kstat_create(pghw_t *pg)
{
+ char *class = pghw_type_string(pg->pghw_hw);
+
/*
* Create a physical pg kstat
*/
if ((pg->pghw_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id,
- "pg", "pg", KSTAT_TYPE_NAMED,
+ "pg", "pg",
+ KSTAT_TYPE_NAMED,
sizeof (pghw_kstat) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL)) != NULL) {
/* Class string, hw string, and policy string */
@@ -400,6 +524,28 @@ pghw_kstat_create(pghw_t *pg)
pg->pghw_kstat->ks_private = pg;
kstat_install(pg->pghw_kstat);
}
+
+ if (pg_cpulist_maxlen == 0)
+ pg_cpulist_maxlen = CPUSTR_LEN(max_ncpus);
+
+ /*
+ * Create a physical pg kstat
+ */
+ if ((pg->pghw_cu_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id,
+ "hardware", class,
+ KSTAT_TYPE_NAMED,
+ sizeof (pghw_cu_kstat) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL)) != NULL) {
+ pg->pghw_cu_kstat->ks_lock = &pghw_kstat_lock;
+ pg->pghw_cu_kstat->ks_data = &pghw_cu_kstat;
+ pg->pghw_cu_kstat->ks_update = pghw_cu_kstat_update;
+ pg->pghw_cu_kstat->ks_private = pg;
+ pg->pghw_cu_kstat->ks_data_size += strlen(class) + 1;
+ /* Allow space for CPU strings */
+ pg->pghw_cu_kstat->ks_data_size += PGHW_KSTAT_STR_LEN_MAX;
+ pg->pghw_cu_kstat->ks_data_size += pg_cpulist_maxlen;
+ kstat_install(pg->pghw_cu_kstat);
+ }
}
int
@@ -411,11 +557,147 @@ pghw_kstat_update(kstat_t *ksp, int rw)
if (rw == KSTAT_WRITE)
return (EACCES);
- pgsp->pg_id.value.ui64 = ((pg_t *)pg)->pg_id;
- pgsp->pg_ncpus.value.ui64 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus);
- pgsp->pg_instance_id.value.ui64 = (uint64_t)pg->pghw_instance;
+ pgsp->pg_id.value.ui32 = ((pg_t *)pg)->pg_id;
+ pgsp->pg_ncpus.value.ui32 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus);
+ pgsp->pg_instance_id.value.ui32 = pg->pghw_instance;
kstat_named_setstr(&pgsp->pg_class, ((pg_t *)pg)->pg_class->pgc_name);
kstat_named_setstr(&pgsp->pg_hw, pghw_type_string(pg->pghw_hw));
kstat_named_setstr(&pgsp->pg_policy, pg_policy_name((pg_t *)pg));
return (0);
}
+
+int
+pghw_cu_kstat_update(kstat_t *ksp, int rw)
+{
+ struct pghw_cu_kstat *pgsp = &pghw_cu_kstat;
+ pghw_t *pg = ksp->ks_private;
+ pghw_util_t *hw_util = &pg->pghw_stats;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ pgsp->pg_id.value.ui32 = ((pg_t *)pg)->pg_id;
+ pgsp->pg_ncpus.value.ui32 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus);
+
+ /*
+ * Allocate memory for the string representing the list of CPUs in PG.
+ * This memory should persist past the call to pghw_cu_kstat_update()
+ * since the kstat snapshot routine will reference this memory.
+ */
+ pghw_cpulist_alloc(pg);
+
+ if (pg->pghw_kstat_gen != pg->pghw_generation) {
+ /*
+ * PG kstat generation number is out of sync with PG's
+ * generation mumber. It means that some CPUs could have joined
+ * or left PG and it is not possible to compare the numbers
+ * obtained before and after the generation change.
+ *
+ * Reset the maximum utilization rate and start computing it
+ * from scratch.
+ */
+ hw_util->pghw_util = 0;
+ hw_util->pghw_rate_max = 0;
+ pg->pghw_kstat_gen = pg->pghw_generation;
+ }
+
+ /*
+ * We can't block on CPU lock because when PG is destroyed (under
+ * cpu_lock) it tries to delete this kstat and it will wait for us to
+ * complete which will never happen since we are waiting for cpu_lock to
+ * drop. Deadlocks are fun!
+ */
+ if (mutex_tryenter(&cpu_lock)) {
+ if (pg->pghw_cpulist != NULL &&
+ *(pg->pghw_cpulist) == '\0') {
+ (void) group2intlist(&(((pg_t *)pg)->pg_cpus),
+ pg->pghw_cpulist, pg->pghw_cpulist_len, cpu2id);
+ }
+ cu_pg_update(pg);
+ mutex_exit(&cpu_lock);
+ }
+
+ pgsp->pg_generation.value.ui32 = pg->pghw_kstat_gen;
+ pgsp->pg_hw_util.value.ui64 = hw_util->pghw_util;
+ pgsp->pg_hw_util_time_running.value.ui64 = hw_util->pghw_time_running;
+ pgsp->pg_hw_util_time_stopped.value.ui64 = hw_util->pghw_time_stopped;
+ pgsp->pg_hw_util_rate.value.ui64 = hw_util->pghw_rate;
+ pgsp->pg_hw_util_rate_max.value.ui64 = hw_util->pghw_rate_max;
+ if (pg->pghw_cpulist != NULL)
+ kstat_named_setstr(&pgsp->pg_cpus, pg->pghw_cpulist);
+ else
+ kstat_named_setstr(&pgsp->pg_cpus, "");
+
+ kstat_named_setstr(&pgsp->pg_sharing, pghw_type_string(pg->pghw_hw));
+
+ return (0);
+}
+
+/*
+ * Update the string representation of CPUs in PG (pg->pghw_cpulist).
+ * The string representation is used for kstats.
+ *
+ * The string is allocated if it has not already been or if it is already
+ * allocated and PG has more CPUs now. If PG has smaller or equal number of
+ * CPUs, but the actual CPUs may have changed, the string is reset to the empty
+ * string causes the string representation to be recreated. The pghw_generation
+ * field is used to detect whether CPUs within the pg may have changed.
+ */
+static void
+pghw_cpulist_alloc(pghw_t *pg)
+{
+ uint_t ncpus = GROUP_SIZE(&((pg_t *)pg)->pg_cpus);
+ size_t len = CPUSTR_LEN(ncpus);
+
+ /*
+ * If the pghw_cpulist string is already allocated we need to make sure
+ * that it has sufficient length. Also if the set of CPUs may have
+ * changed, we need to re-generate the string.
+ */
+ if (pg->pghw_cpulist != NULL &&
+ pg->pghw_kstat_gen != pg->pghw_generation) {
+ if (len <= pg->pghw_cpulist_len) {
+ /*
+ * There is sufficient space in the pghw_cpulist for
+ * the new set of CPUs. Just clear the string to trigger
+ * re-generation of list of CPUs
+ */
+ *(pg->pghw_cpulist) = '\0';
+ } else {
+ /*
+ * There is, potentially, insufficient space in
+ * pghw_cpulist, so reallocate the string.
+ */
+ ASSERT(strlen(pg->pghw_cpulist) < pg->pghw_cpulist_len);
+ kmem_free(pg->pghw_cpulist, pg->pghw_cpulist_len);
+ pg->pghw_cpulist = NULL;
+ pg->pghw_cpulist_len = 0;
+ }
+ }
+
+ if (pg->pghw_cpulist == NULL) {
+ /*
+ * Allocate space to hold cpulist.
+ *
+ * Length can not be bigger that the maximum space we have
+ * allowed for the kstat buffer
+ */
+ if (len > pg_cpulist_maxlen)
+ len = pg_cpulist_maxlen;
+ if (len > 0) {
+ pg->pghw_cpulist = kmem_zalloc(len, KM_NOSLEEP);
+ if (pg->pghw_cpulist != NULL)
+ pg->pghw_cpulist_len = len;
+ }
+ }
+}
+
+static int
+cpu2id(void *v)
+{
+ cpu_t *cp = (cpu_t *)v;
+
+ ASSERT(v != NULL);
+
+ return (cp->cpu_id);
+}
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index 9006be10f4..5133e80e69 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -98,6 +98,7 @@ CHKHDRS= \
byteorder.h \
callb.h \
callo.h \
+ cap_util.h \
cpucaps.h \
cpucaps_impl.h \
ccompile.h \
diff --git a/usr/src/uts/common/sys/cap_util.h b/usr/src/uts/common/sys/cap_util.h
new file mode 100644
index 0000000000..7e25ba6697
--- /dev/null
+++ b/usr/src/uts/common/sys/cap_util.h
@@ -0,0 +1,173 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CAP_UTIL_H
+#define _SYS_CAP_UTIL_H
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/kcpc.h>
+#include <sys/cpc_impl.h>
+#include <sys/pghw.h>
+#include <sys/cmt.h>
+
+#ifdef _KERNEL
+
+/*
+ * Capacity and utilization flags for each CPU
+ */
+#define CU_CPU_CNTRS_ON 1 /* CPU performance counters are on */
+#define CU_CPU_CNTRS_OFF_ON 2 /* Off -> on transition */
+
+/*
+ * Macro that returns whether CPU performance counters turned on for given CPU
+ */
+#define CU_CPC_ON(cp) \
+ ((cp) != NULL && (cp)->cpu_cu_info != NULL && \
+ ((cp)->cpu_cu_info->cu_flag & CU_CPU_CNTRS_ON))
+
+
+/*
+ * Per counter statistics
+ */
+typedef struct cu_cntr_stats {
+ hrtime_t cs_time_running; /* running total of time counting */
+ hrtime_t cs_time_stopped; /* ... time not counting */
+ hrtime_t cs_time_start; /* start time of current sample */
+ uint64_t cs_value_start; /* starting value for next sample */
+ uint64_t cs_value_last; /* last value */
+ uint64_t cs_value_total; /* running total */
+ uint64_t cs_rate; /* observed rate since last */
+ uint64_t cs_rate_max; /* maximum rate */
+ kcpc_request_t *cs_cpc_req; /* corresponding CPC request */
+ struct cpu *cs_cpu_start; /* CPU where starting value gotten */
+} cu_cntr_stats_t;
+
+
+/*
+ * Counter info for a PG hardware sharing relationship
+ */
+typedef struct cu_cntr_info {
+ cpu_t *ci_cpu; /* CPU being measured */
+ pghw_t *ci_pg; /* hardware PG being measured */
+ kstat_t *ci_kstat; /* kstats being exported */
+ cu_cntr_stats_t *ci_stats; /* counter statistics */
+ uint_t ci_nstats; /* number of statistics */
+} cu_cntr_info_t;
+
+
+/*
+ * Each CPU can have one or more CPC contexts for measuring capacity and
+ * utilization
+ *
+ * One CPC context is needed per CPU if the counter events needed to measure
+ * capacity and utilization on each CPU can be programmed onto all the counters
+ * on a CPU at the same time and there are fewer or same number of desired
+ * counter events as counters on each CPU. Otherwise, the desired counter
+ * events are assigned across multiple CPC contexts, so the contexts and their
+ * counter events can be multiplexed onto the counters over time to get the
+ * data for all of the counter events.
+ */
+typedef struct cu_cpc_ctx {
+ int cur_index; /* index for current context */
+ int nctx; /* number of CPC contexts */
+ kcpc_ctx_t **ctx_ptr_array; /* array of context pointers */
+ size_t ctx_ptr_array_sz; /* size of array */
+} cu_cpc_ctx_t;
+
+/*
+ * Per CPU capacity and utilization info
+ */
+typedef struct cu_cpu_info {
+ struct cpu *cu_cpu; /* CPU for the statistics */
+ uint_t cu_flag; /* capacity & utilization flag */
+ hrtime_t cu_sample_time; /* when last sample taken */
+ cu_cpc_ctx_t cu_cpc_ctx; /* performance counter contexts */
+ cu_cntr_stats_t *cu_cntr_stats; /* counter statistics array */
+ uint_t cu_ncntr_stats; /* number of counter statistics */
+ uint_t cu_disabled; /* count of disable requests */
+ /*
+ * Per PG hardware sharing relationship counter info
+ */
+ cu_cntr_info_t *cu_cntr_info[PGHW_NUM_COMPONENTS];
+} cu_cpu_info_t;
+
+/*
+ * COMMON INTERFACE ROUTINES
+ */
+
+/*
+ * Setup capacity and utilization support
+ */
+extern void cu_init(void);
+
+/*
+ * Tear down capacity and utilization support
+ */
+extern int cu_fini(void);
+
+/*
+ * Program CPC for capacity and utilization on given CPU
+ */
+extern void cu_cpc_program(struct cpu *, int *);
+
+/*
+ * Unprogram CPC for capacity and utilization on given CPU
+ */
+extern void cu_cpc_unprogram(struct cpu *, int *);
+
+/*
+ * Update counter statistics on a given CPU
+ */
+extern int cu_cpu_update(struct cpu *, boolean_t);
+
+/*
+ * Update utilization and capacity data for CMT PG
+ */
+extern void cu_pg_update(pghw_t *);
+
+/*
+ * Disable or enable capacity and utilization on all CPUs
+ */
+extern void cu_disable(void);
+extern void cu_enable(void);
+
+/*
+ * PLATFORM SPECIFIC INTERFACE ROUTINES
+ */
+extern int cu_plat_cpc_init(cpu_t *, kcpc_request_list_t *, int);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CAP_UTIL_H */
diff --git a/usr/src/uts/common/sys/cmt.h b/usr/src/uts/common/sys/cmt.h
index 4e7ed28656..afdb6730a6 100644
--- a/usr/src/uts/common/sys/cmt.h
+++ b/usr/src/uts/common/sys/cmt.h
@@ -63,6 +63,7 @@ typedef struct pg_cmt {
int cmt_nchildren; /* # of children CMT PGs */
struct group cmt_cpus_actv;
struct bitset cmt_cpus_actv_set; /* bitset of active CPUs */
+ kstat_t *cmt_kstat; /* cmt kstats exported */
} pg_cmt_t;
/*
diff --git a/usr/src/uts/common/sys/cpc_impl.h b/usr/src/uts/common/sys/cpc_impl.h
index 1b57c76c10..ae89c90508 100644
--- a/usr/src/uts/common/sys/cpc_impl.h
+++ b/usr/src/uts/common/sys/cpc_impl.h
@@ -131,7 +131,7 @@ typedef struct _kcpc_ctx kcpc_ctx_t;
struct _kcpc_ctx {
struct _kcpc_set *kc_set; /* linked list of all bound sets */
- uint32_t kc_flags;
+ volatile uint_t kc_flags;
kcpc_pic_t *kc_pics; /* pointer to array of per-pic data */
hrtime_t kc_hrtime; /* gethrtime() at last sample */
uint64_t kc_vtick; /* virtualized %tick */
@@ -214,20 +214,18 @@ extern hrtime_t tsc_read(void);
struct cpu;
extern uint_t cpc_ncounters;
-extern kmutex_t kcpc_ctx_llock[]; /* protects ctx_list */
-extern kcpc_ctx_t *kcpc_ctx_list[]; /* head of list */
extern krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */
extern int kcpc_cpuctx; /* number of cpu-specific contexts */
extern void kcpc_invalidate_all(void);
extern void kcpc_passivate(void);
-extern void kcpc_remote_stop(struct cpu *cp);
+extern void kcpc_cpu_stop(struct cpu *, boolean_t);
extern int kcpc_pcbe_tryload(const char *, uint_t, uint_t, uint_t);
-extern void kcpc_remote_program(struct cpu *cp);
+extern void kcpc_cpu_program(struct cpu *, kcpc_ctx_t *);
extern void kcpc_register_dcpc(void (*func)(uint64_t));
extern void kcpc_unregister_dcpc(void);
-extern kcpc_ctx_t *kcpc_ctx_alloc(void);
+extern kcpc_ctx_t *kcpc_ctx_alloc(int);
extern int kcpc_assign_reqs(struct _kcpc_set *, kcpc_ctx_t *);
extern void kcpc_ctx_free(kcpc_ctx_t *);
extern int kcpc_configure_reqs(kcpc_ctx_t *, struct _kcpc_set *, int *);
diff --git a/usr/src/uts/common/sys/cpc_pcbe.h b/usr/src/uts/common/sys/cpc_pcbe.h
index 7522a9bf82..eb168fcf2c 100644
--- a/usr/src/uts/common/sys/cpc_pcbe.h
+++ b/usr/src/uts/common/sys/cpc_pcbe.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -37,8 +36,6 @@
#ifndef _SYS_CPC_PCBE_H
#define _SYS_CPC_PCBE_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/inttypes.h>
#include <sys/cpc_impl.h>
@@ -51,6 +48,8 @@ extern "C" {
*/
#define PCBE_VER_1 1
+#define PCBE_IMPL_NAME_P4HT "Pentium 4 with HyperThreading"
+
typedef struct __pcbe_ops {
uint_t pcbe_ver;
uint_t pcbe_caps;
diff --git a/usr/src/uts/common/sys/cpuvar.h b/usr/src/uts/common/sys/cpuvar.h
index aece259a35..b52192b419 100644
--- a/usr/src/uts/common/sys/cpuvar.h
+++ b/usr/src/uts/common/sys/cpuvar.h
@@ -222,6 +222,16 @@ typedef struct cpu {
uint_t cpu_rotor; /* for cheap pseudo-random numbers */
+ struct cu_cpu_info *cpu_cu_info; /* capacity & util. info */
+
+ /*
+ * cpu_generation is updated whenever CPU goes on-line or off-line.
+ * Updates to cpu_generation are protected by cpu_lock.
+ *
+ * See CPU_NEW_GENERATION() macro below.
+ */
+ volatile uint_t cpu_generation; /* tracking on/off-line */
+
/*
* New members must be added /before/ this member, as the CTF tools
* rely on this being the last field before cpu_m, so they can
@@ -597,6 +607,13 @@ extern struct cpu *curcpup(void);
#define CPU_STATS(cp, stat) \
((cp)->cpu_stats.stat)
+/*
+ * Increment CPU generation value.
+ * This macro should be called whenever CPU goes on-line or off-line.
+ * Updates to cpu_generation should be protected by cpu_lock.
+ */
+#define CPU_NEW_GENERATION(cp) ((cp)->cpu_generation++)
+
#endif /* _KERNEL || _KMEMUSER */
/*
@@ -726,6 +743,49 @@ void cpu_enable_intr(struct cpu *cp); /* start issuing interrupts to cpu */
*/
extern kmutex_t cpu_lock; /* lock protecting CPU data */
+/*
+ * CPU state change events
+ *
+ * Various subsystems need to know when CPUs change their state. They get this
+ * information by registering CPU state change callbacks using
+ * register_cpu_setup_func(). Whenever any CPU changes its state, the callback
+ * function is called. The callback function is passed three arguments:
+ *
+ * Event, described by cpu_setup_t
+ * CPU ID
+ * Transparent pointer passed when registering the callback
+ *
+ * The callback function is called with cpu_lock held. The return value from the
+ * callback function is usually ignored, except for CPU_CONFIG and CPU_UNCONFIG
+ * events. For these two events, non-zero return value indicates a failure and
+ * prevents successful completion of the operation.
+ *
+ * New events may be added in the future. Callback functions should ignore any
+ * events that they do not understand.
+ *
+ * The following events provide notification callbacks:
+ *
+ * CPU_INIT A new CPU is started and added to the list of active CPUs
+ * This event is only used during boot
+ *
+ * CPU_CONFIG A newly inserted CPU is prepared for starting running code
+ * This event is called by DR code
+ *
+ * CPU_UNCONFIG CPU has been powered off and needs cleanup
+ * This event is called by DR code
+ *
+ * CPU_ON CPU is enabled but does not run anything yet
+ *
+ * CPU_INTR_ON CPU is enabled and has interrupts enabled
+ *
+ * CPU_OFF CPU is going offline but can still run threads
+ *
+ * CPU_CPUPART_OUT CPU is going to move out of its partition
+ *
+ * CPU_CPUPART_IN CPU is going to move to a new partition
+ *
+ * CPU_SETUP CPU is set up during boot and can run threads
+ */
typedef enum {
CPU_INIT,
CPU_CONFIG,
@@ -734,7 +794,8 @@ typedef enum {
CPU_OFF,
CPU_CPUPART_IN,
CPU_CPUPART_OUT,
- CPU_SETUP
+ CPU_SETUP,
+ CPU_INTR_ON
} cpu_setup_t;
typedef int cpu_setup_func_t(cpu_setup_t, int, void *);
@@ -748,6 +809,13 @@ extern void unregister_cpu_setup_func(cpu_setup_func_t *, void *);
extern void cpu_state_change_notify(int, cpu_setup_t);
/*
+ * Call specified function on the given CPU
+ */
+typedef void (*cpu_call_func_t)(uintptr_t, uintptr_t);
+extern void cpu_call(cpu_t *, cpu_call_func_t, uintptr_t, uintptr_t);
+
+
+/*
* Create various strings that describe the given CPU for the
* processor_info system call and configuration-related kstats.
*/
diff --git a/usr/src/uts/common/sys/group.h b/usr/src/uts/common/sys/group.h
index bb5613bc35..2db1ac01bb 100644
--- a/usr/src/uts/common/sys/group.h
+++ b/usr/src/uts/common/sys/group.h
@@ -101,6 +101,17 @@ void group_remove_at(group_t *, uint_t);
*/
uint_t group_find(group_t *, void *);
+/*
+ * Convert a group to a string with list of integers.
+ *
+ * The consecutive integer values are represented using x-y notation.
+ * The resulting string looks like "1,2-5,8"
+ *
+ * The convert argument is used to map group elements to integer IDs.
+ * The output buffer and its length are specfied in the arguments.
+ */
+extern char *group2intlist(group_t *, char *, size_t, int (convert)(void*));
+
#endif /* !_KERNEL && !_KMEMUSER */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/kcpc.h b/usr/src/uts/common/sys/kcpc.h
index f30e093f78..d90b1c1d29 100644
--- a/usr/src/uts/common/sys/kcpc.h
+++ b/usr/src/uts/common/sys/kcpc.h
@@ -28,11 +28,13 @@
#include <sys/cpc_impl.h>
#include <sys/ksynch.h>
+#include <sys/types.h>
#ifdef __cplusplus
extern "C" {
#endif
+
/*
* Kernel clients need this file in order to know what a request is and how to
* program one.
@@ -74,8 +76,33 @@ struct _kcpc_request {
uint_t kr_flags;
uint_t kr_nattrs;
kcpc_attr_t *kr_attr;
+ void *kr_ptr; /* Ptr assigned by requester */
};
+typedef struct _kcpc_request_list {
+ kcpc_request_t *krl_list; /* counter event requests */
+ int krl_cnt; /* how many requests */
+ int krl_max; /* max request entries */
+} kcpc_request_list_t;
+
+/*
+ * Type of update function to be called when reading counters on current CPU in
+ * kcpc_read()
+ */
+typedef int (*kcpc_update_func_t)(void *, uint64_t);
+
+/*
+ * Type of read function to be called when reading counters on current CPU
+ * (ie. should be same type signature as kcpc_read())
+ */
+typedef int (*kcpc_read_func_t)(kcpc_update_func_t);
+
+
+/*
+ * Initialize the kcpc framework
+ */
+extern int kcpc_init(void);
+
/*
* Bind the set to the indicated thread.
* Returns 0 on success, or an errno in case of error. If EINVAL is returned,
@@ -96,6 +123,56 @@ extern int kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime,
uint64_t *tick);
/*
+ * Create CPC context containing specified list of requested counter events
+ */
+extern int kcpc_cpu_ctx_create(struct cpu *cp, kcpc_request_list_t *req_list,
+ int kmem_flags, kcpc_ctx_t ***ctx_ptr_array, size_t *ctx_ptr_array_sz);
+
+/*
+ * Returns whether specified counter event is supported
+ */
+extern boolean_t kcpc_event_supported(char *event);
+
+/*
+ * Initialize list of CPC event requests
+ */
+extern kcpc_request_list_t *kcpc_reqs_init(int nreqs, int kmem_flags);
+
+/*
+ * Add counter event request to given list of counter event requests
+ */
+extern int kcpc_reqs_add(kcpc_request_list_t *req_list, char *event,
+ uint64_t preset, uint_t flags, uint_t nattrs, kcpc_attr_t *attr, void *ptr,
+ int kmem_flags);
+
+/*
+ * Reset list of CPC event requests so its space can be used for another set
+ * of requests
+ */
+extern int kcpc_reqs_reset(kcpc_request_list_t *req_list);
+
+/*
+ * Free given list of counter event requests
+ */
+extern int kcpc_reqs_fini(kcpc_request_list_t *req_list);
+
+/*
+ * Read CPC data for given event on current CPU
+ */
+extern int kcpc_read(kcpc_update_func_t);
+
+/*
+ * Program current CPU with given CPC context
+ */
+extern void kcpc_program(kcpc_ctx_t *ctx, boolean_t for_thread,
+ boolean_t cu_interpose);
+
+/*
+ * Unprogram CPC counters on current CPU
+ */
+extern void kcpc_unprogram(kcpc_ctx_t *ctx, boolean_t cu_interpose);
+
+/*
* Unbind a request and release the associated resources.
*/
extern int kcpc_unbind(kcpc_set_t *set);
@@ -128,6 +205,8 @@ extern void kcpc_idle_restore(struct cpu *cp);
extern krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */
extern int kcpc_cpuctx; /* number of cpu-specific contexts */
+extern void kcpc_free(kcpc_ctx_t *ctx, int isexec);
+
/*
* 'dtrace_cpc_in_use' contains the number of currently active cpc provider
* based enablings. See the block comment in uts/common/os/dtrace_subr.c for
diff --git a/usr/src/uts/common/sys/pghw.h b/usr/src/uts/common/sys/pghw.h
index ab8b0a9bbe..f0550dba7e 100644
--- a/usr/src/uts/common/sys/pghw.h
+++ b/usr/src/uts/common/sys/pghw.h
@@ -89,6 +89,27 @@ typedef enum pghw_type {
typedef uintptr_t pghw_handle_t;
/*
+ * Representation of PG hardware utilization NOTE: All the sums listed below are
+ * the sums of running total of each item for each CPU in the PG (eg.
+ * sum(utilization) is sum of running total utilization of each CPU in PG)
+ */
+typedef struct pghw_util {
+ uint64_t pghw_util; /* sum(utilization) */
+ uint64_t pghw_rate; /* Last observed utilization rate */
+ uint64_t pghw_rate_max; /* Max observed rate (in units/sec) */
+ hrtime_t pghw_time_stamp; /* Timestamp of last snapshot */
+ /*
+ * sum(time utilization counters on)
+ */
+ hrtime_t pghw_time_running;
+ /*
+ * sum(time utilization counters off)
+ */
+ hrtime_t pghw_time_stopped;
+} pghw_util_t;
+
+
+/*
* Processor Group (physical sharing relationship)
*/
typedef struct pghw {
@@ -97,6 +118,23 @@ typedef struct pghw {
id_t pghw_instance; /* sharing instance identifier */
pghw_handle_t pghw_handle; /* hw specific opaque handle */
kstat_t *pghw_kstat; /* physical kstats exported */
+ kstat_t *pghw_cu_kstat; /* for capacity and utilization */
+ /*
+ * pghw_generation should be updated by superclasses whenever PG changes
+ * significanly (e.g. new CPUs join or leave PG).
+ */
+ uint_t pghw_generation; /* generation number */
+
+ /*
+ * The following fields are used by PGHW cu kstats
+ */
+ char *pghw_cpulist; /* list of CPUs */
+ size_t pghw_cpulist_len; /* length of the list */
+ /*
+ * Generation number at kstat update time
+ */
+ uint_t pghw_kstat_gen;
+ pghw_util_t pghw_stats; /* Utilization data */
} pghw_t;
/*
@@ -111,32 +149,35 @@ typedef struct cpu_physid {
/*
* Physical PG initialization / CPU service hooks
*/
-void pghw_init(pghw_t *, cpu_t *, pghw_type_t);
-void pghw_fini(pghw_t *);
-void pghw_cpu_add(pghw_t *, cpu_t *);
-pghw_t *pghw_place_cpu(cpu_t *, pghw_type_t);
+extern void pghw_init(pghw_t *, cpu_t *, pghw_type_t);
+extern void pghw_fini(pghw_t *);
+extern void pghw_cpu_add(pghw_t *, cpu_t *);
+extern pghw_t *pghw_place_cpu(cpu_t *, pghw_type_t);
/*
* Physical ID cache creation / destruction
*/
-void pghw_physid_create(cpu_t *);
-void pghw_physid_destroy(cpu_t *);
+extern void pghw_physid_create(cpu_t *);
+extern void pghw_physid_destroy(cpu_t *);
/*
* CPU / PG hardware related seach operations
*/
-pghw_t *pghw_find_pg(cpu_t *, pghw_type_t);
-pghw_t *pghw_find_by_instance(id_t, pghw_type_t);
-group_t *pghw_set_lookup(pghw_type_t);
-
-void pghw_kstat_create(pghw_t *);
-int pghw_kstat_update(kstat_t *, int);
+extern pghw_t *pghw_find_pg(cpu_t *, pghw_type_t);
+extern pghw_t *pghw_find_by_instance(id_t, pghw_type_t);
+extern group_t *pghw_set_lookup(pghw_type_t);
/* Hardware sharing relationship platform interfaces */
-int pg_plat_hw_shared(cpu_t *, pghw_type_t);
-int pg_plat_cpus_share(cpu_t *, cpu_t *, pghw_type_t);
-id_t pg_plat_hw_instance_id(cpu_t *, pghw_type_t);
-pghw_type_t pg_plat_hw_rank(pghw_type_t, pghw_type_t);
+extern int pg_plat_hw_shared(cpu_t *, pghw_type_t);
+extern int pg_plat_cpus_share(cpu_t *, cpu_t *, pghw_type_t);
+extern id_t pg_plat_hw_instance_id(cpu_t *, pghw_type_t);
+extern pghw_type_t pg_plat_hw_rank(pghw_type_t, pghw_type_t);
+
+/*
+ * String representation of the hardware type
+ */
+extern char *pghw_type_string(pghw_type_t);
+extern char *pghw_type_shortstring(pghw_type_t);
/*
* What comprises a "core" may vary across processor implementations,
@@ -144,7 +185,7 @@ pghw_type_t pg_plat_hw_rank(pghw_type_t, pghw_type_t);
* is no PGHW_CORE type, but we provide an interface here to allow platforms
* to express cpu <=> core mappings.
*/
-id_t pg_plat_get_core_id(cpu_t *);
+extern id_t pg_plat_get_core_id(cpu_t *);
#endif /* !_KERNEL && !_KMEMUSER */
diff --git a/usr/src/uts/common/sys/systm.h b/usr/src/uts/common/sys/systm.h
index 84ccfb9991..4c3dc7f886 100644
--- a/usr/src/uts/common/sys/systm.h
+++ b/usr/src/uts/common/sys/systm.h
@@ -270,6 +270,7 @@ int spl8(void);
void splx(int);
void set_base_spl(void);
int __ipltospl(int);
+int spl_xcall(void);
void softcall_init(void);
void softcall(void (*)(void *), void *);
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 8e532685c7..8621e3ef55 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -2669,6 +2669,13 @@ cpuid_get_clogid(cpu_t *cpu)
return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
}
+int
+cpuid_get_cacheid(cpu_t *cpu)
+{
+ ASSERT(cpuid_checkpass(cpu, 1));
+ return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
+}
+
uint_t
cpuid_get_procnodeid(cpu_t *cpu)
{
diff --git a/usr/src/uts/i86pc/os/intr.c b/usr/src/uts/i86pc/os/intr.c
index 786cd29e8f..fc0ef9e260 100644
--- a/usr/src/uts/i86pc/os/intr.c
+++ b/usr/src/uts/i86pc/os/intr.c
@@ -1179,6 +1179,12 @@ getpil(void)
}
int
+spl_xcall(void)
+{
+ return (splr(ipltospl(XCALL_PIL)));
+}
+
+int
interrupts_enabled(void)
{
ulong_t flag;
diff --git a/usr/src/uts/i86pc/os/mp_call.c b/usr/src/uts/i86pc/os/mp_call.c
index 5725b18d85..df18f16588 100644
--- a/usr/src/uts/i86pc/os/mp_call.c
+++ b/usr/src/uts/i86pc/os/mp_call.c
@@ -32,6 +32,8 @@
#include <sys/systm.h>
#include <sys/promif.h>
#include <sys/xc_levels.h>
+#include <sys/spl.h>
+#include <sys/bitmap.h>
/*
* Interrupt another CPU.
@@ -54,3 +56,38 @@ poke_cpu(int cpun)
*/
send_dirint(cpun, XC_CPUPOKE_PIL);
}
+
+/*
+ * Call a function on a target CPU
+ */
+void
+cpu_call(cpu_t *cp, cpu_call_func_t func, uintptr_t arg1, uintptr_t arg2)
+{
+ cpuset_t set;
+
+ if (panicstr)
+ return;
+
+ /*
+ * Prevent CPU from going off-line
+ */
+ kpreempt_disable();
+
+ /*
+ * If we are on the target CPU, call the function directly, but raise
+ * the PIL to XC_PIL.
+ * This guarantees that functions called via cpu_call() can not ever
+ * interrupt each other.
+ */
+ if (CPU == cp) {
+ int save_spl = splr(ipltospl(XC_HI_PIL));
+
+ (*func)(arg1, arg2);
+ splx(save_spl);
+ } else {
+ CPUSET_ONLY(set, cp->cpu_id);
+ xc_call((xc_arg_t)arg1, (xc_arg_t)arg2, 0, CPUSET2BV(set),
+ (xc_func_t)func);
+ }
+ kpreempt_enable();
+}
diff --git a/usr/src/uts/i86pc/os/mp_machdep.c b/usr/src/uts/i86pc/os/mp_machdep.c
index 7470a1ef38..80e371850b 100644
--- a/usr/src/uts/i86pc/os/mp_machdep.c
+++ b/usr/src/uts/i86pc/os/mp_machdep.c
@@ -1,3 +1,4 @@
+
/*
* CDDL HEADER START
*
@@ -61,6 +62,7 @@
#include <sys/hpet.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
+#include <sys/cpc_pcbe.h>
#define OFFSETOF(s, m) (size_t)(&(((s *)0)->m))
@@ -1680,3 +1682,37 @@ pg_cmt_affinity_hw(pghw_type_t hw)
else
return (0);
}
+
+/*
+ * Return number of counter events requested to measure hardware capacity and
+ * utilization and setup CPC requests for specified CPU as needed
+ *
+ * May return 0 when platform or processor specific code knows that no CPC
+ * events should be programmed on this CPU or -1 when platform or processor
+ * specific code doesn't know which counter events are best to use and common
+ * code should decide for itself
+ */
+int
+/* LINTED E_FUNC_ARG_UNUSED */
+cu_plat_cpc_init(cpu_t *cp, kcpc_request_list_t *reqs, int nreqs)
+{
+ const char *impl_name;
+
+ /*
+ * Return error if pcbe_ops not set
+ */
+ if (pcbe_ops == NULL)
+ return (-1);
+
+ /*
+ * Return that no CPC events should be programmed on hyperthreaded
+ * Pentium 4 and return error for all other x86 processors to tell
+ * common code to decide what counter events to program on those CPUs
+ * for measuring hardware capacity and utilization
+ */
+ impl_name = pcbe_ops->pcbe_impl_name();
+ if (impl_name != NULL && strcmp(impl_name, PCBE_IMPL_NAME_P4HT) == 0)
+ return (0);
+ else
+ return (-1);
+}
diff --git a/usr/src/uts/i86pc/sys/xc_levels.h b/usr/src/uts/i86pc/sys/xc_levels.h
index 31ba6441fa..0492e48a1d 100644
--- a/usr/src/uts/i86pc/sys/xc_levels.h
+++ b/usr/src/uts/i86pc/sys/xc_levels.h
@@ -35,6 +35,7 @@ extern "C" {
#define XC_CPUPOKE_PIL 11 /* poke to cause wakeup, no service function */
#define XC_SYS_PIL 13 /* should be defined elsewhere */
#define XC_HI_PIL 15 /* cross call with service function */
+#define XCALL_PIL XC_HI_PIL /* alias for XC_HI_PIL */
#ifdef __cplusplus
}
diff --git a/usr/src/uts/intel/genunix/Makefile b/usr/src/uts/intel/genunix/Makefile
index db7b60ff14..ab0073268f 100644
--- a/usr/src/uts/intel/genunix/Makefile
+++ b/usr/src/uts/intel/genunix/Makefile
@@ -20,7 +20,7 @@
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -84,6 +84,8 @@ $(PATCH_BUILD)IPCTF_TARGET =
CPPFLAGS += -I$(SRC)/common
CPPFLAGS += -I$(SRC)/uts/common/fs/zfs
+CPPFLAGS += -I$(UTSBASE)/i86pc
+
#
# For now, disable these lint checks; maintainers should endeavor
# to investigate and remove these for maximum lint coverage.
diff --git a/usr/src/uts/intel/ia32/os/cpc_subr.c b/usr/src/uts/intel/ia32/os/cpc_subr.c
index 1a71c1c431..1e3049a399 100644
--- a/usr/src/uts/intel/ia32/os/cpc_subr.c
+++ b/usr/src/uts/intel/ia32/os/cpc_subr.c
@@ -188,33 +188,6 @@ kcpc_hw_load_pcbe(void)
cpuid_getmodel(CPU), cpuid_getstep(CPU)));
}
-static int
-kcpc_remotestop_func(void)
-{
- ASSERT(CPU->cpu_cpc_ctx != NULL);
- pcbe_ops->pcbe_allstop();
- atomic_or_uint(&CPU->cpu_cpc_ctx->kc_flags, KCPC_CTX_INVALID_STOPPED);
-
- return (0);
-}
-
-/*
- * Ensure the counters are stopped on the given processor.
- *
- * Callers must ensure kernel preemption is disabled.
- */
-void
-kcpc_remote_stop(cpu_t *cp)
-{
- cpuset_t set;
-
- CPUSET_ZERO(set);
-
- CPUSET_ADD(set, cp->cpu_id);
-
- xc_sync(0, 0, 0, CPUSET2BV(set), (xc_func_t)kcpc_remotestop_func);
-}
-
/*
* Called by the generic framework to check if it's OK to bind a set to a CPU.
*/
@@ -292,28 +265,3 @@ kcpc_hw_lwp_hook(void)
mutex_exit(&cpu_lock);
return (0);
}
-
-static int
-kcpc_remoteprogram_func(void)
-{
- ASSERT(CPU->cpu_cpc_ctx != NULL);
-
- pcbe_ops->pcbe_program(CPU->cpu_cpc_ctx);
-
- return (0);
-}
-
-/*
- * Ensure counters are enabled on the given processor.
- */
-void
-kcpc_remote_program(cpu_t *cp)
-{
- cpuset_t set;
-
- CPUSET_ZERO(set);
-
- CPUSET_ADD(set, cp->cpu_id);
-
- xc_sync(0, 0, 0, CPUSET2BV(set), (xc_func_t)kcpc_remoteprogram_func);
-}
diff --git a/usr/src/uts/intel/pcbe/opteron_pcbe.c b/usr/src/uts/intel/pcbe/opteron_pcbe.c
index 18a309eca6..cb97d21b78 100644
--- a/usr/src/uts/intel/pcbe/opteron_pcbe.c
+++ b/usr/src/uts/intel/pcbe/opteron_pcbe.c
@@ -563,26 +563,6 @@ opt_pcbe_list_attrs(void)
return ("edge,pc,inv,cmask,umask");
}
-/*ARGSUSED*/
-static uint64_t
-opt_pcbe_event_coverage(char *event)
-{
- /*
- * Fortunately, all counters can count all events.
- */
- return (0xF);
-}
-
-static uint64_t
-opt_pcbe_overflow_bitmap(void)
-{
- /*
- * Unfortunately, this chip cannot detect which counter overflowed, so
- * we must act as if they all did.
- */
- return (0xF);
-}
-
static amd_generic_event_t *
find_generic_event(char *name)
{
@@ -608,6 +588,32 @@ find_event(char *name)
}
/*ARGSUSED*/
+static uint64_t
+opt_pcbe_event_coverage(char *event)
+{
+ /*
+ * Check whether counter event is supported
+ */
+ if (find_event(event) == NULL && find_generic_event(event) == NULL)
+ return (0);
+
+ /*
+ * Fortunately, all counters can count all events.
+ */
+ return (0xF);
+}
+
+static uint64_t
+opt_pcbe_overflow_bitmap(void)
+{
+ /*
+ * Unfortunately, this chip cannot detect which counter overflowed, so
+ * we must act as if they all did.
+ */
+ return (0xF);
+}
+
+/*ARGSUSED*/
static int
opt_pcbe_configure(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
uint_t nattrs, kcpc_attr_t *attrs, void **data, void *token)
diff --git a/usr/src/uts/intel/pcbe/p4_pcbe.c b/usr/src/uts/intel/pcbe/p4_pcbe.c
index 0fffcd2961..8c05c599a3 100644
--- a/usr/src/uts/intel/pcbe/p4_pcbe.c
+++ b/usr/src/uts/intel/pcbe/p4_pcbe.c
@@ -522,7 +522,7 @@ static const char *
p4_pcbe_impl_name(void)
{
if (p4_htt)
- return ("Pentium 4 with HyperThreading");
+ return (PCBE_IMPL_NAME_P4HT);
return ("Pentium 4");
}
diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h
index e5f1cababc..0bb28d4d49 100644
--- a/usr/src/uts/intel/sys/x86_archext.h
+++ b/usr/src/uts/intel/sys/x86_archext.h
@@ -626,6 +626,7 @@ extern int cpuid_get_chipid(struct cpu *);
extern id_t cpuid_get_coreid(struct cpu *);
extern int cpuid_get_pkgcoreid(struct cpu *);
extern int cpuid_get_clogid(struct cpu *);
+extern int cpuid_get_cacheid(struct cpu *);
extern uint32_t cpuid_get_apicid(struct cpu *);
extern uint_t cpuid_get_procnodeid(struct cpu *cpu);
extern uint_t cpuid_get_procnodes_per_pkg(struct cpu *cpu);
diff --git a/usr/src/uts/sun4/os/mp_call.c b/usr/src/uts/sun4/os/mp_call.c
index f881a23755..f7ee31a276 100644
--- a/usr/src/uts/sun4/os/mp_call.c
+++ b/usr/src/uts/sun4/os/mp_call.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Facilities for cross-processor subroutine calls using "mailbox" interrupts.
*/
@@ -37,6 +34,7 @@
#include <sys/systm.h>
#include <sys/machsystm.h>
#include <sys/intr.h>
+#include <sys/xc_impl.h>
/*
* Interrupt another CPU.
@@ -64,3 +62,40 @@ poke_cpu(int cpun)
xt_one(cpun, setsoftint_tl1, poke_cpu_inum, 0);
}
+
+extern int xc_spl_enter[];
+
+/*
+ * Call a function on a target CPU
+ */
+void
+cpu_call(cpu_t *cp, cpu_call_func_t func, uintptr_t arg1, uintptr_t arg2)
+{
+ if (panicstr)
+ return;
+
+ /*
+ * Prevent CPU from going offline
+ */
+ kpreempt_disable();
+
+ /*
+ * If we are on the target CPU, call the function directly, but raise
+ * the PIL to XC_PIL.
+ * This guarantees that functions called via cpu_call() can not ever
+ * interrupt each other.
+ */
+ if (CPU != cp) {
+ xc_one(cp->cpu_id, (xcfunc_t *)func, (uint64_t)arg1,
+ (uint64_t)arg2);
+ } else {
+ int lcx;
+ int opl;
+
+ XC_SPL_ENTER(lcx, opl);
+ func(arg1, arg2);
+ XC_SPL_EXIT(lcx, opl);
+ }
+
+ kpreempt_enable();
+}
diff --git a/usr/src/uts/sun4/os/x_call.c b/usr/src/uts/sun4/os/x_call.c
index 0c5c06c36a..521f740c82 100644
--- a/usr/src/uts/sun4/os/x_call.c
+++ b/usr/src/uts/sun4/os/x_call.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/systm.h>
#include <sys/archsystm.h>
#include <sys/machsystm.h>
@@ -226,6 +224,15 @@ xc_init(void)
*/
/*
+ * spl_xcall - set PIL to xcall level
+ */
+int
+spl_xcall(void)
+{
+ return (splr(XCALL_PIL));
+}
+
+/*
* xt_one - send a "x-trap" to a cpu
*/
void
diff --git a/usr/src/uts/sun4u/genunix/Makefile b/usr/src/uts/sun4u/genunix/Makefile
index 8d7c87f065..1a77e4c916 100644
--- a/usr/src/uts/sun4u/genunix/Makefile
+++ b/usr/src/uts/sun4u/genunix/Makefile
@@ -20,7 +20,7 @@
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -96,6 +96,8 @@ CFLAGS += $(CCVERBOSE)
CPPFLAGS += -I$(SRC)/common
CPPFLAGS += -I$(SRC)/uts/common/fs/zfs
+INC_PATH += -I$(UTSBASE)/sun4
+
#
# For now, disable these lint checks; maintainers should endeavor
# to investigate and remove these for maximum lint coverage.
diff --git a/usr/src/uts/sun4u/os/cmp.c b/usr/src/uts/sun4u/os/cmp.c
index 8ba9aa3b6e..8a0fa0e6dc 100644
--- a/usr/src/uts/sun4u/os/cmp.c
+++ b/usr/src/uts/sun4u/os/cmp.c
@@ -303,3 +303,19 @@ pg_cmt_affinity_hw(pghw_type_t hw)
else
return (0);
}
+
+/*
+ * Return number of counter events requested to measure hardware capacity and
+ * utilization and setup CPC requests for specified CPU if list where to add
+ * CPC requests is given
+ */
+int
+/* LINTED E_FUNC_ARG_UNUSED */
+cu_plat_cpc_init(cpu_t *cp, kcpc_request_list_t *reqs, int nreqs)
+{
+ /*
+ * Return error to tell common code to decide what counter events to
+ * program on this CPU for measuring hardware capacity and utilization
+ */
+ return (-1);
+}
diff --git a/usr/src/uts/sun4u/os/cpc_subr.c b/usr/src/uts/sun4u/os/cpc_subr.c
index a9c64681fd..cfe1fd283d 100644
--- a/usr/src/uts/sun4u/os/cpc_subr.c
+++ b/usr/src/uts/sun4u/os/cpc_subr.c
@@ -45,6 +45,7 @@
#include <sys/cpc_pcbe.h>
#include <sys/modctl.h>
#include <sys/sdt.h>
+#include <sys/kcpc.h>
uint64_t cpc_level15_inum; /* used in interrupt.s */
int cpc_has_overflow_intr; /* set in cheetah.c */
@@ -111,26 +112,6 @@ kcpc_hw_load_pcbe(void)
}
/*ARGSUSED*/
-static void
-kcpc_remotestop_func(uint64_t arg1, uint64_t arg2)
-{
- ASSERT(CPU->cpu_cpc_ctx != NULL);
- pcbe_ops->pcbe_allstop();
- atomic_or_uint(&CPU->cpu_cpc_ctx->kc_flags, KCPC_CTX_INVALID_STOPPED);
-}
-
-/*
- * Ensure the counters are stopped on the given processor.
- *
- * Callers must ensure kernel preemption is disabled.
- */
-void
-kcpc_remote_stop(cpu_t *cp)
-{
- xc_one(cp->cpu_id, kcpc_remotestop_func, 0, 0);
-}
-
-/*ARGSUSED*/
int
kcpc_hw_cpu_hook(processorid_t cpuid, ulong_t *kcpc_cpumap)
{
@@ -142,21 +123,3 @@ kcpc_hw_lwp_hook(void)
{
return (0);
}
-
-/*ARGSUSED*/
-static void
-kcpc_remoteprogram_func(uint64_t arg1, uint64_t arg2)
-{
- ASSERT(CPU->cpu_cpc_ctx != NULL);
-
- pcbe_ops->pcbe_program(CPU->cpu_cpc_ctx);
-}
-
-/*
- * Ensure counters are enabled on the given processor.
- */
-void
-kcpc_remote_program(cpu_t *cp)
-{
- xc_one(cp->cpu_id, kcpc_remoteprogram_func, 0, 0);
-}
diff --git a/usr/src/uts/sun4v/genunix/Makefile b/usr/src/uts/sun4v/genunix/Makefile
index e629630fb5..28d4f2aeeb 100644
--- a/usr/src/uts/sun4v/genunix/Makefile
+++ b/usr/src/uts/sun4v/genunix/Makefile
@@ -20,7 +20,7 @@
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
@@ -104,6 +104,8 @@ CFLAGS += $(CCVERBOSE)
CPPFLAGS += -I$(SRC)/common
CPPFLAGS += -I$(SRC)/uts/common/fs/zfs
+INC_PATH += -I$(UTSBASE)/sun4
+
#
# For now, disable these lint checks; maintainers should endeavor
# to investigate and remove these for maximum lint coverage.
diff --git a/usr/src/uts/sun4v/os/cmp.c b/usr/src/uts/sun4v/os/cmp.c
index 4e80f06f32..8eedd1a69d 100644
--- a/usr/src/uts/sun4v/os/cmp.c
+++ b/usr/src/uts/sun4v/os/cmp.c
@@ -208,3 +208,19 @@ pg_cmt_affinity_hw(pghw_type_t hw)
else
return (0);
}
+
+/*
+ * Return number of counter events requested to measure hardware capacity and
+ * utilization and setup CPC requests for specified CPU if list where to add
+ * CPC requests is given
+ */
+int
+/* LINTED E_FUNC_ARG_UNUSED */
+cu_plat_cpc_init(cpu_t *cp, kcpc_request_list_t *reqs, int nreqs)
+{
+ /*
+ * Return error to tell common code to decide what counter events to
+ * program on this CPU for measuring hardware capacity and utilization
+ */
+ return (-1);
+}
diff --git a/usr/src/uts/sun4v/os/cpc_subr.c b/usr/src/uts/sun4v/os/cpc_subr.c
index 8e58d85513..089c582541 100644
--- a/usr/src/uts/sun4v/os/cpc_subr.c
+++ b/usr/src/uts/sun4v/os/cpc_subr.c
@@ -130,26 +130,6 @@ kcpc_hw_load_pcbe(void)
}
/*ARGSUSED*/
-static void
-kcpc_remotestop_func(uint64_t arg1, uint64_t arg2)
-{
- ASSERT(CPU->cpu_cpc_ctx != NULL);
- pcbe_ops->pcbe_allstop();
- atomic_or_uint(&CPU->cpu_cpc_ctx->kc_flags, KCPC_CTX_INVALID_STOPPED);
-}
-
-/*
- * Ensure the counters are stopped on the given processor.
- *
- * Callers must ensure kernel preemption is disabled.
- */
-void
-kcpc_remote_stop(cpu_t *cp)
-{
- xc_one(cp->cpu_id, kcpc_remotestop_func, 0, 0);
-}
-
-/*ARGSUSED*/
int
kcpc_hw_cpu_hook(processorid_t cpuid, ulong_t *kcpc_cpumap)
{
@@ -161,21 +141,3 @@ kcpc_hw_lwp_hook(void)
{
return (0);
}
-
-/*ARGSUSED*/
-static void
-kcpc_remoteprogram_func(uint64_t arg1, uint64_t arg2)
-{
- ASSERT(CPU->cpu_cpc_ctx != NULL);
-
- pcbe_ops->pcbe_program(CPU->cpu_cpc_ctx);
-}
-
-/*
- * Ensure counters are enabled on the given processor.
- */
-void
-kcpc_remote_program(cpu_t *cp)
-{
- xc_one(cp->cpu_id, kcpc_remoteprogram_func, 0, 0);
-}
diff --git a/usr/src/uts/sun4v/pcbe/niagara2_pcbe.c b/usr/src/uts/sun4v/pcbe/niagara2_pcbe.c
index 13c428130e..d4b69e5de4 100644
--- a/usr/src/uts/sun4v/pcbe/niagara2_pcbe.c
+++ b/usr/src/uts/sun4v/pcbe/niagara2_pcbe.c
@@ -399,6 +399,12 @@ static uint64_t
ni2_pcbe_event_coverage(char *event)
{
/*
+ * Check whether counter event is supported
+ */
+ if (find_event(event) == NULL && find_generic_event(event) == NULL)
+ return (0);
+
+ /*
* Fortunately, both pic0 and pic1 can count all events.
*/
return (0x3);