summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2011-12-30 17:45:41 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2011-12-30 17:45:41 +0000
commit12ee55d75bc01e9832f994ea28daf6d428cdcff6 (patch)
treee15048813dc752262853f2b2656211f13c857c25
parent81aba0c52267982673db217070e4c32b7f6202c7 (diff)
downloadillumos-joyent-12ee55d75bc01e9832f994ea28daf6d428cdcff6.tar.gz
OS-800 need a metric (kstat) to track when we're bursting
-rw-r--r--usr/src/cmd/prtconf/prtconf.c48
-rw-r--r--usr/src/cmd/prtconf/prtconf.h2
-rw-r--r--usr/src/lib/brand/joyent/zone/statechange.ksh73
-rw-r--r--usr/src/man/man1m/prtconf.1m13
-rw-r--r--usr/src/uts/common/disp/cpucaps.c81
-rw-r--r--usr/src/uts/common/os/zone.c40
-rw-r--r--usr/src/uts/common/sys/cpucaps.h3
-rw-r--r--usr/src/uts/common/sys/cpucaps_impl.h3
8 files changed, 240 insertions, 23 deletions
diff --git a/usr/src/cmd/prtconf/prtconf.c b/usr/src/cmd/prtconf/prtconf.c
index 4ff967fffb..ab41e8ede7 100644
--- a/usr/src/cmd/prtconf/prtconf.c
+++ b/usr/src/cmd/prtconf/prtconf.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -168,7 +169,7 @@ cleanup_path(const char *input_path, char *path)
#ifdef DEBUG
static const char *optstring = "abcdDvVxpPFf:M:dLuC";
#else
-static const char *optstring = "abcdDvVxpPFf:uC";
+static const char *optstring = "abcdDvVxmpPFf:uC";
#endif /* DEBUG */
int
@@ -201,6 +202,9 @@ main(int argc, char *argv[])
case 'v':
++opts.o_verbose;
break;
+ case 'm':
+ ++opts.o_memory;
+ break;
case 'p':
++opts.o_prominfo;
break;
@@ -338,35 +342,43 @@ main(int argc, char *argv[])
return (0);
}
- ret = sysinfo(SI_HW_PROVIDER, hw_provider, sizeof (hw_provider));
- /*
- * If 0 bytes are returned (the system returns '1', for the \0),
- * we're probably on x86, and there has been no si-hw-provider
- * set in /etc/bootrc, default to Joyent.
- */
- if (ret <= 1) {
- (void) strncpy(hw_provider, "Joyent",
+ if (!opts.o_memory) {
+ ret = sysinfo(SI_HW_PROVIDER, hw_provider,
sizeof (hw_provider));
+ /*
+ * If 0 bytes are returned (the system returns '1', for the \0),
+ * we're probably on x86, and there has been no si-hw-provider
+ * set in /etc/bootrc, default to Joyent.
+ */
+ if (ret <= 1) {
+ (void) strncpy(hw_provider, "Joyent",
+ sizeof (hw_provider));
+ }
+ (void) printf("System Configuration: %s %s\n", hw_provider,
+ opts.o_uts.machine);
}
- (void) printf("System Configuration: %s %s\n", hw_provider,
- opts.o_uts.machine);
pagesize = sysconf(_SC_PAGESIZE);
npages = sysconf(_SC_PHYS_PAGES);
- (void) printf("Memory size: ");
if (pagesize == -1 || npages == -1)
- (void) printf("unable to determine\n");
+ if (opts.o_memory) {
+ (void) printf("0\n");
+ return (1);
+ } else {
+ (void) printf("Memory size: unable to determine\n");
+ }
else {
const int64_t kbyte = 1024;
const int64_t mbyte = 1024 * 1024;
int64_t ii = (int64_t)pagesize * npages;
- if (ii >= mbyte)
- (void) printf("%ld Megabytes\n",
+ if (opts.o_memory) {
+ (void) printf("%ld\n", (long)((ii+mbyte-1) / mbyte));
+ return (0);
+ } else {
+ (void) printf("Memory size: %ld Megabytes\n",
(long)((ii+mbyte-1) / mbyte));
- else
- (void) printf("%ld Kilobytes\n",
- (long)((ii+kbyte-1) / kbyte));
+ }
}
if (opts.o_prominfo) {
diff --git a/usr/src/cmd/prtconf/prtconf.h b/usr/src/cmd/prtconf/prtconf.h
index 366e5e0351..2e48fef0a5 100644
--- a/usr/src/cmd/prtconf/prtconf.h
+++ b/usr/src/cmd/prtconf/prtconf.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
#ifndef _PRT_CONF_H
@@ -52,6 +53,7 @@ struct prt_opts {
int o_drv_name;
int o_pseudodevs;
int o_fbname;
+ int o_memory;
int o_noheader;
int o_prominfo;
int o_productinfo;
diff --git a/usr/src/lib/brand/joyent/zone/statechange.ksh b/usr/src/lib/brand/joyent/zone/statechange.ksh
index ba4df02b99..242a0a8181 100644
--- a/usr/src/lib/brand/joyent/zone/statechange.ksh
+++ b/usr/src/lib/brand/joyent/zone/statechange.ksh
@@ -385,6 +385,63 @@ setup_snapshots()
done
}
+#
+# If the zone has a CPU cap, calculate the CPU baseline and set it so we can
+# track when we're bursting. There are many ways that the baseline can be
+# calculated based on the other settings in the zones (e.g. a simple way would
+# be as a precentage of the cap).
+#
+# For SmartMachines, our CPU baseline is calculated off of the system's
+# provisionable memory and the memory cap of the zone. We assume that 83% of
+# the system's memory is usable by zones (the rest is for the OS) and we assume
+# that the zone memory cap is set so that we're proportional to how many zones
+# we can provision on the system (i.e. we don't overprovision memory). Using
+# these assumptions, we calculate the proportion of CPU for the zone based on
+# its proportion of memory. Thus, the zone's CPU baseline is calculated using:
+# ((zone capped memsize in MB) * 100) / (MB/core).
+# Uncapped zones have no baseline (i.e. infrastructure zones).
+#
+# Remember that the cpu-cap rctl and the baseline are expressed in units of
+# a percent of a CPU, so 100 is 1 full CPU.
+#
+setup_cpu_baseline()
+{
+ # Get current cap and convert from zonecfg format into rctl format
+ cap=`zonecfg -z $ZONENAME info capped-cpu | nawk '{
+ if ($1 == "[ncpus:") print (substr($2, 1, length($2) - 1) * 100)
+ }'`
+ [ -z "$cap" ] && return
+
+ # Get zone's memory cap in MB times 100
+ zmem=`zonecfg -z $ZONENAME info capped-memory | nawk '{
+ if ($1 == "[physical:") {
+ val = substr($2, 1, length($2) - 2)
+ units = substr($2, length($2) - 1, 1)
+
+ # convert GB to MB
+ if (units == "G")
+ val *= 1024
+ print (val * 100)
+ }
+ }'`
+ [ -z "$zmem" ] && return
+
+ # Get system's total memory in MB
+ smem=`prtconf -m`
+ # provisionable memory is 83% of total memory (bash can't do floats)
+ prov_mem=$((($smem * 83) / 100))
+ nprocs=`psrinfo -v | \
+ nawk '/virtual processor/ {cnt++} END {print cnt}'`
+
+ mb_per_core=$(($prov_mem / $nprocs))
+
+ baseline=$(($zmem / $mb_per_core))
+ [[ $baseline == 0 ]] && baseline=1
+ [[ $baseline -gt $cap ]] && baseline=$cap
+
+ prctl -n zone.cpu-baseline -v $baseline -t priv -i zone $ZONENAME
+}
+
cleanup_snapshots()
{
#
@@ -431,10 +488,18 @@ load_sdc_sysinfo
load_sdc_config
[[ "$subcommand" == "pre" && $cmd == 0 ]] && setup_fs
-[[ "$subcommand" == "post" && $cmd == 0 ]] && setup_snapshots
-[[ "$subcommand" == "pre" && $cmd == 4 ]] && cleanup_snapshots
-[[ "$subcommand" == "post" && $cmd == 0 ]] && setup_net
-[[ "$subcommand" == "pre" && $cmd == 4 ]] && cleanup_net
+if [[ "$subcommand" == "post" && $cmd == 0 ]]; then
+ setup_snapshots
+ setup_net
+fi
+
+# We can't set a rctl until we have a process in the zone to grab
+[[ "$subcommand" == "post" && $cmd == 1 ]] && setup_cpu_baseline
+
+if [[ "$subcommand" == "pre" && $cmd == 4 ]]; then
+ cleanup_snapshots
+ cleanup_net
+fi
exit 0
diff --git a/usr/src/man/man1m/prtconf.1m b/usr/src/man/man1m/prtconf.1m
index bd93c39ec6..2f47637796 100644
--- a/usr/src/man/man1m/prtconf.1m
+++ b/usr/src/man/man1m/prtconf.1m
@@ -1,5 +1,6 @@
'\" te
.\" Copyright 1989 AT&T Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright 2011, Joyent, Inc. All Rights Reserved
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
@@ -9,7 +10,7 @@ prtconf \- print system configuration
.SH SYNOPSIS
.LP
.nf
-\fB/usr/sbin/prtconf\fR [\fB-V\fR] | [\fB-F\fR] | [\fB-x\fR] | [\fB-bpv\fR] | [\fB-acdDPv\fR]
+\fB/usr/sbin/prtconf\fR [\fB-V\fR] | [\fB-F\fR] | [\fB-m\fr] | [\fB-x\fR] | [\fB-bpv\fR] | [\fB-acdDPv\fR]
[\fIdev_path\fR]
.fi
@@ -98,6 +99,16 @@ console frame buffer on a SUNW,Ultra-30 is \fBffb\fR, the command returns:
.sp
.ne 2
.na
+\fB\fB-m\fR\fR
+.ad
+.RS 6n
+Displays the amount system memory in megabytes.
+This flag must be used by itself.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-p\fR\fR
.ad
.RS 6n
diff --git a/usr/src/uts/common/disp/cpucaps.c b/usr/src/uts/common/disp/cpucaps.c
index 46f53faab6..58b1860c9c 100644
--- a/usr/src/uts/common/disp/cpucaps.c
+++ b/usr/src/uts/common/disp/cpucaps.c
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
#include <sys/disp.h>
@@ -74,6 +75,16 @@
* Putting threads on wait queues in random places while running in the
* kernel might lead to all kinds of locking problems.
*
+ * Bursting
+ * ========
+ *
+ * CPU bursting occurs when the CPU usage is over the baseline but under the
+ * cap. The baseline CPU (zone.cpu-baseline) is set in a multi-tenant
+ * environment so that we know how much CPU is allocated for a tenant under
+ * normal utilization. We can then track how much time a zone is spending
+ * over the "normal" CPU utilization expected for that zone using the
+ * "above_base_sec" kstat.
+ *
* Accounting
* ==========
*
@@ -203,18 +214,22 @@ static void caps_update();
*/
struct cap_kstat {
kstat_named_t cap_value;
+ kstat_named_t cap_baseline;
kstat_named_t cap_usage;
kstat_named_t cap_nwait;
kstat_named_t cap_below;
kstat_named_t cap_above;
+ kstat_named_t cap_above_base;
kstat_named_t cap_maxusage;
kstat_named_t cap_zonename;
} cap_kstat = {
{ "value", KSTAT_DATA_UINT64 },
+ { "baseline", KSTAT_DATA_UINT64 },
{ "usage", KSTAT_DATA_UINT64 },
{ "nwait", KSTAT_DATA_UINT64 },
{ "below_sec", KSTAT_DATA_UINT64 },
{ "above_sec", KSTAT_DATA_UINT64 },
+ { "above_base_sec", KSTAT_DATA_UINT64 },
{ "maxusage", KSTAT_DATA_UINT64 },
{ "zonename", KSTAT_DATA_STRING },
};
@@ -494,6 +509,9 @@ cap_poke_waitq(cpucap_t *cap, int64_t gen)
{
ASSERT(MUTEX_HELD(&caps_lock));
+ if (cap->cap_base != 0 && cap->cap_usage > cap->cap_base)
+ cap->cap_above_base++;
+
if (cap->cap_usage >= cap->cap_value) {
cap->cap_above++;
} else {
@@ -757,6 +775,55 @@ cpucaps_zone_set(zone_t *zone, rctl_qty_t cap_val)
}
/*
+ * Set zone's base cpu value to base_val
+ */
+int
+cpucaps_zone_set_base(zone_t *zone, rctl_qty_t base_val)
+{
+ cpucap_t *cap = NULL;
+ hrtime_t value;
+
+ ASSERT(base_val <= MAXCAP);
+ if (base_val > MAXCAP)
+ base_val = MAXCAP;
+
+ if (CPUCAPS_OFF() || !ZONE_IS_CAPPED(zone))
+ return (0);
+
+ if (zone->zone_cpucap == NULL)
+ cap = cap_alloc();
+
+ mutex_enter(&caps_lock);
+
+ if (cpucaps_busy) {
+ mutex_exit(&caps_lock);
+ return (EBUSY);
+ }
+
+ /*
+ * Double-check whether zone->zone_cpucap is NULL, now with caps_lock
+ * held. If it is still NULL, assign a newly allocated cpucap to it.
+ */
+ if (zone->zone_cpucap == NULL) {
+ zone->zone_cpucap = cap;
+ } else if (cap != NULL) {
+ cap_free(cap);
+ }
+
+ cap = zone->zone_cpucap;
+
+ value = base_val * cap_tick_cost;
+ if (value < 0 || value > cap->cap_value)
+ value = 0;
+
+ cap->cap_base = value;
+
+ mutex_exit(&caps_lock);
+
+ return (0);
+}
+
+/*
* The project is going away so disable its cap.
*/
void
@@ -948,6 +1015,16 @@ cpucaps_zone_get(zone_t *zone)
}
/*
+ * Get current zone baseline.
+ */
+rctl_qty_t
+cpucaps_zone_get_base(zone_t *zone)
+{
+ return (zone->zone_cpucap != NULL ?
+ (rctl_qty_t)(zone->zone_cpucap->cap_base / cap_tick_cost) : 0);
+}
+
+/*
* Charge project of thread t the time thread t spent on CPU since previously
* adjusted.
*
@@ -1133,6 +1210,8 @@ cap_kstat_update(kstat_t *ksp, int rw)
capsp->cap_value.value.ui64 =
ROUND_SCALE(cap->cap_value, cap_tick_cost);
+ capsp->cap_baseline.value.ui64 =
+ ROUND_SCALE(cap->cap_base, cap_tick_cost);
capsp->cap_usage.value.ui64 =
ROUND_SCALE(cap->cap_usage, cap_tick_cost);
capsp->cap_maxusage.value.ui64 =
@@ -1140,6 +1219,8 @@ cap_kstat_update(kstat_t *ksp, int rw)
capsp->cap_nwait.value.ui64 = cap->cap_waitq.wq_count;
capsp->cap_below.value.ui64 = ROUND_SCALE(cap->cap_below, tick_sec);
capsp->cap_above.value.ui64 = ROUND_SCALE(cap->cap_above, tick_sec);
+ capsp->cap_above_base.value.ui64 =
+ ROUND_SCALE(cap->cap_above_base, tick_sec);
kstat_named_setstr(&capsp->cap_zonename, zonename);
return (0);
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index a9bd61f05c..a7100e28a0 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -1382,6 +1382,41 @@ static rctl_ops_t zone_cpu_cap_ops = {
rcop_no_test
};
+/*ARGSUSED*/
+static rctl_qty_t
+zone_cpu_base_get(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (cpucaps_zone_get_base(p->p_zone));
+}
+
+/*
+ * The zone cpu base is used to set the baseline CPU for the zone
+ * so we can track when the zone is bursting.
+ */
+/*ARGSUSED*/
+static int
+zone_cpu_base_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+ rctl_qty_t nv)
+{
+ zone_t *zone = e->rcep_p.zone;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(e->rcep_t == RCENTITY_ZONE);
+
+ if (zone == NULL)
+ return (0);
+
+ return (cpucaps_zone_set_base(zone, nv));
+}
+
+static rctl_ops_t zone_cpu_base_ops = {
+ rcop_no_action,
+ zone_cpu_base_get,
+ zone_cpu_base_set,
+ rcop_no_test
+};
+
/*
* zone.zfs-io-pri resource control support (IO priority).
*/
@@ -2428,6 +2463,11 @@ zone_init(void)
RCTL_GLOBAL_INFINITE,
MAXCAP, MAXCAP, &zone_cpu_cap_ops);
+ rc_zone_cpu_cap = rctl_register("zone.cpu-baseline",
+ RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
+ RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT |RCTL_GLOBAL_SYSLOG_NEVER,
+ MAXCAP, MAXCAP, &zone_cpu_base_ops);
+
rc_zone_zfs_io_pri = rctl_register("zone.zfs-io-priority",
RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
diff --git a/usr/src/uts/common/sys/cpucaps.h b/usr/src/uts/common/sys/cpucaps.h
index 6063ff4380..9ead38d480 100644
--- a/usr/src/uts/common/sys/cpucaps.h
+++ b/usr/src/uts/common/sys/cpucaps.h
@@ -22,6 +22,7 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_CPUCAPS_H
@@ -84,12 +85,14 @@ extern void cpucaps_zone_remove(zone_t *);
*/
extern int cpucaps_project_set(kproject_t *, rctl_qty_t);
extern int cpucaps_zone_set(zone_t *, rctl_qty_t);
+extern int cpucaps_zone_set_base(zone_t *, rctl_qty_t);
/*
* Get current CPU usage for a project/zone.
*/
extern rctl_qty_t cpucaps_project_get(kproject_t *);
extern rctl_qty_t cpucaps_zone_get(zone_t *);
+extern rctl_qty_t cpucaps_zone_get_base(zone_t *);
/*
* Scheduling class hooks into CPU caps framework.
diff --git a/usr/src/uts/common/sys/cpucaps_impl.h b/usr/src/uts/common/sys/cpucaps_impl.h
index 95afd21827..7011e5ca6e 100644
--- a/usr/src/uts/common/sys/cpucaps_impl.h
+++ b/usr/src/uts/common/sys/cpucaps_impl.h
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_CPUCAPS_IMPL_H
@@ -68,6 +69,7 @@ typedef struct cpucap {
int64_t cap_gen; /* zone cap specific */
hrtime_t cap_value; /* scaled CPU usage cap */
hrtime_t cap_usage; /* current CPU usage */
+ hrtime_t cap_base; /* base CPU for burst */
disp_lock_t cap_usagelock; /* protects cap_usage above */
/*
* Per cap statistics.
@@ -75,6 +77,7 @@ typedef struct cpucap {
hrtime_t cap_maxusage; /* maximum cap usage */
u_longlong_t cap_below; /* # of ticks spend below the cap */
u_longlong_t cap_above; /* # of ticks spend above the cap */
+ u_longlong_t cap_above_base; /* # of ticks spent above the base */
} cpucap_t;
/*