diff options
author | Bryan Cantrill <bryan@joyent.com> | 2011-08-13 02:06:39 -0700 |
---|---|---|
committer | Bryan Cantrill <bryan@joyent.com> | 2011-08-13 02:06:39 -0700 |
commit | 7aa76ffc594f84c1c092911a84f85a79ddb44c73 (patch) | |
tree | 57957fd031766852616f770ce5130e0b1a8ddcd1 | |
parent | f484800de70343e19872fa0f3fde2a00504a9cec (diff) | |
download | illumos-joyent-7aa76ffc594f84c1c092911a84f85a79ddb44c73.tar.gz |
1362 add kvmstat for monitoring of KVM statistics
1363 add vmregs[] variable to DTrace
1364 need disassembler support for VMX instructions
1365 mdb needs 16-bit disassembler support
Reviewed by: Brendan Gregg <brendan.gregg@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Garrett D'Amore <garrett@nexenta.com>
-rw-r--r-- | usr/src/cmd/Makefile | 2 | ||||
-rw-r--r-- | usr/src/cmd/kvmstat/Makefile | 41 | ||||
-rw-r--r-- | usr/src/cmd/kvmstat/kvmstat.c | 479 | ||||
-rw-r--r-- | usr/src/cmd/mdb/common/mdb/mdb_disasm.c | 15 | ||||
-rw-r--r-- | usr/src/common/dis/i386/dis_tables.c | 107 | ||||
-rw-r--r-- | usr/src/lib/libdtrace/common/dt_open.c | 2 | ||||
-rw-r--r-- | usr/src/lib/libdtrace/i386/regs.d.in | 151 | ||||
-rw-r--r-- | usr/src/pkg/manifests/SUNWcs.mf | 1 | ||||
-rw-r--r-- | usr/src/uts/common/dtrace/dtrace.c | 16 | ||||
-rw-r--r-- | usr/src/uts/common/sys/dtrace.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/sys/dtrace_impl.h | 1 | ||||
-rw-r--r-- | usr/src/uts/i86pc/ml/locore.s | 25 | ||||
-rw-r--r-- | usr/src/uts/intel/dtrace/dtrace_asm.s | 43 | ||||
-rw-r--r-- | usr/src/uts/sparc/dtrace/dtrace_isa.c | 12 |
14 files changed, 877 insertions, 19 deletions
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile index 839fe278c0..062a022322 100644 --- a/usr/src/cmd/Makefile +++ b/usr/src/cmd/Makefile @@ -21,6 +21,7 @@ # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2010 Nexenta Systems, Inc. All rights reserved. +# Copyright 2011 Joyent, Inc. All rights reserved. include ../Makefile.master @@ -220,6 +221,7 @@ COMMON_SUBDIRS= \ krb5 \ ksh \ kstat \ + kvmstat \ last \ lastcomm \ latencytop \ diff --git a/usr/src/cmd/kvmstat/Makefile b/usr/src/cmd/kvmstat/Makefile new file mode 100644 index 0000000000..2b3cce0eee --- /dev/null +++ b/usr/src/cmd/kvmstat/Makefile @@ -0,0 +1,41 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2011, Joyent, Inc. All rights reserved. +# + +PROG= kvmstat + +include ../Makefile.cmd + +LDLIBS += -lkstat + +.KEEP_STATE: + +all: $(PROG) + +install: all $(ROOTPROG) + +clean: + $(RM) $(PROG) + +lint: lint_PROG + +include ../Makefile.targ diff --git a/usr/src/cmd/kvmstat/kvmstat.c b/usr/src/cmd/kvmstat/kvmstat.c new file mode 100644 index 0000000000..526e212ef5 --- /dev/null +++ b/usr/src/cmd/kvmstat/kvmstat.c @@ -0,0 +1,479 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2011, Joyent, Inc. All rights reserved. + */ + +#include <sys/kstat.h> +#include <kstat.h> +#include <stdlib.h> +#include <unistd.h> +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <strings.h> +#include <alloca.h> +#include <signal.h> +#include <sys/varargs.h> +#include <sys/int_limits.h> + +#define KSTAT_FIELD_USEINSTANCE 0x01 +#define KSTAT_FIELD_NODELTA 0x02 +#define KSTAT_FIELD_FILLER 0x04 + +typedef struct kstat_field { + char *ksf_header; /* header for field */ + char *ksf_name; /* name of stat, if any */ + int ksf_width; /* width for field in output line */ + uint32_t ksf_flags; /* flags for this field, if any */ + int ksf_hint; /* index hint for field in kstat */ +} kstat_field_t; + +typedef struct kstat_instance { + char ksi_name[KSTAT_STRLEN]; /* name of the underlying kstat */ + int ksi_instance; /* instance identifer of this kstat */ + kstat_t *ksi_ksp; /* pointer to the kstat */ + uint64_t *ksi_data[2]; /* pointer to two generations of data */ + hrtime_t ksi_snaptime[2]; /* hrtime for data generations */ + int ksi_gen; /* current generation */ + struct kstat_instance *ksi_next; /* next in instance list */ +} kstat_instance_t; + +const char *g_cmd = "kvmstat"; + +static void +fatal(char *fmt, ...) +{ + va_list ap; + int error = errno; + + va_start(ap, fmt); + + (void) fprintf(stderr, "%s: ", g_cmd); + /*LINTED*/ + (void) vfprintf(stderr, fmt, ap); + + if (fmt[strlen(fmt) - 1] != '\n') + (void) fprintf(stderr, ": %s\n", strerror(error)); + + exit(EXIT_FAILURE); +} + +int +kstat_field_hint(kstat_t *ksp, kstat_field_t *field) +{ + kstat_named_t *nm = KSTAT_NAMED_PTR(ksp); + int i; + + assert(ksp->ks_type == KSTAT_TYPE_NAMED); + + for (i = 0; i < ksp->ks_ndata; i++) { + if (strcmp(field->ksf_name, nm[i].name) == 0) + return (field->ksf_hint = i); + } + + fatal("could not find field '%s' in %s:%d\n", + field->ksf_name, ksp->ks_name, ksp->ks_instance); + + return (0); +} + +int +kstat_instances_compare(const void *lhs, const void *rhs) +{ + kstat_instance_t *l = *((kstat_instance_t **)lhs); + kstat_instance_t *r = *((kstat_instance_t **)rhs); + int rval; + + if ((rval = strcmp(l->ksi_name, r->ksi_name)) != 0) + return (rval); + + if (l->ksi_instance < r->ksi_instance) + return (-1); + + if (l->ksi_instance > r->ksi_instance) + return (1); + + return (0); +} + +void +kstat_instances_update(kstat_ctl_t *kcp, kstat_instance_t **head, + boolean_t (*interested)(kstat_t *)) +{ + int ninstances = 0, i; + kstat_instance_t **sorted, *ksi, *next; + kstat_t *ksp; + kid_t kid; + + if ((kid = kstat_chain_update(kcp)) == 0 && *head != NULL) + return; + + if (kid == -1) + fatal("failed to update kstat chain"); + + for (ksi = *head; ksi != NULL; ksi = ksi->ksi_next) + ksi->ksi_ksp = NULL; + + for (ksp = kcp->kc_chain; ksp != NULL; ksp = ksp->ks_next) { + kstat_instance_t *last = NULL; + + if (!interested(ksp)) + continue; + + /* + * Now look to see if we have this instance and name. (Yes, + * this is a linear search; we're assuming that this list is + * modest in size.) + */ + for (ksi = *head; ksi != NULL; ksi = ksi->ksi_next) { + last = ksi; + + if (ksi->ksi_instance != ksp->ks_instance) + continue; + + if (strcmp(ksi->ksi_name, ksp->ks_name) != 0) + continue; + + ksi->ksi_ksp = ksp; + ninstances++; + break; + } + + if (ksi != NULL) + continue; + + if ((ksi = malloc(sizeof (kstat_instance_t))) == NULL) + fatal("could not allocate memory for stat instance"); + + bzero(ksi, sizeof (kstat_instance_t)); + (void) strlcpy(ksi->ksi_name, ksp->ks_name, KSTAT_STRLEN); + ksi->ksi_instance = ksp->ks_instance; + ksi->ksi_ksp = ksp; + ksi->ksi_next = NULL; + + if (last == NULL) { + assert(*head == NULL); + *head = ksi; + } else { + last->ksi_next = ksi; + } + + ninstances++; + } + + /* + * Now we know how many instances we have; iterate back over them, + * pruning the stale ones and adding the active ones to a holding + * array in which to sort them. + */ + sorted = (void *)alloca(ninstances * sizeof (kstat_instance_t *)); + ninstances = 0; + + for (ksi = *head; ksi != NULL; ksi = next) { + next = ksi->ksi_next; + + if (ksi->ksi_ksp == NULL) { + free(ksi); + } else { + sorted[ninstances++] = ksi; + } + } + + if (ninstances == 0) { + *head = NULL; + return; + } + + qsort(sorted, ninstances, sizeof (kstat_instance_t *), + kstat_instances_compare); + + *head = sorted[0]; + + for (i = 0; i < ninstances; i++) { + ksi = sorted[i]; + ksi->ksi_next = i < ninstances - 1 ? sorted[i + 1] : NULL; + } +} + +void +kstat_instances_read(kstat_ctl_t *kcp, kstat_instance_t *instances, + kstat_field_t *fields) +{ + kstat_instance_t *ksi; + int i, nfields; + + for (nfields = 0; fields[nfields].ksf_header != NULL; nfields++) + continue; + + for (ksi = instances; ksi != NULL; ksi = ksi->ksi_next) { + kstat_t *ksp = ksi->ksi_ksp; + + if (ksp == NULL) + continue; + + if (kstat_read(kcp, ksp, NULL) == -1) { + if (errno == ENXIO) { + /* + * Our kstat has been removed since the update; + * NULL it out to prevent us from trying to read + * it again (and to indicate that it should not + * be displayed) and drive on. + */ + ksi->ksi_ksp = NULL; + continue; + } + + fatal("failed to read kstat %s:%d", + ksi->ksi_name, ksi->ksi_instance); + } + + if (ksp->ks_type != KSTAT_TYPE_NAMED) { + fatal("%s:%d is not a named kstat", ksi->ksi_name, + ksi->ksi_instance); + } + + if (ksi->ksi_data[0] == NULL) { + size_t size = nfields * sizeof (uint64_t) * 2; + uint64_t *data; + + if ((data = malloc(size)) == NULL) + fatal("could not allocate memory"); + + bzero(data, size); + ksi->ksi_data[0] = data; + ksi->ksi_data[1] = &data[nfields]; + } + + for (i = 0; i < nfields; i++) { + kstat_named_t *nm = KSTAT_NAMED_PTR(ksp); + kstat_field_t *field = &fields[i]; + int hint = field->ksf_hint; + + if (field->ksf_name == NULL) + continue; + + if (hint < 0 || hint >= ksp->ks_ndata || + strcmp(field->ksf_name, nm[hint].name) != 0) { + hint = kstat_field_hint(ksp, field); + } + + ksi->ksi_data[ksi->ksi_gen][i] = nm[hint].value.ui64; + } + + ksi->ksi_snaptime[ksi->ksi_gen] = ksp->ks_snaptime; + ksi->ksi_gen ^= 1; + } +} + +uint64_t +kstat_instances_delta(kstat_instance_t *ksi, int i) +{ + int gen = ksi->ksi_gen; + uint64_t delta = ksi->ksi_data[gen ^ 1][i] - ksi->ksi_data[gen][i]; + uint64_t tdelta = ksi->ksi_snaptime[gen ^ 1] - ksi->ksi_snaptime[gen]; + + return (((delta * (uint64_t)NANOSEC) + (tdelta / 2)) / tdelta); +} + +void +kstat_instances_print(kstat_instance_t *instances, kstat_field_t *fields, + boolean_t header) +{ + kstat_instance_t *ksi = instances; + int i, nfields; + + for (nfields = 0; fields[nfields].ksf_header != NULL; nfields++) + continue; + + if (header) { + for (i = 0; i < nfields; i++) { + (void) printf("%*s%c", fields[i].ksf_width, + fields[i].ksf_header, i < nfields - 1 ? ' ' : '\n'); + } + } + + for (ksi = instances; ksi != NULL; ksi = ksi->ksi_next) { + if (ksi->ksi_snaptime[1] == 0 || ksi->ksi_ksp == NULL) + continue; + + for (i = 0; i < nfields; i++) { + char trailer = i < nfields - 1 ? ' ' : '\n'; + + if (fields[i].ksf_flags & KSTAT_FIELD_FILLER) { + (void) printf("%*s%c", fields[i].ksf_width, + fields[i].ksf_header, trailer); + continue; + } + + (void) printf("%*lld%c", fields[i].ksf_width, + fields[i].ksf_flags & KSTAT_FIELD_USEINSTANCE ? + ksi->ksi_instance : + fields[i].ksf_flags & KSTAT_FIELD_NODELTA ? + ksi->ksi_data[ksi->ksi_gen ^ 1][i] : + kstat_instances_delta(ksi, i), trailer); + } + } +} + +boolean_t +interested(kstat_t *ksp) +{ + const char *module = "kvm"; + const char *class = "misc"; + const char *name = "vcpu-"; + + if (strcmp(ksp->ks_module, module) != 0) + return (B_FALSE); + + if (strcmp(ksp->ks_class, class) != 0) + return (B_FALSE); + + if (strstr(ksp->ks_name, name) != ksp->ks_name) + return (B_FALSE); + + return (B_TRUE); +} + +/* BEGIN CSTYLED */ +char *g_usage = "Usage: kvmstat [interval [count]]\n" + "\n" + " Displays statistics for running kernel virtual machines, with one line\n" + " per virtual CPU. All statistics are reported as per-second rates.\n" + "\n" + " The columns are as follows:\n" + "\n" + " pid => identifier of process controlling the virtual CPU\n" + " vcpu => virtual CPU identifier relative to its virtual machine\n" + " exits => virtual machine exits for the virtual CPU\n" + " haltx => virtual machine exits due to the HLT instruction\n" + " irqx => virtual machine exits due to a pending external interrupt\n" + " irqwx => virtual machine exits due to an open interrupt window\n" + " iox => virtual machine exits due to an I/O instruction\n" + " mmiox => virtual machine exits due to memory mapped I/O \n" + " irqs => interrupts injected into the virtual CPU\n" + " emul => instructions emulated in the kernel\n" + " eptv => extended page table violations\n" + "\n"; +/* END CSTYLED */ + +void +usage() +{ + (void) fprintf(stderr, "%s", g_usage); + exit(EXIT_FAILURE); +} + +/*ARGSUSED*/ +void +intr(int sig) +{} + +/*ARGSUSED*/ +int +main(int argc, char **argv) +{ + kstat_ctl_t *kcp; + kstat_instance_t *instances = NULL; + int i = 0; + int interval = 1; + int count = INT32_MAX; + struct itimerval itimer; + struct sigaction act; + sigset_t set; + char *endp; + + kstat_field_t fields[] = { + { "pid", "pid", 6, KSTAT_FIELD_NODELTA }, + { "vcpu", NULL, 4, KSTAT_FIELD_USEINSTANCE }, + { "|", NULL, 1, KSTAT_FIELD_FILLER }, + { "exits", "exits", 6 }, + { ":", NULL, 1, KSTAT_FIELD_FILLER }, + { "haltx", "halt-exits", 6 }, + { "irqx", "irq-exits", 6 }, + { "irqwx", "irq-window-exits", 6 }, + { "iox", "io-exits", 6 }, + { "mmiox", "mmio-exits", 6 }, + { "|", NULL, 1, KSTAT_FIELD_FILLER }, + { "irqs", "irq-injections", 6 }, + { "emul", "insn-emulation", 6 }, + { "eptv", "pf-fixed", 6 }, + { NULL } + }; + + if (argc > 1) { + interval = strtol(argv[1], &endp, 10); + + if (*endp != '\0' || interval <= 0) + usage(); + } + + if (argc > 2) { + count = strtol(argv[2], &endp, 10); + + if (*endp != '\0' || count <= 0) + usage(); + } + + if ((kcp = kstat_open()) == NULL) + fatal("could not open /dev/kstat"); + + (void) sigemptyset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = intr; + (void) sigaction(SIGALRM, &act, NULL); + + (void) sigemptyset(&set); + (void) sigaddset(&set, SIGALRM); + (void) sigprocmask(SIG_BLOCK, &set, NULL); + + bzero(&itimer, sizeof (itimer)); + itimer.it_value.tv_sec = interval; + itimer.it_interval.tv_sec = interval; + + if (setitimer(ITIMER_REAL, &itimer, NULL) != 0) { + fatal("could not set timer to %d second%s", interval, + interval == 1 ? "" : "s"); + } + + (void) sigemptyset(&set); + + for (;;) { + kstat_instances_update(kcp, &instances, interested); + kstat_instances_read(kcp, instances, fields); + + if (i++ > 0) { + kstat_instances_print(instances, fields, + instances != NULL && instances->ksi_next == NULL ? + (((i - 2) % 20) == 0) : B_TRUE); + } + + if (i > count) + break; + + (void) sigsuspend(&set); + } + + /*NOTREACHED*/ + return (0); +} diff --git a/usr/src/cmd/mdb/common/mdb/mdb_disasm.c b/usr/src/cmd/mdb/common/mdb/mdb_disasm.c index 063fda5640..47788bfa02 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_disasm.c +++ b/usr/src/cmd/mdb/common/mdb/mdb_disasm.c @@ -22,6 +22,9 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2011 Joyent, Inc. All rights reserved. + */ #include <mdb/mdb_disasm_impl.h> #include <mdb/mdb_modapi.h> @@ -416,9 +419,17 @@ libdisasm_create(mdb_disasm_t *dp, const char *name, return (0); } - #if defined(__i386) || defined(__amd64) static int +ia16_create(mdb_disasm_t *dp) +{ + return (libdisasm_create(dp, + "ia16", + "Intel 16-bit disassembler", + DIS_X86_SIZE16)); +} + +static int ia32_create(mdb_disasm_t *dp) { return (libdisasm_create(dp, @@ -546,9 +557,11 @@ defdis_create(mdb_disasm_t *dp) mdb_dis_ctor_f *const mdb_dis_builtins[] = { defdis_create, #if defined(__amd64) + ia16_create, ia32_create, amd64_create, #elif defined(__i386) + ia16_create, ia32_create, #elif defined(__sparc) sparc1_create, diff --git a/usr/src/common/dis/i386/dis_tables.c b/usr/src/common/dis/i386/dis_tables.c index 116191a462..99269b0e9b 100644 --- a/usr/src/common/dis/i386/dis_tables.c +++ b/usr/src/common/dis/i386/dis_tables.c @@ -21,6 +21,7 @@ */ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, Joyent, Inc. All rights reserved. */ /* @@ -101,6 +102,7 @@ enum { Mv, Mw, M, /* register or memory */ + MG9, /* register or memory in group 9 (prefix optional) */ Mb, /* register or memory, always byte sized */ MO, /* memory only (no registers) */ PREF, @@ -111,6 +113,7 @@ enum { SEG, MR, RM, + RM_66r, /* RM, but with a required 0x66 prefix */ IA, MA, SD, @@ -225,7 +228,9 @@ enum { VEX_RRi, /* VEX mod_rm, imm8 -> mod_reg */ VEX_RM, /* VEX mod_reg -> mod_rm */ VEX_RRM, /* VEX VEX.vvvv, mod_reg -> mod_rm */ - VEX_RMX /* VEX VEX.vvvv, mod_rm -> mod_reg */ + VEX_RMX, /* VEX VEX.vvvv, mod_rm -> mod_reg */ + VMx, /* vmcall/vmlaunch/vmresume/vmxoff */ + VMxo /* VMx instruction with optional prefix */ }; /* @@ -493,7 +498,7 @@ const instable_t dis_op0F00[8] = { */ const instable_t dis_op0F01[8] = { -/* [0] */ TNSZ("sgdt",MO,6), TNSZ("sidt",MONITOR_MWAIT,6), TNSZ("lgdt",XGETBV_XSETBV,6), TNSZ("lidt",MO,6), +/* [0] */ TNSZ("sgdt",VMx,6), TNSZ("sidt",MONITOR_MWAIT,6), TNSZ("lgdt",XGETBV_XSETBV,6), TNSZ("lidt",MO,6), /* [4] */ TNSZ("smsw",M,2), INVALID, TNSZ("lmsw",M,2), TNS("invlpg",SWAPGS), }; @@ -525,15 +530,34 @@ const instable_t dis_op0FBA[8] = { }; /* - * Decode table for 0x0FC7 opcode + * Decode table for 0x0FC7 opcode (group 9) */ const instable_t dis_op0FC7[8] = { /* [0] */ INVALID, TNS("cmpxchg8b",M), INVALID, INVALID, -/* [4] */ INVALID, INVALID, INVALID, INVALID, +/* [4] */ INVALID, INVALID, TNS("vmptrld",MG9), TNS("vmptrst",MG9), }; +/* + * Decode table for 0x0FC7 opcode with 0x66 prefix + */ + +const instable_t dis_op660FC7[8] = { + +/* [0] */ INVALID, INVALID, INVALID, INVALID, +/* [4] */ INVALID, INVALID, TNS("vmclear",M), INVALID, +}; + +/* + * Decode table for 0x0FC7 opcode with 0xF3 prefix + */ + +const instable_t dis_opF30FC7[8] = { + +/* [0] */ INVALID, INVALID, INVALID, INVALID, +/* [4] */ INVALID, INVALID, TNS("vmxon",M), INVALID, +}; /* * Decode table for 0x0FC8 opcode -- 486 bswap instruction @@ -1144,7 +1168,7 @@ const instable_t dis_op0F38[256] = { /* [78] */ INVALID, INVALID, INVALID, INVALID, /* [7C] */ INVALID, INVALID, INVALID, INVALID, -/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [80] */ TNSy("invept", RM_66r), TNSy("invvpid", RM_66r),INVALID, INVALID, /* [84] */ INVALID, INVALID, INVALID, INVALID, /* [88] */ INVALID, INVALID, INVALID, INVALID, /* [8C] */ INVALID, INVALID, INVALID, INVALID, @@ -1472,7 +1496,7 @@ const instable_t dis_op0F[16][16] = { }, { /* [70] */ TNSZ("pshufw",MMOPM,8), TNS("psrXXX",MR), TNS("psrXXX",MR), TNS("psrXXX",MR), /* [74] */ TNSZ("pcmpeqb",MMO,8), TNSZ("pcmpeqw",MMO,8), TNSZ("pcmpeqd",MMO,8), TNS("emms",NORM), -/* [78] */ TNS("INVALID",XMMO), TNS("INVALID",XMMO), INVALID, INVALID, +/* [78] */ TNSy("vmread",RM), TNSy("vmwrite",MR), INVALID, INVALID, /* [7C] */ INVALID, INVALID, TNSZ("movd",MMOS,4), TNSZ("movq",MMOS,8), }, { /* [80] */ TNS("jo",D), TNS("jno",D), TNS("jb",D), TNS("jae",D), @@ -2902,6 +2926,7 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) goto error; #endif switch (dp->it_adrmode) { + case RM_66r: case XMM_66r: case XMMM_66r: if (opnd_size_prefix == 0) { @@ -3051,6 +3076,42 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) } break; + case MG9: + /* + * More horribleness: the group 9 (0xF0 0xC7) instructions are + * allowed an optional prefix of 0x66 or 0xF3. This is similar + * to the SIMD business described above, but with a different + * addressing mode (and an indirect table), so we deal with it + * separately (if similarly). + */ + + /* + * Calculate our offset in dis_op0FC7 (the group 9 table) + */ + if ((uintptr_t)dp - (uintptr_t)dis_op0FC7 > sizeof (dis_op0FC7)) + goto error; + + off = ((uintptr_t)dp - (uintptr_t)dis_op0FC7) / + sizeof (instable_t); + + /* + * Rewrite if this instruction used one of the magic prefixes. + */ + if (rep_prefix) { + if (rep_prefix == 0xf3) + dp = (instable_t *)&dis_opF30FC7[off]; + else + goto error; + rep_prefix = 0; + } else if (opnd_size_prefix) { + dp = (instable_t *)&dis_op660FC7[off]; + opnd_size_prefix = 0; + if (opnd_size == SIZE16) + opnd_size = SIZE32; + } + break; + + case MMOSH: /* * As with the "normal" SIMD instructions, the MMX @@ -3448,6 +3509,7 @@ just_mem: /* single memory or register operand */ case M: + case MG9: wbit = LONG_OPND; goto just_mem; @@ -3456,6 +3518,38 @@ just_mem: wbit = BYTE_OPND; goto just_mem; + case VMx: + if (mode == 3) { +#ifdef DIS_TEXT + char *vminstr; + + switch (r_m) { + case 1: + vminstr = "vmcall"; + break; + case 2: + vminstr = "vmlaunch"; + break; + case 3: + vminstr = "vmresume"; + break; + case 4: + vminstr = "vmxoff"; + break; + default: + goto error; + } + + (void) strncpy(x->d86_mnem, vminstr, OPLEN); +#else + if (r_m < 1 || r_m > 4) + goto error; +#endif + + NOMEM; + break; + } + /*FALLTHROUGH*/ case MONITOR_MWAIT: if (mode == 3) { if (r_m == 0) { @@ -3594,6 +3688,7 @@ just_mem: break; case RM: + case RM_66r: wbit = LONG_OPND; STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); break; diff --git a/usr/src/lib/libdtrace/common/dt_open.c b/usr/src/lib/libdtrace/common/dt_open.c index bd60702e49..d5259c9a2e 100644 --- a/usr/src/lib/libdtrace/common/dt_open.c +++ b/usr/src/lib/libdtrace/common/dt_open.c @@ -403,6 +403,8 @@ static const dt_ident_t _dtrace_globals[] = { &dt_idops_type, "uint32_t" }, { "usym", DT_IDENT_ACTFUNC, 0, DT_ACT_USYM, DT_ATTR_STABCMN, DT_VERS_1_2, &dt_idops_func, "_usymaddr(uintptr_t)" }, +{ "vmregs", DT_IDENT_ARRAY, 0, DIF_VAR_VMREGS, DT_ATTR_STABCMN, DT_VERS_1_7, + &dt_idops_regs, NULL }, { "vtimestamp", DT_IDENT_SCALAR, 0, DIF_VAR_VTIMESTAMP, DT_ATTR_STABCMN, DT_VERS_1_0, &dt_idops_type, "uint64_t" }, diff --git a/usr/src/lib/libdtrace/i386/regs.d.in b/usr/src/lib/libdtrace/i386/regs.d.in index 3328f33515..d18c5f7ff1 100644 --- a/usr/src/lib/libdtrace/i386/regs.d.in +++ b/usr/src/lib/libdtrace/i386/regs.d.in @@ -23,8 +23,9 @@ * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ - -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright 2011 Joyent, Inc. All rights reserved. + */ inline int R_GS = @GS@; #pragma D binding "1.0" R_GS @@ -115,3 +116,149 @@ inline int R_R14 = @REG_R14@; inline int R_R15 = @REG_R15@; #pragma D binding "1.0" R_R15 +enum vmregs_vmx { + VMX_VIRTUAL_PROCESSOR_ID = 0x00000000, + VMX_GUEST_ES_SELECTOR = 0x00000800, + VMX_GUEST_CS_SELECTOR = 0x00000802, + VMX_GUEST_SS_SELECTOR = 0x00000804, + VMX_GUEST_DS_SELECTOR = 0x00000806, + VMX_GUEST_FS_SELECTOR = 0x00000808, + VMX_GUEST_GS_SELECTOR = 0x0000080a, + VMX_GUEST_LDTR_SELECTOR = 0x0000080c, + VMX_GUEST_TR_SELECTOR = 0x0000080e, + VMX_HOST_ES_SELECTOR = 0x00000c00, + VMX_HOST_CS_SELECTOR = 0x00000c02, + VMX_HOST_SS_SELECTOR = 0x00000c04, + VMX_HOST_DS_SELECTOR = 0x00000c06, + VMX_HOST_FS_SELECTOR = 0x00000c08, + VMX_HOST_GS_SELECTOR = 0x00000c0a, + VMX_HOST_TR_SELECTOR = 0x00000c0c, + VMX_IO_BITMAP_A = 0x00002000, + VMX_IO_BITMAP_A_HIGH = 0x00002001, + VMX_IO_BITMAP_B = 0x00002002, + VMX_IO_BITMAP_B_HIGH = 0x00002003, + VMX_MSR_BITMAP = 0x00002004, + VMX_MSR_BITMAP_HIGH = 0x00002005, + VMX_VM_EXIT_MSR_STORE_ADDR = 0x00002006, + VMX_VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, + VMX_VM_EXIT_MSR_LOAD_ADDR = 0x00002008, + VMX_VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, + VMX_VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, + VMX_VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, + VMX_TSC_OFFSET = 0x00002010, + VMX_TSC_OFFSET_HIGH = 0x00002011, + VMX_VIRTUAL_APIC_PAGE_ADDR = 0x00002012, + VMX_VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, + VMX_APIC_ACCESS_ADDR = 0x00002014, + VMX_APIC_ACCESS_ADDR_HIGH = 0x00002015, + VMX_EPT_POINTER = 0x0000201a, + VMX_EPT_POINTER_HIGH = 0x0000201b, + VMX_GUEST_PHYSICAL_ADDRESS = 0x00002400, + VMX_GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, + VMX_VMCS_LINK_POINTER = 0x00002800, + VMX_VMCS_LINK_POINTER_HIGH = 0x00002801, + VMX_GUEST_IA32_DEBUGCTL = 0x00002802, + VMX_GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + VMX_GUEST_IA32_PAT = 0x00002804, + VMX_GUEST_IA32_PAT_HIGH = 0x00002805, + VMX_GUEST_PDPTR0 = 0x0000280a, + VMX_GUEST_PDPTR0_HIGH = 0x0000280b, + VMX_GUEST_PDPTR1 = 0x0000280c, + VMX_GUEST_PDPTR1_HIGH = 0x0000280d, + VMX_GUEST_PDPTR2 = 0x0000280e, + VMX_GUEST_PDPTR2_HIGH = 0x0000280f, + VMX_GUEST_PDPTR3 = 0x00002810, + VMX_GUEST_PDPTR3_HIGH = 0x00002811, + VMX_HOST_IA32_PAT = 0x00002c00, + VMX_HOST_IA32_PAT_HIGH = 0x00002c01, + VMX_PIN_BASED_VM_EXEC_CONTROL = 0x00004000, + VMX_CPU_BASED_VM_EXEC_CONTROL = 0x00004002, + VMX_EXCEPTION_BITMAP = 0x00004004, + VMX_PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, + VMX_PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, + VMX_CR3_TARGET_COUNT = 0x0000400a, + VMX_VM_EXIT_CONTROLS = 0x0000400c, + VMX_VM_EXIT_MSR_STORE_COUNT = 0x0000400e, + VMX_VM_EXIT_MSR_LOAD_COUNT = 0x00004010, + VMX_VM_ENTRY_CONTROLS = 0x00004012, + VMX_VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, + VMX_VM_ENTRY_INTR_INFO_FIELD = 0x00004016, + VMX_VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, + VMX_VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, + VMX_TPR_THRESHOLD = 0x0000401c, + VMX_SECONDARY_VM_EXEC_CONTROL = 0x0000401e, + VMX_PLE_GAP = 0x00004020, + VMX_PLE_WINDOW = 0x00004022, + VMX_VM_INSTRUCTION_ERROR = 0x00004400, + VMX_VM_EXIT_REASON = 0x00004402, + VMX_VM_EXIT_INTR_INFO = 0x00004404, + VMX_VM_EXIT_INTR_ERROR_CODE = 0x00004406, + VMX_IDT_VECTORING_INFO_FIELD = 0x00004408, + VMX_IDT_VECTORING_ERROR_CODE = 0x0000440a, + VMX_VM_EXIT_INSTRUCTION_LEN = 0x0000440c, + VMX_VMX_INSTRUCTION_INFO = 0x0000440e, + VMX_GUEST_ES_LIMIT = 0x00004800, + VMX_GUEST_CS_LIMIT = 0x00004802, + VMX_GUEST_SS_LIMIT = 0x00004804, + VMX_GUEST_DS_LIMIT = 0x00004806, + VMX_GUEST_FS_LIMIT = 0x00004808, + VMX_GUEST_GS_LIMIT = 0x0000480a, + VMX_GUEST_LDTR_LIMIT = 0x0000480c, + VMX_GUEST_TR_LIMIT = 0x0000480e, + VMX_GUEST_GDTR_LIMIT = 0x00004810, + VMX_GUEST_IDTR_LIMIT = 0x00004812, + VMX_GUEST_ES_AR_BYTES = 0x00004814, + VMX_GUEST_CS_AR_BYTES = 0x00004816, + VMX_GUEST_SS_AR_BYTES = 0x00004818, + VMX_GUEST_DS_AR_BYTES = 0x0000481a, + VMX_GUEST_FS_AR_BYTES = 0x0000481c, + VMX_GUEST_GS_AR_BYTES = 0x0000481e, + VMX_GUEST_LDTR_AR_BYTES = 0x00004820, + VMX_GUEST_TR_AR_BYTES = 0x00004822, + VMX_GUEST_INTERRUPTIBILITY_INFO = 0x00004824, + VMX_GUEST_ACTIVITY_STATE = 0X00004826, + VMX_GUEST_SYSENTER_CS = 0x0000482A, + VMX_HOST_IA32_SYSENTER_CS = 0x00004c00, + VMX_CR0_GUEST_HOST_MASK = 0x00006000, + VMX_CR4_GUEST_HOST_MASK = 0x00006002, + VMX_CR0_READ_SHADOW = 0x00006004, + VMX_CR4_READ_SHADOW = 0x00006006, + VMX_CR3_TARGET_VALUE0 = 0x00006008, + VMX_CR3_TARGET_VALUE1 = 0x0000600a, + VMX_CR3_TARGET_VALUE2 = 0x0000600c, + VMX_CR3_TARGET_VALUE3 = 0x0000600e, + VMX_EXIT_QUALIFICATION = 0x00006400, + VMX_GUEST_LINEAR_ADDRESS = 0x0000640a, + VMX_GUEST_CR0 = 0x00006800, + VMX_GUEST_CR3 = 0x00006802, + VMX_GUEST_CR4 = 0x00006804, + VMX_GUEST_ES_BASE = 0x00006806, + VMX_GUEST_CS_BASE = 0x00006808, + VMX_GUEST_SS_BASE = 0x0000680a, + VMX_GUEST_DS_BASE = 0x0000680c, + VMX_GUEST_FS_BASE = 0x0000680e, + VMX_GUEST_GS_BASE = 0x00006810, + VMX_GUEST_LDTR_BASE = 0x00006812, + VMX_GUEST_TR_BASE = 0x00006814, + VMX_GUEST_GDTR_BASE = 0x00006816, + VMX_GUEST_IDTR_BASE = 0x00006818, + VMX_GUEST_DR7 = 0x0000681a, + VMX_GUEST_RSP = 0x0000681c, + VMX_GUEST_RIP = 0x0000681e, + VMX_GUEST_RFLAGS = 0x00006820, + VMX_GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, + VMX_GUEST_SYSENTER_ESP = 0x00006824, + VMX_GUEST_SYSENTER_EIP = 0x00006826, + VMX_HOST_CR0 = 0x00006c00, + VMX_HOST_CR3 = 0x00006c02, + VMX_HOST_CR4 = 0x00006c04, + VMX_HOST_FS_BASE = 0x00006c06, + VMX_HOST_GS_BASE = 0x00006c08, + VMX_HOST_TR_BASE = 0x00006c0a, + VMX_HOST_GDTR_BASE = 0x00006c0c, + VMX_HOST_IDTR_BASE = 0x00006c0e, + VMX_HOST_IA32_SYSENTER_ESP = 0x00006c10, + VMX_HOST_IA32_SYSENTER_EIP = 0x00006c12, + VMX_HOST_RSP = 0x00006c14, + VMX_HOST_RIP = 0x00006c16 +}; diff --git a/usr/src/pkg/manifests/SUNWcs.mf b/usr/src/pkg/manifests/SUNWcs.mf index fca9829a3c..ff84521970 100644 --- a/usr/src/pkg/manifests/SUNWcs.mf +++ b/usr/src/pkg/manifests/SUNWcs.mf @@ -824,6 +824,7 @@ file path=usr/bin/kbd mode=0555 file path=usr/bin/keylogin mode=0555 file path=usr/bin/keylogout mode=0555 file path=usr/bin/kmfcfg mode=0555 +file path=usr/bin/kvmstat mode=0555 file path=usr/bin/line mode=0555 file path=usr/bin/listdgrp mode=0555 file path=usr/bin/listusers mode=0555 diff --git a/usr/src/uts/common/dtrace/dtrace.c b/usr/src/uts/common/dtrace/dtrace.c index 0edbd433ed..c352f66d5d 100644 --- a/usr/src/uts/common/dtrace/dtrace.c +++ b/usr/src/uts/common/dtrace/dtrace.c @@ -2760,6 +2760,22 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (dtrace_getreg(lwp->lwp_regs, ndx)); } + case DIF_VAR_VMREGS: { + uint64_t rval; + + if (!dtrace_priv_kernel(state)) + return (0); + + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + + rval = dtrace_getvmreg(ndx, + &cpu_core[CPU->cpu_id].cpuc_dtrace_flags); + + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + + return (rval); + } + case DIF_VAR_CURTHREAD: if (!dtrace_priv_kernel(state)) return (0); diff --git a/usr/src/uts/common/sys/dtrace.h b/usr/src/uts/common/sys/dtrace.h index f2339d3d3c..834cfd6d55 100644 --- a/usr/src/uts/common/sys/dtrace.h +++ b/usr/src/uts/common/sys/dtrace.h @@ -206,6 +206,7 @@ typedef enum dtrace_probespec { #define DIF_VAR_ARGS 0x0000 /* arguments array */ #define DIF_VAR_REGS 0x0001 /* registers array */ #define DIF_VAR_UREGS 0x0002 /* user registers array */ +#define DIF_VAR_VMREGS 0x0003 /* virtual machine registers array */ #define DIF_VAR_CURTHREAD 0x0100 /* thread pointer */ #define DIF_VAR_TIMESTAMP 0x0101 /* timestamp */ #define DIF_VAR_VTIMESTAMP 0x0102 /* virtual timestamp */ diff --git a/usr/src/uts/common/sys/dtrace_impl.h b/usr/src/uts/common/sys/dtrace_impl.h index a68b1a8a89..dc89cb1b0b 100644 --- a/usr/src/uts/common/sys/dtrace_impl.h +++ b/usr/src/uts/common/sys/dtrace_impl.h @@ -1251,6 +1251,7 @@ extern void dtrace_copyoutstr(uintptr_t, uintptr_t, size_t, volatile uint16_t *); extern void dtrace_getpcstack(pc_t *, int, int, uint32_t *); extern ulong_t dtrace_getreg(struct regs *, uint_t); +extern uint64_t dtrace_getvmreg(uint_t, volatile uint16_t *); extern int dtrace_getstackdepth(int); extern void dtrace_getupcstack(uint64_t *, int); extern void dtrace_getufpstack(uint64_t *, uint64_t *, int); diff --git a/usr/src/uts/i86pc/ml/locore.s b/usr/src/uts/i86pc/ml/locore.s index 8aec1537e5..91e38307ca 100644 --- a/usr/src/uts/i86pc/ml/locore.s +++ b/usr/src/uts/i86pc/ml/locore.s @@ -22,6 +22,9 @@ /* * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright 2011 Joyent, Inc. All rights reserved. + */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ @@ -1144,28 +1147,34 @@ cmntrap() .dtrace_induced: cmpw $KCS_SEL, REGOFF_CS(%rbp) /* test CS for user-mode trap */ - jne 2f /* if from user, panic */ + jne 3f /* if from user, panic */ cmpl $T_PGFLT, REGOFF_TRAPNO(%rbp) - je 0f + je 1f cmpl $T_GPFLT, REGOFF_TRAPNO(%rbp) - jne 3f /* if not PF or GP, panic */ + je 0f + + cmpl $T_ILLINST, REGOFF_TRAPNO(%rbp) + je 0f + + jne 4f /* if not PF, GP or UD, panic */ /* * If we've taken a GPF, we don't (unfortunately) have the address that * induced the fault. So instead of setting the fault to BADADDR, * we'll set the fault to ILLOP. */ +0: orw $CPU_DTRACE_ILLOP, %cx movw %cx, CPUC_DTRACE_FLAGS(%rax) - jmp 1f -0: + jmp 2f +1: orw $CPU_DTRACE_BADADDR, %cx movw %cx, CPUC_DTRACE_FLAGS(%rax) /* set fault to bad addr */ movq %r15, CPUC_DTRACE_ILLVAL(%rax) /* fault addr is illegal value */ -1: +2: movq REGOFF_RIP(%rbp), %rdi movq %rdi, %r12 call dtrace_instr_size @@ -1174,11 +1183,11 @@ cmntrap() INTR_POP IRET /*NOTREACHED*/ -2: +3: leaq dtrace_badflags(%rip), %rdi xorl %eax, %eax call panic -3: +4: leaq dtrace_badtrap(%rip), %rdi xorl %eax, %eax call panic diff --git a/usr/src/uts/intel/dtrace/dtrace_asm.s b/usr/src/uts/intel/dtrace/dtrace_asm.s index 47b981d1e3..3aad499599 100644 --- a/usr/src/uts/intel/dtrace/dtrace_asm.s +++ b/usr/src/uts/intel/dtrace/dtrace_asm.s @@ -22,8 +22,9 @@ * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ - -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright 2011 Joyent, Inc. All rights reserved. + */ #include <sys/asm_linkage.h> #include <sys/regset.h> @@ -62,6 +63,44 @@ dtrace_getfp(void) #if defined(lint) || defined(__lint) +/*ARGSUSED*/ +uint64_t +dtrace_getvmreg(uint32_t reg, volatile uint16_t *flags) +{ return (0); } + +#else /* lint */ + +#if defined(__amd64) + + ENTRY_NP(dtrace_getvmreg) + + movq %rdi, %rdx + vmread %rdx, %rax + ret + + SET_SIZE(dtrace_getvmreg) + +#elif defined(__i386) + + ENTRY_NP(dtrace_getvmreg) + pushl %ebp / Setup stack frame + movl %esp, %ebp + + movl 12(%ebp), %eax / Load flag pointer + movw (%eax), %cx / Load flags + orw $CPU_DTRACE_ILLOP, %cx / Set ILLOP + movw %cx, (%eax) / Store flags + + leave + ret + SET_SIZE(dtrace_getvmreg) + +#endif /* __i386 */ +#endif /* lint */ + + +#if defined(lint) || defined(__lint) + uint32_t dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new) { diff --git a/usr/src/uts/sparc/dtrace/dtrace_isa.c b/usr/src/uts/sparc/dtrace/dtrace_isa.c index 9fd9d90fc0..20d9bc420b 100644 --- a/usr/src/uts/sparc/dtrace/dtrace_isa.c +++ b/usr/src/uts/sparc/dtrace/dtrace_isa.c @@ -23,6 +23,9 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2011 Joyent, Inc. All rights reserved. + */ #include <sys/dtrace_impl.h> #include <sys/atomic.h> @@ -936,3 +939,12 @@ got_fp: return (value); } + +/*ARGSUSED*/ +uint64_t +dtrace_getvmreg(uint_t ndx, volatile uint16_t *flags) +{ + *flags |= CPU_DTRACE_ILLOP; + + return (0); +} |