From 15db28971f91c98efb449aebf46024ac72779fa3 Mon Sep 17 00:00:00 2001 From: Krishnendu Sadhukhan - Sun Microsystems Date: Mon, 28 Sep 2009 13:53:34 -0700 Subject: PSARC/2009/339 LatencyTOP for OpenSolaris 6825817 Integrate latencyTOP into OpenSolaris Contributed by Lejun Zhu --- usr/src/cmd/Makefile | 1 + usr/src/cmd/latencytop/Makefile | 47 ++ usr/src/cmd/latencytop/Makefile.com | 79 ++ usr/src/cmd/latencytop/amd64/Makefile | 30 + usr/src/cmd/latencytop/common/display.c | 1040 +++++++++++++++++++++++ usr/src/cmd/latencytop/common/dwrapper.c | 573 +++++++++++++ usr/src/cmd/latencytop/common/klog.c | 226 +++++ usr/src/cmd/latencytop/common/latencytop.c | 486 +++++++++++ usr/src/cmd/latencytop/common/latencytop.d | 404 +++++++++ usr/src/cmd/latencytop/common/latencytop.h | 269 ++++++ usr/src/cmd/latencytop/common/latencytop.trans | 44 + usr/src/cmd/latencytop/common/stat.c | 1050 ++++++++++++++++++++++++ usr/src/cmd/latencytop/common/table.c | 840 +++++++++++++++++++ usr/src/cmd/latencytop/common/util.c | 312 +++++++ usr/src/cmd/latencytop/i386/Makefile | 29 + usr/src/cmd/latencytop/sparcv9/Makefile | 30 + 16 files changed, 5460 insertions(+) create mode 100644 usr/src/cmd/latencytop/Makefile create mode 100644 usr/src/cmd/latencytop/Makefile.com create mode 100644 usr/src/cmd/latencytop/amd64/Makefile create mode 100644 usr/src/cmd/latencytop/common/display.c create mode 100644 usr/src/cmd/latencytop/common/dwrapper.c create mode 100644 usr/src/cmd/latencytop/common/klog.c create mode 100644 usr/src/cmd/latencytop/common/latencytop.c create mode 100644 usr/src/cmd/latencytop/common/latencytop.d create mode 100644 usr/src/cmd/latencytop/common/latencytop.h create mode 100644 usr/src/cmd/latencytop/common/latencytop.trans create mode 100644 usr/src/cmd/latencytop/common/stat.c create mode 100644 usr/src/cmd/latencytop/common/table.c create mode 100644 usr/src/cmd/latencytop/common/util.c create mode 100644 usr/src/cmd/latencytop/i386/Makefile create mode 100644 usr/src/cmd/latencytop/sparcv9/Makefile (limited to 'usr/src/cmd') diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile index 3ddc3f1d43..d21840f8bd 100644 --- a/usr/src/cmd/Makefile +++ b/usr/src/cmd/Makefile @@ -227,6 +227,7 @@ COMMON_SUBDIRS= \ kstat \ last \ lastcomm \ + latencytop \ ldap \ ldapcachemgr \ lgrpinfo \ diff --git a/usr/src/cmd/latencytop/Makefile b/usr/src/cmd/latencytop/Makefile new file mode 100644 index 0000000000..3d9815ddc7 --- /dev/null +++ b/usr/src/cmd/latencytop/Makefile @@ -0,0 +1,47 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2008-2009, Intel Corporation. +# All Rights Reserved. +# + +PROG = latencytop + +include ../Makefile.cmd + +$(64ONLY)SUBDIRS= $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET = all +install := TARGET = install +clean := TARGET = clean +clobber := TARGET = clobber +lint := TARGET = lint + +.KEEP_STATE: + +all install clean clobber lint: $(SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../Makefile.targ diff --git a/usr/src/cmd/latencytop/Makefile.com b/usr/src/cmd/latencytop/Makefile.com new file mode 100644 index 0000000000..98e65bafc7 --- /dev/null +++ b/usr/src/cmd/latencytop/Makefile.com @@ -0,0 +1,79 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 2008-2009, Intel Corporation. +# All Rights Reserved. +# + +PROG = latencytop +OBJS = latencytop.o display.o dwrapper.o klog.o stat.o table.o util.o +SRCS = $(OBJS:%.o=../common/%.c) + +include ../../Makefile.cmd + +CFLAGS += $(CCVERBOSE) +CFLAGS64 += $(CCVERBOSE) + +CPPFLAGS += -DEMBED_CONFIGS -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include +C99MODE = $(C99_ENABLE) +LDLIBS += -lcurses -ldtrace +all install := LDLIBS += -lglib-2.0 + +LINTFLAGS += -erroff=E_NAME_USED_NOT_DEF2 +LINTFLAGS64 += -erroff=E_NAME_USED_NOT_DEF2 + +FILEMODE = 0555 + +ELFWRAP = elfwrap +WRAPOBJ = latencytop_wrap.o + +CLEANFILES += $(OBJS) $(WRAPOBJ) ./latencytop_d ./latencytop_trans + +.KEEP_STATE: + +all: $(PROG) + +install: $(SUBDIRS) + -$(RM) $(ROOTPROG) + -$(LN) $(ISAEXEC) $(ROOTPROG) + +$(PROG): $(OBJS) $(WRAPOBJ) + $(LINK.c) -o $@ $(OBJS) $(WRAPOBJ) $(LDLIBS) + $(POST_PROCESS) + +$(WRAPOBJ): latencytop_d latencytop_trans + $(ELFWRAP) $(WRAPOPT) -o $(WRAPOBJ) latencytop_d latencytop_trans + +latencytop_d: + cp ../common/latencytop.d ./latencytop_d + +latencytop_trans: + cp ../common/latencytop.trans ./latencytop_trans + +clean: + $(RM) $(CLEANFILES) + +lint: lint_SRCS + +%.o: ../common/%.c + $(COMPILE.c) $< + +include ../../Makefile.targ diff --git a/usr/src/cmd/latencytop/amd64/Makefile b/usr/src/cmd/latencytop/amd64/Makefile new file mode 100644 index 0000000000..79b3cceb54 --- /dev/null +++ b/usr/src/cmd/latencytop/amd64/Makefile @@ -0,0 +1,30 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2008-2009, Intel Corporation. +# All Rights Reserved. +# + +include ../Makefile.com +include ../../Makefile.cmd.64 + +WRAPOPT = -64 + +install: all $(ROOTPROG64) diff --git a/usr/src/cmd/latencytop/common/display.c b/usr/src/cmd/latencytop/common/display.c new file mode 100644 index 0000000000..edc2c20ae1 --- /dev/null +++ b/usr/src/cmd/latencytop/common/display.c @@ -0,0 +1,1040 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2008-2009, Intel Corporation. + * All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "latencytop.h" + +#define LT_WINDOW_X 80 +#define LT_WINDOW_Y 24 + +#define LT_COLOR_DEFAULT 1 +#define LT_COLOR_HEADER 2 + +/* Windows created by libcurses */ +static WINDOW *titlebar = NULL; +static WINDOW *captionbar = NULL; +static WINDOW *sysglobal_window = NULL; +static WINDOW *taskbar = NULL; +static WINDOW *process_window = NULL; +static WINDOW *hintbar = NULL; +/* Screen dimension */ +static int screen_width = 1, screen_height = 1; +/* Is display initialized, i.e. are window pointers set up. */ +static int display_initialized = FALSE; +/* Is initscr() called */ +static int curses_inited = FALSE; + +/* To handle user key presses */ +static pid_t selected_pid = INVALID_PID; +static id_t selected_tid = INVALID_TID; +static lt_sort_t sort_type = LT_SORT_TOTAL; +static int thread_mode = FALSE; +/* Type of list being displayed */ +static int current_list_type = LT_LIST_CAUSE; +static int show_help = FALSE; + +/* Help functions that append/prepend a blank to the given string */ +#define fill_space_right(a, b, c) fill_space((a), (b), (c), TRUE) +#define fill_space_left(a, b, c) fill_space((a), (b), (c), FALSE) + +static void +fill_space(char *buffer, int len, int buffer_limit, int is_right) +{ + int i = 0; + int tofill; + + if (len >= buffer_limit) { + len = buffer_limit - 1; + } + + i = strlen(buffer); + + if (i >= len) { + return; + } + + tofill = len - i; + + if (is_right) { + (void) memset(&buffer[i], ' ', tofill); + buffer[len] = '\0'; + } else { + (void) memmove(&buffer[tofill], buffer, i+1); + (void) memset(buffer, ' ', tofill); + } +} + +/* Convert the nanosecond value to a human readable string */ +static const char * +get_time_string(double nanoseconds, char *buffer, int len, int fill_width) +{ + const double ONE_USEC = 1000.0; + const double ONE_MSEC = 1000000.0; + const double ONE_SEC = 1000000000.0; + + if (nanoseconds < (ONE_USEC - .5)) { + (void) snprintf(buffer, len, "%3.1f nsec", nanoseconds); + } else if (nanoseconds < (ONE_MSEC - .5 * ONE_USEC)) { + (void) snprintf(buffer, len, + "%3.1f usec", nanoseconds / ONE_USEC); + } else if (nanoseconds < (ONE_SEC - .5 * ONE_MSEC)) { + (void) snprintf(buffer, len, + "%3.1f msec", nanoseconds / ONE_MSEC); + } else if (nanoseconds < 999.5 * ONE_SEC) { + (void) snprintf(buffer, len, + "%3.1f sec", nanoseconds / ONE_SEC); + } else { + (void) snprintf(buffer, len, + "%.0e sec", nanoseconds / ONE_SEC); + } + + fill_space_left(buffer, fill_width, len); + return (buffer); +} + +/* Used in print_statistics below */ +#define WIDTH_REASON_STRING 36 +#define WIDTH_COUNT 12 +#define WIDTH_AVG 12 +#define WIDTH_MAX 12 +#define WIDTH_PCT 8 +#define BEGIN_COUNT WIDTH_REASON_STRING +#define BEGIN_AVG (BEGIN_COUNT + WIDTH_COUNT) +#define BEGIN_MAX (BEGIN_AVG + WIDTH_AVG) +#define BEGIN_PCT (BEGIN_MAX + WIDTH_MAX) + +/* + * Print statistics in global/process pane. Called by print_sysglobal + * print_process. + * + * Parameters: + * window - the global or process statistics window. + * begin_line - where to start printing. + * count - how many lines should be printed. + * list - a stat_list. + */ +static void +print_statistics(WINDOW * window, int begin_line, int nlines, void *list) +{ + uint64_t total; + int i = 0; + + if (!display_initialized) { + return; + } + + total = lt_stat_list_get_gtotal(list); + + if (total == 0) { + return; + } + + while (i < nlines && lt_stat_list_has_item(list, i)) { + + char tmp[WIDTH_REASON_STRING]; + const char *reason = lt_stat_list_get_reason(list, i); + uint64_t count = lt_stat_list_get_count(list, i); + + if (count == 0) { + continue; + } + + (void) snprintf(tmp, sizeof (tmp), "%s", reason); + (void) mvwprintw(window, i + begin_line, 0, "%s", tmp); + + (void) snprintf(tmp, sizeof (tmp), "%llu", count); + fill_space_left(tmp, WIDTH_COUNT, sizeof (tmp)); + (void) mvwprintw(window, i + begin_line, BEGIN_COUNT, + "%s", tmp); + + (void) mvwprintw(window, i + begin_line, BEGIN_AVG, + "%s", get_time_string( + (double)lt_stat_list_get_sum(list, i) / count, + tmp, sizeof (tmp), WIDTH_AVG)); + + (void) mvwprintw(window, i + begin_line, BEGIN_MAX, + "%s", get_time_string( + (double)lt_stat_list_get_max(list, i), + tmp, sizeof (tmp), WIDTH_MAX)); + + if (LT_LIST_SPECIALS != current_list_type) { + (void) snprintf(tmp, sizeof (tmp), "%.1f %%", + (double)lt_stat_list_get_sum(list, i) + / total * 100.0); + } else { + (void) snprintf(tmp, sizeof (tmp), "--- "); + } + + fill_space_left(tmp, WIDTH_PCT, sizeof (tmp)); + + (void) mvwprintw(window, i + begin_line, BEGIN_PCT, + "%s", tmp); + i++; + } +} + +/* + * Print statistics in global pane. + */ +static void +print_sysglobal(void) +{ + void *list; + char header[256]; + + if (!display_initialized) { + return; + } + + (void) werase(sysglobal_window); + + (void) wattron(sysglobal_window, A_REVERSE); + (void) snprintf(header, sizeof (header), + "%s", "System wide latencies"); + fill_space_right(header, screen_width, sizeof (header)); + (void) mvwprintw(sysglobal_window, 0, 0, "%s", header); + (void) wattroff(sysglobal_window, A_REVERSE); + + list = lt_stat_list_create(current_list_type, + LT_LEVEL_GLOBAL, 0, 0, 10, sort_type); + print_statistics(sysglobal_window, 1, 10, list); + lt_stat_list_free(list); + + (void) wrefresh(sysglobal_window); +} + +/* + * Prints current operation mode. Mode is combination of: + * + * "Process or Thread", and "1 or 2 or 3". + */ +static void +print_current_mode() +{ + char type; + + if (!display_initialized) { + return; + } + + switch (current_list_type) { + case LT_LIST_CAUSE: + type = '1'; + break; + case LT_LIST_SPECIALS: + type = '2'; + break; + case LT_LIST_SOBJ: + type = '3'; + break; + default: + type = '?'; + break; + } + + (void) mvwprintw(process_window, 0, screen_width - 8, "View: %c%c", + type, thread_mode ? 'T' : 'P'); +} + +/* + * Print per-process statistics in process pane. + * This is called when mode of operation is process. + */ +static void +print_process(unsigned int pid) +{ + void *list; + char header[256]; + char tmp[30]; + + if (!display_initialized) { + return; + } + + list = lt_stat_list_create(current_list_type, LT_LEVEL_PROCESS, + pid, 0, 8, sort_type); + + (void) werase(process_window); + (void) wattron(process_window, A_REVERSE); + (void) snprintf(header, sizeof (header), "Process %s (%i), %d threads", + lt_stat_proc_get_name(pid), pid, lt_stat_proc_get_nthreads(pid)); + fill_space_right(header, screen_width, sizeof (header)); + (void) mvwprintw(process_window, 0, 0, "%s", header); + + if (current_list_type != LT_LIST_SPECIALS) { + (void) mvwprintw(process_window, 0, 48, "Total: %s", + get_time_string((double)lt_stat_list_get_gtotal(list), + tmp, sizeof (tmp), 12)); + } + + print_current_mode(); + (void) wattroff(process_window, A_REVERSE); + print_statistics(process_window, 1, 8, list); + lt_stat_list_free(list); + + (void) wrefresh(process_window); +} + +/* + * Display the list of processes that are tracked, in task bar. + * This one is called when mode of operation is process. + */ +static void +print_taskbar_process(pid_t *pidlist, int pidlist_len, int pidlist_index) +{ + const int ITEM_WIDTH = 8; + + int number_item; + int i; + int xpos = 0; + + if (!display_initialized) { + return; + } + + number_item = (screen_width / ITEM_WIDTH) - 1; + i = pidlist_index - (pidlist_index % number_item); + + (void) werase(taskbar); + + if (i != 0) { + (void) mvwprintw(taskbar, 0, xpos, "<-"); + } + + xpos = ITEM_WIDTH / 2; + + while (xpos + ITEM_WIDTH <= screen_width && i < pidlist_len) { + char str[ITEM_WIDTH+1]; + int slen; + const char *pname = lt_stat_proc_get_name(pidlist[i]); + + if (pname && pname[0]) { + (void) snprintf(str, sizeof (str) - 1, "%s", pname); + } else { + (void) snprintf(str, sizeof (str) - 1, + "<%d>", pidlist[i]); + } + + slen = strlen(str); + + if (slen < ITEM_WIDTH) { + (void) memset(&str[slen], ' ', ITEM_WIDTH - slen); + } + + str[sizeof (str) - 1] = '\0'; + + if (i == pidlist_index) { + (void) wattron(taskbar, A_REVERSE); + } + + (void) mvwprintw(taskbar, 0, xpos, "%s", str); + + if (i == pidlist_index) { + (void) wattroff(taskbar, A_REVERSE); + } + + xpos += ITEM_WIDTH; + i++; + } + + if (i != pidlist_len) { + (void) mvwprintw(taskbar, 0, screen_width - 2, "->"); + } + + (void) wrefresh(taskbar); +} + +/* + * Display the list of processes that are tracked, in task bar. + * This one is called when mode of operation is thread. + */ +static void +print_taskbar_thread(pid_t *pidlist, id_t *tidlist, int list_len, + int list_index) +{ + const int ITEM_WIDTH = 12; + + int number_item; + int i; + int xpos = 0; + const char *pname = NULL; + pid_t last_pid = INVALID_PID; + + + if (!display_initialized) { + return; + } + + number_item = (screen_width - 8) / ITEM_WIDTH; + i = list_index - (list_index % number_item); + + (void) werase(taskbar); + + if (i != 0) { + (void) mvwprintw(taskbar, 0, xpos, "<-"); + } + + xpos = 4; + + while (xpos + ITEM_WIDTH <= screen_width && i < list_len) { + char str[ITEM_WIDTH+1]; + int slen, tlen; + + if (pidlist[i] != last_pid) { + pname = lt_stat_proc_get_name(pidlist[i]); + last_pid = pidlist[i]; + } + + /* + * Calculate length of thread's ID; use shorter process name + * in order to save space on the screen. + */ + tlen = snprintf(NULL, 0, "_%d", tidlist[i]); + + if (pname && pname[0]) { + (void) snprintf(str, sizeof (str) - tlen - 1, + "%s", pname); + } else { + (void) snprintf(str, sizeof (str) - tlen - 1, + "<%d>", pidlist[i]); + } + + slen = strlen(str); + + (void) snprintf(&str[slen], sizeof (str) - slen, + "_%d", tidlist[i]); + + slen += tlen; + + if (slen < ITEM_WIDTH) { + (void) memset(&str[slen], ' ', ITEM_WIDTH - slen); + } + + str[sizeof (str) - 1] = '\0'; + + if (i == list_index) { + (void) wattron(taskbar, A_REVERSE); + } + + (void) mvwprintw(taskbar, 0, xpos, "%s", str); + + if (i == list_index) { + (void) wattroff(taskbar, A_REVERSE); + } + + xpos += ITEM_WIDTH; + i++; + } + + if (i != list_len) { + (void) mvwprintw(taskbar, 0, screen_width - 2, "->"); + } + + (void) wrefresh(taskbar); +} + +/* + * Print per-thread statistics in process pane. + * This is called when mode of operation is thread. + */ +static void +print_thread(pid_t pid, id_t tid) +{ + void *list; + char header[256]; + char tmp[30]; + + if (!display_initialized) { + return; + } + + list = lt_stat_list_create(current_list_type, LT_LEVEL_THREAD, + pid, tid, 8, sort_type); + + (void) werase(process_window); + (void) wattron(process_window, A_REVERSE); + (void) snprintf(header, sizeof (header), + "Process %s (%i), LWP %d", + lt_stat_proc_get_name(pid), pid, tid); + fill_space_right(header, screen_width, sizeof (header)); + (void) mvwprintw(process_window, 0, 0, "%s", header); + + if (current_list_type != LT_LIST_SPECIALS) { + (void) mvwprintw(process_window, 0, 48, "Total: %s", + get_time_string( + (double)lt_stat_list_get_gtotal(list), + tmp, sizeof (tmp), 12)); + } + + print_current_mode(); + (void) wattroff(process_window, A_REVERSE); + print_statistics(process_window, 1, 8, list); + lt_stat_list_free(list); + (void) wrefresh(process_window); +} + +/* + * Update hint string at the bottom line. The message to print is stored in + * hint. If hint is NULL, the function will display its own message. + */ +static void +print_hint(const char *hint) +{ + const char *HINTS[] = { + "Press '<' or '>' to switch between processes.", + "Press 'q' to exit.", + "Press 'r' to refresh immediately.", + "Press 't' to toggle Process/Thread display mode.", + "Press 'h' for help.", + "Use 'c', 'a', 'm', 'p' to change sort criteria." + "Use '1', '2', '3' to switch between windows." + }; + const uint64_t update_interval = 5000; /* 5 seconds */ + + static int index = 0; + static uint64_t next_hint = 0; + uint64_t now = lt_millisecond(); + + if (!display_initialized) { + return; + } + + if (hint == NULL) { + if (now < next_hint) { + return; + } + + hint = HINTS[index]; + index = (index + 1) % (sizeof (HINTS) / sizeof (HINTS[0])); + next_hint = now + update_interval; + } else { + /* + * Important messages are displayed at least every 2 cycles. + */ + next_hint = now + update_interval * 2; + } + + (void) werase(hintbar); + (void) mvwprintw(hintbar, 0, (screen_width - strlen(hint)) / 2, + "%s", hint); + (void) wrefresh(hintbar); +} + +/* + * Create a PID list or a PID/TID list (if operation mode is thread) from + * available statistics. + */ +static void +get_plist(pid_t **plist, id_t **tlist, int *list_len, int *list_index) +{ + if (!thread_mode) { + /* Per-process mode */ + *list_len = lt_stat_proc_list_create(plist, NULL); + + /* Search for previously selected PID */ + for (*list_index = 0; *list_index < *list_len && + (*plist)[*list_index] != selected_pid; + ++*list_index) { + } + + if (*list_index >= *list_len) { + /* + * The previously selected pid is gone. + * Select the first one. + */ + *list_index = 0; + } + } else { + /* Per-thread mode */ + *list_len = lt_stat_proc_list_create(plist, tlist); + + /* Search for previously selected PID & TID */ + for (*list_index = 0; *list_index < *list_len; + ++*list_index) { + if ((*plist)[*list_index] == selected_pid && + (*tlist)[*list_index] == selected_tid) { + break; + } + } + + if (*list_index >= *list_len) { + /* + * The previously selected pid/tid is gone. + * Select the first one. + */ + for (*list_index = 0; + *list_index < *list_len && + (*plist)[*list_index] != selected_pid; + ++*list_index) { + } + } + + if (*list_index >= *list_len) { + /* + * The previously selected pid is gone. + * Select the first one + */ + *list_index = 0; + } + } +} + +/* Print help message when user presses 'h' hot key */ +static void +print_help(void) +{ + const char *HELP[] = { + TITLE, + COPYRIGHT, + "", + "These single-character commands are available:", + "< - Move to previous process/thread.", + "> - Move to next process/thread.", + "q - Exit.", + "r - Refresh.", + "t - Toggle process/thread mode.", + "c - Sort by count.", + "a - Sort by average.", + "m - Sort by maximum.", + "p - Sort by percent.", + "1 - Show list by causes.", + "2 - Show list of special entries.", + "3 - Show list by synchronization objects.", + "h - Show this help.", + "", + "Press any key to continue..." + }; + int i; + + if (!display_initialized) { + return; + } + + for (i = 0; i < sizeof (HELP) / sizeof (HELP[0]); ++i) { + (void) mvwprintw(stdscr, i, 0, "%s", HELP[i]); + } + + (void) refresh(); +} + +/* + * Print title on screen + */ +static void +print_title(void) +{ + if (!display_initialized) { + return; + } + + (void) wattrset(titlebar, COLOR_PAIR(LT_COLOR_HEADER)); + (void) wbkgd(titlebar, COLOR_PAIR(LT_COLOR_HEADER)); + (void) werase(titlebar); + + (void) mvwprintw(titlebar, 0, (screen_width - strlen(TITLE)) / 2, + "%s", TITLE); + (void) wrefresh(titlebar); + + (void) werase(captionbar); + (void) mvwprintw(captionbar, 0, 0, "%s", + " Cause " + "Count Average Maximum Percent"); + (void) wrefresh(captionbar); + + (void) wattrset(hintbar, COLOR_PAIR(LT_COLOR_HEADER)); + (void) wbkgd(hintbar, COLOR_PAIR(LT_COLOR_HEADER)); +} + +/* + * Handle signal from terminal resize + */ +/* ARGSUSED */ +static void +on_resize(int sig) +{ + lt_gpipe_break("r"); +} + +/* + * Initialize display. Display will be cleared when this function returns. + */ +void +lt_display_init(void) +{ + if (display_initialized) { + return; + } + + /* Window resize signal */ + (void) signal(SIGWINCH, on_resize); + + /* Initialize curses library */ + (void) initscr(); + (void) start_color(); + (void) keypad(stdscr, TRUE); + (void) nonl(); + (void) cbreak(); + (void) noecho(); + (void) curs_set(0); + + /* Set up color pairs */ + (void) init_pair(LT_COLOR_DEFAULT, COLOR_WHITE, COLOR_BLACK); + (void) init_pair(LT_COLOR_HEADER, COLOR_BLACK, COLOR_WHITE); + + curses_inited = TRUE; + getmaxyx(stdscr, screen_height, screen_width); + + if (screen_width < LT_WINDOW_X || screen_height < LT_WINDOW_Y) { + (void) mvwprintw(stdscr, 0, 0, "Terminal size is too small."); + (void) mvwprintw(stdscr, 1, 0, + "Please resize it to 80x24 or larger."); + (void) mvwprintw(stdscr, 2, 0, "Press q to quit."); + (void) refresh(); + return; + } + + /* Set up all window panes */ + titlebar = subwin(stdscr, 1, screen_width, 0, 0); + captionbar = subwin(stdscr, 1, screen_width, 1, 0); + sysglobal_window = subwin(stdscr, screen_height / 2 - 1, + screen_width, 2, 0); + process_window = subwin(stdscr, screen_height / 2 - 3, + screen_width, screen_height / 2 + 1, 0); + taskbar = subwin(stdscr, 1, screen_width, screen_height - 2, 0); + hintbar = subwin(stdscr, 1, screen_width, screen_height - 1, 0); + (void) werase(stdscr); + (void) refresh(); + + display_initialized = TRUE; + + print_title(); +} + +/* + * The event loop for display. It displays data on screen and handles hotkey + * presses. + * + * Parameter : + * duration - returns after 'duration' + * + * The function also returns if user presses 'q', 'Ctrl+C' or 'r'. + * + * Return value: + * 0 - main() exits + * 1 - main() calls it again + */ +int +lt_display_loop(int duration) +{ + uint64_t start; + int remaining; + struct timeval timeout; + fd_set read_fd; + int need_refresh = TRUE; + pid_t *plist = NULL; + id_t *tlist = NULL; + int list_len = 0; + int list_index = 0; + int retval = 1; + int next_snap; + int gpipe; + + start = lt_millisecond(); + gpipe = lt_gpipe_readfd(); + + if (!show_help) { + print_hint(NULL); + print_sysglobal(); + } + + get_plist(&plist, &tlist, &list_len, &list_index); + + for (;;) { + if (list_len != 0 && need_refresh && !show_help) { + if (!thread_mode) { + print_taskbar_process(plist, list_len, + list_index); + print_process(plist[list_index]); + } else { + print_taskbar_thread(plist, tlist, + list_len, list_index); + print_thread(plist[list_index], + tlist[list_index]); + } + } + + need_refresh = TRUE; /* Usually we need refresh. */ + remaining = duration - (int)(lt_millisecond() - start); + + if (remaining <= 0) { + break; + } + + /* Embedded dtrace snap action here. */ + next_snap = lt_dtrace_work(0); + + if (next_snap == 0) { + /* + * Just did a snap, check time for the next one. + */ + next_snap = lt_dtrace_work(0); + } + + if (next_snap > 0 && remaining > next_snap) { + remaining = next_snap; + } + + timeout.tv_sec = remaining / 1000; + timeout.tv_usec = (remaining % 1000) * 1000; + + FD_ZERO(&read_fd); + FD_SET(0, &read_fd); + FD_SET(gpipe, &read_fd); + + /* Wait for keyboard input, or signal from gpipe */ + if (select(gpipe + 1, &read_fd, NULL, NULL, &timeout) > 0) { + int k = 0; + + if (FD_ISSET(gpipe, &read_fd)) { + /* Data from pipe has priority */ + char ch; + (void) read(gpipe, &ch, 1); + k = ch; /* Need this for big-endianness */ + } else { + k = getch(); + } + + /* + * Check if we need to update the hint line whenever we + * get a chance. + * NOTE: current implementation depends on + * g_config.lt_cfg_snap_interval, but it's OK because it + * doesn't have to be precise. + */ + print_hint(NULL); + /* + * If help is on display right now, and a key press + * happens, we need to clear the help and continue. + */ + if (show_help) { + (void) werase(stdscr); + (void) refresh(); + print_title(); + print_sysglobal(); + show_help = FALSE; + /* Drop this key and continue */ + continue; + } + + switch (k) { + case 'Q': + case 'q': + retval = 0; + goto quit; + case 'R': + case 'r': + lt_display_deinit(); + lt_display_init(); + goto quit; + case 'H': + case 'h': + show_help = TRUE; + (void) werase(stdscr); + (void) refresh(); + print_help(); + break; + case ',': + case '<': + case KEY_LEFT: + --list_index; + + if (list_index < 0) { + list_index = 0; + } + + break; + case '.': + case '>': + case KEY_RIGHT: + ++list_index; + + if (list_index >= list_len) { + list_index = list_len - 1; + } + + break; + case 'a': + case 'A': + sort_type = LT_SORT_AVG; + print_sysglobal(); + break; + case 'p': + case 'P': + sort_type = LT_SORT_TOTAL; + print_sysglobal(); + break; + case 'm': + case 'M': + sort_type = LT_SORT_MAX; + print_sysglobal(); + break; + case 'c': + case 'C': + sort_type = LT_SORT_COUNT; + print_sysglobal(); + break; + case 't': + case 'T': + if (plist != NULL) { + selected_pid = plist[list_index]; + } + + selected_tid = INVALID_TID; + thread_mode = !thread_mode; + get_plist(&plist, &tlist, + &list_len, &list_index); + break; + case '1': + case '!': + current_list_type = LT_LIST_CAUSE; + print_sysglobal(); + break; + case '2': + case '@': + if (g_config.lt_cfg_low_overhead_mode) { + lt_display_error("Switching mode is " + "not available for '-f low'."); + } else { + current_list_type = LT_LIST_SPECIALS; + print_sysglobal(); + } + + break; + case '3': + case '#': + if (g_config.lt_cfg_trace_syncobj) { + current_list_type = LT_LIST_SOBJ; + print_sysglobal(); + } else if (g_config.lt_cfg_low_overhead_mode) { + lt_display_error("Switching mode is " + "not available for '-f low'."); + } else { + lt_display_error("Tracing " + "synchronization objects is " + "disabled."); + } + + break; + default: + /* Wake up for nothing; no refresh is needed */ + need_refresh = FALSE; + break; + } + } else { + need_refresh = FALSE; + } + } + +quit: + if (plist != NULL) { + selected_pid = plist[list_index]; + } + + if (tlist != NULL) { + selected_tid = tlist[list_index]; + } + + lt_stat_proc_list_free(plist, tlist); + + return (retval); +} + +/* + * Clean up display. + */ +void +lt_display_deinit(void) +{ + if (curses_inited) { + (void) clear(); + (void) refresh(); + (void) endwin(); + } + + titlebar = NULL; + captionbar = NULL; + sysglobal_window = NULL; + taskbar = NULL; + process_window = NULL; + hintbar = NULL; + screen_width = 1; + screen_height = 1; + + display_initialized = FALSE; + curses_inited = FALSE; +} + +/* + * Print message when display error happens. + */ +/* ARGSUSED */ +void +lt_display_error(const char *fmt, ...) +{ + va_list vl; + char tmp[81]; + int l; + + va_start(vl, fmt); + (void) vsnprintf(tmp, sizeof (tmp), fmt, vl); + va_end(vl); + + l = strlen(tmp); + + while (l > 0 && (tmp[l - 1] == '\n' || tmp[l - 1] == '\r')) { + tmp[l - 1] = '\0'; + --l; + } + + if (!display_initialized) { + (void) fprintf(stderr, "%s\n", tmp); + } else if (!show_help) { + print_hint(tmp); + } + +} diff --git a/usr/src/cmd/latencytop/common/dwrapper.c b/usr/src/cmd/latencytop/common/dwrapper.c new file mode 100644 index 0000000000..d1920e5500 --- /dev/null +++ b/usr/src/cmd/latencytop/common/dwrapper.c @@ -0,0 +1,573 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2008-2009, Intel Corporation. + * All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "latencytop.h" + +static dtrace_hdl_t *g_dtp = NULL; /* dtrace handle */ +static pid_t pid_self = -1; /* PID of our own process */ + +/* + * Ignore sched if sched is not tracked. + * Also ignore ourselves (i.e., latencytop). + */ +#define SHOULD_IGNORE(pid) \ + ((!g_config.lt_cfg_trace_sched && 0 == (pid)) || pid_self == (pid)) + +/* + * Get an integer value from dtrace record. + */ +static uint64_t +rec_get_value(void *a, size_t b) +{ + uint64_t ret = 0; + + switch (b) { + case sizeof (uint64_t): + ret = *((uint64_t *)(a)); + break; + case sizeof (uint32_t): + ret = *((uint32_t *)(a)); + break; + case sizeof (uint16_t): + ret = *((uint16_t *)(a)); + break; + case sizeof (uint8_t): + ret = *((uint8_t *)(a)); + break; + default: + break; + } + + return (ret); +} + +/* + * Callback to process aggregation lt_call_* (related to on/off cpu + * activities) in the snapshot. + */ +static int +aggwalk_call(const dtrace_aggdata_t *data, lt_stat_type_t stat_type) +{ + dtrace_aggdesc_t *aggdesc = data->dtada_desc; + dtrace_syminfo_t dts; + GElf_Sym sym; + caddr_t addr; + pid_t pid; + id_t tid; + unsigned int stack_depth; + unsigned int pc_size; + uint64_t pc; + uint64_t agg_value; + char *ptr = NULL; + char *buffer = NULL; + int ptrsize; + unsigned int buffersize; + char *tag = NULL; + unsigned int priority; + enum { REC_PID = 1, REC_TID, REC_STACK, REC_TAG, REC_PRIO, REC_AGG, + NREC }; + + /* Check action type */ + if ((aggdesc->dtagd_nrecs < NREC) || + (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) || + (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) || + (aggdesc->dtagd_rec[REC_TAG].dtrd_action != DTRACEACT_DIFEXPR) || + (aggdesc->dtagd_rec[REC_PRIO].dtrd_action != DTRACEACT_DIFEXPR) || + (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action)) || + (aggdesc->dtagd_rec[REC_STACK].dtrd_action != DTRACEACT_STACK)) { + + return (-1); + } + + pid = rec_get_value( + data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset, + aggdesc->dtagd_rec[REC_PID].dtrd_size); + + if (SHOULD_IGNORE(pid)) { + return (0); + } + + tid = rec_get_value( + data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset, + aggdesc->dtagd_rec[REC_TID].dtrd_size); + + /* Parse stack array from dtagd_rec */ + stack_depth = aggdesc->dtagd_rec[REC_STACK].dtrd_arg; + pc_size = aggdesc->dtagd_rec[REC_STACK].dtrd_size / stack_depth; + addr = data->dtada_data + aggdesc->dtagd_rec[REC_STACK].dtrd_offset; + buffersize = (stack_depth * (2 * PATH_MAX + 2) + 1) * sizeof (char); + buffer = (char *)lt_malloc(buffersize); + ptr = buffer; + ptrsize = buffersize; + + /* Print the stack */ + while (stack_depth > 0) { + pc = rec_get_value(addr, pc_size); + + if (pc == 0) { + break; + } + + addr += pc_size; + + if (dtrace_lookup_by_addr(g_dtp, pc, &sym, &dts) == 0) { + int len; + len = snprintf(ptr, ptrsize, + "%s`%s ", dts.dts_object, dts.dts_name); + ptrsize -= len; + + if (ptrsize <= 0) { + /* + * snprintf returns "desired" length, so + * reaching here means our buffer is full. + * Move ptr to the last byte of the buffer and + * break. + */ + ptr = &buffer[buffersize-1]; + break; + } else { + ptr += len; + } + } + } + + if (ptr != buffer) { + /* + * We have printed something, so it is safe to remove + * the last ' '. + */ + *(ptr-1) = '\0'; + } + + tag = (char *)data->dtada_data + + aggdesc->dtagd_rec[REC_TAG].dtrd_offset; + + priority = rec_get_value( + data->dtada_data + aggdesc->dtagd_rec[REC_PRIO].dtrd_offset, + aggdesc->dtagd_rec[REC_PRIO].dtrd_size); + + agg_value = rec_get_value( + data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset, + aggdesc->dtagd_rec[REC_AGG].dtrd_size); + + lt_stat_update(pid, tid, buffer, tag, priority, stat_type, agg_value); + + if (buffer != NULL) { + free(buffer); + } + + return (0); +} + +/* + * Callback to process aggregation lt_named_* (related to lock spinning etc.), + * in the snapshot. + */ +static int +aggwalk_named(const dtrace_aggdata_t *data, lt_stat_type_t stat_type) +{ + dtrace_aggdesc_t *aggdesc = data->dtada_desc; + pid_t pid; + id_t tid; + uint64_t agg_value; + int cause_id; + char *type = NULL; + enum { REC_PID = 1, REC_TID, REC_TYPE, REC_AGG, NREC }; + + /* Check action type */ + if ((aggdesc->dtagd_nrecs < NREC) || + (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) || + (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) || + (aggdesc->dtagd_rec[REC_TYPE].dtrd_action != DTRACEACT_DIFEXPR) || + (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action))) { + + return (-1); + } + + pid = rec_get_value( + data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset, + aggdesc->dtagd_rec[REC_PID].dtrd_size); + + if (SHOULD_IGNORE(pid)) { + return (0); + } + + tid = rec_get_value( + data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset, + aggdesc->dtagd_rec[REC_TID].dtrd_size); + + type = (char *)data->dtada_data + + aggdesc->dtagd_rec[REC_TYPE].dtrd_offset; + cause_id = lt_table_cause_from_name(type, 1, CAUSE_FLAG_SPECIAL); + + agg_value = rec_get_value( + data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset, + aggdesc->dtagd_rec[REC_AGG].dtrd_size); + + lt_stat_update_cause(pid, tid, cause_id, stat_type, agg_value); + + return (0); + +} + +/* + * Callback to process aggregation lt_sync_* (related to synchronization + * objects), in the snapshot. + */ +static int +aggwalk_sync(const dtrace_aggdata_t *data, lt_stat_type_t stat_type) +{ + dtrace_aggdesc_t *aggdesc = data->dtada_desc; + pid_t pid; + id_t tid; + uint64_t agg_value; + int stype; + unsigned long long wchan; + enum { REC_PID = 1, REC_TID, REC_STYPE, REC_WCHAN, REC_AGG, NREC }; + + /* Check action type */ + if ((aggdesc->dtagd_nrecs < NREC) || + (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) || + (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) || + (aggdesc->dtagd_rec[REC_STYPE].dtrd_action != DTRACEACT_DIFEXPR) || + (aggdesc->dtagd_rec[REC_WCHAN].dtrd_action != DTRACEACT_DIFEXPR) || + (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action))) { + + return (-1); + } + + pid = rec_get_value( + data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset, + aggdesc->dtagd_rec[REC_PID].dtrd_size); + + if (SHOULD_IGNORE(pid)) { + return (0); + } + + tid = rec_get_value( + data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset, + aggdesc->dtagd_rec[REC_TID].dtrd_size); + + stype = rec_get_value( + data->dtada_data + aggdesc->dtagd_rec[REC_STYPE].dtrd_offset, + aggdesc->dtagd_rec[REC_STYPE].dtrd_size); + + wchan = rec_get_value( + data->dtada_data + aggdesc->dtagd_rec[REC_WCHAN].dtrd_offset, + aggdesc->dtagd_rec[REC_WCHAN].dtrd_size); + + agg_value = rec_get_value( + data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset, + aggdesc->dtagd_rec[REC_AGG].dtrd_size); + + lt_stat_update_sobj(pid, tid, stype, wchan, stat_type, agg_value); + + return (0); +} + +/* + * Callback to process various aggregations in the snapshot. Called by + * different aggwalk_* functions. + */ +/* ARGSUSED */ +static int +aggwalk(const dtrace_aggdata_t *data, void *arg) +{ + char *tmp; + char buffer[32]; + lt_stat_type_t stat_type; + int (*func)(const dtrace_aggdata_t *, lt_stat_type_t); + + (void) strncpy(buffer, data->dtada_desc->dtagd_name, sizeof (buffer)); + buffer[sizeof (buffer) - 1] = '\0'; + tmp = strtok(buffer, "_"); + + if (tmp == NULL || strcmp(tmp, "lt") != 0) { + goto done; + } + + tmp = strtok(NULL, "_"); + + if (tmp == NULL) { + goto done; + } else if (strcmp(tmp, "call") == 0) { + func = aggwalk_call; + } else if (strcmp(tmp, "named") == 0) { + func = aggwalk_named; + } else if (strcmp(tmp, "sync") == 0) { + func = aggwalk_sync; + } else { + goto done; + } + + tmp = strtok(NULL, "_"); + + if (tmp == NULL) { + goto done; + } else if (strcmp(tmp, "count") == 0) { + stat_type = LT_STAT_COUNT; + } else if (strcmp(tmp, "sum") == 0) { + stat_type = LT_STAT_SUM; + } else if (strcmp(tmp, "max") == 0) { + stat_type = LT_STAT_MAX; + } else { + goto done; + } + + (void) func(data, stat_type); + +done: + /* We have our data, so remove it from DTrace now */ + return (DTRACE_AGGWALK_REMOVE); +} + +/* + * Callback to handle event caused by DTrace dropping data. + */ +/*ARGSUSED*/ +static int +drop_handler(const dtrace_dropdata_t *data, void *user) +{ + lt_display_error("Drop: %s\n", data->dtdda_msg); + + /* Pretend nothing happened, so just continue */ + return (DTRACE_HANDLE_OK); +} + +#ifndef EMBED_CONFIGS +/* + * Copy the content from a "real" file into a temp file. + */ +static int +copy_tmp_file(const char *src, FILE *dst) +{ + FILE *tmp = NULL; + char buffer[256]; + int bytes; + + if ((tmp = fopen(src, "r")) == NULL) { + return (-1); + } + + while ((bytes = fread(buffer, 1, sizeof (buffer), tmp)) > 0) { + if (fwrite(buffer, bytes, 1, dst) != 1) { + return (-1); + } + } + + (void) fclose(tmp); + + return (0); +} +#endif + +/* + * DTrace initialization. D script starts running when this function returns. + */ +int +lt_dtrace_init(void) +{ + dtrace_prog_t *prog; + dtrace_proginfo_t info; + int err; + FILE *fp_script = NULL; + + pid_self = getpid(); + + if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) { + lt_display_error("Cannot open dtrace library: %s\n", + dtrace_errmsg(NULL, err)); + return (-1); + } + + if (dtrace_handle_drop(g_dtp, &drop_handler, NULL) == -1) { + lt_display_error("Cannot install DTrace handle: %s\n", + dtrace_errmsg(NULL, err)); + return (-1); + } + + if (g_config.lt_cfg_enable_filter) { + if ((err = dtrace_setopt(g_dtp, "define", + "ENABLE_FILTER")) != 0) { + lt_display_error( + "Failed to set option ENABLE_FILTER.\n"); + return (err); + } + } + + if (g_config.lt_cfg_trace_syncobj) { + if ((err = dtrace_setopt(g_dtp, "define", + "ENABLE_SYNCOBJ")) != 0) { + lt_display_error( + "Failed to set option ENABLE_SYNCOBJ.\n"); + return (err); + } + } + + if (g_config.lt_cfg_trace_sched) { + if ((err = dtrace_setopt(g_dtp, "define", + "ENABLE_SCHED")) != 0) { + lt_display_error( + "Failed to set option ENABLE_SYNCOBJ.\n"); + return (err); + } + } + + if (g_config.lt_cfg_low_overhead_mode) { + if ((err = dtrace_setopt(g_dtp, "define", + "ENABLE_LOW_OVERHEAD")) != 0) { + lt_display_error( + "Failed to set option ENABLE_SYNCOBJ.\n"); + return (err); + } + } + + /* Create a temp file; libdtrace needs it for cpp(1) */ + if ((fp_script = tmpfile()) == NULL) { + lt_display_error("Cannot create tmp file\n"); + return (-1); + } + + /* Copy the main D script into the temp file */ +#ifdef EMBED_CONFIGS + if (fwrite(&latencytop_d_start, + (size_t)(&latencytop_d_end - &latencytop_d_start), 1, fp_script) + != 1) { + lt_display_error("Could not copy D script, fwrite() failed\n"); + (void) fclose(fp_script); + return (-1); + } +#else + if (copy_tmp_file(DEFAULT_D_SCRIPT_NAME, fp_script) != 0) { + lt_display_error("Cannot open script file %s\n", + DEFAULT_D_SCRIPT_NAME); + (void) fclose(fp_script); + return (-1); + } +#endif /* EMBED_CONFIGS */ + + if (lt_table_append_trans(fp_script) != 0) { + (void) fclose(fp_script); + return (-1); + } + + (void) fseek(fp_script, 0, SEEK_SET); + + if ((prog = dtrace_program_fcompile(g_dtp, fp_script, + DTRACE_C_CPP, 0, NULL)) == NULL) { + lt_display_error("Failed to compile D script.\n"); + (void) fclose(fp_script); + return (dtrace_errno(g_dtp)); + } + + (void) fclose(fp_script); + + /* Execute the D script */ + if (dtrace_program_exec(g_dtp, prog, &info) == -1) { + lt_display_error("Failed to enable probes.\n"); + return (dtrace_errno(g_dtp)); + } + + if (dtrace_go(g_dtp) != 0) { + lt_display_error("Failed to run D script.\n"); + return (dtrace_errno(g_dtp)); + } + + return (0); +} + +/* + * Worker function to move aggregate data to user space. Called periodically + * to prevent the kernel from running out of memory. + */ +int +lt_dtrace_work(int force) +{ + static uint64_t last_snap = 0; + uint64_t now = lt_millisecond(); + + if (!force && now - last_snap < g_config.lt_cfg_snap_interval) { + return (last_snap + g_config.lt_cfg_snap_interval - now); + } + + if (dtrace_status(g_dtp) == -1) { + lt_display_error("Failed when getting status: %s\n", + dtrace_errmsg(g_dtp, dtrace_errno(g_dtp))); + return (-1); + } + + if (dtrace_aggregate_snap(g_dtp) != 0) { + lt_display_error("Failed to snap aggregate: %s\n", + dtrace_errmsg(g_dtp, dtrace_errno(g_dtp))); + return (-1); + } + + last_snap = now; + return (0); +} + +/* + * Walk through dtrace aggregator and collect data for latencytop to display. + * Called immediately before UI update. + */ +int +lt_dtrace_collect(void) +{ + if (lt_dtrace_work(1) != 0) { + return (-1); + } + + if (dtrace_aggregate_walk(g_dtp, aggwalk, NULL) != 0) { + lt_display_error("Failed to sort aggregate: %s\n", + dtrace_errmsg(g_dtp, dtrace_errno(g_dtp))); + return (-1); + } + + /* + * Probably we don't need to clear again, because we have removed + * everything. Paranoid ? + */ + dtrace_aggregate_clear(g_dtp); + + return (0); +} + +/* + * dtrace clean up. + */ +void +lt_dtrace_deinit(void) +{ + (void) dtrace_stop(g_dtp); + dtrace_close(g_dtp); +} diff --git a/usr/src/cmd/latencytop/common/klog.c b/usr/src/cmd/latencytop/common/klog.c new file mode 100644 index 0000000000..104145ecae --- /dev/null +++ b/usr/src/cmd/latencytop/common/klog.c @@ -0,0 +1,226 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2008-2009, Intel Corporation. + * All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "latencytop.h" + +static GHashTable *proc_table = NULL; /* pid -> char * */ +static GHashTable *klog_table = NULL; /* char * -> uint64_t total */ +static char klog_filename[PATH_MAX] = DEFAULT_KLOG_FILE; +static int klog_level = LT_KLOG_LEVEL_NONE; + +static void +print_proc(void *key, const char *args, FILE *fp) +{ + (void) fprintf(fp, "%-8ld \"%s\"\n", (long)key, args); +} + +static void +print_stat(const char *key, lt_stat_data_t *log, FILE *fp) +{ + (void) fprintf(fp, "%lld, %lld, %lld, %s\n", + (long long)log->lt_s_total, + (long long)log->lt_s_count, + (long long)log->lt_s_max, + key); +} + +/* + * Initialization for kernel logging. + */ +void +lt_klog_init(void) +{ + if (klog_table != NULL || proc_table != NULL) { + return; + } + + klog_table = g_hash_table_new_full(g_str_hash, g_str_equal, + (GDestroyNotify)free, (GDestroyNotify)free); + lt_check_null(klog_table); + + proc_table = g_hash_table_new_full(g_direct_hash, g_direct_equal, + NULL, (GDestroyNotify)free); + lt_check_null(proc_table); +} + +/* + * Set log file path. + */ +int +lt_klog_set_log_file(const char *filename) +{ + FILE *fp; + int file_exist; + + g_assert(strlen(filename) < sizeof (klog_filename)); + + file_exist = lt_file_exist(filename); + /* Test if we can write to the file */ + fp = fopen(filename, "a"); + + if (fp == NULL) { + return (-2); + } + + (void) fclose(fp); + /* Don't leave empty file around */ + if (!file_exist) { + (void) unlink(filename); + } + + (void) strncpy(klog_filename, filename, + sizeof (klog_filename)); + + return (0); +} + +/* + * Set log level. + */ +int +lt_klog_set_log_level(int level) +{ + if (level < 0 || level > (int)LT_KLOG_LEVEL_ALL) { + return (-1); + } + + klog_level = level; + + return (0); +} + +/* + * Write content to log file. + */ +void +lt_klog_write(void) +{ + FILE *fp; + char buffer[32]; + + if (klog_level == LT_KLOG_LEVEL_NONE) { + return; + } + + g_assert(klog_table != NULL && proc_table != NULL); + fp = fopen(klog_filename, "a"); + + if (fp == NULL) { + return; + } + + lt_time_str(buffer, sizeof (buffer)); + + (void) fprintf(fp, "# Log generated at %s by %s\n", buffer, TITLE); + (void) fprintf(fp, "# List of processes\n"); + (void) fprintf(fp, "PID, CMD\n"); + g_hash_table_foreach(proc_table, (GHFunc)print_proc, fp); + + (void) fprintf(fp, "# Statistics\n"); + (void) fprintf(fp, "TOTAL, COUNT, MAX, PID, KSTACK\n"); + g_hash_table_foreach(klog_table, (GHFunc)print_stat, fp); + + (void) fclose(fp); +} + +/* + * Clean up. It flushes all log content in memory to log file. + */ +void +lt_klog_deinit(void) +{ + if (klog_table != NULL) { + g_hash_table_destroy(klog_table); + klog_table = NULL; + } + + if (proc_table != NULL) { + g_hash_table_destroy(proc_table); + proc_table = NULL; + } +} + +/* + * Write a kernel stack and its statistics to log file. Only "total" will + * be logged, others are internally discarded. + */ +/* ARGSUSED */ +void +lt_klog_log(int level, pid_t pid, char *stack, + lt_stat_type_t type, uint64_t value) +{ + lt_stat_data_t *entry = NULL; + char *psargs; + char *str; + int str_len; + + if ((level & klog_level) == 0) { + return; + } + + g_assert(klog_table != NULL && proc_table != NULL); + psargs = (char *)g_hash_table_lookup(proc_table, + LT_INT_TO_POINTER(pid)); + + if (psargs == NULL) { + psargs = lt_get_proc_field(pid, LT_FIELD_PSARGS); + + if (psargs == NULL) { + psargs = lt_get_proc_field(pid, LT_FIELD_FNAME); + } + + if (psargs == NULL) { + return; + } + + g_hash_table_insert(proc_table, + LT_INT_TO_POINTER(pid), psargs); + } + + str_len = strlen(stack) + 20; + str = lt_malloc(str_len); + (void) snprintf(str, str_len, "%ld, \"%s\"", pid, stack); + entry = (lt_stat_data_t *)g_hash_table_lookup(klog_table, str); + + if (entry == NULL) { + entry = (lt_stat_data_t *)lt_zalloc(sizeof (lt_stat_data_t)); + g_hash_table_insert(klog_table, str, entry); + } else { + free(str); + } + + lt_update_stat_value(entry, type, value); +} diff --git a/usr/src/cmd/latencytop/common/latencytop.c b/usr/src/cmd/latencytop/common/latencytop.c new file mode 100644 index 0000000000..81451eadf6 --- /dev/null +++ b/usr/src/cmd/latencytop/common/latencytop.c @@ -0,0 +1,486 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2008-2009, Intel Corporation. + * All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "latencytop.h" + +#define CMPOPT(a, b) strncmp((a), (b), sizeof (b)) + +lt_config_t g_config; + +typedef enum { + LT_CMDOPT_INTERVAL, + LT_CMDOPT_LOG_FILE, + LT_CMDOPT_LOG_LEVEL, + LT_CMDOPT_LOG_INTERVAL, + LT_CMDOPT_CONFIG_FILE, + LT_CMDOPT_F_FILTER, + LT_CMDOPT_F_SCHED, + LT_CMDOPT_F_SOBJ, + LT_CMDOPT_F_LOW, + LT_CMDOPT__LAST /* Must be last one */ +} lt_cmd_option_id_t; + +/* + * Check for duplicate command line options. + * Returns TRUE if duplicate options with different values are found, + * returns FALSE otherwise. + */ +static int +check_opt_dup(lt_cmd_option_id_t id, uint64_t value) { + + static int opt_set[(int)LT_CMDOPT__LAST]; + static uint64_t opt_val[(int)LT_CMDOPT__LAST]; + + const char *errmsg[] = { + "-t is set more than once with different values.", + "-o is set more than once.", + "-k is set more than once with different values.", + "-l is set more than once with different values.", + "-c is set more than once.", + "-f [no]filter is set more than once with different values.", + "-f [no]sched is set more than once with different values.", + "-f [no]sobj is set more than once with different values.", + "-f [no]low is set more than once with different values.", + }; + + g_assert(sizeof (errmsg)/sizeof (errmsg[0]) == (int)LT_CMDOPT__LAST); + + if (!opt_set[(int)id]) { + opt_set[(int)id] = TRUE; + opt_val[(int)id] = value; + return (FALSE); + } + + if (opt_val[(int)id] != value) { + (void) fprintf(stderr, "%s\n", errmsg[(int)id]); + return (TRUE); + } + + return (FALSE); +} + +/* + * Print command-line help message. + */ +static void +print_usage(const char *execname, int long_help) +{ + char buffer[PATH_MAX]; + (void) snprintf(buffer, sizeof (buffer), "%s", execname); + + if (!long_help) { + /* Print short help to stderr. */ + (void) fprintf(stderr, "Usage: %s [option(s)], ", + basename(buffer)); + (void) fprintf(stderr, "use '%s -h' for details.\n", + basename(buffer)); + return; + } + + (void) printf("Usage: %s [option(s)]\n", basename(buffer)); + (void) printf("Options:\n" + " -h, --help\n" + " Print this help.\n" + " -t, --interval TIME\n" + " Set refresh interval to TIME. " + "Valid range [1...60] seconds, default = 5\n" + /* + * Option "-c, --config FILE" is not user-visible for now. + * When we have chance to properly document the format of translation + * rules, we'll make it user-visible. + */ + " -o, --output-log-file FILE\n" + " Output kernel log to FILE. Default = " + DEFAULT_KLOG_FILE "\n" + " -k, --kernel-log-level LEVEL\n" + " Set kernel log level to LEVEL.\n" + " 0(default) = None, 1 = Unmapped, 2 = Mapped, 3 = All.\n" + " -f, --feature [no]feature1,[no]feature2,...\n" + " Enable/disable features in LatencyTOP.\n" + " [no]filter:\n" + " Filter large interruptible latencies, e.g. sleep.\n" + " [no]sched:\n" + " Monitors sched (PID=0).\n" + " [no]sobj:\n" + " Monitors synchronization objects.\n" + " [no]low:\n" + " Lower overhead by sampling small latencies.\n" + " -l, --log-period TIME\n" + " Write and restart log every TIME seconds, TIME >= 60\n"); +} + +/* + * Properly exit latencytop when it receives SIGINT or SIGTERM. + */ +/* ARGSUSED */ +static void +signal_handler(int sig) +{ + lt_gpipe_break("q"); +} + +/* + * Convert string to integer. It returns error if extra characters are found. + */ +static int +to_int(const char *str, int *result) +{ + char *tail = NULL; + long ret; + + if (str == NULL || result == NULL) { + return (-1); + } + + ret = strtol(str, &tail, 10); + + if (tail != NULL && *tail != '\0') { + return (-1); + } + + *result = (int)ret; + + return (0); +} + +/* + * The main function. + */ +int +main(int argc, char *argv[]) +{ + const char *opt_string = "t:o:k:hf:l:c:"; + struct option const longopts[] = { + {"interval", required_argument, NULL, 't'}, + {"output-log-file", required_argument, NULL, 'o'}, + {"kernel-log-level", required_argument, NULL, 'k'}, + {"help", no_argument, NULL, 'h'}, + {"feature", required_argument, NULL, 'f'}, + {"log-period", required_argument, NULL, 'l'}, + {"config", required_argument, NULL, 'c'}, + {NULL, 0, NULL, 0} + }; + + int optc; + int longind = 0; + int running = 1; + int unknown_option = FALSE; + int refresh_interval = 5; + int klog_level = 0; + int log_interval = 0; + long long last_logged = 0; + char *token = NULL; + int retval = 0; + int gpipe; + int err; + uint64_t collect_end; + uint64_t current_time; + uint64_t delta_time; + char logfile[PATH_MAX] = ""; + + lt_gpipe_init(); + (void) signal(SIGINT, signal_handler); + (void) signal(SIGTERM, signal_handler); + + /* Default global settings */ + g_config.lt_cfg_enable_filter = 0; + g_config.lt_cfg_trace_sched = 0; + g_config.lt_cfg_trace_syncobj = 1; + g_config.lt_cfg_low_overhead_mode = 0; + /* dtrace snapshot every 1 second */ + g_config.lt_cfg_snap_interval = 1000; +#ifdef EMBED_CONFIGS + g_config.lt_cfg_config_name = NULL; +#else + g_config.lt_cfg_config_name = lt_strdup(DEFAULT_CONFIG_NAME); +#endif + + /* Parse command line arguments. */ + while ((optc = getopt_long(argc, argv, opt_string, + longopts, &longind)) != -1) { + switch (optc) { + case 'h': + print_usage(argv[0], TRUE); + goto end_none; + case 't': + if (to_int(optarg, &refresh_interval) != 0 || + refresh_interval < 1 || refresh_interval > 60) { + lt_display_error( + "Invalid refresh interval: %s\n", optarg); + unknown_option = TRUE; + } else if (check_opt_dup(LT_CMDOPT_INTERVAL, + refresh_interval)) { + unknown_option = TRUE; + } + + break; + case 'k': + if (to_int(optarg, &klog_level) != 0 || + lt_klog_set_log_level(klog_level) != 0) { + lt_display_error( + "Invalid log level: %s\n", optarg); + unknown_option = TRUE; + } else if (check_opt_dup(LT_CMDOPT_LOG_LEVEL, + refresh_interval)) { + unknown_option = TRUE; + } + + break; + case 'o': + if (check_opt_dup(LT_CMDOPT_LOG_FILE, optind)) { + unknown_option = TRUE; + } else if (strlen(optarg) >= sizeof (logfile)) { + lt_display_error( + "Log file name is too long: %s\n", + optarg); + unknown_option = TRUE; + } else { + (void) strncpy(logfile, optarg, + sizeof (logfile)); + } + + break; + case 'f': + for (token = strtok(optarg, ","); token != NULL; + token = strtok(NULL, ",")) { + int v = TRUE; + + if (strncmp(token, "no", 2) == 0) { + v = FALSE; + token = &token[2]; + } + + if (CMPOPT(token, "filter") == 0) { + if (check_opt_dup(LT_CMDOPT_F_FILTER, + v)) { + unknown_option = TRUE; + } else { + g_config.lt_cfg_enable_filter + = v; + } + } else if (CMPOPT(token, "sched") == 0) { + if (check_opt_dup(LT_CMDOPT_F_SCHED, + v)) { + unknown_option = TRUE; + } else { + g_config.lt_cfg_trace_sched + = v; + } + } else if (CMPOPT(token, "sobj") == 0) { + if (check_opt_dup(LT_CMDOPT_F_SOBJ, + v)) { + unknown_option = TRUE; + } else { + g_config.lt_cfg_trace_syncobj + = v; + } + } else if (CMPOPT(token, "low") == 0) { + if (check_opt_dup(LT_CMDOPT_F_LOW, + v)) { + unknown_option = TRUE; + } else { + g_config. + lt_cfg_low_overhead_mode + = v; + } + } else { + lt_display_error( + "Unknown feature: %s\n", token); + unknown_option = TRUE; + } + } + + break; + case 'l': + if (to_int(optarg, &log_interval) != 0 || + log_interval < 60) { + lt_display_error( + "Invalid log interval: %s\n", optarg); + unknown_option = TRUE; + } else if (check_opt_dup(LT_CMDOPT_LOG_INTERVAL, + log_interval)) { + unknown_option = TRUE; + } + + break; + case 'c': + if (strlen(optarg) >= PATH_MAX) { + lt_display_error( + "Configuration name is too long.\n"); + unknown_option = TRUE; + } else if (check_opt_dup(LT_CMDOPT_CONFIG_FILE, + optind)) { + unknown_option = TRUE; + } else { + g_config.lt_cfg_config_name = + lt_strdup(optarg); + } + + break; + default: + unknown_option = TRUE; + break; + } + } + + if (!unknown_option && strlen(logfile) > 0) { + err = lt_klog_set_log_file(logfile); + + if (err == -1) { + lt_display_error("Log file name is too long: %s\n", + logfile); + unknown_option = TRUE; + } else if (err == -2) { + lt_display_error("Cannot write to log file: %s\n", + logfile); + unknown_option = TRUE; + } + } + + /* Throw error for invalid/junk arguments */ + if (optind < argc) { + int tmpind = optind; + (void) fprintf(stderr, "Unknown option(s): "); + + while (tmpind < argc) { + (void) fprintf(stderr, "%s ", argv[tmpind++]); + } + + (void) fprintf(stderr, "\n"); + unknown_option = TRUE; + } + + if (unknown_option) { + print_usage(argv[0], FALSE); + retval = 1; + goto end_none; + } + + (void) printf("%s\n%s\n", TITLE, COPYRIGHT); + + /* + * Initialization + */ + lt_klog_init(); + + if (lt_table_init() != 0) { + lt_display_error("Unable to load configuration table.\n"); + retval = 1; + goto end_notable; + } + + if (lt_dtrace_init() != 0) { + lt_display_error("Unable to initialize dtrace.\n"); + retval = 1; + goto end_nodtrace; + } + + last_logged = lt_millisecond(); + + (void) printf("Collecting data for %d seconds...\n", + refresh_interval); + + gpipe = lt_gpipe_readfd(); + collect_end = last_logged + refresh_interval * 1000; + for (;;) { + fd_set read_fd; + struct timeval timeout; + int tsleep = collect_end - lt_millisecond(); + + if (tsleep <= 0) { + break; + } + + if (tsleep > g_config.lt_cfg_snap_interval * 1000) { + tsleep = g_config.lt_cfg_snap_interval * 1000; + } + + timeout.tv_sec = tsleep / 1000; + timeout.tv_usec = (tsleep % 1000) * 1000; + + FD_ZERO(&read_fd); + FD_SET(gpipe, &read_fd); + + if (select(gpipe + 1, &read_fd, NULL, NULL, &timeout) > 0) { + goto end_ubreak; + } + + (void) lt_dtrace_work(0); + } + + lt_display_init(); + + do { + current_time = lt_millisecond(); + + lt_stat_clear_all(); + (void) lt_dtrace_collect(); + + delta_time = current_time; + current_time = lt_millisecond(); + delta_time = current_time - delta_time; + + if (log_interval > 0 && + current_time - last_logged > log_interval * 1000) { + lt_klog_write(); + last_logged = current_time; + } + + running = lt_display_loop(refresh_interval * 1000 - + delta_time); + } while (running != 0); + + lt_klog_write(); + + /* Cleanup */ + lt_display_deinit(); + +end_ubreak: + lt_dtrace_deinit(); + lt_stat_free_all(); + +end_nodtrace: + lt_table_deinit(); + +end_notable: + lt_klog_deinit(); + +end_none: + lt_gpipe_deinit(); + + if (g_config.lt_cfg_config_name != NULL) { + free(g_config.lt_cfg_config_name); + } + + return (retval); +} diff --git a/usr/src/cmd/latencytop/common/latencytop.d b/usr/src/cmd/latencytop/common/latencytop.d new file mode 100644 index 0000000000..95381b12de --- /dev/null +++ b/usr/src/cmd/latencytop/common/latencytop.d @@ -0,0 +1,404 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2008-2009, Intel Corporation. + * All Rights Reserved. + */ + +#define MAX_TAG 8 +#define MAX_STACK 64 + +#pragma D option aggsize=8m +#pragma D option bufsize=16m +#pragma D option dynvarsize=16m +#pragma D option aggrate=0 +#pragma D option stackframes=MAX_STACK +/* + * Our D script needs to compile even if some of the TRANSLATE probes cannot + * be found. Missing probes can be caused by older kernel, different + * architecture, unloaded modules etc. + */ +#pragma D option zdefs + +#if defined(ENABLE_SCHED) +#define TRACE_FILTER +#define TRACE_FILTER_COND(a) / (a) / +#else +#define TRACE_FILTER / pid != 0 / +#define TRACE_FILTER_COND(a) / pid != 0 && (a) / +#endif + +/* Threshold to filter WAKEABLE latencies. */ +#define FILTER_THRESHOLD 5000000 +/* From thread.h */ +#define T_WAKEABLE 2 + +/* + * This array is used to store timestamp of when threads are enqueued + * to dispatch queue. + * self-> is not accessible when enqueue happens. + */ +unsigned long long lt_timestamps[int, int]; + +self unsigned int lt_is_block_wakeable; +self unsigned long long lt_sleep_start; +self unsigned long long lt_sleep_duration; +self unsigned long long lt_sch_delay; +self unsigned int lt_counter; /* only used in low overhead */ +self unsigned long long lt_timestamp; /* only used in low overhead */ +self unsigned int lt_stackp; +self unsigned int lt_prio[int]; +self string lt_cause[int]; + +this unsigned int priority; +this string cause; + +/* + * Clean up everything, otherwise we will run out of memory. + */ +proc:::lwp-exit +{ + lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 0; + + self->lt_sleep_start = 0; + self->lt_is_block_wakeable = 0; + self->lt_counter = 0; + self->lt_timestamp = 0; + + /* + * Workaround: no way to clear associative array. + * We have to manually clear 0 ~ (MAX_TAG-1). + */ + + self->lt_prio[0] = 0; + self->lt_prio[1] = 0; + self->lt_prio[2] = 0; + self->lt_prio[3] = 0; + self->lt_prio[4] = 0; + self->lt_prio[5] = 0; + self->lt_prio[6] = 0; + self->lt_prio[7] = 0; + + self->lt_cause[0] = 0; + self->lt_cause[1] = 0; + self->lt_cause[2] = 0; + self->lt_cause[3] = 0; + self->lt_cause[4] = 0; + self->lt_cause[5] = 0; + self->lt_cause[6] = 0; + self->lt_cause[7] = 0; +} + +#if !defined(ENABLE_LOW_OVERHEAD) +/* + * Log timestamp when a thread is taken off the CPU. + */ +sched::resume:off-cpu +TRACE_FILTER_COND(curlwpsinfo->pr_state == SSLEEP) +{ + self->lt_sleep_start = timestamp; + self->lt_is_block_wakeable = curthread->t_flag & T_WAKEABLE; + + lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = + self->lt_sleep_start; +} + +/* + * Log timestamp when a thread is put on a dispatch queue and becomes runnable. + */ +sched:::enqueue +/lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] != 0/ +{ + lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] = timestamp; +} + +/* + * Calculate latency when the thread is actually on the CPU. + * This is necessary in order to get the right stack. + */ +this unsigned long long end; +this unsigned long long now; +sched::resume:on-cpu +/self->lt_sleep_start != 0/ +{ + this->end = lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid]; + this->now = timestamp; + lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 0; + this->end = (this->end != 0 && this->end != self->lt_sleep_start) + ? this->end : this->now; + + self->lt_sch_delay = this->now - this->end; + self->lt_sleep_duration = this->end - self->lt_sleep_start; + self->lt_sleep_start = 0; +} + +/* + * Filter: drop all "large" latency when it is interruptible, i.e., sleep() + * etc. + */ +#if defined(ENABLE_FILTER) +sched::resume:on-cpu +/self->lt_sleep_duration > FILTER_THRESHOLD && + self->lt_is_block_wakeable != 0/ +{ + self->lt_sch_delay = 0; + self->lt_sleep_duration = 0; + self->lt_is_block_wakeable = 0; +} +#endif /* defined(ENABLE_FILTER) */ + +/* + * Write sleep time to the aggregation. + * lt_sleep_duration is the duration between the time when a thread is taken + * off the CPU and the time when it is enqueued again. + */ +sched::resume:on-cpu +/self->lt_sleep_duration != 0/ +{ + this->cause = self->lt_stackp > 0 ? + self->lt_cause[self->lt_stackp - 1] : ""; + this->priority = self->lt_stackp > 0 ? + self->lt_prio[self->lt_stackp - 1] : 0; + + @lt_call_count[pid, tid, stack(), this->cause, + this->priority] = count(); + @lt_call_sum[pid, tid, stack(), this->cause, + this->priority] = sum(self->lt_sleep_duration); + @lt_call_max[pid, tid, stack(), this->cause, + this->priority] = max(self->lt_sleep_duration); + + self->lt_is_block_wakeable = 0; /* Clear the flag to avoid leak */ + self->lt_sleep_duration = 0; +} + +/* + * Write time spent in queue to the aggregation. + * lt_sch_delay is the interval between the time when a thread becomes + * runnable and the time when it is actually on the CPU. + */ +sched::resume:on-cpu +/self->lt_sch_delay != 0/ +{ + @lt_named_count[pid, tid, "Wait for available CPU"] = count(); + @lt_named_sum[pid, tid, "Wait for available CPU"] = + sum(self->lt_sch_delay); + @lt_named_max[pid, tid, "Wait for available CPU"] = + max(self->lt_sch_delay); + + self->lt_sch_delay = 0; +} + +/* + * Probes to track latency caused by spinning on a lock. + */ +lockstat:::adaptive-spin +TRACE_FILTER +{ + @lt_named_count[pid, tid, "Adapt. lock spin"] = count(); + @lt_named_sum[pid, tid, "Adapt. lock spin"] = sum(arg1); + @lt_named_max[pid, tid, "Adapt. lock spin"] = max(arg1); +} + +lockstat:::spin-spin +TRACE_FILTER +{ + @lt_named_count[pid, tid, "Spinlock spin"] = count(); + @lt_named_sum[pid, tid, "Spinlock spin"] = sum(arg1); + @lt_named_max[pid, tid, "Spinlock spin"] = max(arg1); +} + +/* + * Probes to track latency caused by blocking on a lock. + */ +lockstat:::adaptive-block +TRACE_FILTER +{ + @lt_named_count[pid, tid, "#Adapt. lock block"] = count(); + @lt_named_sum[pid, tid, "#Adapt. lock block"] = sum(arg1); + @lt_named_max[pid, tid, "#Adapt. lock block"] = max(arg1); +} + +lockstat:::rw-block +TRACE_FILTER +{ + @lt_named_count[pid, tid, "#RW. lock block"] = count(); + @lt_named_sum[pid, tid, "#RW. lock block"] = sum(arg1); + @lt_named_max[pid, tid, "#RW. lock block"] = max(arg1); +} + +#if defined(ENABLE_SYNCOBJ) +/* + * Probes to track latency caused by synchronization objects. + */ +this int stype; +this unsigned long long wchan; +this unsigned long long wtime; + +sched:::wakeup +/* + * Currently we are unable to track wakeup from sched, because all its LWP IDs + * are zero when we trace it and that makes lt_timestamps unusable. + */ +/args[1]->pr_pid != 0 && + lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] != 0/ +{ + this->stype = args[0]->pr_stype; + this->wchan = args[0]->pr_wchan; + /* + * We can use lt_timestamps[] here, because + * wakeup is always fired before enqueue. + * After enqueue, lt_timestamps[] will be overwritten. + */ + this->wtime = timestamp - lt_timestamps[args[1]->pr_pid, + args[0]->pr_lwpid]; + + @lt_sync_count[args[1]->pr_pid, args[0]->pr_lwpid, this->stype, + this->wchan] = count(); + @lt_sync_sum[args[1]->pr_pid, args[0]->pr_lwpid, this->stype, + this->wchan] = sum(this->wtime); + @lt_sync_max[args[1]->pr_pid, args[0]->pr_lwpid, this->stype, + this->wchan] = max(this->wtime); +} +#endif /* defined(ENABLE_SYNCOBJ) */ + +#else /* !defined(ENABLE_LOW_OVERHEAD) */ + +/* + * This is the low overhead mode. + * In order to reduce the number of instructions executed during each + * off-cpu and on-cpu event, we do the following: + * + * 1. Use sampling and update aggregations only roughly 1/100 times + * (SAMPLE_TIMES). + * 2. Do not track anything other than what is needed for "main" window. + * 3. Use as few thread local variables as possible. + */ + +#define SAMPLE_TIMES 100 +#define SAMPLE_THRESHOLD 50000000 + +/* + * Log timestamp when a thread is off CPU. + */ +sched::resume:off-cpu +TRACE_FILTER_COND(curlwpsinfo->pr_state == SSLEEP) +{ + self->lt_timestamp = timestamp; +#if defined(ENABLE_FILTER) + self->lt_is_block_wakeable = curthread->t_flag & T_WAKEABLE; +#endif /* defined(ENABLE_FILTER) */ +} + +/* + * Calculate latency when a thread is actually on the CPU. + */ +this int need_skip; +sched::resume:on-cpu +/self->lt_timestamp != 0/ +{ + self->lt_timestamp = timestamp - self->lt_timestamp; + +#if defined(ENABLE_FILTER) + self->lt_timestamp = + (self->lt_timestamp > FILTER_THRESHOLD && + self->lt_is_block_wakeable != 0) ? 0 : self->lt_timestamp; + self->lt_is_block_wakeable = 0; +#endif /* defined(ENABLE_FILTER) */ + + this->need_skip = (self->lt_counter < (SAMPLE_TIMES - 1) && + self->lt_timestamp <= SAMPLE_THRESHOLD) ? 1 : 0; + self->lt_timestamp = this->need_skip ? 0 : self->lt_timestamp; + self->lt_counter += this->need_skip; +} + +/* + * Track large latency first. + */ +sched::resume:on-cpu +/self->lt_timestamp > SAMPLE_THRESHOLD/ +{ + this->cause = self->lt_stackp > 0 ? + self->lt_cause[self->lt_stackp - 1] : ""; + this->priority = self->lt_stackp > 0 ? + self->lt_prio[self->lt_stackp - 1] : 0; + + @lt_call_count[pid, tid, stack(), this->cause, + this->priority] = sum(1); + @lt_call_sum[pid, tid, stack(), this->cause, + this->priority] = sum(self->lt_timestamp); + @lt_call_max[pid, tid, stack(), this->cause, + this->priority] = max(self->lt_timestamp); + + self->lt_timestamp = 0; +} + +/* + * If we fall back to this probe, that means the latency is small and counter + * has reached SAMPLE_TIMES. + */ +sched::resume:on-cpu +/self->lt_timestamp != 0/ +{ + this->cause = self->lt_stackp > 0 ? + self->lt_cause[self->lt_stackp - 1] : ""; + this->priority = self->lt_stackp > 0 ? + self->lt_prio[self->lt_stackp - 1] : 0; + + /* Need +1 because lt_counter has not been updated in this cycle. */ + @lt_call_count[pid, tid, stack(), this->cause, + this->priority] = sum(self->lt_counter + 1); + @lt_call_sum[pid, tid, stack(), this->cause, + this->priority] = sum((self->lt_counter + 1) * self->lt_timestamp); + @lt_call_max[pid, tid, stack(), this->cause, + this->priority] = max(self->lt_timestamp); + + self->lt_timestamp = 0; + self->lt_counter = 0; +} + +#endif /* !defined(ENABLE_LOW_OVERHEAD) */ + +#define TRANSLATE(entryprobe, returnprobe, cause, priority) \ +entryprobe \ +TRACE_FILTER_COND(self->lt_stackp == 0 || \ + (self->lt_stackp < MAX_TAG && \ + self->lt_prio[self->lt_stackp - 1] <= priority) ) \ +{ \ + self->lt_prio[self->lt_stackp] = priority; \ + self->lt_cause[self->lt_stackp] = cause; \ + ++self->lt_stackp; \ +} \ +returnprobe \ +TRACE_FILTER_COND(self->lt_stackp > 0 && \ + self->lt_cause[self->lt_stackp - 1] == cause) \ +{ \ + --self->lt_stackp; \ + self->lt_cause[self->lt_stackp] = NULL; \ +} + +/* + * Syscalls have a priority of 10. This is to make sure that latency is + * traced to one of the syscalls only if nothing else matches. + * We put this special probe here because it uses "probefunc" variable instead + * of a constant string. + */ + +TRANSLATE(syscall:::entry, syscall:::return, probefunc, 10) diff --git a/usr/src/cmd/latencytop/common/latencytop.h b/usr/src/cmd/latencytop/common/latencytop.h new file mode 100644 index 0000000000..df0f992c47 --- /dev/null +++ b/usr/src/cmd/latencytop/common/latencytop.h @@ -0,0 +1,269 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2008-2009, Intel Corporation. + * All Rights Reserved. + */ + +#ifndef _LATENCYTOP_H +#define _LATENCYTOP_H + +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Without this lint seems to be confused by glib header file. + */ +#ifdef __lint +#undef g_assert +#define g_assert(x) ((void)(x)) +#undef TRUE +#define TRUE 1 +#endif + +/* + * We define our own conversions in order to avoid compiler warnings. + */ +#define LT_INT_TO_POINTER(a) ((void *)(unsigned long)(a)) + +#define TITLE "LatencyTOP for OpenSolaris, version 1.0" +#define COPYRIGHT "Copyright (c) 2008-2009, Intel Corporation." +#define DEFAULT_KLOG_FILE "/var/log/latencytop.log" + +#define INVALID_PID (~0) +#define INVALID_TID (~0) +#define PID_SYS_GLOBAL INVALID_PID +#define INVALID_CAUSE 0 +#define HIGHER_PRIORITY(a, b) ((a) > (b)) + +#ifdef EMBED_CONFIGS +/* + * LatencyTOP configuration is embedded in the binary. + * Array will be generated by elfwrap. + */ +extern char latencytop_d_start; +extern char latencytop_d_end; +extern char latencytop_trans_start; +extern char latencytop_trans_end; +#else +/* + * LatencyTOP configuration is provided externally by user. + */ +#define DEFAULT_CONFIG_NAME "./latencytop.trans" +#define DEFAULT_D_SCRIPT_NAME "./latencytop.d" +#endif + +typedef enum { + LT_STAT_COUNT, + LT_STAT_MAX, + LT_STAT_SUM, +} lt_stat_type_t; + +#define LT_KLOG_LEVEL_NONE 0 /* Log nothing */ +#define LT_KLOG_LEVEL_UNMAPPED 1 /* Log only stacks not mapped */ +#define LT_KLOG_LEVEL_MAPPED 2 /* Log only stacks mapped */ +#define LT_KLOG_LEVEL_ALL 3 /* Log all stacks, mapped or not */ + +typedef enum { + LT_LEVEL_GLOBAL, /* System wide statistics */ + LT_LEVEL_PROCESS, /* Per-process statistics */ + LT_LEVEL_THREAD, /* Per-thread statistics */ +} lt_stat_level_t; + +typedef enum { + LT_SORT_TOTAL, + LT_SORT_MAX, + LT_SORT_AVG, + LT_SORT_COUNT, +} lt_sort_t; + +typedef enum { + LT_FIELD_FNAME, + LT_FIELD_PSARGS, +} lt_field_t; + +typedef enum { + LT_LIST_CAUSE, /* List latency by causes (default) */ + LT_LIST_SPECIALS, /* List only "special" causes */ + LT_LIST_SOBJ /* List synchronization objects */ +} lt_list_type_t; + +/* + * Data structure which contains statistics. + */ +typedef struct { + uint64_t lt_s_count; + uint64_t lt_s_total; + uint64_t lt_s_max; +} lt_stat_data_t; + +/* + * Data structure that stores statistics along with the name. + */ +typedef struct { + enum { + STAT_CAUSE, + STAT_SOBJ + } lt_se_type; + const char *lt_se_string; + lt_stat_data_t lt_se_data; + union { + struct { + int lt_se_c_id; + int lt_se_c_flags; + } lt_se_t_cause; + struct { + int lt_se_s_id; + } lt_se_t_sobj; + } lt_se_tsdata; /* type specific data */ +} lt_stat_entry_t; + +typedef struct { + int lt_cfg_enable_filter; + int lt_cfg_trace_sched; + int lt_cfg_trace_syncobj; + int lt_cfg_low_overhead_mode; + int lt_cfg_snap_interval; + char *lt_cfg_config_name; +} lt_config_t; + +extern lt_config_t g_config; /* The global settings */ + +/* + * Causes can be disabled through the configuration file. + * When disabled, though D script will continue to capture causes, they will + * not be counted by LatencyTOP. + */ +#define CAUSE_FLAG_DISABLED 1 +/* + * This flag will not show and count causes as part of summary in + * "kstack window". + */ +#define CAUSE_FLAG_HIDE_IN_SUMMARY 2 +/* + * This is generated from D script (named cause), and is "special". + */ +#define CAUSE_FLAG_SPECIAL 4 +#define CAUSE_ALL_FLAGS 0xffffffff + +/* + * These functions collect statistics using DTrace. + */ +extern int lt_dtrace_init(void); +extern int lt_dtrace_work(int); +extern int lt_dtrace_collect(void); +extern void lt_dtrace_deinit(void); + +/* + * These functions maintain configuration, e.g. symbol to cause mapping. + */ +extern int lt_table_init(void); +extern int lt_table_cause_from_stack(const char *, int *, int *); +extern const char *lt_table_get_cause_name(int); +extern int lt_table_get_cause_flag(int, int); +extern int lt_table_cause_from_name(char *, int, int); +extern int lt_table_append_trans(FILE *fp); +extern void lt_table_deinit(void); + +/* + * These functions update statistic of all causes of latency, collected + * from DTrace. + */ +extern void lt_stat_update(pid_t, id_t, char *, char *, unsigned int, + lt_stat_type_t, uint64_t); +extern void lt_stat_update_cause(pid_t, id_t, int, lt_stat_type_t, uint64_t); +extern void lt_stat_update_sobj(pid_t, id_t, int, unsigned long long, + lt_stat_type_t, uint64_t); +extern void lt_stat_clear_all(void); +extern void lt_stat_free_all(void); + +/* + * These functions produce lists for display panes. + * Note: after a call to lt_stat_update_*, the old lists will become invalid. + */ +extern void *lt_stat_list_create(lt_list_type_t, lt_stat_level_t, + pid_t, id_t, int, lt_sort_t); +extern int lt_stat_list_has_item(void *, int); +extern const char *lt_stat_list_get_reason(void *, int); +extern uint64_t lt_stat_list_get_max(void *, int); +extern uint64_t lt_stat_list_get_sum(void *, int); +extern uint64_t lt_stat_list_get_count(void *, int); +extern uint64_t lt_stat_list_get_gtotal(void *); +extern void lt_stat_list_free(void *); + +/* + * These functions produce the process list and the thread list. + */ +extern int lt_stat_proc_list_create(pid_t **, id_t **); +extern void lt_stat_proc_list_free(pid_t *, id_t *); +extern const char *lt_stat_proc_get_name(pid_t); +extern int lt_stat_proc_get_nthreads(pid_t); + +/* + * These functions use ncurses to create console-based display. + */ +extern void lt_display_init(void); +extern int lt_display_loop(int); +extern void lt_display_error(const char *, ...); +extern void lt_display_deinit(void); + +/* + * Write statistics to log file - useful for debugging and offline analysis. + */ +extern void lt_klog_init(void); +extern void lt_klog_deinit(void); +extern int lt_klog_set_log_file(const char *); +extern int lt_klog_set_log_level(int); +extern void lt_klog_write(void); +extern void lt_klog_log(int, pid_t, char *, lt_stat_type_t, + uint64_t); + +/* + * Utility functions. + */ +extern uint64_t lt_millisecond(void); +extern void *lt_malloc(size_t); +extern void *lt_zalloc(size_t); +extern char *lt_strdup(const char *); +extern void lt_check_null(void *); +extern void lt_time_str(char *, int); +extern char *lt_get_proc_field(pid_t, lt_field_t); +extern void lt_update_stat_value(lt_stat_data_t *, lt_stat_type_t, uint64_t); +extern int lt_sort_by_total_desc(lt_stat_entry_t *, lt_stat_entry_t *); +extern int lt_sort_by_max_desc(lt_stat_entry_t *, lt_stat_entry_t *); +extern int lt_sort_by_count_desc(lt_stat_entry_t *, lt_stat_entry_t *); +extern int lt_sort_by_avg_desc(lt_stat_entry_t *, lt_stat_entry_t *); +extern void lt_gpipe_init(void); +extern void lt_gpipe_deinit(void); +extern void lt_gpipe_break(const char *); +extern int lt_gpipe_readfd(void); +extern int lt_file_exist(const char *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LATENCYTOP_H */ diff --git a/usr/src/cmd/latencytop/common/latencytop.trans b/usr/src/cmd/latencytop/common/latencytop.trans new file mode 100644 index 0000000000..fbec48060b --- /dev/null +++ b/usr/src/cmd/latencytop/common/latencytop.trans @@ -0,0 +1,44 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2008-2009, Intel Corporation. +# All Rights Reserved. +# +# LatencyTOP 1.0 configuration +# + +# +# Format: +# D ... +# ;