diff options
| author | Sebastien Roy <seb@delphix.com> | 2015-05-29 13:47:23 -0400 |
|---|---|---|
| committer | Richard Lowe <richlowe@richlowe.net> | 2019-08-19 22:32:46 +0000 |
| commit | a2f04351e04971ab0879872d264d6038c156b860 (patch) | |
| tree | cd6640900e1adf19b745c5f8e796a0e97c7a8ee7 | |
| parent | c12492cf73149aa0aa845af5d59966b0eb5aa910 (diff) | |
| download | illumos-joyent-a2f04351e04971ab0879872d264d6038c156b860.tar.gz | |
11547 Want connstat(1M) command to display per-connection TCP statistics
Portions contributed by: Cody Peter Mello <cody.mello@joyent.com>
Portions contributed by: Ahmed G <ahmedg@delphix.com>
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
24 files changed, 2022 insertions, 145 deletions
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile index 389916bc67..1d1ffb6e00 100644 --- a/usr/src/cmd/Makefile +++ b/usr/src/cmd/Makefile @@ -21,8 +21,8 @@ # # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright 2019 Joyent, Inc. -# Copyright (c) 2012 by Delphix. All rights reserved. +# Copyright (c) 2019, Joyent, Inc. +# Copyright (c) 2012, 2015 by Delphix. All rights reserved. # Copyright (c) 2013 DEY Storage Systems, Inc. All rights reserved. # Copyright 2014 Garrett D'Amore <garrett@damore.org> # Copyright 2016 Toomas Soome <tsoome@me.com> @@ -98,6 +98,7 @@ COMMON_SUBDIRS= \ cmd-inet \ col \ compress \ + connstat \ consadm \ coreadm \ cpio \ @@ -547,6 +548,7 @@ MSGSUBDIRS= \ cmd-inet \ col \ compress \ + connstat \ consadm \ coreadm \ cpio \ diff --git a/usr/src/cmd/connstat/Makefile b/usr/src/cmd/connstat/Makefile new file mode 100644 index 0000000000..847bd05428 --- /dev/null +++ b/usr/src/cmd/connstat/Makefile @@ -0,0 +1,51 @@ +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +PROG= connstat +OBJS= connstat_main.o connstat_mib.o connstat_tcp.o +SRCS= $(OBJS:%.o=%.c) +POFILES= connstat_main.po connstat_tcp.po connstat_mib.po +POFILE= connstat.po + +include ../Makefile.cmd +include ../Makefile.ctf + +CSTD= $(CSTD_GNU99) +LDLIBS += -lsocket -lnsl -lumem -lofmt +XGETFLAGS += -a -x $(PROG).xcl + +.KEEP_STATE: + +all: $(PROG) + +$(PROG): $(OBJS) + $(LINK.c) -o $@ $(OBJS) $(LDLIBS) + $(POST_PROCESS) + +$(POFILE): $(POFILES) + $(RM) $@ + cat $(POFILES) > $@ + +install: all $(ROOTPROG) + +clean: + $(RM) $(OBJS) + +lint: lint_SRCS + +include ../Makefile.targ diff --git a/usr/src/cmd/connstat/connstat.h b/usr/src/cmd/connstat/connstat.h new file mode 100644 index 0000000000..a50049b3c7 --- /dev/null +++ b/usr/src/cmd/connstat/connstat.h @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2015, 2016 by Delphix. All rights reserved. + */ + +#ifndef _CONNSTAT_H +#define _CONNSTAT_H + +#include <sys/types.h> +#include <sys/socket.h> +#include <ofmt.h> +#include <sys/stropts.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct connstat_conn_attr_s { + struct sockaddr_storage ca_laddr; + struct sockaddr_storage ca_raddr; + int ca_lport; + int ca_rport; + int ca_state; +} connstat_conn_attr_t; + +typedef struct conn_walk_state_s { + ofmt_handle_t cws_ofmt; + uint_t cws_flags; + connstat_conn_attr_t cws_filter; +} conn_walk_state_t; + +/* cws_flags */ +#define CS_LOOPBACK 0x0001 /* Include loopback connections */ +#define CS_IPV4 0x0002 /* Show only IPv4 connections */ +#define CS_IPV6 0x0004 /* Show only IPv6 connections */ +#define CS_LADDR 0x0008 /* Filter by laddr in cws_filter */ +#define CS_RADDR 0x0010 /* Filter by raddr in cws_filter */ +#define CS_LPORT 0x0020 /* Filter by lport in cws_filter */ +#define CS_RPORT 0x0040 /* Filter by rport in cws_filter */ +#define CS_STATE 0x0080 /* Filter by state in cws_filter */ +#define CS_PARSABLE 0x0100 /* Parsable output */ + +typedef ofmt_field_t *connstat_getfieldsfunc_t(void); +typedef void connstat_walkfunc_t(struct strbuf *, conn_walk_state_t *); + +typedef struct connstat_proto_s { + char *csp_proto; + char *csp_default_fields; + int csp_miblevel; + int csp_mibv4name; + int csp_mibv6name; + connstat_getfieldsfunc_t *csp_getfields; + connstat_walkfunc_t *csp_v4walk; + connstat_walkfunc_t *csp_v6walk; +} connstat_proto_t; + +boolean_t print_string(ofmt_arg_t *, char *, uint_t); +boolean_t print_uint16(ofmt_arg_t *, char *, uint_t); +boolean_t print_uint32(ofmt_arg_t *, char *, uint_t); +boolean_t print_uint64(ofmt_arg_t *, char *, uint_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _CONNSTAT_H */ diff --git a/usr/src/cmd/connstat/connstat.xcl b/usr/src/cmd/connstat/connstat.xcl new file mode 100644 index 0000000000..089bc7358e --- /dev/null +++ b/usr/src/cmd/connstat/connstat.xcl @@ -0,0 +1,84 @@ +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +msgid "count" +msgid "established" +msgid "filter" +msgid "help" +msgid "interval" +msgid "ipv4" +msgid "ipv6" +msgid "no-loopback" +msgid "output" +msgid "parsable" +msgid "protocol" +msgid "timestamp" +msgid "tcp" +msgid "laddr,lport,raddr,rport,inbytes,outbytes," + "retransbytes,suna,swnd,cwnd,rwnd" +msgid " %s\n" +msgid "" +msgid "c:eF:hi:Lo:Pp:T:46" +msgid "all" +msgid "laddr" +msgid "raddr" +msgid "lport" +msgid "rport" +msgid "= " +msgid "%s%ld\n" +msgid "%s%s\n" +msgid "%hu" +msgid "%u" +msgid "%llu" +msgid "%s: " +msgid "LADDR" +msgid "RADDR" +msgid "LPORT" +msgid "RPORT" +msgid "INBYTES" +msgid "INSEGS" +msgid "INUNORDERBYTES" +msgid "INUNORDERSEGS" +msgid "OUTBYTES" +msgid "OUTSEGS" +msgid "RETRANSBYTES" +msgid "RETRANSSEGS" +msgid "SUNA" +msgid "count" +msgid "SWND" +msgid "CWND" +msgid "RWND" +msgid "STATE" +msgid "CLOSED" +msgid "IDLE" +msgid "BOUND" +msgid "LISTEN" +msgid "SYN_SENT" +msgid "SYN_RCVD" +msgid "ESTABLISHED" +msgid "CLOSE_WAIT" +msgid "FIN_WAIT_1" +msgid "CLOSING" +msgid "LAST_ACK" +msgid "FIN_WAIT_2" +msgid "TIME_WAIT" +msgid "UNKNOWN(%d)" +msgid "/dev/arp" +msgid "putmsg" +msgid "getmsg" +msgid "malloc" diff --git a/usr/src/cmd/connstat/connstat_main.c b/usr/src/cmd/connstat/connstat_main.c new file mode 100644 index 0000000000..dd58e2ac2b --- /dev/null +++ b/usr/src/cmd/connstat/connstat_main.c @@ -0,0 +1,567 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2015, 2016 by Delphix. All rights reserved. + */ + +#include <err.h> +#include <stdio.h> +#include <errno.h> +#include <getopt.h> +#include <stdlib.h> +#include <stddef.h> +#include <strings.h> +#include <unistd.h> +#include <libgen.h> +#include <libintl.h> +#include <limits.h> +#include <locale.h> +#include <langinfo.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netdb.h> +#include <sys/varargs.h> +#include <ofmt.h> +#include <inet/tcp.h> +#include <netinet/in.h> +#include <inet/mib2.h> +#include "connstat.h" +#include "connstat_mib.h" +#include "connstat_tcp.h" + +#define DEFAULT_PROTO "tcp" + +static const char *invalid_v4v6_msg = + "Invalid combination of IPv4 and IPv6 arguments\n"; + +static const char *invalid_T_msg = + "Invalid -T arg \"%s\". Must be \"u\" or \"d\"\n"; + +static const struct option longopts[] = { + { "count", required_argument, 0, 'c' }, + { "established", no_argument, 0, 'e' }, + { "filter", required_argument, 0, 'F' }, + { "help", no_argument, 0, 'h' }, + { "interval", required_argument, 0, 'i' }, + { "ipv4", no_argument, 0, '4' }, + { "ipv6", no_argument, 0, '6' }, + { "no-loopback", no_argument, 0, 'L' }, + { "output", required_argument, 0, 'o' }, + { "parsable", no_argument, 0, 'P' }, + { "protocol", required_argument, 0, 'p' }, + { "timestamp", required_argument, 0, 'T' }, + { NULL, 0, 0, 0 } +}; + +static connstat_proto_t connstat_protos[] = { + CONNSTAT_TCP_PROTO, + { NULL, NULL, 0, 0, 0, NULL, NULL, NULL } +}; + +typedef enum { NOTIMESTAMP, UTIMESTAMP, DTIMESTAMP } timestamp_fmt_t; + +static void die(const char *, ...) __NORETURN; +static void process_filter(char *, connstat_conn_attr_t *, uint_t *); +static void show_stats(connstat_proto_t *, ofmt_handle_t, uint_t, + connstat_conn_attr_t *, timestamp_fmt_t, uint_t, uint_t); + +static void __NORETURN +usage(int code) +{ + static const char *opts[] = { + "-4, --ipv4 Only display IPv4 connections", + "-6, --ipv6 Only display IPv6 connections", + "-c, --count=COUNT Only print COUNT reports", + "-e, --established Only display established connections", + "-F, --filter=FILTER Only display connections that match " + "FILTER", + "-h, --help Print this help", + "-i, --interval=SECONDS Report once every SECONDS seconds", + "-L, --no-loopback Omit loopback connections", + "-o, --output=FIELDS Restrict output to the comma-separated " + "list of fields\n" + " specified", + "-P, --parsable Parsable output mode", + "-T, --timestamp=TYPE Display a timestamp for each iteration", + NULL + }; + + (void) fprintf(stderr, gettext("usage: ")); + (void) fprintf(stderr, + gettext("%s [-eLP] [-4|-6] [-T d|u] [-F <filter>]\n" + " [-i <interval> [-c <count>]] [-o <field>[,...]]\n"), + getprogname()); + + (void) fprintf(stderr, gettext("\nOptions:\n")); + for (const char **optp = opts; *optp != NULL; optp++) { + (void) fprintf(stderr, " %s\n", gettext(*optp)); + } + + (void) fprintf(stderr, gettext("\nFilter:\n")); + (void) fprintf(stderr, gettext(" The FILTER argument for the -F " + "option is of the form:\n" + " <field>=<value>,[<field>=<value>,...]\n")); + (void) fprintf(stderr, gettext(" Filterable fields are laddr, lport, " + "raddr, rport, and state.\n")); + + (void) fprintf(stderr, gettext("\nFields:\n")); + (void) fprintf(stderr, gettext( + " laddr Local IP address\n" + " raddr Remote IP address\n" + " lport Local port\n" + " rport Remote port\n" + " inbytes Total bytes received\n" + " insegs Total segments received\n" + " inunorderbytes Bytes received out of order\n" + " inunordersegs Segments received out of order\n" + " outbytes Total bytes sent\n" + " outsegs Total segments sent\n" + " retransbytes Bytes retransmitted\n" + " retranssegs Segments retransmitted\n" + " suna Current unacknowledged bytes sent\n" + " unsent Unsent bytes on the transmit queue\n" + " swnd Send window size (peer's receive window)\n" + " cwnd Congestion window size\n" + " rwnd Receive window size\n" + " mss Maximum segment size\n" + " rto Retransmission timeout (ms)\n" + " rtt Smoothed round-trip time (us)\n" + " rtts Sum round-trip time (us)\n" + " rttc Count of round-trip times\n" + " state Connection state\n")); + exit(code); +} + +static connstat_proto_t * +getproto(const char *proto) +{ + for (connstat_proto_t *current = &connstat_protos[0]; + current->csp_proto != NULL; current++) { + if (strcasecmp(proto, current->csp_proto) == 0) { + return (current); + } + } + return (NULL); +} + +int +main(int argc, char *argv[]) +{ + int option; + int count = 0; + int interval = 0; + const char *errstr = NULL; + char *fields = NULL; + char *filterstr = NULL; + connstat_conn_attr_t filter = {0}; + char *protostr = DEFAULT_PROTO; + connstat_proto_t *proto; + ofmt_handle_t ofmt; + ofmt_status_t oferr; + char oferrbuf[OFMT_BUFSIZE]; + uint_t ofmtflags = OFMT_NOHEADER; + uint_t flags = CS_LOOPBACK | CS_IPV4 | CS_IPV6; + timestamp_fmt_t timestamp_fmt = NOTIMESTAMP; + + (void) setlocale(LC_ALL, ""); +#if !defined(TEXT_DOMAIN) +#define TEXT_DOMAIN "SYS_TEST" +#endif + (void) textdomain(TEXT_DOMAIN); + + setprogname(basename(argv[0])); + + while ((option = getopt_long(argc, argv, "c:eF:hi:Lo:Pp:T:46", + longopts, NULL)) != -1) { + switch (option) { + case 'c': + count = strtonum(optarg, 1, INT_MAX, &errstr); + if (errstr != NULL) { + (void) fprintf(stderr, gettext( + "error parsing -c argument (%s): %s\n"), + optarg, errstr); + usage(1); + } + break; + case 'e': + flags |= CS_STATE; + filter.ca_state = TCPS_ESTABLISHED; + break; + case 'F': + filterstr = optarg; + break; + case 'i': + interval = strtonum(optarg, 1, INT_MAX, &errstr); + if (errstr != NULL) { + (void) fprintf(stderr, gettext( + "error parsing -i argument (%s): %s\n"), + optarg, errstr); + usage(1); + } + break; + case 'L': + flags &= ~CS_LOOPBACK; + break; + case 'o': + fields = optarg; + break; + case 'P': + ofmtflags |= OFMT_PARSABLE; + flags |= CS_PARSABLE; + break; + case 'p': + /* + * -p is an undocumented flag whose only supported + * argument is "tcp". The idea is to reserve this + * flag for potential future use in case connstat + * is extended to support stats for other protocols. + */ + protostr = optarg; + break; + case 'T': + if (strcmp(optarg, "u") == 0) { + timestamp_fmt = UTIMESTAMP; + } else if (strcmp(optarg, "d") == 0) { + timestamp_fmt = DTIMESTAMP; + } else { + (void) fprintf(stderr, gettext( + invalid_T_msg), optarg); + usage(1); + } + break; + case '4': + if (!(flags & CS_IPV4)) { + (void) fprintf(stderr, gettext( + invalid_v4v6_msg)); + usage(1); + } + flags &= ~CS_IPV6; + break; + case '6': + if (!(flags & CS_IPV6)) { + (void) fprintf(stderr, gettext( + invalid_v4v6_msg)); + usage(1); + } + flags &= ~CS_IPV4; + break; + case '?': + default: + usage(1); + break; + } + } + + if ((proto = getproto(protostr)) == NULL) { + die("unknown protocol given to \"-p\": %s", protostr); + } + + if ((ofmtflags & OFMT_PARSABLE) && fields == NULL) { + die("parsable output requires \"-o\""); + } + + if ((ofmtflags & OFMT_PARSABLE) && fields != NULL && + strcasecmp(fields, "all") == 0) { + die("\"-o all\" is invalid with parsable output"); + } + + if (fields == NULL) { + fields = proto->csp_default_fields; + } + + /* If count is specified, then interval must also be specified. */ + if (count != 0 && interval == 0) { + die("\"-c\" requires \"-i\""); + } + + /* If interval is not specified, then the default count is 1. */ + if (interval == 0 && count == 0) { + count = 1; + } + + if (filterstr != NULL) { + process_filter(filterstr, &filter, &flags); + } + + oferr = ofmt_open(fields, proto->csp_getfields(), ofmtflags, 0, &ofmt); + if (oferr != OFMT_SUCCESS) { + (void) ofmt_strerror(ofmt, oferr, oferrbuf, sizeof (oferrbuf)); + die(oferrbuf); + } + ofmt_set_fs(ofmt, ','); + + show_stats(proto, ofmt, flags, &filter, timestamp_fmt, interval, count); + + ofmt_close(ofmt); + return (0); +} + +/* + * Convert the input IP address literal to sockaddr of the appropriate address + * family. Preserves any potential port number that may have been set in the + * input sockaddr_storage structure. + */ +static void +str2sockaddr(const char *addr, struct sockaddr_storage *ss) +{ + struct addrinfo hints, *res; + + bzero(&hints, sizeof (hints)); + hints.ai_flags = AI_NUMERICHOST; + if (getaddrinfo(addr, NULL, &hints, &res) != 0) { + die("invalid literal IP address: %s", addr); + } + bcopy(res->ai_addr, ss, res->ai_addrlen); + freeaddrinfo(res); +} + +/* + * The filterstr argument is of the form: <attr>=<value>[,...] + * Possible attributes are laddr, raddr, lport, and rport. Parse this + * filter and store the results into the provided attribute structure. + */ +static void +process_filter(char *filterstr, connstat_conn_attr_t *filter, uint_t *flags) +{ + int option; + char *val; + enum { F_LADDR, F_RADDR, F_LPORT, F_RPORT, F_STATE }; + static char *filter_optstr[] = + { "laddr", "raddr", "lport", "rport", "state", NULL }; + uint_t flag = 0; + struct sockaddr_storage *addrp = NULL; + const char *errstr = NULL; + int *portp = NULL; + + while (*filterstr != '\0') { + option = getsubopt(&filterstr, filter_optstr, &val); + errno = 0; + + switch (option) { + case F_LADDR: + flag = CS_LADDR; + addrp = &filter->ca_laddr; + break; + case F_RADDR: + flag = CS_RADDR; + addrp = &filter->ca_raddr; + break; + case F_LPORT: + flag = CS_LPORT; + portp = &filter->ca_lport; + break; + case F_RPORT: + flag = CS_RPORT; + portp = &filter->ca_rport; + break; + case F_STATE: + flag = CS_STATE; + break; + default: + usage(1); + } + + if (*flags & flag) { + (void) fprintf(stderr, gettext( + "Ambiguous filter provided. The \"%s\" field " + "appears more than once.\n"), + filter_optstr[option]); + usage(1); + } + *flags |= flag; + + switch (flag) { + case CS_LADDR: + case CS_RADDR: + str2sockaddr(val, addrp); + if (addrp->ss_family == AF_INET) { + if (!(*flags & CS_IPV4)) { + (void) fprintf(stderr, gettext( + invalid_v4v6_msg)); + usage(1); + } + *flags &= ~CS_IPV6; + } else { + if (!(*flags & CS_IPV6)) { + (void) fprintf(stderr, gettext( + invalid_v4v6_msg)); + usage(1); + } + *flags &= ~CS_IPV4; + } + break; + case CS_LPORT: + case CS_RPORT: + *portp = strtonum(val, 1, UINT16_MAX, &errstr); + if (errstr != NULL) { + (void) fprintf(stderr, gettext( + "error parsing port (%s): %s\n"), + val, errstr); + usage(1); + } + break; + case CS_STATE: + filter->ca_state = tcp_str2state(val); + if (filter->ca_state < TCPS_CLOSED) { + (void) fprintf(stderr, gettext( + "invalid TCP state: %s\n"), val); + usage(1); + } + break; + } + } + + /* Make sure that laddr and raddr are at least in the same family. */ + if ((*flags & (CS_LADDR|CS_RADDR)) == (CS_LADDR|CS_RADDR)) { + if (filter->ca_laddr.ss_family != filter->ca_raddr.ss_family) { + die("laddr and raddr must be of the same family."); + } + } +} + +/* + * Print timestamp as decimal representation of time_t value (-T u was + * specified) or in date(1) format (-T d was specified). + */ +static void +print_timestamp(timestamp_fmt_t timestamp_fmt, boolean_t parsable) +{ + time_t t = time(NULL); + char *pfx = parsable ? "= " : ""; + static char *fmt = NULL; + + /* We only need to retrieve this once per invocation */ + if (fmt == NULL) { + fmt = nl_langinfo(_DATE_FMT); + } + + switch (timestamp_fmt) { + case NOTIMESTAMP: + break; + case UTIMESTAMP: + (void) printf("%s%ld\n", pfx, t); + break; + case DTIMESTAMP: { + char dstr[64]; + size_t len; + + len = strftime(dstr, sizeof (dstr), fmt, localtime(&t)); + if (len > 0) { + (void) printf("%s%s\n", pfx, dstr); + } + break; + } + default: + abort(); + break; + } +} + +static void +show_stats(connstat_proto_t *proto, ofmt_handle_t ofmt, uint_t flags, + connstat_conn_attr_t *filter, timestamp_fmt_t timestamp_fmt, + uint_t interval, uint_t count) +{ + boolean_t done = B_FALSE; + uint_t i = 0; + int mibfd; + conn_walk_state_t state; + + state.cws_ofmt = ofmt; + state.cws_flags = flags; + state.cws_filter = *filter; + + if ((mibfd = mibopen(proto->csp_proto)) == -1) { + die("failed to open MIB stream: %s", strerror(errno)); + } + + do { + if (timestamp_fmt != NOTIMESTAMP) { + print_timestamp(timestamp_fmt, flags & CS_PARSABLE); + } + if (!(flags & CS_PARSABLE)) { + ofmt_print_header(ofmt); + } + + if (conn_walk(mibfd, proto, &state) != 0) { + die("failed to fetch and print connection info"); + } + + if (count != 0 && ++i == count) { + done = B_TRUE; + } else { + (void) sleep(interval); + } + } while (!done); +} + +/* + * ofmt callbacks for printing individual fields of various types. + */ +boolean_t +print_string(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) +{ + char *value; + + value = (char *)ofarg->ofmt_cbarg + ofarg->ofmt_id; + (void) strlcpy(buf, value, bufsize); + return (B_TRUE); +} + +boolean_t +print_uint16(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) +{ + uint16_t value; + + /* LINTED E_BAD_PTR_CAST_ALIGN */ + value = *(uint16_t *)((char *)ofarg->ofmt_cbarg + ofarg->ofmt_id); + (void) snprintf(buf, bufsize, "%hu", value); + return (B_TRUE); +} + +boolean_t +print_uint32(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) +{ + uint32_t value; + + /* LINTED E_BAD_PTR_CAST_ALIGN */ + value = *(uint32_t *)((char *)ofarg->ofmt_cbarg + ofarg->ofmt_id); + (void) snprintf(buf, bufsize, "%u", value); + return (B_TRUE); +} + +boolean_t +print_uint64(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) +{ + uint64_t value; + + /* LINTED E_BAD_PTR_CAST_ALIGN */ + value = *(uint64_t *)((char *)ofarg->ofmt_cbarg + ofarg->ofmt_id); + (void) snprintf(buf, bufsize, "%llu", value); + return (B_TRUE); +} + +/* PRINTFLIKE1 */ +static void +die(const char *format, ...) +{ + va_list alist; + + format = gettext(format); + + va_start(alist, format); + verrx(1, format, alist); + va_end(alist); +} diff --git a/usr/src/cmd/connstat/connstat_mib.c b/usr/src/cmd/connstat/connstat_mib.c new file mode 100644 index 0000000000..4b3e532c63 --- /dev/null +++ b/usr/src/cmd/connstat/connstat_mib.c @@ -0,0 +1,177 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2015 by Delphix. All rights reserved. + */ + +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <fcntl.h> +#include <strings.h> +#include <unistd.h> +#include <stropts.h> +#include <sys/debug.h> +#include <sys/tihdr.h> +#include "connstat.h" + +int +mibopen(const char *proto) +{ + int saved; + int fd; + + fd = open("/dev/arp", O_RDWR); + if (fd == -1) { + return (-1); + } + + if (ioctl(fd, I_PUSH, proto) == -1) { + saved = errno; + (void) close(fd); + errno = saved; + return (-1); + } + + return (fd); +} + +int +conn_walk(int fd, connstat_proto_t *proto, conn_walk_state_t *state) +{ + struct strbuf cbuf, dbuf; + struct opthdr *hdr; + int flags, r, err = 0; + struct { + struct T_optmgmt_req req; + struct opthdr hdr; + } req; + union { + struct T_optmgmt_ack ack; + uint8_t space[sizeof (struct T_optmgmt_ack) + + sizeof (struct opthdr) * 2]; + } ack; + + bzero(&cbuf, sizeof (cbuf)); + bzero(&dbuf, sizeof (dbuf)); + + req.req.PRIM_type = T_OPTMGMT_REQ; + req.req.OPT_offset = (caddr_t)&req.hdr - (caddr_t)&req; + req.req.OPT_length = sizeof (req.hdr); + req.req.MGMT_flags = T_CURRENT; + + req.hdr.level = proto->csp_miblevel; + req.hdr.name = 0; + req.hdr.len = 0; + + cbuf.buf = (caddr_t)&req; + cbuf.len = sizeof (req); + + if (putmsg(fd, &cbuf, NULL, 0) == -1) { + warn("failed to request connection info: putmsg"); + return (-1); + } + + /* + * Each reply consists of a control part for one fixed structure or + * table, as defined in mib2.h. The format is a T_OPTMGMT_ACK + * containing an opthdr structure. The level and name identify the + * entry, and len is the size of the data part of the message. + */ + for (;;) { + cbuf.buf = (caddr_t)&ack; + cbuf.maxlen = sizeof (ack); + flags = 0; + + /* + * We first do a getmsg() for the control part so that we + * can allocate a properly sized buffer to read the data + * part. + */ + do { + r = getmsg(fd, &cbuf, NULL, &flags); + } while (r < 0 && errno == EINTR); + + if (r < 0) { + warn("failed to fetch further connection info"); + err = -1; + break; + } else if ((r & MORECTL) != 0) { + warnx("failed to fetch full control message"); + err = -1; + break; + } + + if (cbuf.len < sizeof (struct T_optmgmt_ack) || + ack.ack.PRIM_type != T_OPTMGMT_ACK || + ack.ack.MGMT_flags != T_SUCCESS || + ack.ack.OPT_length < sizeof (struct opthdr)) { + warnx("cannot process invalid message from getmsg()"); + err = -1; + break; + } + + /* LINTED E_BAD_PTR_CAST_ALIGN */ + hdr = (struct opthdr *)((caddr_t)&ack + ack.ack.OPT_offset); + if (r == 0 && hdr->level == 0 && hdr->name == 0) { + /* + * snmpcom_req() has sent us the final End-Of-Data + * message, so there's nothing further to read. + */ + break; + } + + /* Only data should remain. */ + VERIFY3S(r, ==, MOREDATA); + + /* Allocate a buffer to hold the data portion of the message */ + if ((dbuf.buf = realloc(dbuf.buf, hdr->len)) == NULL) { + warn("failed to realloc() buffer"); + err = -1; + break; + } + dbuf.maxlen = hdr->len; + dbuf.len = 0; + flags = 0; + + do { + r = getmsg(fd, NULL, &dbuf, &flags); + } while (r < 0 && errno == EINTR); + + if (r < 0) { + warn("failed to fetch connection data: getmsg()"); + err = -1; + break; + } else if (r != 0) { + warnx("failed to fetch all data: " + "getmsg() returned %d", r); + err = -1; + break; + } + + if ((state->cws_flags & CS_IPV4) && + hdr->name == proto->csp_mibv4name) { + proto->csp_v4walk(&dbuf, state); + } else if ((state->cws_flags & CS_IPV6) && + hdr->name == proto->csp_mibv6name) { + proto->csp_v6walk(&dbuf, state); + } + } + + free(dbuf.buf); + + return (err); +} diff --git a/usr/src/cmd/connstat/connstat_mib.h b/usr/src/cmd/connstat/connstat_mib.h new file mode 100644 index 0000000000..038ac2d874 --- /dev/null +++ b/usr/src/cmd/connstat/connstat_mib.h @@ -0,0 +1,35 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2015 by Delphix. All rights reserved. + */ + +#ifndef _CONNSTAT_MIB_H +#define _CONNSTAT_MIB_H + +#include "connstat.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int mibopen(const char *); +int conn_walk(int, connstat_proto_t *, conn_walk_state_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _CONNSTAT_MIB_H */ diff --git a/usr/src/cmd/connstat/connstat_tcp.c b/usr/src/cmd/connstat/connstat_tcp.c new file mode 100644 index 0000000000..4cd20c9b09 --- /dev/null +++ b/usr/src/cmd/connstat/connstat_tcp.c @@ -0,0 +1,403 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2015, 2016 by Delphix. All rights reserved. + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <inet/mib2.h> +#include <sys/debug.h> +#include <sys/stropts.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <inet/tcp.h> +#include <arpa/inet.h> +#include <ofmt.h> +#include <sys/time.h> +#include "connstat_mib.h" +#include "connstat_tcp.h" + +/* + * The byte order of some of the fields in this code can be a bit confusing. + * When using sockaddr_in(6) structs, the address and ports are always in + * Network Byte Order (Big Endian), as required by sockaddr(3SOCKET). + * + * When using the structs mib2_tcpConnEntry_t and mib2_tcp6ConnEntry_t, the + * address fields (tcp(6)ConnLocalAddress and tcp(6)ConnRemAdddress) are in + * Network Byte Order. Note, however, that the port fields ARE NOT, but are + * instead in Host Byte Order. This isn't a problem though, since the ports + * we filter on from the command-line (ca_lport and ca_rport) are kept in + * Host Byte Order after parsing. + * + * Since the t_lport and t_rport fields come from the MIB structs, they are + * likewise stored in Host Byte Order (and need to be for printing). The + * t_laddr and t_raddr fields are string representations of the addresses, + * so they don't require any special attention. + * + * All of the statistics (such as bytes read and written, current window + * sizes, etc.) are in Host Byte Order. + */ + +typedef struct tcp_fields_buf_s { + char t_laddr[INET6_ADDRSTRLEN]; + char t_raddr[INET6_ADDRSTRLEN]; + uint16_t t_lport; + uint16_t t_rport; + uint64_t t_inbytes; + uint64_t t_insegs; + uint64_t t_inunorderbytes; + uint64_t t_inunordersegs; + uint64_t t_outbytes; + uint64_t t_outsegs; + uint64_t t_retransbytes; + uint64_t t_retranssegs; + uint32_t t_suna; + uint32_t t_unsent; + uint32_t t_swnd; + uint32_t t_cwnd; + uint32_t t_rwnd; + uint32_t t_mss; + uint32_t t_rto; + uint32_t t_rtt_cnt; + uint64_t t_rtt_sum; + int t_state; + uint64_t t_rtt; +} tcp_fields_buf_t; + +static boolean_t print_tcp_state(ofmt_arg_t *, char *, uint_t); + +static ofmt_field_t tcp_fields[] = { + { "LADDR", 26, + offsetof(tcp_fields_buf_t, t_laddr), print_string }, + { "RADDR", 26, + offsetof(tcp_fields_buf_t, t_raddr), print_string }, + { "LPORT", 6, + offsetof(tcp_fields_buf_t, t_lport), print_uint16 }, + { "RPORT", 6, + offsetof(tcp_fields_buf_t, t_rport), print_uint16 }, + { "INBYTES", 11, + offsetof(tcp_fields_buf_t, t_inbytes), print_uint64 }, + { "INSEGS", 11, + offsetof(tcp_fields_buf_t, t_insegs), print_uint64 }, + { "INUNORDERBYTES", 15, + offsetof(tcp_fields_buf_t, t_inunorderbytes), print_uint64 }, + { "INUNORDERSEGS", 14, + offsetof(tcp_fields_buf_t, t_inunordersegs), print_uint64 }, + { "OUTBYTES", 11, + offsetof(tcp_fields_buf_t, t_outbytes), print_uint64 }, + { "OUTSEGS", 11, + offsetof(tcp_fields_buf_t, t_outsegs), print_uint64 }, + { "RETRANSBYTES", 13, + offsetof(tcp_fields_buf_t, t_retransbytes), print_uint64 }, + { "RETRANSSEGS", 12, + offsetof(tcp_fields_buf_t, t_retranssegs), print_uint64 }, + { "SUNA", 11, + offsetof(tcp_fields_buf_t, t_suna), print_uint32 }, + { "UNSENT", 11, + offsetof(tcp_fields_buf_t, t_unsent), print_uint32 }, + { "SWND", 11, + offsetof(tcp_fields_buf_t, t_swnd), print_uint32 }, + { "CWND", 11, + offsetof(tcp_fields_buf_t, t_cwnd), print_uint32 }, + { "RWND", 11, + offsetof(tcp_fields_buf_t, t_rwnd), print_uint32 }, + { "MSS", 6, + offsetof(tcp_fields_buf_t, t_mss), print_uint32 }, + { "RTO", 8, + offsetof(tcp_fields_buf_t, t_rto), print_uint32 }, + { "RTT", 8, + offsetof(tcp_fields_buf_t, t_rtt), print_uint64 }, + { "RTTS", 8, + offsetof(tcp_fields_buf_t, t_rtt_sum), print_uint64 }, + { "RTTC", 11, + offsetof(tcp_fields_buf_t, t_rtt_cnt), print_uint32 }, + { "STATE", 12, + offsetof(tcp_fields_buf_t, t_state), print_tcp_state }, + { NULL, 0, 0, NULL} +}; + +static tcp_fields_buf_t fields_buf; + + +typedef struct tcp_state_info_s { + int tsi_state; + const char *tsi_string; +} tcp_state_info_t; + +tcp_state_info_t tcp_state_info[] = { + { TCPS_CLOSED, "CLOSED" }, + { TCPS_IDLE, "IDLE" }, + { TCPS_BOUND, "BOUND" }, + { TCPS_LISTEN, "LISTEN" }, + { TCPS_SYN_SENT, "SYN_SENT" }, + { TCPS_SYN_RCVD, "SYN_RCVD" }, + { TCPS_ESTABLISHED, "ESTABLISHED" }, + { TCPS_CLOSE_WAIT, "CLOSE_WAIT" }, + { TCPS_FIN_WAIT_1, "FIN_WAIT_1" }, + { TCPS_CLOSING, "CLOSING" }, + { TCPS_LAST_ACK, "LAST_ACK" }, + { TCPS_FIN_WAIT_2, "FIN_WAIT_2" }, + { TCPS_TIME_WAIT, "TIME_WAIT" }, + { TCPS_CLOSED - 1, NULL } +}; + +ofmt_field_t * +tcp_get_fields(void) +{ + return (tcp_fields); +} + +/* + * Extract information from the connection info structure into the global + * output buffer. + */ +static void +tcp_ci2buf(struct tcpConnEntryInfo_s *ci) +{ + fields_buf.t_inbytes = + ci->ce_in_data_inorder_bytes + ci->ce_in_data_unorder_bytes; + fields_buf.t_insegs = + ci->ce_in_data_inorder_segs + ci->ce_in_data_unorder_segs; + fields_buf.t_inunorderbytes = ci->ce_in_data_unorder_bytes; + fields_buf.t_inunordersegs = ci->ce_in_data_unorder_segs; + fields_buf.t_outbytes = ci->ce_out_data_bytes; + fields_buf.t_outsegs = ci->ce_out_data_segs; + fields_buf.t_retransbytes = ci->ce_out_retrans_bytes; + fields_buf.t_retranssegs = ci->ce_out_retrans_segs; + fields_buf.t_suna = ci->ce_snxt - ci->ce_suna; + fields_buf.t_unsent = ci->ce_unsent; + fields_buf.t_swnd = ci->ce_swnd; + fields_buf.t_cwnd = ci->ce_cwnd; + fields_buf.t_rwnd = ci->ce_rwnd; + fields_buf.t_mss = ci->ce_mss; + fields_buf.t_rto = ci->ce_rto; + fields_buf.t_rtt = (ci->ce_out_data_segs == 0 ? 0 : ci->ce_rtt_sa); + fields_buf.t_rtt_sum = ci->ce_rtt_sum; + fields_buf.t_rtt_cnt = ci->ce_rtt_cnt; + fields_buf.t_state = ci->ce_state; +} + +/* + * Extract information from the connection entry into the global output + * buffer. + */ +static void +tcp_ipv4_ce2buf(mib2_tcpConnEntry_t *ce) +{ + VERIFY3P(inet_ntop(AF_INET, (void *)&ce->tcpConnLocalAddress, + fields_buf.t_laddr, sizeof (fields_buf.t_laddr)), !=, NULL); + VERIFY3P(inet_ntop(AF_INET, (void *)&ce->tcpConnRemAddress, + fields_buf.t_raddr, sizeof (fields_buf.t_raddr)), !=, NULL); + + fields_buf.t_lport = ce->tcpConnLocalPort; + fields_buf.t_rport = ce->tcpConnRemPort; + + tcp_ci2buf(&ce->tcpConnEntryInfo); +} + +static void +tcp_ipv6_ce2buf(mib2_tcp6ConnEntry_t *ce) +{ + VERIFY3P(inet_ntop(AF_INET6, (void *)&ce->tcp6ConnLocalAddress, + fields_buf.t_laddr, sizeof (fields_buf.t_laddr)), !=, NULL); + VERIFY3P(inet_ntop(AF_INET6, (void *)&ce->tcp6ConnRemAddress, + fields_buf.t_raddr, sizeof (fields_buf.t_raddr)), !=, NULL); + + fields_buf.t_lport = ce->tcp6ConnLocalPort; + fields_buf.t_rport = ce->tcp6ConnRemPort; + + tcp_ci2buf(&ce->tcp6ConnEntryInfo); +} + +/* + * Print a single IPv4 connection entry, taking into account possible + * filters that have been set in state. + */ +static void +tcp_ipv4_print(mib2_tcpConnEntry_t *ce, conn_walk_state_t *state) +{ + if (!(state->cws_flags & CS_LOOPBACK) && + ntohl(ce->tcpConnLocalAddress) == INADDR_LOOPBACK) { + return; + } + + if (state->cws_flags & CS_LADDR) { + struct sockaddr_in *sin = + (struct sockaddr_in *)&state->cws_filter.ca_laddr; + if (ce->tcpConnLocalAddress != sin->sin_addr.s_addr) { + return; + } + } + if (state->cws_flags & CS_RADDR) { + struct sockaddr_in *sin = + (struct sockaddr_in *)&state->cws_filter.ca_raddr; + if (ce->tcpConnRemAddress != sin->sin_addr.s_addr) { + return; + } + } + if (state->cws_flags & CS_LPORT) { + if (ce->tcpConnLocalPort != state->cws_filter.ca_lport) { + return; + } + } + if (state->cws_flags & CS_RPORT) { + if (ce->tcpConnRemPort != state->cws_filter.ca_rport) { + return; + } + } + + if ((state->cws_flags & CS_STATE) && + ce->tcpConnEntryInfo.ce_state != state->cws_filter.ca_state) { + return; + } + + tcp_ipv4_ce2buf(ce); + ofmt_print(state->cws_ofmt, &fields_buf); +} + +/* + * Print a single IPv6 connection entry, taking into account possible + * filters that have been set in state. + */ +static void +tcp_ipv6_print(mib2_tcp6ConnEntry_t *ce, conn_walk_state_t *state) +{ + if (!(state->cws_flags & CS_LOOPBACK) && + IN6_IS_ADDR_LOOPBACK( + (struct in6_addr *)&ce->tcp6ConnLocalAddress)) { + return; + } + + if (state->cws_flags & CS_LADDR) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)&state->cws_filter.ca_laddr; + if (!IN6_ARE_ADDR_EQUAL( + (struct in6_addr *)&ce->tcp6ConnLocalAddress, + &sin6->sin6_addr)) { + return; + } + } + if (state->cws_flags & CS_RADDR) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)&state->cws_filter.ca_raddr; + if (!IN6_ARE_ADDR_EQUAL( + (struct in6_addr *)&ce->tcp6ConnRemAddress, + &sin6->sin6_addr)) { + return; + } + } + if (state->cws_flags & CS_LPORT) { + if (ce->tcp6ConnLocalPort != state->cws_filter.ca_lport) { + return; + } + } + if (state->cws_flags & CS_RPORT) { + if (ce->tcp6ConnRemPort != state->cws_filter.ca_rport) { + return; + } + } + + if ((state->cws_flags & CS_STATE) && + ce->tcp6ConnEntryInfo.ce_state != state->cws_filter.ca_state) { + return; + } + + tcp_ipv6_ce2buf(ce); + ofmt_print(state->cws_ofmt, &fields_buf); +} + +void +tcp_walk_ipv4(struct strbuf *dbuf, conn_walk_state_t *state) +{ + uint_t nconns = (dbuf->len / sizeof (mib2_tcpConnEntry_t)); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + mib2_tcpConnEntry_t *ce = (mib2_tcpConnEntry_t *)dbuf->buf; + + for (; nconns > 0; ce++, nconns--) { + tcp_ipv4_print(ce, state); + } +} + +void +tcp_walk_ipv6(struct strbuf *dbuf, conn_walk_state_t *state) +{ + uint_t nconns = (dbuf->len / sizeof (mib2_tcp6ConnEntry_t)); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + mib2_tcp6ConnEntry_t *ce = (mib2_tcp6ConnEntry_t *)dbuf->buf; + + for (; nconns > 0; ce++, nconns--) { + tcp_ipv6_print(ce, state); + } +} + +static tcp_state_info_t * +tcp_stateinfobystate(int state) +{ + tcp_state_info_t *sip; + + for (sip = tcp_state_info; sip->tsi_string != NULL; sip++) { + if (sip->tsi_state == state) { + return (sip); + } + } + return (NULL); +} + +static tcp_state_info_t * +tcp_stateinfobystr(const char *statestr) +{ + tcp_state_info_t *sip; + + for (sip = tcp_state_info; sip->tsi_string != NULL; sip++) { + if (strncasecmp(statestr, sip->tsi_string, + strlen(sip->tsi_string)) == 0) { + return (sip); + } + } + return (NULL); +} + +int +tcp_str2state(const char *statestr) +{ + tcp_state_info_t *sip = tcp_stateinfobystr(statestr); + return (sip == NULL ? TCPS_CLOSED - 1 : sip->tsi_state); +} + +static const char * +tcp_state2str(int state) +{ + tcp_state_info_t *sip = tcp_stateinfobystate(state); + return (sip == NULL ? NULL : sip->tsi_string); +} + +static boolean_t +print_tcp_state(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) +{ + /* LINTED E_BAD_PTR_CAST_ALIGN */ + int state = *(int *)((char *)ofarg->ofmt_cbarg + ofarg->ofmt_id); + const char *statestr = tcp_state2str(state); + + if (statestr != NULL) { + (void) strlcpy(buf, statestr, bufsize); + } else { + (void) snprintf(buf, bufsize, "UNKNOWN(%d)", state); + } + + return (B_TRUE); +} diff --git a/usr/src/cmd/connstat/connstat_tcp.h b/usr/src/cmd/connstat/connstat_tcp.h new file mode 100644 index 0000000000..7cad46370d --- /dev/null +++ b/usr/src/cmd/connstat/connstat_tcp.h @@ -0,0 +1,50 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2015, 2016 by Delphix. All rights reserved. + */ + +#ifndef _CONNSTAT_TCP_H +#define _CONNSTAT_TCP_H + +#include <stddef.h> +#include "connstat.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int tcp_str2state(const char *state); +connstat_getfieldsfunc_t tcp_get_fields; +connstat_walkfunc_t tcp_walk_ipv4, tcp_walk_ipv6; + +/* + * Keep the default output to < 80 columns. For most interactive workflows, + * the user will run the command without arguments to get an idea of what + * connections exist before narrowing down the investigation to a single + * connection (with filtering) and specifying additional fields to output + * depending on what the user is interested in. + */ +#define TCP_DEFAULT_FIELDS "laddr,lport,raddr,rport,state" + +#define CONNSTAT_TCP_PROTO \ + { "tcp", TCP_DEFAULT_FIELDS, MIB2_TCP, MIB2_TCP_CONN, MIB2_TCP6_CONN, \ + tcp_get_fields, tcp_walk_ipv4, tcp_walk_ipv6 } + +#ifdef __cplusplus +} +#endif + +#endif /* _CONNSTAT_TCP_H */ diff --git a/usr/src/man/man1m/Makefile b/usr/src/man/man1m/Makefile index eda366a82d..ebdd639d57 100644 --- a/usr/src/man/man1m/Makefile +++ b/usr/src/man/man1m/Makefile @@ -77,6 +77,7 @@ _MANFILES= 6to4relay.1m \ clear_locks.1m \ clinfo.1m \ clri.1m \ + connstat.1m \ consadm.1m \ conv_lp.1m \ conv_lpd.1m \ diff --git a/usr/src/man/man1m/connstat.1m b/usr/src/man/man1m/connstat.1m new file mode 100644 index 0000000000..2175fefdb3 --- /dev/null +++ b/usr/src/man/man1m/connstat.1m @@ -0,0 +1,395 @@ +.\" +.\" CDDL HEADER START +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" CDDL HEADER END +.\" +.\" +.\" Copyright (c) 2016 by Delphix. All rights reserved. +.\" +.Dd July 5, 2016 +.Dt CONNSTAT 1M +.Os +.Sh NAME +.Nm connstat +.Nd report TCP connection statistics +.Sh SYNOPSIS +.Nm +.Op Fl eLP +.Op Fl 4 Ns | Ns Fl 6 +.Op Fl T Sy d Ns | Ns Sy u +.Op Fl F Ar filter +.Op Fl i Ar interval +.Op Fl c Ar count +.Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns ... +.Sh DESCRIPTION +The +.Nm +command reports TCP connection statistics in tabular form. +Each row of the table represents the activity of one connection. +The +.Nm +command adds virtually no overhead to run as it is aggregating statistics that +are always collected in the kernel. +.Pp +With no arguments, +.Nm +prints a single report containing all TCP connections, and includes a basic +set of fields representing IP address and port information, as well as connection +state. +The +.Fl o +flag can be used to specify which fields to display, and other arguments to +filter the set of connections included in the output. +.Sh OPTIONS +The arguments are as follows: +.Bl -tag -width "" +.It Fl 4 , Fl -ipv4 +Only displays IPv4 connections. +.It Fl 6 , Fl -ipv6 +Only displays IPv6 connections +.It Fl c Ar count , Fl -count Ns = Ns Ar count +Print a specified number of reports before exiting. +This is used in conjunction with +.Fl i . +.It Fl e , Fl -established +Only display connections that are in state ESTABLISHED. +This is equivalent to including +.Sy state=ESTABLISHED +in the filter argument to the +.Fl F +option. +.It Fl F Ar filter , Fl -filter Ns = Ns Ar filter +Only display connections that match the filter argument provided. +The format of the filter is: +.Pp +.Ar field Ns = Ns Ar value Ns Oo , Ns Ar field Ns = Ns Ar value Oc Ns ... +.Pp +Fields that can currently be filtered are +.Ar laddr , Ar lport , Ar raddr , Ar rport , and Ar state . +See the +.Sx Fields +section for a description of these fields. +The filter matches a connection if all of the filter elements match, and a +field must only appears once in the filter. +.It Fl i Ar interval , Fl -interval Ns = Ns Ar interval +Specify an output interval in seconds. +For each interval, a report containing all connections appropriate given other +command-line options is printed. +.It Fl L , Fl -no-loopback +Exclude connections to the loopback address. +.It Fl o Ar fields , Fl -output Ns = Ns Ar fields +Restrict the output to the specified comma-delimited list of field names. +See the +.Sx Fields +section for information about possible fields. +.It Fl P , Fl -parsable +Display using a stable, machine-parsable output format. +The +.Fl o +flag must also be given to specify which fields to output and their order. +Each line of output will consist of comma-delimited (,) fields, +and no header will be emittted. +When also using the +.Fl T +option, lines indicating the current time will begin with +.Dq "= " . +See +.Sx Example 4 +for an example of how to process parsable output. +.It Fl T Sy d Ns | Ns Sy u , Fl -timestamp Ns = Ns Sy d Ns | Ns Sy u +Print a timestamp before each block of output. +.Pp +Specify +.Sy u +for a printed representation of the internal representation of time (see +.Xr time 2 Ns ). +Specify +.Sy d +for standard date format (see +.Xr date 1 Ns ). +.El +.Ss Fields +The following fields are supported. +Field names are case insensitive. +Unless otherwise indicated, the values of fields that represent a count (e.g. +bytes or segments) are cumulative since the connection was established. +Some of these fields refer to data segments, which are segments that contain +non-zero amount of data. +All sizes are in bytes. +.Bl -tag -width "inunorderbytes" +.It Sy cwnd +The size of the local TCP congestion window at this instant. +.It Sy inbytes +The number of data bytes received. +This does not include duplicate bytes received. +.It Sy insegs +The number of data segments received. +This does not include duplicate segments received. +.It Sy inunorderbytes +The number of data bytes that were received out of order. +.It Sy inunordersegs +The number of data segments that were received out of order. +.It Sy laddr +The local IP address. +.It Sy lport +The local TCP port. +.It Sy mss +The maximum TCP segment size for this connection. +.It Sy outbytes +The number of data bytes sent. +This does not include retransmitted bytes counted by +.Sy retransbytes . +.It Sy outsegs +The number of data segments sent. +This does not include segments containing retransmitted bytes counted by +.Sy retranssegs . +.It Sy raddr +The remote IP address. +.It Sy retransbytes +The number of data bytes retransmitted. +.It Sy retranssegs +The number of data segments sent that contained retransmitted bytes. +.It Sy rport +The remote TCP port. +.It Sy rto +The current retransmission timeout in milliseconds. +.It Sy rtt +The current smoothed round-trip time to the peer in microseconds. +The smoothed RTT average algorithm used is as described in RFC 6298. +.It Sy rttc +The number of times that a round-trip sample was added to +.Sy rtts . +See +.Sy rtts +for a description of how these two fields can be used together to calculate the +average round-trip over a given period. +.It Sy rtts +The sum of all round-trip samples taken over the lifetime of the connection in +microseconds. +Each time TCP updates the value of +.Sy rtt +with a new sample, that sample's value is added to +.Sy rtts . +To calculate the average round-trip over a given period (e.g. between T1 and T2), +take samples of +.Sy rtts +and +.Sy rttc +at T1 and T2, and calculate +.br +(( +.Sy rtts Ns +_T2 - +.Sy rtts Ns +_T1 ) / ( +.Sy rttc Ns +_T2 - +.Sy rttc Ns +_T1 )). +.br +See +.Sx Example 4 +for an example of how this can be done programmatically from a shell script. +.It Sy rwnd +The size of the local TCP receive window at this instant. +.It Sy state +The TCP connection state. +Possible values are: +.Bl -tag -width "SYN_RECEIVED" +.It Sy BOUND +Bound, ready to connect or listen. +.It Sy CLOSED +Closed. +The local endpoint (e.g. socket) is not being used. +.It Sy CLOSING +Closed, but still waiting for a termination acknowledgment from the peer. +.It Sy CLOSE_WAIT +The peer has shutdown; waiting for the local endpoint to close. +.It Sy ESTABLISHED +Connection has been established and data can be transferred. +.It Sy FIN_WAIT_1 +Local endpoint is closed, but waiting for termination acknowledgment from the +peer. +.It Sy FIN_WAIT_2 +Local endpoint is closed, but waiting for a termination request from the peer. +.It Sy IDLE +The local endpoint (e.g. socket) has been opened, but is not bound. +.It Sy LAST_ACK +The remote endpoint has terminated, and the local endpoint has sent a termination +request. +The acknowledgment for this request has not been received. +.It Sy LISTEN +Listening for incoming connections. +.It Sy SYN_RECEIVED +Initial connection request has been received and acknowledged, and a connection +request has been sent but not yet acknowledged. +.It Sy SYN_SENT +A connection establishment request has been sent but not yet acknowledged. +.It Sy TIME_WAIT +Waiting for time to pass after having sent an acknowledgment for the peer's +connection termination request. +.El +.Pp +See RFC 793 for a more complete understanding of the TCP protocol and TCP +connection states. +.It Sy suna +The number of unacknowledged bytes outstanding at this instant. +.It Sy swnd +The size of the local TCP send window (the peer's receive window) at this +instant. +.It Sy unsent +The number of unsent bytes in the local TCP transmit queue at this instant. +.El +.Sh EXIT STATUS +The +.Nm +utility exits 0 on success, or 1 if an error occurs. +.Sh EXAMPLES +.Bl -tag -width "" +.It Sy Example 1 List established connections. +By default, connstat lists basic connection details. +Using the +.Fl e +option allows the user to get a quick glance of established connections. +.Bd -literal +$ connstat -e + LADDR LPORT RADDR RPORT STATE + 10.43.37.172 51275 172.16.105.4 389 ESTABLISHED + 10.43.37.172 22 172.16.98.16 62270 ESTABLISHED + 10.43.37.172 1020 172.16.100.162 2049 ESTABLISHED + 10.43.37.172 1019 10.43.11.64 2049 ESTABLISHED + 10.43.37.172 22 172.16.98.16 61520 ESTABLISHED + 10.43.37.172 80 10.43.16.132 59467 ESTABLISHED +.Ed +.It Sy Example 2 Show one connection's I/O stats every second +The +.Fl F +option is used to filter a specific connection, +.Fl o +is used to output specific fields, and +.Fl i +to provide the output interval in seconds. +.Bd -literal +$ connstat -F lport=22,rport=49675,raddr=172.16.168.30 \e + -o inbytes,outbytes -i 1 + INBYTES OUTBYTES + 9589 18101 + INBYTES OUTBYTES + 9589 18341 + INBYTES OUTBYTES + 9589 18501 + INBYTES OUTBYTES + 9589 18661 + ... +.Ed +.It Sy Example 3 Understanding the bottleneck for a given connection +Understanding the transmit bottleneck for a connection requires knowing the +size of the congestion window, whether the window is full, and the round-trip +time to the peer. +The congestion window is full when +.Sy suna +is equal to +.Sy cwnd . +If the window is full, then the throughput is limited by the size of the window +and the round-trip time. +In that case, knowing these two values is critical. +Either the window is small because of retransmissions, or the round-trip +latency is high, or both. +In the example below, the window is small due to high congestion or an +unreliable network. +.Bd -literal +$ connstat -F lport=41934,rport=50001 \e + -o outbytes,suna,cwnd,unsent,retransbytes,rtt -T d -i 1 +July 7, 2016 11:04:40 AM EDT + OUTBYTES SUNA CWND UNSENT RETRANSBYTES RTT + 1647048093 47784 47784 3017352 3701844 495 +July 7, 2016 11:04:41 AM EDT + OUTBYTES SUNA CWND UNSENT RETRANSBYTES RTT + 1660720109 41992 41992 1535032 3765556 673 +July 7, 2016 11:04:42 AM EDT + OUTBYTES SUNA CWND UNSENT RETRANSBYTES RTT + 1661875613 26064 26064 4311688 3829268 571 +July 7, 2016 11:04:43 AM EDT + OUTBYTES SUNA CWND UNSENT RETRANSBYTES RTT + 1681478637 41992 41992 437304 3932076 1471 +July 7, 2016 11:04:44 AM EDT + OUTBYTES SUNA CWND UNSENT RETRANSBYTES RTT + 1692028765 44888 44888 1945800 4014612 921 +\&... +.Ed +.It Sy Example 4 Calculating average RTT over intervals +As described in the +.Sx Fields +section, the +.Sy rtts +and +.Sy rttc +fields can be used to calculate average RTT over a period of time. +The following example combines machine parsable output with these fields to do +this programmatically. +The script: +.Bd -literal +#!/bin/bash + +i=0 +connstat -P -F lport=41934,rport=50001 -o rttc,rtts -i 1 | \e + while IFS=, read rttc[$i] rtts[$i]; do + if [[ $i != 0 ]]; then + let rtt="(${rtts[$i]} - ${rtts[$i - 1]}) / \e + (${rttc[$i]} - ${rttc[$i - 1]})" + print "avg rtt = ${rtt}us" + fi + ((i++)) +done +.Ed +.Pp +The output: +.Bd -literal +\&... +avg rtt = 992us +avg rtt = 829us +avg rtt = 712us +avg rtt = 869us +\&... +.Ed +.It Sy Example 5 Show HTTP server connections in TIME_WAIT state +Connections accumulating in TIME_WAIT state can sometimes be an issue, as these +connections linger and take up port number space while their time wait timer +is ticking. +.Bd -literal +$ connstat -F state=time_wait,lport=80 + LADDR LPORT RADDR RPORT STATE + 10.43.37.172 80 172.16.168.30 56067 TIME_WAIT + 10.43.37.172 80 172.16.168.30 56068 TIME_WAIT + 10.43.37.172 80 172.16.168.30 56070 TIME_WAIT +.Ed +.El +.Sh INTERFACE STABILITY +The command line options for this command are stable, but the output format +when not using the +.Fl P +option and diagnostic messages are not. +.Sh SEE ALSO +.Xr netstat 1M +.Rs +.%A J. Postel +.%B Transmission Control Protocol, STD 7, RFC 793 +.%D September 1981 +.Re +.Rs +.%A V. Paxson +.%A M. Allman +.%A J. Chu +.%A M. Sargent +.%B Computing TCP's Retransmission Timer, RFC 6298 +.%D June 2011 +.Re diff --git a/usr/src/pkg/manifests/SUNWcs.man1m.inc b/usr/src/pkg/manifests/SUNWcs.man1m.inc index e5e971ed18..8643a52bff 100644 --- a/usr/src/pkg/manifests/SUNWcs.man1m.inc +++ b/usr/src/pkg/manifests/SUNWcs.man1m.inc @@ -14,6 +14,7 @@ # Copyright 2015 Nexenta Systems, Inc. All rights reserved. # Copyright 2016 Toomas Soome <tsoome@me.com> # Copyright (c) 2017, Chris Fraire <cfraire@me.com>. +# Copyright (c) 2016 by Delphix. All rights reserved. # file path=usr/share/man/man1m/6to4relay.1m @@ -48,6 +49,7 @@ file path=usr/share/man/man1m/chroot.1m file path=usr/share/man/man1m/clear_locks.1m file path=usr/share/man/man1m/clinfo.1m file path=usr/share/man/man1m/clri.1m +file path=usr/share/man/man1m/connstat.1m file path=usr/share/man/man1m/consadm.1m file path=usr/share/man/man1m/coreadm.1m file path=usr/share/man/man1m/cron.1m diff --git a/usr/src/pkg/manifests/SUNWcs.mf b/usr/src/pkg/manifests/SUNWcs.mf index ce53e443ca..0bdc473d00 100644 --- a/usr/src/pkg/manifests/SUNWcs.mf +++ b/usr/src/pkg/manifests/SUNWcs.mf @@ -26,6 +26,7 @@ # Copyright 2017 Nexenta Systems, Inc. # Copyright 2017 Toomas Soome <tsoome@me.com> # Copyright 2019 Peter Tribble. +# Copyright (c) 2011, 2015 by Delphix. All rights reserved. # <include SUNWcs.man1.inc> @@ -708,6 +709,7 @@ file path=usr/bin/cktime mode=0555 file path=usr/bin/ckuid mode=0555 file path=usr/bin/ckyorn mode=0555 file path=usr/bin/clear mode=0555 +file path=usr/bin/connstat mode=0555 file path=usr/bin/coreadm mode=0555 file path=usr/bin/cp mode=0555 file path=usr/bin/cpio mode=0555 diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index 46272b2b22..3cfdbe73ff 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -9635,12 +9635,18 @@ ip_snmp_get(queue_t *q, mblk_t *mpctl, int level, boolean_t legacy_req) if ((mpctl = udp_snmp_get(q, mpctl, legacy_req)) == NULL) { return (1); } + if (level == MIB2_UDP) { + goto done; + } } if (level != MIB2_UDP) { if ((mpctl = tcp_snmp_get(q, mpctl, legacy_req)) == NULL) { return (1); } + if (level == MIB2_TCP) { + goto done; + } } if ((mpctl = ip_snmp_get_mib2_ip_traffic_stats(q, mpctl, @@ -9717,6 +9723,7 @@ ip_snmp_get(queue_t *q, mblk_t *mpctl, int level, boolean_t legacy_req) if ((mpctl = ip_snmp_get_mib2_ip_dce(q, mpctl, ipst)) == NULL) { return (1); } +done: freemsg(mpctl); return (1); } diff --git a/usr/src/uts/common/inet/mib2.h b/usr/src/uts/common/inet/mib2.h index f6b6b996a8..847ad1c560 100644 --- a/usr/src/uts/common/inet/mib2.h +++ b/usr/src/uts/common/inet/mib2.h @@ -20,7 +20,10 @@ * * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. */ -/* Copyright (c) 1990 Mentat Inc. */ +/* + * Copyright (c) 1990 Mentat Inc. + * Copyright (c) 2015, 2016 by Delphix. All rights reserved. + */ #ifndef _INET_MIB2_H #define _INET_MIB2_H @@ -1354,25 +1357,46 @@ typedef struct mib2_tcpConnEntry { /* remote port for this connection { tcpConnEntry 5 } */ int tcpConnRemPort; /* In host byte order */ struct tcpConnEntryInfo_s { - /* seq # of next segment to send */ + Counter64 ce_in_data_inorder_bytes; + Counter64 ce_in_data_inorder_segs; + Counter64 ce_in_data_unorder_bytes; + Counter64 ce_in_data_unorder_segs; + Counter64 ce_in_zwnd_probes; + + Counter64 ce_out_data_bytes; + Counter64 ce_out_data_segs; + Counter64 ce_out_retrans_bytes; + Counter64 ce_out_retrans_segs; + Counter64 ce_out_zwnd_probes; + Counter64 ce_rtt_sum; + + /* seq # of next segment to send */ Gauge ce_snxt; /* seq # of of last segment unacknowledged */ Gauge ce_suna; - /* currect send window size */ + /* current send window size */ Gauge ce_swnd; + /* current congestion window size */ + Gauge ce_cwnd; /* seq # of next expected segment */ Gauge ce_rnxt; /* seq # of last ack'd segment */ Gauge ce_rack; - /* currenct receive window size */ + /* # of unsent bytes in the xmit queue */ + Gauge ce_unsent; + /* current receive window size */ Gauge ce_rwnd; - /* current rto (retransmit timeout) */ + /* round-trip time smoothed average (us) */ + Gauge ce_rtt_sa; + /* current rto (retransmit timeout) */ Gauge ce_rto; - /* current max segment size */ + /* round-trip time count */ + Gauge ce_rtt_cnt; + /* current max segment size */ Gauge ce_mss; /* actual internal state */ int ce_state; - } tcpConnEntryInfo; + } tcpConnEntryInfo; /* pid of the processes that created this connection */ uint32_t tcpConnCreationProcess; @@ -1408,26 +1432,7 @@ typedef struct mib2_tcp6ConnEntry { DeviceIndex tcp6ConnIfIndex; /* state of tcp6 connection { ipv6TcpConnEntry 6 } RW */ int tcp6ConnState; - struct tcp6ConnEntryInfo_s { - /* seq # of next segment to send */ - Gauge ce_snxt; - /* seq # of of last segment unacknowledged */ - Gauge ce_suna; - /* currect send window size */ - Gauge ce_swnd; - /* seq # of next expected segment */ - Gauge ce_rnxt; - /* seq # of last ack'd segment */ - Gauge ce_rack; - /* currenct receive window size */ - Gauge ce_rwnd; - /* current rto (retransmit timeout) */ - Gauge ce_rto; - /* current max segment size */ - Gauge ce_mss; - /* actual internal state */ - int ce_state; - } tcp6ConnEntryInfo; + struct tcpConnEntryInfo_s tcp6ConnEntryInfo; /* pid of the processes that created this connection */ uint32_t tcp6ConnCreationProcess; diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h index 9c5ffed2eb..7e3910e894 100644 --- a/usr/src/uts/common/inet/tcp.h +++ b/usr/src/uts/common/inet/tcp.h @@ -177,16 +177,11 @@ typedef struct tcp_s { mblk_t *tcp_xmit_tail; /* Last data sent */ uint32_t tcp_unsent; /* # of bytes in hand that are unsent */ uint32_t tcp_xmit_tail_unsent; /* # of unsent bytes in xmit_tail */ - uint32_t tcp_suna; /* Sender unacknowledged */ uint32_t tcp_rexmit_nxt; /* Next rexmit seq num */ uint32_t tcp_rexmit_max; /* Max retran seq num */ uint32_t tcp_cwnd; /* Congestion window */ int32_t tcp_cwnd_cnt; /* cwnd cnt in congestion avoidance */ - - uint32_t tcp_ibsegs; /* Inbound segments on this stream */ - uint32_t tcp_obsegs; /* Outbound segments on this stream */ - uint32_t tcp_naglim; /* Tunable nagle limit */ uint32_t tcp_valid_bits; #define TCP_ISS_VALID 0x1 /* Is the tcp_iss seq num active? */ @@ -194,8 +189,6 @@ typedef struct tcp_s { #define TCP_URG_VALID 0x4 /* Is the tcp_urg seq num active? */ #define TCP_OFO_FIN_VALID 0x8 /* Has TCP received an out of order FIN? */ - - timeout_id_t tcp_timer_tid; /* Control block for timer service */ uchar_t tcp_timer_backoff; /* Backoff shift count. */ int64_t tcp_last_recv_time; /* Last time we receive a segment. */ @@ -282,6 +275,8 @@ typedef struct tcp_s { uint32_t tcp_cwnd_max; uint32_t tcp_csuna; /* Clear (no rexmits in window) suna */ + hrtime_t tcp_rtt_sum; /* Round trip sum */ + uint32_t tcp_rtt_cnt; /* Round trip count (non_dup ACKs) */ hrtime_t tcp_rtt_sa; /* Round trip smoothed average */ hrtime_t tcp_rtt_sd; /* Round trip smoothed deviation */ uint32_t tcp_rtt_update; /* Round trip update(s) */ @@ -493,6 +488,8 @@ typedef struct tcp_s { /* FIN-WAIT-2 flush timeout */ uint32_t tcp_fin_wait_2_flush_interval; + tcp_conn_stats_t tcp_cs; + #ifdef DEBUG pc_t tcmp_stk[15]; #endif diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index ee1d75924e..d7458c8eee 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -1231,11 +1231,6 @@ tcp_closei_local(tcp_t *tcp) if (!TCP_IS_SOCKET(tcp)) tcp_acceptor_hash_remove(tcp); - TCPS_UPDATE_MIB(tcps, tcpHCInSegs, tcp->tcp_ibsegs); - tcp->tcp_ibsegs = 0; - TCPS_UPDATE_MIB(tcps, tcpHCOutSegs, tcp->tcp_obsegs); - tcp->tcp_obsegs = 0; - /* * This can be called via tcp_time_wait_processing() if TCP gets a * SYN with sequence number outside the TIME-WAIT connection's @@ -1904,15 +1899,6 @@ tcp_reinit(tcp_t *tcp) /* Cancel outstanding timers */ tcp_timers_stop(tcp); - /* - * Reset everything in the state vector, after updating global - * MIB data from instance counters. - */ - TCPS_UPDATE_MIB(tcps, tcpHCInSegs, tcp->tcp_ibsegs); - tcp->tcp_ibsegs = 0; - TCPS_UPDATE_MIB(tcps, tcpHCOutSegs, tcp->tcp_obsegs); - tcp->tcp_obsegs = 0; - tcp_close_mpp(&tcp->tcp_xmit_head); if (tcp->tcp_snd_zcopy_aware) tcp_zcopy_notify(tcp); @@ -2084,9 +2070,6 @@ tcp_reinit_values(tcp_t *tcp) tcp->tcp_swnd = 0; DONTCARE(tcp->tcp_cwnd); /* Init in tcp_process_options */ - ASSERT(tcp->tcp_ibsegs == 0); - ASSERT(tcp->tcp_obsegs == 0); - if (connp->conn_ht_iphc != NULL) { kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated); connp->conn_ht_iphc = NULL; @@ -2178,6 +2161,8 @@ tcp_reinit_values(tcp_t *tcp) DONTCARE(tcp->tcp_rtt_sa); /* Init in tcp_init_values */ DONTCARE(tcp->tcp_rtt_sd); /* Init in tcp_init_values */ tcp->tcp_rtt_update = 0; + tcp->tcp_rtt_sum = 0; + tcp->tcp_rtt_cnt = 0; DONTCARE(tcp->tcp_swl1); /* Init in case TCPS_LISTEN/TCPS_SYN_SENT */ DONTCARE(tcp->tcp_swl2); /* Init in case TCPS_LISTEN/TCPS_SYN_SENT */ diff --git a/usr/src/uts/common/inet/tcp/tcp_fusion.c b/usr/src/uts/common/inet/tcp/tcp_fusion.c index 6acc02d769..e73c34de34 100644 --- a/usr/src/uts/common/inet/tcp/tcp_fusion.c +++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 by Delphix. All rights reserved. */ #include <sys/types.h> @@ -645,14 +646,16 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) peer_tcp->tcp_rack = peer_tcp->tcp_rnxt; TCPS_BUMP_MIB(tcps, tcpOutDataSegs); + TCPS_BUMP_MIB(tcps, tcpHCOutSegs); TCPS_UPDATE_MIB(tcps, tcpOutDataBytes, send_size); + tcp->tcp_cs.tcp_out_data_bytes += send_size; + tcp->tcp_cs.tcp_out_data_segs++; TCPS_BUMP_MIB(tcps, tcpHCInSegs); TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs); TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, send_size); - - BUMP_LOCAL(tcp->tcp_obsegs); - BUMP_LOCAL(peer_tcp->tcp_ibsegs); + peer_tcp->tcp_cs.tcp_in_data_inorder_bytes += send_size; + peer_tcp->tcp_cs.tcp_in_data_inorder_segs++; DTRACE_TCP5(send, void, NULL, ip_xmit_attr_t *, connp->conn_ixa, __dtrace_tcp_void_ip_t *, NULL, tcp_t *, tcp, diff --git a/usr/src/uts/common/inet/tcp/tcp_input.c b/usr/src/uts/common/inet/tcp/tcp_input.c index 0e12d23c3e..dd50c3f6ad 100644 --- a/usr/src/uts/common/inet/tcp/tcp_input.c +++ b/usr/src/uts/common/inet/tcp/tcp_input.c @@ -559,7 +559,7 @@ tcp_process_options(tcp_t *tcp, tcpha_t *tcpha) static mblk_t * tcp_reass(tcp_t *tcp, mblk_t *mp, uint32_t start) { - uint32_t end; + uint32_t end, bytes; mblk_t *mp1; mblk_t *mp2; mblk_t *next_mp; @@ -578,26 +578,26 @@ tcp_reass(tcp_t *tcp, mblk_t *mp, uint32_t start) freeb(mp); continue; } + bytes = end - start; mp->b_cont = NULL; TCP_REASS_SET_SEQ(mp, start); TCP_REASS_SET_END(mp, end); mp1 = tcp->tcp_reass_tail; - if (!mp1) { - tcp->tcp_reass_tail = mp; - tcp->tcp_reass_head = mp; - TCPS_BUMP_MIB(tcps, tcpInDataUnorderSegs); - TCPS_UPDATE_MIB(tcps, tcpInDataUnorderBytes, - end - start); - continue; - } - /* New stuff completely beyond tail? */ - if (SEQ_GEQ(start, TCP_REASS_END(mp1))) { - /* Link it on end. */ - mp1->b_cont = mp; + if (mp1 == NULL || SEQ_GEQ(start, TCP_REASS_END(mp1))) { + if (mp1 != NULL) { + /* + * New stuff is beyond the tail; link it on the + * end. + */ + mp1->b_cont = mp; + } else { + tcp->tcp_reass_head = mp; + } tcp->tcp_reass_tail = mp; TCPS_BUMP_MIB(tcps, tcpInDataUnorderSegs); - TCPS_UPDATE_MIB(tcps, tcpInDataUnorderBytes, - end - start); + TCPS_UPDATE_MIB(tcps, tcpInDataUnorderBytes, bytes); + tcp->tcp_cs.tcp_in_data_unorder_segs++; + tcp->tcp_cs.tcp_in_data_unorder_bytes += bytes; continue; } mp1 = tcp->tcp_reass_head; @@ -2414,7 +2414,7 @@ tcp_input_data(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) flags = (unsigned int)tcpha->tha_flags & 0xFF; - BUMP_LOCAL(tcp->tcp_ibsegs); + TCPS_BUMP_MIB(tcps, tcpHCInSegs); DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp); if ((flags & TH_URG) && sqp != NULL) { @@ -2659,7 +2659,7 @@ tcp_input_data(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) tcp->tcp_ack_tid = 0; } tcp_send_data(tcp, ack_mp); - BUMP_LOCAL(tcp->tcp_obsegs); + TCPS_BUMP_MIB(tcps, tcpHCOutSegs); TCPS_BUMP_MIB(tcps, tcpOutAck); if (!IPCL_IS_NONSTR(connp)) { @@ -3048,6 +3048,7 @@ try_again:; if (tcp->tcp_rwnd == 0) { TCPS_BUMP_MIB(tcps, tcpInWinProbe); + tcp->tcp_cs.tcp_in_zwnd_probes++; } else { TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs); TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap); @@ -3297,6 +3298,9 @@ ok:; } else if (seg_len > 0) { TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs); TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len); + tcp->tcp_cs.tcp_in_data_inorder_segs++; + tcp->tcp_cs.tcp_in_data_inorder_bytes += seg_len; + /* * If an out of order FIN was received before, and the seq * num and len of the new segment match that of the FIN, @@ -4146,7 +4150,7 @@ process_ack: } mp = tcp_ack_mp(tcp); if (mp != NULL) { - BUMP_LOCAL(tcp->tcp_obsegs); + TCPS_BUMP_MIB(tcps, tcpHCOutSegs); TCPS_BUMP_MIB(tcps, tcpOutAck); tcp_send_data(tcp, mp); } @@ -4837,6 +4841,8 @@ xmit_check: TCPS_BUMP_MIB(tcps, tcpRetransSegs); TCPS_UPDATE_MIB(tcps, tcpRetransBytes, snd_size); + tcp->tcp_cs.tcp_out_retrans_segs++; + tcp->tcp_cs.tcp_out_retrans_bytes += snd_size; tcp_send_data(tcp, mp1); } } @@ -4912,7 +4918,7 @@ ack_check: if (mp1 != NULL) { tcp_send_data(tcp, mp1); - BUMP_LOCAL(tcp->tcp_obsegs); + TCPS_BUMP_MIB(tcps, tcpHCOutSegs); TCPS_BUMP_MIB(tcps, tcpOutAck); } if (tcp->tcp_ack_tid != 0) { @@ -5228,6 +5234,8 @@ tcp_set_rto(tcp_t *tcp, hrtime_t rtt) TCPS_BUMP_MIB(tcps, tcpRttUpdate); tcp->tcp_rtt_update++; + tcp->tcp_rtt_sum += m; + tcp->tcp_rtt_cnt++; /* tcp_rtt_sa is not 0 means this is a new sample. */ if (sa != 0) { diff --git a/usr/src/uts/common/inet/tcp/tcp_output.c b/usr/src/uts/common/inet/tcp/tcp_output.c index c836076430..f54ab3fb33 100644 --- a/usr/src/uts/common/inet/tcp/tcp_output.c +++ b/usr/src/uts/common/inet/tcp/tcp_output.c @@ -1273,7 +1273,9 @@ tcp_output(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy) TCPS_BUMP_MIB(tcps, tcpOutDataSegs); TCPS_UPDATE_MIB(tcps, tcpOutDataBytes, len); - BUMP_LOCAL(tcp->tcp_obsegs); + TCPS_BUMP_MIB(tcps, tcpHCOutSegs); + tcp->tcp_cs.tcp_out_data_segs++; + tcp->tcp_cs.tcp_out_data_bytes += len; /* Update the latest receive window size in TCP header. */ tcpha->tha_win = htons(tcp->tcp_rwnd >> tcp->tcp_rcv_ws); @@ -1960,16 +1962,21 @@ tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len, } *snxt += len; *tail_unsent = (*xmit_tail)->b_wptr - mp1->b_wptr; - BUMP_LOCAL(tcp->tcp_obsegs); + TCPS_BUMP_MIB(tcps, tcpHCOutSegs); TCPS_BUMP_MIB(tcps, tcpOutDataSegs); TCPS_UPDATE_MIB(tcps, tcpOutDataBytes, len); + tcp->tcp_cs.tcp_out_data_segs++; + tcp->tcp_cs.tcp_out_data_bytes += len; tcp_send_data(tcp, mp); continue; } *snxt += len; /* Adjust later if we don't send all of len */ + TCPS_BUMP_MIB(tcps, tcpHCOutSegs); TCPS_BUMP_MIB(tcps, tcpOutDataSegs); TCPS_UPDATE_MIB(tcps, tcpOutDataBytes, len); + tcp->tcp_cs.tcp_out_data_segs++; + tcp->tcp_cs.tcp_out_data_bytes += len; if (*tail_unsent) { /* Are the bytes above us in flight? */ @@ -2145,6 +2152,7 @@ tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len, *snxt += spill; tcp->tcp_last_sent_len += spill; TCPS_UPDATE_MIB(tcps, tcpOutDataBytes, spill); + tcp->tcp_cs.tcp_out_data_bytes += spill; /* * Adjust the checksum */ @@ -2193,7 +2201,7 @@ tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len, */ ixa->ixa_fragsize = ixa->ixa_pmtu; ixa->ixa_extra_ident = 0; - tcp->tcp_obsegs += num_lso_seg; + TCPS_BUMP_MIB(tcps, tcpHCOutSegs); TCP_STAT(tcps, tcp_lso_times); TCP_STAT_UPDATE(tcps, tcp_lso_pkt_out, num_lso_seg); } else { @@ -2204,7 +2212,7 @@ tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len, */ lso_info_cleanup(mp); tcp_send_data(tcp, mp); - BUMP_LOCAL(tcp->tcp_obsegs); + TCPS_BUMP_MIB(tcps, tcpHCOutSegs); } } @@ -2420,7 +2428,7 @@ tcp_xmit_ctl(char *str, tcp_t *tcp, uint32_t seq, uint32_t ack, int ctl) tcp->tcp_rack_cnt = 0; TCPS_BUMP_MIB(tcps, tcpOutAck); } - BUMP_LOCAL(tcp->tcp_obsegs); + TCPS_BUMP_MIB(tcps, tcpHCOutSegs); tcpha->tha_seq = htonl(seq); tcpha->tha_ack = htonl(ack); /* @@ -3394,6 +3402,8 @@ tcp_sack_rexmit(tcp_t *tcp, uint_t *flags) TCPS_BUMP_MIB(tcps, tcpRetransSegs); TCPS_UPDATE_MIB(tcps, tcpRetransBytes, seg_len); TCPS_BUMP_MIB(tcps, tcpOutSackRetransSegs); + tcp->tcp_cs.tcp_out_retrans_segs++; + tcp->tcp_cs.tcp_out_retrans_bytes += seg_len; /* * Update tcp_rexmit_max to extend this SACK recovery phase. * This happens when new data sent during fast recovery is @@ -3464,6 +3474,8 @@ tcp_ss_rexmit(tcp_t *tcp) old_snxt_mp->b_prev = (mblk_t *)(intptr_t)gethrtime(); TCPS_BUMP_MIB(tcps, tcpRetransSegs); TCPS_UPDATE_MIB(tcps, tcpRetransBytes, cnt); + tcp->tcp_cs.tcp_out_retrans_segs++; + tcp->tcp_cs.tcp_out_retrans_bytes += cnt; tcp->tcp_rexmit_nxt = snxt; } diff --git a/usr/src/uts/common/inet/tcp/tcp_stats.c b/usr/src/uts/common/inet/tcp/tcp_stats.c index e6b13fe6c9..dbf320d09d 100644 --- a/usr/src/uts/common/inet/tcp/tcp_stats.c +++ b/usr/src/uts/common/inet/tcp/tcp_stats.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, Joyent Inc. All rights reserved. + * Copyright (c) 2015, 2016 by Delphix. All rights reserved. */ #include <sys/types.h> @@ -86,6 +87,50 @@ tcp_snmp_state(tcp_t *tcp) } } +static void +tcp_set_conninfo(tcp_t *tcp, struct tcpConnEntryInfo_s *tcei, boolean_t ispriv) +{ + /* Don't want just anybody seeing these... */ + if (ispriv) { + tcei->ce_snxt = tcp->tcp_snxt; + tcei->ce_suna = tcp->tcp_suna; + tcei->ce_rnxt = tcp->tcp_rnxt; + tcei->ce_rack = tcp->tcp_rack; + } else { + /* + * Netstat, unfortunately, uses this to get send/receive queue + * sizes. How to fix? Why not compute the difference only? + */ + tcei->ce_snxt = tcp->tcp_snxt - tcp->tcp_suna; + tcei->ce_suna = 0; + tcei->ce_rnxt = tcp->tcp_rnxt - tcp->tcp_rack; + tcei->ce_rack = 0; + } + + tcei->ce_in_data_inorder_bytes = tcp->tcp_cs.tcp_in_data_inorder_bytes; + tcei->ce_in_data_inorder_segs = tcp->tcp_cs.tcp_in_data_inorder_segs; + tcei->ce_in_data_unorder_bytes = tcp->tcp_cs.tcp_in_data_unorder_bytes; + tcei->ce_in_data_unorder_segs = tcp->tcp_cs.tcp_in_data_unorder_segs; + tcei->ce_in_zwnd_probes = tcp->tcp_cs.tcp_in_zwnd_probes; + + tcei->ce_out_data_bytes = tcp->tcp_cs.tcp_out_data_bytes; + tcei->ce_out_data_segs = tcp->tcp_cs.tcp_out_data_segs; + tcei->ce_out_retrans_bytes = tcp->tcp_cs.tcp_out_retrans_bytes; + tcei->ce_out_retrans_segs = tcp->tcp_cs.tcp_out_retrans_segs; + tcei->ce_out_zwnd_probes = tcp->tcp_cs.tcp_out_zwnd_probes; + + tcei->ce_unsent = tcp->tcp_unsent; + tcei->ce_swnd = tcp->tcp_swnd; + tcei->ce_cwnd = tcp->tcp_cwnd; + tcei->ce_rwnd = tcp->tcp_rwnd; + tcei->ce_rto = tcp->tcp_rto; + tcei->ce_mss = tcp->tcp_mss; + tcei->ce_state = tcp->tcp_state; + tcei->ce_rtt_sa = NSEC2USEC(tcp->tcp_rtt_sa >> 3); + tcei->ce_rtt_sum = NSEC2USEC(tcp->tcp_rtt_sum); + tcei->ce_rtt_cnt = tcp->tcp_rtt_cnt; +} + /* * Return SNMP stuff in buffer in mpdata. */ @@ -183,11 +228,6 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl, boolean_t legacy_req) continue; /* not in this zone */ tcp = connp->conn_tcp; - TCPS_UPDATE_MIB(tcps, tcpHCInSegs, tcp->tcp_ibsegs); - tcp->tcp_ibsegs = 0; - TCPS_UPDATE_MIB(tcps, tcpHCOutSegs, tcp->tcp_obsegs); - tcp->tcp_obsegs = 0; - tce6.tcp6ConnState = tce.tcpConnState = tcp_snmp_state(tcp); if (tce.tcpConnState == MIB2_TCP_established || @@ -243,35 +283,9 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl, boolean_t legacy_req) } else { tce6.tcp6ConnIfIndex = connp->conn_bound_if; } - /* Don't want just anybody seeing these... */ - if (ispriv) { - tce6.tcp6ConnEntryInfo.ce_snxt = - tcp->tcp_snxt; - tce6.tcp6ConnEntryInfo.ce_suna = - tcp->tcp_suna; - tce6.tcp6ConnEntryInfo.ce_rnxt = - tcp->tcp_rnxt; - tce6.tcp6ConnEntryInfo.ce_rack = - tcp->tcp_rack; - } else { - /* - * Netstat, unfortunately, uses this to - * get send/receive queue sizes. How to fix? - * Why not compute the difference only? - */ - tce6.tcp6ConnEntryInfo.ce_snxt = - tcp->tcp_snxt - tcp->tcp_suna; - tce6.tcp6ConnEntryInfo.ce_suna = 0; - tce6.tcp6ConnEntryInfo.ce_rnxt = - tcp->tcp_rnxt - tcp->tcp_rack; - tce6.tcp6ConnEntryInfo.ce_rack = 0; - } - tce6.tcp6ConnEntryInfo.ce_swnd = tcp->tcp_swnd; - tce6.tcp6ConnEntryInfo.ce_rwnd = tcp->tcp_rwnd; - tce6.tcp6ConnEntryInfo.ce_rto = tcp->tcp_rto; - tce6.tcp6ConnEntryInfo.ce_mss = tcp->tcp_mss; - tce6.tcp6ConnEntryInfo.ce_state = tcp->tcp_state; + tcp_set_conninfo(tcp, &tce6.tcp6ConnEntryInfo, + ispriv); tce6.tcp6ConnCreationProcess = (connp->conn_cpid < 0) ? MIB2_UNKNOWN_PROCESS : @@ -307,37 +321,9 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl, boolean_t legacy_req) } tce.tcpConnLocalPort = ntohs(connp->conn_lport); tce.tcpConnRemPort = ntohs(connp->conn_fport); - /* Don't want just anybody seeing these... */ - if (ispriv) { - tce.tcpConnEntryInfo.ce_snxt = - tcp->tcp_snxt; - tce.tcpConnEntryInfo.ce_suna = - tcp->tcp_suna; - tce.tcpConnEntryInfo.ce_rnxt = - tcp->tcp_rnxt; - tce.tcpConnEntryInfo.ce_rack = - tcp->tcp_rack; - } else { - /* - * Netstat, unfortunately, uses this to - * get send/receive queue sizes. How - * to fix? - * Why not compute the difference only? - */ - tce.tcpConnEntryInfo.ce_snxt = - tcp->tcp_snxt - tcp->tcp_suna; - tce.tcpConnEntryInfo.ce_suna = 0; - tce.tcpConnEntryInfo.ce_rnxt = - tcp->tcp_rnxt - tcp->tcp_rack; - tce.tcpConnEntryInfo.ce_rack = 0; - } - tce.tcpConnEntryInfo.ce_swnd = tcp->tcp_swnd; - tce.tcpConnEntryInfo.ce_rwnd = tcp->tcp_rwnd; - tce.tcpConnEntryInfo.ce_rto = tcp->tcp_rto; - tce.tcpConnEntryInfo.ce_mss = tcp->tcp_mss; - tce.tcpConnEntryInfo.ce_state = - tcp->tcp_state; + tcp_set_conninfo(tcp, &tce.tcpConnEntryInfo, + ispriv); tce.tcpConnCreationProcess = (connp->conn_cpid < 0) ? diff --git a/usr/src/uts/common/inet/tcp/tcp_time_wait.c b/usr/src/uts/common/inet/tcp/tcp_time_wait.c index 72997de24a..caf7aeda50 100644 --- a/usr/src/uts/common/inet/tcp/tcp_time_wait.c +++ b/usr/src/uts/common/inet/tcp/tcp_time_wait.c @@ -608,7 +608,7 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, conn_t *connp = tcp->tcp_connp; tcp_stack_t *tcps = tcp->tcp_tcps; - BUMP_LOCAL(tcp->tcp_ibsegs); + TCPS_BUMP_MIB(tcps, tcpHCInSegs); DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp); flags = (unsigned int)tcpha->tha_flags & 0xFF; @@ -794,6 +794,8 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, TCPS_BUMP_MIB(tcps, tcpInClosed); TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs); TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len); + tcp->tcp_cs.tcp_in_data_inorder_segs++; + tcp->tcp_cs.tcp_in_data_inorder_bytes += seg_len; } if (flags & TH_RST) { (void) tcp_clean_death(tcp, 0); diff --git a/usr/src/uts/common/inet/tcp/tcp_timers.c b/usr/src/uts/common/inet/tcp/tcp_timers.c index b890bf6142..81cf5c57a5 100644 --- a/usr/src/uts/common/inet/tcp/tcp_timers.c +++ b/usr/src/uts/common/inet/tcp/tcp_timers.c @@ -594,7 +594,7 @@ tcp_ack_timer(void *arg) mp = tcp_ack_mp(tcp); if (mp != NULL) { - BUMP_LOCAL(tcp->tcp_obsegs); + TCPS_BUMP_MIB(tcps, tcpHCOutSegs); TCPS_BUMP_MIB(tcps, tcpOutAck); TCPS_BUMP_MIB(tcps, tcpOutAckDelayed); tcp_send_data(tcp, mp); @@ -853,6 +853,7 @@ tcp_timer(void *arg) tcp->tcp_swnd++; tcp->tcp_zero_win_probe = B_TRUE; TCPS_BUMP_MIB(tcps, tcpOutWinProbe); + tcp->tcp_cs.tcp_out_zwnd_probes++; } else { /* * Handle timeout from sender SWS avoidance. @@ -1081,6 +1082,8 @@ timer_rexmit: tcp->tcp_csuna = tcp->tcp_snxt; TCPS_BUMP_MIB(tcps, tcpRetransSegs); TCPS_UPDATE_MIB(tcps, tcpRetransBytes, mss); + tcp->tcp_cs.tcp_out_retrans_segs++; + tcp->tcp_cs.tcp_out_retrans_bytes += mss; tcp_send_data(tcp, mp); } diff --git a/usr/src/uts/common/inet/tcp_stats.h b/usr/src/uts/common/inet/tcp_stats.h index 487d0d3414..704102e9d6 100644 --- a/usr/src/uts/common/inet/tcp_stats.h +++ b/usr/src/uts/common/inet/tcp_stats.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 by Delphix. All rights reserved. */ #ifndef _INET_TCP_STATS_H @@ -205,6 +206,26 @@ typedef struct { tcp_stat_counter_t tcp_sc_stats; } tcp_stats_cpu_t; +/* + * Per-connection statistics. Some of these are also kept globally in the + * per-cpu tcp_sc_mib entry (see tcp_stats_cpu_t above). We need not maintain + * per-cpu versions of these stats since a connection is typically processed + * on the same CPU. + */ +typedef struct tcp_conn_stats { + uint64_t tcp_in_data_inorder_bytes; + uint64_t tcp_in_data_inorder_segs; + uint64_t tcp_in_data_unorder_bytes; + uint64_t tcp_in_data_unorder_segs; + uint64_t tcp_in_zwnd_probes; + + uint64_t tcp_out_data_bytes; + uint64_t tcp_out_data_segs; + uint64_t tcp_out_retrans_bytes; + uint64_t tcp_out_retrans_segs; + uint64_t tcp_out_zwnd_probes; +} tcp_conn_stats_t; + #define TCPS_BUMP_MIB(tcps, x) \ BUMP_MIB(&(tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_mib, x) |
