summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastien Roy <seb@delphix.com>2015-05-29 13:47:23 -0400
committerRichard Lowe <richlowe@richlowe.net>2019-08-19 22:32:46 +0000
commita2f04351e04971ab0879872d264d6038c156b860 (patch)
treecd6640900e1adf19b745c5f8e796a0e97c7a8ee7
parentc12492cf73149aa0aa845af5d59966b0eb5aa910 (diff)
downloadillumos-joyent-a2f04351e04971ab0879872d264d6038c156b860.tar.gz
11547 Want connstat(1M) command to display per-connection TCP statistics
Portions contributed by: Cody Peter Mello <cody.mello@joyent.com> Portions contributed by: Ahmed G <ahmedg@delphix.com> Reviewed by: Jason King <jason.king@joyent.com> Reviewed by: Robert Mustacchi <rm@joyent.com> Reviewed by: Dan McDonald <danmcd@joyent.com> Approved by: Richard Lowe <richlowe@richlowe.net>
-rw-r--r--usr/src/cmd/Makefile6
-rw-r--r--usr/src/cmd/connstat/Makefile51
-rw-r--r--usr/src/cmd/connstat/connstat.h79
-rw-r--r--usr/src/cmd/connstat/connstat.xcl84
-rw-r--r--usr/src/cmd/connstat/connstat_main.c567
-rw-r--r--usr/src/cmd/connstat/connstat_mib.c177
-rw-r--r--usr/src/cmd/connstat/connstat_mib.h35
-rw-r--r--usr/src/cmd/connstat/connstat_tcp.c403
-rw-r--r--usr/src/cmd/connstat/connstat_tcp.h50
-rw-r--r--usr/src/man/man1m/Makefile1
-rw-r--r--usr/src/man/man1m/connstat.1m395
-rw-r--r--usr/src/pkg/manifests/SUNWcs.man1m.inc2
-rw-r--r--usr/src/pkg/manifests/SUNWcs.mf2
-rw-r--r--usr/src/uts/common/inet/ip/ip.c7
-rw-r--r--usr/src/uts/common/inet/mib2.h59
-rw-r--r--usr/src/uts/common/inet/tcp.h11
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c19
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_fusion.c9
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_input.c46
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_output.c22
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_stats.c112
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_time_wait.c4
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_timers.c5
-rw-r--r--usr/src/uts/common/inet/tcp_stats.h21
24 files changed, 2022 insertions, 145 deletions
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index 389916bc67..1d1ffb6e00 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -21,8 +21,8 @@
#
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2019 Joyent, Inc.
-# Copyright (c) 2012 by Delphix. All rights reserved.
+# Copyright (c) 2019, Joyent, Inc.
+# Copyright (c) 2012, 2015 by Delphix. All rights reserved.
# Copyright (c) 2013 DEY Storage Systems, Inc. All rights reserved.
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
# Copyright 2016 Toomas Soome <tsoome@me.com>
@@ -98,6 +98,7 @@ COMMON_SUBDIRS= \
cmd-inet \
col \
compress \
+ connstat \
consadm \
coreadm \
cpio \
@@ -547,6 +548,7 @@ MSGSUBDIRS= \
cmd-inet \
col \
compress \
+ connstat \
consadm \
coreadm \
cpio \
diff --git a/usr/src/cmd/connstat/Makefile b/usr/src/cmd/connstat/Makefile
new file mode 100644
index 0000000000..847bd05428
--- /dev/null
+++ b/usr/src/cmd/connstat/Makefile
@@ -0,0 +1,51 @@
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2015 by Delphix. All rights reserved.
+#
+
+PROG= connstat
+OBJS= connstat_main.o connstat_mib.o connstat_tcp.o
+SRCS= $(OBJS:%.o=%.c)
+POFILES= connstat_main.po connstat_tcp.po connstat_mib.po
+POFILE= connstat.po
+
+include ../Makefile.cmd
+include ../Makefile.ctf
+
+CSTD= $(CSTD_GNU99)
+LDLIBS += -lsocket -lnsl -lumem -lofmt
+XGETFLAGS += -a -x $(PROG).xcl
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+$(POFILE): $(POFILES)
+ $(RM) $@
+ cat $(POFILES) > $@
+
+install: all $(ROOTPROG)
+
+clean:
+ $(RM) $(OBJS)
+
+lint: lint_SRCS
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/connstat/connstat.h b/usr/src/cmd/connstat/connstat.h
new file mode 100644
index 0000000000..a50049b3c7
--- /dev/null
+++ b/usr/src/cmd/connstat/connstat.h
@@ -0,0 +1,79 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+ */
+
+#ifndef _CONNSTAT_H
+#define _CONNSTAT_H
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <ofmt.h>
+#include <sys/stropts.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct connstat_conn_attr_s {
+ struct sockaddr_storage ca_laddr;
+ struct sockaddr_storage ca_raddr;
+ int ca_lport;
+ int ca_rport;
+ int ca_state;
+} connstat_conn_attr_t;
+
+typedef struct conn_walk_state_s {
+ ofmt_handle_t cws_ofmt;
+ uint_t cws_flags;
+ connstat_conn_attr_t cws_filter;
+} conn_walk_state_t;
+
+/* cws_flags */
+#define CS_LOOPBACK 0x0001 /* Include loopback connections */
+#define CS_IPV4 0x0002 /* Show only IPv4 connections */
+#define CS_IPV6 0x0004 /* Show only IPv6 connections */
+#define CS_LADDR 0x0008 /* Filter by laddr in cws_filter */
+#define CS_RADDR 0x0010 /* Filter by raddr in cws_filter */
+#define CS_LPORT 0x0020 /* Filter by lport in cws_filter */
+#define CS_RPORT 0x0040 /* Filter by rport in cws_filter */
+#define CS_STATE 0x0080 /* Filter by state in cws_filter */
+#define CS_PARSABLE 0x0100 /* Parsable output */
+
+typedef ofmt_field_t *connstat_getfieldsfunc_t(void);
+typedef void connstat_walkfunc_t(struct strbuf *, conn_walk_state_t *);
+
+typedef struct connstat_proto_s {
+ char *csp_proto;
+ char *csp_default_fields;
+ int csp_miblevel;
+ int csp_mibv4name;
+ int csp_mibv6name;
+ connstat_getfieldsfunc_t *csp_getfields;
+ connstat_walkfunc_t *csp_v4walk;
+ connstat_walkfunc_t *csp_v6walk;
+} connstat_proto_t;
+
+boolean_t print_string(ofmt_arg_t *, char *, uint_t);
+boolean_t print_uint16(ofmt_arg_t *, char *, uint_t);
+boolean_t print_uint32(ofmt_arg_t *, char *, uint_t);
+boolean_t print_uint64(ofmt_arg_t *, char *, uint_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CONNSTAT_H */
diff --git a/usr/src/cmd/connstat/connstat.xcl b/usr/src/cmd/connstat/connstat.xcl
new file mode 100644
index 0000000000..089bc7358e
--- /dev/null
+++ b/usr/src/cmd/connstat/connstat.xcl
@@ -0,0 +1,84 @@
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2015 by Delphix. All rights reserved.
+#
+
+msgid "count"
+msgid "established"
+msgid "filter"
+msgid "help"
+msgid "interval"
+msgid "ipv4"
+msgid "ipv6"
+msgid "no-loopback"
+msgid "output"
+msgid "parsable"
+msgid "protocol"
+msgid "timestamp"
+msgid "tcp"
+msgid "laddr,lport,raddr,rport,inbytes,outbytes,"
+ "retransbytes,suna,swnd,cwnd,rwnd"
+msgid " %s\n"
+msgid ""
+msgid "c:eF:hi:Lo:Pp:T:46"
+msgid "all"
+msgid "laddr"
+msgid "raddr"
+msgid "lport"
+msgid "rport"
+msgid "= "
+msgid "%s%ld\n"
+msgid "%s%s\n"
+msgid "%hu"
+msgid "%u"
+msgid "%llu"
+msgid "%s: "
+msgid "LADDR"
+msgid "RADDR"
+msgid "LPORT"
+msgid "RPORT"
+msgid "INBYTES"
+msgid "INSEGS"
+msgid "INUNORDERBYTES"
+msgid "INUNORDERSEGS"
+msgid "OUTBYTES"
+msgid "OUTSEGS"
+msgid "RETRANSBYTES"
+msgid "RETRANSSEGS"
+msgid "SUNA"
+msgid "count"
+msgid "SWND"
+msgid "CWND"
+msgid "RWND"
+msgid "STATE"
+msgid "CLOSED"
+msgid "IDLE"
+msgid "BOUND"
+msgid "LISTEN"
+msgid "SYN_SENT"
+msgid "SYN_RCVD"
+msgid "ESTABLISHED"
+msgid "CLOSE_WAIT"
+msgid "FIN_WAIT_1"
+msgid "CLOSING"
+msgid "LAST_ACK"
+msgid "FIN_WAIT_2"
+msgid "TIME_WAIT"
+msgid "UNKNOWN(%d)"
+msgid "/dev/arp"
+msgid "putmsg"
+msgid "getmsg"
+msgid "malloc"
diff --git a/usr/src/cmd/connstat/connstat_main.c b/usr/src/cmd/connstat/connstat_main.c
new file mode 100644
index 0000000000..dd58e2ac2b
--- /dev/null
+++ b/usr/src/cmd/connstat/connstat_main.c
@@ -0,0 +1,567 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <strings.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <libintl.h>
+#include <limits.h>
+#include <locale.h>
+#include <langinfo.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <sys/varargs.h>
+#include <ofmt.h>
+#include <inet/tcp.h>
+#include <netinet/in.h>
+#include <inet/mib2.h>
+#include "connstat.h"
+#include "connstat_mib.h"
+#include "connstat_tcp.h"
+
+#define DEFAULT_PROTO "tcp"
+
+static const char *invalid_v4v6_msg =
+ "Invalid combination of IPv4 and IPv6 arguments\n";
+
+static const char *invalid_T_msg =
+ "Invalid -T arg \"%s\". Must be \"u\" or \"d\"\n";
+
+static const struct option longopts[] = {
+ { "count", required_argument, 0, 'c' },
+ { "established", no_argument, 0, 'e' },
+ { "filter", required_argument, 0, 'F' },
+ { "help", no_argument, 0, 'h' },
+ { "interval", required_argument, 0, 'i' },
+ { "ipv4", no_argument, 0, '4' },
+ { "ipv6", no_argument, 0, '6' },
+ { "no-loopback", no_argument, 0, 'L' },
+ { "output", required_argument, 0, 'o' },
+ { "parsable", no_argument, 0, 'P' },
+ { "protocol", required_argument, 0, 'p' },
+ { "timestamp", required_argument, 0, 'T' },
+ { NULL, 0, 0, 0 }
+};
+
+static connstat_proto_t connstat_protos[] = {
+ CONNSTAT_TCP_PROTO,
+ { NULL, NULL, 0, 0, 0, NULL, NULL, NULL }
+};
+
+typedef enum { NOTIMESTAMP, UTIMESTAMP, DTIMESTAMP } timestamp_fmt_t;
+
+static void die(const char *, ...) __NORETURN;
+static void process_filter(char *, connstat_conn_attr_t *, uint_t *);
+static void show_stats(connstat_proto_t *, ofmt_handle_t, uint_t,
+ connstat_conn_attr_t *, timestamp_fmt_t, uint_t, uint_t);
+
+static void __NORETURN
+usage(int code)
+{
+ static const char *opts[] = {
+ "-4, --ipv4 Only display IPv4 connections",
+ "-6, --ipv6 Only display IPv6 connections",
+ "-c, --count=COUNT Only print COUNT reports",
+ "-e, --established Only display established connections",
+ "-F, --filter=FILTER Only display connections that match "
+ "FILTER",
+ "-h, --help Print this help",
+ "-i, --interval=SECONDS Report once every SECONDS seconds",
+ "-L, --no-loopback Omit loopback connections",
+ "-o, --output=FIELDS Restrict output to the comma-separated "
+ "list of fields\n"
+ " specified",
+ "-P, --parsable Parsable output mode",
+ "-T, --timestamp=TYPE Display a timestamp for each iteration",
+ NULL
+ };
+
+ (void) fprintf(stderr, gettext("usage: "));
+ (void) fprintf(stderr,
+ gettext("%s [-eLP] [-4|-6] [-T d|u] [-F <filter>]\n"
+ " [-i <interval> [-c <count>]] [-o <field>[,...]]\n"),
+ getprogname());
+
+ (void) fprintf(stderr, gettext("\nOptions:\n"));
+ for (const char **optp = opts; *optp != NULL; optp++) {
+ (void) fprintf(stderr, " %s\n", gettext(*optp));
+ }
+
+ (void) fprintf(stderr, gettext("\nFilter:\n"));
+ (void) fprintf(stderr, gettext(" The FILTER argument for the -F "
+ "option is of the form:\n"
+ " <field>=<value>,[<field>=<value>,...]\n"));
+ (void) fprintf(stderr, gettext(" Filterable fields are laddr, lport, "
+ "raddr, rport, and state.\n"));
+
+ (void) fprintf(stderr, gettext("\nFields:\n"));
+ (void) fprintf(stderr, gettext(
+ " laddr Local IP address\n"
+ " raddr Remote IP address\n"
+ " lport Local port\n"
+ " rport Remote port\n"
+ " inbytes Total bytes received\n"
+ " insegs Total segments received\n"
+ " inunorderbytes Bytes received out of order\n"
+ " inunordersegs Segments received out of order\n"
+ " outbytes Total bytes sent\n"
+ " outsegs Total segments sent\n"
+ " retransbytes Bytes retransmitted\n"
+ " retranssegs Segments retransmitted\n"
+ " suna Current unacknowledged bytes sent\n"
+ " unsent Unsent bytes on the transmit queue\n"
+ " swnd Send window size (peer's receive window)\n"
+ " cwnd Congestion window size\n"
+ " rwnd Receive window size\n"
+ " mss Maximum segment size\n"
+ " rto Retransmission timeout (ms)\n"
+ " rtt Smoothed round-trip time (us)\n"
+ " rtts Sum round-trip time (us)\n"
+ " rttc Count of round-trip times\n"
+ " state Connection state\n"));
+ exit(code);
+}
+
+static connstat_proto_t *
+getproto(const char *proto)
+{
+ for (connstat_proto_t *current = &connstat_protos[0];
+ current->csp_proto != NULL; current++) {
+ if (strcasecmp(proto, current->csp_proto) == 0) {
+ return (current);
+ }
+ }
+ return (NULL);
+}
+
+int
+main(int argc, char *argv[])
+{
+ int option;
+ int count = 0;
+ int interval = 0;
+ const char *errstr = NULL;
+ char *fields = NULL;
+ char *filterstr = NULL;
+ connstat_conn_attr_t filter = {0};
+ char *protostr = DEFAULT_PROTO;
+ connstat_proto_t *proto;
+ ofmt_handle_t ofmt;
+ ofmt_status_t oferr;
+ char oferrbuf[OFMT_BUFSIZE];
+ uint_t ofmtflags = OFMT_NOHEADER;
+ uint_t flags = CS_LOOPBACK | CS_IPV4 | CS_IPV6;
+ timestamp_fmt_t timestamp_fmt = NOTIMESTAMP;
+
+ (void) setlocale(LC_ALL, "");
+#if !defined(TEXT_DOMAIN)
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+ (void) textdomain(TEXT_DOMAIN);
+
+ setprogname(basename(argv[0]));
+
+ while ((option = getopt_long(argc, argv, "c:eF:hi:Lo:Pp:T:46",
+ longopts, NULL)) != -1) {
+ switch (option) {
+ case 'c':
+ count = strtonum(optarg, 1, INT_MAX, &errstr);
+ if (errstr != NULL) {
+ (void) fprintf(stderr, gettext(
+ "error parsing -c argument (%s): %s\n"),
+ optarg, errstr);
+ usage(1);
+ }
+ break;
+ case 'e':
+ flags |= CS_STATE;
+ filter.ca_state = TCPS_ESTABLISHED;
+ break;
+ case 'F':
+ filterstr = optarg;
+ break;
+ case 'i':
+ interval = strtonum(optarg, 1, INT_MAX, &errstr);
+ if (errstr != NULL) {
+ (void) fprintf(stderr, gettext(
+ "error parsing -i argument (%s): %s\n"),
+ optarg, errstr);
+ usage(1);
+ }
+ break;
+ case 'L':
+ flags &= ~CS_LOOPBACK;
+ break;
+ case 'o':
+ fields = optarg;
+ break;
+ case 'P':
+ ofmtflags |= OFMT_PARSABLE;
+ flags |= CS_PARSABLE;
+ break;
+ case 'p':
+ /*
+ * -p is an undocumented flag whose only supported
+ * argument is "tcp". The idea is to reserve this
+ * flag for potential future use in case connstat
+ * is extended to support stats for other protocols.
+ */
+ protostr = optarg;
+ break;
+ case 'T':
+ if (strcmp(optarg, "u") == 0) {
+ timestamp_fmt = UTIMESTAMP;
+ } else if (strcmp(optarg, "d") == 0) {
+ timestamp_fmt = DTIMESTAMP;
+ } else {
+ (void) fprintf(stderr, gettext(
+ invalid_T_msg), optarg);
+ usage(1);
+ }
+ break;
+ case '4':
+ if (!(flags & CS_IPV4)) {
+ (void) fprintf(stderr, gettext(
+ invalid_v4v6_msg));
+ usage(1);
+ }
+ flags &= ~CS_IPV6;
+ break;
+ case '6':
+ if (!(flags & CS_IPV6)) {
+ (void) fprintf(stderr, gettext(
+ invalid_v4v6_msg));
+ usage(1);
+ }
+ flags &= ~CS_IPV4;
+ break;
+ case '?':
+ default:
+ usage(1);
+ break;
+ }
+ }
+
+ if ((proto = getproto(protostr)) == NULL) {
+ die("unknown protocol given to \"-p\": %s", protostr);
+ }
+
+ if ((ofmtflags & OFMT_PARSABLE) && fields == NULL) {
+ die("parsable output requires \"-o\"");
+ }
+
+ if ((ofmtflags & OFMT_PARSABLE) && fields != NULL &&
+ strcasecmp(fields, "all") == 0) {
+ die("\"-o all\" is invalid with parsable output");
+ }
+
+ if (fields == NULL) {
+ fields = proto->csp_default_fields;
+ }
+
+ /* If count is specified, then interval must also be specified. */
+ if (count != 0 && interval == 0) {
+ die("\"-c\" requires \"-i\"");
+ }
+
+ /* If interval is not specified, then the default count is 1. */
+ if (interval == 0 && count == 0) {
+ count = 1;
+ }
+
+ if (filterstr != NULL) {
+ process_filter(filterstr, &filter, &flags);
+ }
+
+ oferr = ofmt_open(fields, proto->csp_getfields(), ofmtflags, 0, &ofmt);
+ if (oferr != OFMT_SUCCESS) {
+ (void) ofmt_strerror(ofmt, oferr, oferrbuf, sizeof (oferrbuf));
+ die(oferrbuf);
+ }
+ ofmt_set_fs(ofmt, ',');
+
+ show_stats(proto, ofmt, flags, &filter, timestamp_fmt, interval, count);
+
+ ofmt_close(ofmt);
+ return (0);
+}
+
+/*
+ * Convert the input IP address literal to sockaddr of the appropriate address
+ * family. Preserves any potential port number that may have been set in the
+ * input sockaddr_storage structure.
+ */
+static void
+str2sockaddr(const char *addr, struct sockaddr_storage *ss)
+{
+ struct addrinfo hints, *res;
+
+ bzero(&hints, sizeof (hints));
+ hints.ai_flags = AI_NUMERICHOST;
+ if (getaddrinfo(addr, NULL, &hints, &res) != 0) {
+ die("invalid literal IP address: %s", addr);
+ }
+ bcopy(res->ai_addr, ss, res->ai_addrlen);
+ freeaddrinfo(res);
+}
+
+/*
+ * The filterstr argument is of the form: <attr>=<value>[,...]
+ * Possible attributes are laddr, raddr, lport, and rport. Parse this
+ * filter and store the results into the provided attribute structure.
+ */
+static void
+process_filter(char *filterstr, connstat_conn_attr_t *filter, uint_t *flags)
+{
+ int option;
+ char *val;
+ enum { F_LADDR, F_RADDR, F_LPORT, F_RPORT, F_STATE };
+ static char *filter_optstr[] =
+ { "laddr", "raddr", "lport", "rport", "state", NULL };
+ uint_t flag = 0;
+ struct sockaddr_storage *addrp = NULL;
+ const char *errstr = NULL;
+ int *portp = NULL;
+
+ while (*filterstr != '\0') {
+ option = getsubopt(&filterstr, filter_optstr, &val);
+ errno = 0;
+
+ switch (option) {
+ case F_LADDR:
+ flag = CS_LADDR;
+ addrp = &filter->ca_laddr;
+ break;
+ case F_RADDR:
+ flag = CS_RADDR;
+ addrp = &filter->ca_raddr;
+ break;
+ case F_LPORT:
+ flag = CS_LPORT;
+ portp = &filter->ca_lport;
+ break;
+ case F_RPORT:
+ flag = CS_RPORT;
+ portp = &filter->ca_rport;
+ break;
+ case F_STATE:
+ flag = CS_STATE;
+ break;
+ default:
+ usage(1);
+ }
+
+ if (*flags & flag) {
+ (void) fprintf(stderr, gettext(
+ "Ambiguous filter provided. The \"%s\" field "
+ "appears more than once.\n"),
+ filter_optstr[option]);
+ usage(1);
+ }
+ *flags |= flag;
+
+ switch (flag) {
+ case CS_LADDR:
+ case CS_RADDR:
+ str2sockaddr(val, addrp);
+ if (addrp->ss_family == AF_INET) {
+ if (!(*flags & CS_IPV4)) {
+ (void) fprintf(stderr, gettext(
+ invalid_v4v6_msg));
+ usage(1);
+ }
+ *flags &= ~CS_IPV6;
+ } else {
+ if (!(*flags & CS_IPV6)) {
+ (void) fprintf(stderr, gettext(
+ invalid_v4v6_msg));
+ usage(1);
+ }
+ *flags &= ~CS_IPV4;
+ }
+ break;
+ case CS_LPORT:
+ case CS_RPORT:
+ *portp = strtonum(val, 1, UINT16_MAX, &errstr);
+ if (errstr != NULL) {
+ (void) fprintf(stderr, gettext(
+ "error parsing port (%s): %s\n"),
+ val, errstr);
+ usage(1);
+ }
+ break;
+ case CS_STATE:
+ filter->ca_state = tcp_str2state(val);
+ if (filter->ca_state < TCPS_CLOSED) {
+ (void) fprintf(stderr, gettext(
+ "invalid TCP state: %s\n"), val);
+ usage(1);
+ }
+ break;
+ }
+ }
+
+ /* Make sure that laddr and raddr are at least in the same family. */
+ if ((*flags & (CS_LADDR|CS_RADDR)) == (CS_LADDR|CS_RADDR)) {
+ if (filter->ca_laddr.ss_family != filter->ca_raddr.ss_family) {
+ die("laddr and raddr must be of the same family.");
+ }
+ }
+}
+
+/*
+ * Print timestamp as decimal representation of time_t value (-T u was
+ * specified) or in date(1) format (-T d was specified).
+ */
+static void
+print_timestamp(timestamp_fmt_t timestamp_fmt, boolean_t parsable)
+{
+ time_t t = time(NULL);
+ char *pfx = parsable ? "= " : "";
+ static char *fmt = NULL;
+
+ /* We only need to retrieve this once per invocation */
+ if (fmt == NULL) {
+ fmt = nl_langinfo(_DATE_FMT);
+ }
+
+ switch (timestamp_fmt) {
+ case NOTIMESTAMP:
+ break;
+ case UTIMESTAMP:
+ (void) printf("%s%ld\n", pfx, t);
+ break;
+ case DTIMESTAMP: {
+ char dstr[64];
+ size_t len;
+
+ len = strftime(dstr, sizeof (dstr), fmt, localtime(&t));
+ if (len > 0) {
+ (void) printf("%s%s\n", pfx, dstr);
+ }
+ break;
+ }
+ default:
+ abort();
+ break;
+ }
+}
+
+static void
+show_stats(connstat_proto_t *proto, ofmt_handle_t ofmt, uint_t flags,
+ connstat_conn_attr_t *filter, timestamp_fmt_t timestamp_fmt,
+ uint_t interval, uint_t count)
+{
+ boolean_t done = B_FALSE;
+ uint_t i = 0;
+ int mibfd;
+ conn_walk_state_t state;
+
+ state.cws_ofmt = ofmt;
+ state.cws_flags = flags;
+ state.cws_filter = *filter;
+
+ if ((mibfd = mibopen(proto->csp_proto)) == -1) {
+ die("failed to open MIB stream: %s", strerror(errno));
+ }
+
+ do {
+ if (timestamp_fmt != NOTIMESTAMP) {
+ print_timestamp(timestamp_fmt, flags & CS_PARSABLE);
+ }
+ if (!(flags & CS_PARSABLE)) {
+ ofmt_print_header(ofmt);
+ }
+
+ if (conn_walk(mibfd, proto, &state) != 0) {
+ die("failed to fetch and print connection info");
+ }
+
+ if (count != 0 && ++i == count) {
+ done = B_TRUE;
+ } else {
+ (void) sleep(interval);
+ }
+ } while (!done);
+}
+
+/*
+ * ofmt callbacks for printing individual fields of various types.
+ */
+boolean_t
+print_string(ofmt_arg_t *ofarg, char *buf, uint_t bufsize)
+{
+ char *value;
+
+ value = (char *)ofarg->ofmt_cbarg + ofarg->ofmt_id;
+ (void) strlcpy(buf, value, bufsize);
+ return (B_TRUE);
+}
+
+boolean_t
+print_uint16(ofmt_arg_t *ofarg, char *buf, uint_t bufsize)
+{
+ uint16_t value;
+
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ value = *(uint16_t *)((char *)ofarg->ofmt_cbarg + ofarg->ofmt_id);
+ (void) snprintf(buf, bufsize, "%hu", value);
+ return (B_TRUE);
+}
+
+boolean_t
+print_uint32(ofmt_arg_t *ofarg, char *buf, uint_t bufsize)
+{
+ uint32_t value;
+
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ value = *(uint32_t *)((char *)ofarg->ofmt_cbarg + ofarg->ofmt_id);
+ (void) snprintf(buf, bufsize, "%u", value);
+ return (B_TRUE);
+}
+
+boolean_t
+print_uint64(ofmt_arg_t *ofarg, char *buf, uint_t bufsize)
+{
+ uint64_t value;
+
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ value = *(uint64_t *)((char *)ofarg->ofmt_cbarg + ofarg->ofmt_id);
+ (void) snprintf(buf, bufsize, "%llu", value);
+ return (B_TRUE);
+}
+
+/* PRINTFLIKE1 */
+static void
+die(const char *format, ...)
+{
+ va_list alist;
+
+ format = gettext(format);
+
+ va_start(alist, format);
+ verrx(1, format, alist);
+ va_end(alist);
+}
diff --git a/usr/src/cmd/connstat/connstat_mib.c b/usr/src/cmd/connstat/connstat_mib.c
new file mode 100644
index 0000000000..4b3e532c63
--- /dev/null
+++ b/usr/src/cmd/connstat/connstat_mib.c
@@ -0,0 +1,177 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2015 by Delphix. All rights reserved.
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <sys/debug.h>
+#include <sys/tihdr.h>
+#include "connstat.h"
+
+int
+mibopen(const char *proto)
+{
+ int saved;
+ int fd;
+
+ fd = open("/dev/arp", O_RDWR);
+ if (fd == -1) {
+ return (-1);
+ }
+
+ if (ioctl(fd, I_PUSH, proto) == -1) {
+ saved = errno;
+ (void) close(fd);
+ errno = saved;
+ return (-1);
+ }
+
+ return (fd);
+}
+
+int
+conn_walk(int fd, connstat_proto_t *proto, conn_walk_state_t *state)
+{
+ struct strbuf cbuf, dbuf;
+ struct opthdr *hdr;
+ int flags, r, err = 0;
+ struct {
+ struct T_optmgmt_req req;
+ struct opthdr hdr;
+ } req;
+ union {
+ struct T_optmgmt_ack ack;
+ uint8_t space[sizeof (struct T_optmgmt_ack) +
+ sizeof (struct opthdr) * 2];
+ } ack;
+
+ bzero(&cbuf, sizeof (cbuf));
+ bzero(&dbuf, sizeof (dbuf));
+
+ req.req.PRIM_type = T_OPTMGMT_REQ;
+ req.req.OPT_offset = (caddr_t)&req.hdr - (caddr_t)&req;
+ req.req.OPT_length = sizeof (req.hdr);
+ req.req.MGMT_flags = T_CURRENT;
+
+ req.hdr.level = proto->csp_miblevel;
+ req.hdr.name = 0;
+ req.hdr.len = 0;
+
+ cbuf.buf = (caddr_t)&req;
+ cbuf.len = sizeof (req);
+
+ if (putmsg(fd, &cbuf, NULL, 0) == -1) {
+ warn("failed to request connection info: putmsg");
+ return (-1);
+ }
+
+ /*
+ * Each reply consists of a control part for one fixed structure or
+ * table, as defined in mib2.h. The format is a T_OPTMGMT_ACK
+ * containing an opthdr structure. The level and name identify the
+ * entry, and len is the size of the data part of the message.
+ */
+ for (;;) {
+ cbuf.buf = (caddr_t)&ack;
+ cbuf.maxlen = sizeof (ack);
+ flags = 0;
+
+ /*
+ * We first do a getmsg() for the control part so that we
+ * can allocate a properly sized buffer to read the data
+ * part.
+ */
+ do {
+ r = getmsg(fd, &cbuf, NULL, &flags);
+ } while (r < 0 && errno == EINTR);
+
+ if (r < 0) {
+ warn("failed to fetch further connection info");
+ err = -1;
+ break;
+ } else if ((r & MORECTL) != 0) {
+ warnx("failed to fetch full control message");
+ err = -1;
+ break;
+ }
+
+ if (cbuf.len < sizeof (struct T_optmgmt_ack) ||
+ ack.ack.PRIM_type != T_OPTMGMT_ACK ||
+ ack.ack.MGMT_flags != T_SUCCESS ||
+ ack.ack.OPT_length < sizeof (struct opthdr)) {
+ warnx("cannot process invalid message from getmsg()");
+ err = -1;
+ break;
+ }
+
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ hdr = (struct opthdr *)((caddr_t)&ack + ack.ack.OPT_offset);
+ if (r == 0 && hdr->level == 0 && hdr->name == 0) {
+ /*
+ * snmpcom_req() has sent us the final End-Of-Data
+ * message, so there's nothing further to read.
+ */
+ break;
+ }
+
+ /* Only data should remain. */
+ VERIFY3S(r, ==, MOREDATA);
+
+ /* Allocate a buffer to hold the data portion of the message */
+ if ((dbuf.buf = realloc(dbuf.buf, hdr->len)) == NULL) {
+ warn("failed to realloc() buffer");
+ err = -1;
+ break;
+ }
+ dbuf.maxlen = hdr->len;
+ dbuf.len = 0;
+ flags = 0;
+
+ do {
+ r = getmsg(fd, NULL, &dbuf, &flags);
+ } while (r < 0 && errno == EINTR);
+
+ if (r < 0) {
+ warn("failed to fetch connection data: getmsg()");
+ err = -1;
+ break;
+ } else if (r != 0) {
+ warnx("failed to fetch all data: "
+ "getmsg() returned %d", r);
+ err = -1;
+ break;
+ }
+
+ if ((state->cws_flags & CS_IPV4) &&
+ hdr->name == proto->csp_mibv4name) {
+ proto->csp_v4walk(&dbuf, state);
+ } else if ((state->cws_flags & CS_IPV6) &&
+ hdr->name == proto->csp_mibv6name) {
+ proto->csp_v6walk(&dbuf, state);
+ }
+ }
+
+ free(dbuf.buf);
+
+ return (err);
+}
diff --git a/usr/src/cmd/connstat/connstat_mib.h b/usr/src/cmd/connstat/connstat_mib.h
new file mode 100644
index 0000000000..038ac2d874
--- /dev/null
+++ b/usr/src/cmd/connstat/connstat_mib.h
@@ -0,0 +1,35 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2015 by Delphix. All rights reserved.
+ */
+
+#ifndef _CONNSTAT_MIB_H
+#define _CONNSTAT_MIB_H
+
+#include "connstat.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int mibopen(const char *);
+int conn_walk(int, connstat_proto_t *, conn_walk_state_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CONNSTAT_MIB_H */
diff --git a/usr/src/cmd/connstat/connstat_tcp.c b/usr/src/cmd/connstat/connstat_tcp.c
new file mode 100644
index 0000000000..4cd20c9b09
--- /dev/null
+++ b/usr/src/cmd/connstat/connstat_tcp.c
@@ -0,0 +1,403 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inet/mib2.h>
+#include <sys/debug.h>
+#include <sys/stropts.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <inet/tcp.h>
+#include <arpa/inet.h>
+#include <ofmt.h>
+#include <sys/time.h>
+#include "connstat_mib.h"
+#include "connstat_tcp.h"
+
+/*
+ * The byte order of some of the fields in this code can be a bit confusing.
+ * When using sockaddr_in(6) structs, the address and ports are always in
+ * Network Byte Order (Big Endian), as required by sockaddr(3SOCKET).
+ *
+ * When using the structs mib2_tcpConnEntry_t and mib2_tcp6ConnEntry_t, the
+ * address fields (tcp(6)ConnLocalAddress and tcp(6)ConnRemAdddress) are in
+ * Network Byte Order. Note, however, that the port fields ARE NOT, but are
+ * instead in Host Byte Order. This isn't a problem though, since the ports
+ * we filter on from the command-line (ca_lport and ca_rport) are kept in
+ * Host Byte Order after parsing.
+ *
+ * Since the t_lport and t_rport fields come from the MIB structs, they are
+ * likewise stored in Host Byte Order (and need to be for printing). The
+ * t_laddr and t_raddr fields are string representations of the addresses,
+ * so they don't require any special attention.
+ *
+ * All of the statistics (such as bytes read and written, current window
+ * sizes, etc.) are in Host Byte Order.
+ */
+
+typedef struct tcp_fields_buf_s {
+ char t_laddr[INET6_ADDRSTRLEN];
+ char t_raddr[INET6_ADDRSTRLEN];
+ uint16_t t_lport;
+ uint16_t t_rport;
+ uint64_t t_inbytes;
+ uint64_t t_insegs;
+ uint64_t t_inunorderbytes;
+ uint64_t t_inunordersegs;
+ uint64_t t_outbytes;
+ uint64_t t_outsegs;
+ uint64_t t_retransbytes;
+ uint64_t t_retranssegs;
+ uint32_t t_suna;
+ uint32_t t_unsent;
+ uint32_t t_swnd;
+ uint32_t t_cwnd;
+ uint32_t t_rwnd;
+ uint32_t t_mss;
+ uint32_t t_rto;
+ uint32_t t_rtt_cnt;
+ uint64_t t_rtt_sum;
+ int t_state;
+ uint64_t t_rtt;
+} tcp_fields_buf_t;
+
+static boolean_t print_tcp_state(ofmt_arg_t *, char *, uint_t);
+
+static ofmt_field_t tcp_fields[] = {
+ { "LADDR", 26,
+ offsetof(tcp_fields_buf_t, t_laddr), print_string },
+ { "RADDR", 26,
+ offsetof(tcp_fields_buf_t, t_raddr), print_string },
+ { "LPORT", 6,
+ offsetof(tcp_fields_buf_t, t_lport), print_uint16 },
+ { "RPORT", 6,
+ offsetof(tcp_fields_buf_t, t_rport), print_uint16 },
+ { "INBYTES", 11,
+ offsetof(tcp_fields_buf_t, t_inbytes), print_uint64 },
+ { "INSEGS", 11,
+ offsetof(tcp_fields_buf_t, t_insegs), print_uint64 },
+ { "INUNORDERBYTES", 15,
+ offsetof(tcp_fields_buf_t, t_inunorderbytes), print_uint64 },
+ { "INUNORDERSEGS", 14,
+ offsetof(tcp_fields_buf_t, t_inunordersegs), print_uint64 },
+ { "OUTBYTES", 11,
+ offsetof(tcp_fields_buf_t, t_outbytes), print_uint64 },
+ { "OUTSEGS", 11,
+ offsetof(tcp_fields_buf_t, t_outsegs), print_uint64 },
+ { "RETRANSBYTES", 13,
+ offsetof(tcp_fields_buf_t, t_retransbytes), print_uint64 },
+ { "RETRANSSEGS", 12,
+ offsetof(tcp_fields_buf_t, t_retranssegs), print_uint64 },
+ { "SUNA", 11,
+ offsetof(tcp_fields_buf_t, t_suna), print_uint32 },
+ { "UNSENT", 11,
+ offsetof(tcp_fields_buf_t, t_unsent), print_uint32 },
+ { "SWND", 11,
+ offsetof(tcp_fields_buf_t, t_swnd), print_uint32 },
+ { "CWND", 11,
+ offsetof(tcp_fields_buf_t, t_cwnd), print_uint32 },
+ { "RWND", 11,
+ offsetof(tcp_fields_buf_t, t_rwnd), print_uint32 },
+ { "MSS", 6,
+ offsetof(tcp_fields_buf_t, t_mss), print_uint32 },
+ { "RTO", 8,
+ offsetof(tcp_fields_buf_t, t_rto), print_uint32 },
+ { "RTT", 8,
+ offsetof(tcp_fields_buf_t, t_rtt), print_uint64 },
+ { "RTTS", 8,
+ offsetof(tcp_fields_buf_t, t_rtt_sum), print_uint64 },
+ { "RTTC", 11,
+ offsetof(tcp_fields_buf_t, t_rtt_cnt), print_uint32 },
+ { "STATE", 12,
+ offsetof(tcp_fields_buf_t, t_state), print_tcp_state },
+ { NULL, 0, 0, NULL}
+};
+
+static tcp_fields_buf_t fields_buf;
+
+
+typedef struct tcp_state_info_s {
+ int tsi_state;
+ const char *tsi_string;
+} tcp_state_info_t;
+
+tcp_state_info_t tcp_state_info[] = {
+ { TCPS_CLOSED, "CLOSED" },
+ { TCPS_IDLE, "IDLE" },
+ { TCPS_BOUND, "BOUND" },
+ { TCPS_LISTEN, "LISTEN" },
+ { TCPS_SYN_SENT, "SYN_SENT" },
+ { TCPS_SYN_RCVD, "SYN_RCVD" },
+ { TCPS_ESTABLISHED, "ESTABLISHED" },
+ { TCPS_CLOSE_WAIT, "CLOSE_WAIT" },
+ { TCPS_FIN_WAIT_1, "FIN_WAIT_1" },
+ { TCPS_CLOSING, "CLOSING" },
+ { TCPS_LAST_ACK, "LAST_ACK" },
+ { TCPS_FIN_WAIT_2, "FIN_WAIT_2" },
+ { TCPS_TIME_WAIT, "TIME_WAIT" },
+ { TCPS_CLOSED - 1, NULL }
+};
+
+ofmt_field_t *
+tcp_get_fields(void)
+{
+ return (tcp_fields);
+}
+
+/*
+ * Extract information from the connection info structure into the global
+ * output buffer.
+ */
+static void
+tcp_ci2buf(struct tcpConnEntryInfo_s *ci)
+{
+ fields_buf.t_inbytes =
+ ci->ce_in_data_inorder_bytes + ci->ce_in_data_unorder_bytes;
+ fields_buf.t_insegs =
+ ci->ce_in_data_inorder_segs + ci->ce_in_data_unorder_segs;
+ fields_buf.t_inunorderbytes = ci->ce_in_data_unorder_bytes;
+ fields_buf.t_inunordersegs = ci->ce_in_data_unorder_segs;
+ fields_buf.t_outbytes = ci->ce_out_data_bytes;
+ fields_buf.t_outsegs = ci->ce_out_data_segs;
+ fields_buf.t_retransbytes = ci->ce_out_retrans_bytes;
+ fields_buf.t_retranssegs = ci->ce_out_retrans_segs;
+ fields_buf.t_suna = ci->ce_snxt - ci->ce_suna;
+ fields_buf.t_unsent = ci->ce_unsent;
+ fields_buf.t_swnd = ci->ce_swnd;
+ fields_buf.t_cwnd = ci->ce_cwnd;
+ fields_buf.t_rwnd = ci->ce_rwnd;
+ fields_buf.t_mss = ci->ce_mss;
+ fields_buf.t_rto = ci->ce_rto;
+ fields_buf.t_rtt = (ci->ce_out_data_segs == 0 ? 0 : ci->ce_rtt_sa);
+ fields_buf.t_rtt_sum = ci->ce_rtt_sum;
+ fields_buf.t_rtt_cnt = ci->ce_rtt_cnt;
+ fields_buf.t_state = ci->ce_state;
+}
+
+/*
+ * Extract information from the connection entry into the global output
+ * buffer.
+ */
+static void
+tcp_ipv4_ce2buf(mib2_tcpConnEntry_t *ce)
+{
+ VERIFY3P(inet_ntop(AF_INET, (void *)&ce->tcpConnLocalAddress,
+ fields_buf.t_laddr, sizeof (fields_buf.t_laddr)), !=, NULL);
+ VERIFY3P(inet_ntop(AF_INET, (void *)&ce->tcpConnRemAddress,
+ fields_buf.t_raddr, sizeof (fields_buf.t_raddr)), !=, NULL);
+
+ fields_buf.t_lport = ce->tcpConnLocalPort;
+ fields_buf.t_rport = ce->tcpConnRemPort;
+
+ tcp_ci2buf(&ce->tcpConnEntryInfo);
+}
+
+static void
+tcp_ipv6_ce2buf(mib2_tcp6ConnEntry_t *ce)
+{
+ VERIFY3P(inet_ntop(AF_INET6, (void *)&ce->tcp6ConnLocalAddress,
+ fields_buf.t_laddr, sizeof (fields_buf.t_laddr)), !=, NULL);
+ VERIFY3P(inet_ntop(AF_INET6, (void *)&ce->tcp6ConnRemAddress,
+ fields_buf.t_raddr, sizeof (fields_buf.t_raddr)), !=, NULL);
+
+ fields_buf.t_lport = ce->tcp6ConnLocalPort;
+ fields_buf.t_rport = ce->tcp6ConnRemPort;
+
+ tcp_ci2buf(&ce->tcp6ConnEntryInfo);
+}
+
+/*
+ * Print a single IPv4 connection entry, taking into account possible
+ * filters that have been set in state.
+ */
+static void
+tcp_ipv4_print(mib2_tcpConnEntry_t *ce, conn_walk_state_t *state)
+{
+ if (!(state->cws_flags & CS_LOOPBACK) &&
+ ntohl(ce->tcpConnLocalAddress) == INADDR_LOOPBACK) {
+ return;
+ }
+
+ if (state->cws_flags & CS_LADDR) {
+ struct sockaddr_in *sin =
+ (struct sockaddr_in *)&state->cws_filter.ca_laddr;
+ if (ce->tcpConnLocalAddress != sin->sin_addr.s_addr) {
+ return;
+ }
+ }
+ if (state->cws_flags & CS_RADDR) {
+ struct sockaddr_in *sin =
+ (struct sockaddr_in *)&state->cws_filter.ca_raddr;
+ if (ce->tcpConnRemAddress != sin->sin_addr.s_addr) {
+ return;
+ }
+ }
+ if (state->cws_flags & CS_LPORT) {
+ if (ce->tcpConnLocalPort != state->cws_filter.ca_lport) {
+ return;
+ }
+ }
+ if (state->cws_flags & CS_RPORT) {
+ if (ce->tcpConnRemPort != state->cws_filter.ca_rport) {
+ return;
+ }
+ }
+
+ if ((state->cws_flags & CS_STATE) &&
+ ce->tcpConnEntryInfo.ce_state != state->cws_filter.ca_state) {
+ return;
+ }
+
+ tcp_ipv4_ce2buf(ce);
+ ofmt_print(state->cws_ofmt, &fields_buf);
+}
+
+/*
+ * Print a single IPv6 connection entry, taking into account possible
+ * filters that have been set in state.
+ */
+static void
+tcp_ipv6_print(mib2_tcp6ConnEntry_t *ce, conn_walk_state_t *state)
+{
+ if (!(state->cws_flags & CS_LOOPBACK) &&
+ IN6_IS_ADDR_LOOPBACK(
+ (struct in6_addr *)&ce->tcp6ConnLocalAddress)) {
+ return;
+ }
+
+ if (state->cws_flags & CS_LADDR) {
+ struct sockaddr_in6 *sin6 =
+ (struct sockaddr_in6 *)&state->cws_filter.ca_laddr;
+ if (!IN6_ARE_ADDR_EQUAL(
+ (struct in6_addr *)&ce->tcp6ConnLocalAddress,
+ &sin6->sin6_addr)) {
+ return;
+ }
+ }
+ if (state->cws_flags & CS_RADDR) {
+ struct sockaddr_in6 *sin6 =
+ (struct sockaddr_in6 *)&state->cws_filter.ca_raddr;
+ if (!IN6_ARE_ADDR_EQUAL(
+ (struct in6_addr *)&ce->tcp6ConnRemAddress,
+ &sin6->sin6_addr)) {
+ return;
+ }
+ }
+ if (state->cws_flags & CS_LPORT) {
+ if (ce->tcp6ConnLocalPort != state->cws_filter.ca_lport) {
+ return;
+ }
+ }
+ if (state->cws_flags & CS_RPORT) {
+ if (ce->tcp6ConnRemPort != state->cws_filter.ca_rport) {
+ return;
+ }
+ }
+
+ if ((state->cws_flags & CS_STATE) &&
+ ce->tcp6ConnEntryInfo.ce_state != state->cws_filter.ca_state) {
+ return;
+ }
+
+ tcp_ipv6_ce2buf(ce);
+ ofmt_print(state->cws_ofmt, &fields_buf);
+}
+
+void
+tcp_walk_ipv4(struct strbuf *dbuf, conn_walk_state_t *state)
+{
+ uint_t nconns = (dbuf->len / sizeof (mib2_tcpConnEntry_t));
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ mib2_tcpConnEntry_t *ce = (mib2_tcpConnEntry_t *)dbuf->buf;
+
+ for (; nconns > 0; ce++, nconns--) {
+ tcp_ipv4_print(ce, state);
+ }
+}
+
+void
+tcp_walk_ipv6(struct strbuf *dbuf, conn_walk_state_t *state)
+{
+ uint_t nconns = (dbuf->len / sizeof (mib2_tcp6ConnEntry_t));
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ mib2_tcp6ConnEntry_t *ce = (mib2_tcp6ConnEntry_t *)dbuf->buf;
+
+ for (; nconns > 0; ce++, nconns--) {
+ tcp_ipv6_print(ce, state);
+ }
+}
+
+static tcp_state_info_t *
+tcp_stateinfobystate(int state)
+{
+ tcp_state_info_t *sip;
+
+ for (sip = tcp_state_info; sip->tsi_string != NULL; sip++) {
+ if (sip->tsi_state == state) {
+ return (sip);
+ }
+ }
+ return (NULL);
+}
+
+static tcp_state_info_t *
+tcp_stateinfobystr(const char *statestr)
+{
+ tcp_state_info_t *sip;
+
+ for (sip = tcp_state_info; sip->tsi_string != NULL; sip++) {
+ if (strncasecmp(statestr, sip->tsi_string,
+ strlen(sip->tsi_string)) == 0) {
+ return (sip);
+ }
+ }
+ return (NULL);
+}
+
+int
+tcp_str2state(const char *statestr)
+{
+ tcp_state_info_t *sip = tcp_stateinfobystr(statestr);
+ return (sip == NULL ? TCPS_CLOSED - 1 : sip->tsi_state);
+}
+
+static const char *
+tcp_state2str(int state)
+{
+ tcp_state_info_t *sip = tcp_stateinfobystate(state);
+ return (sip == NULL ? NULL : sip->tsi_string);
+}
+
+static boolean_t
+print_tcp_state(ofmt_arg_t *ofarg, char *buf, uint_t bufsize)
+{
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ int state = *(int *)((char *)ofarg->ofmt_cbarg + ofarg->ofmt_id);
+ const char *statestr = tcp_state2str(state);
+
+ if (statestr != NULL) {
+ (void) strlcpy(buf, statestr, bufsize);
+ } else {
+ (void) snprintf(buf, bufsize, "UNKNOWN(%d)", state);
+ }
+
+ return (B_TRUE);
+}
diff --git a/usr/src/cmd/connstat/connstat_tcp.h b/usr/src/cmd/connstat/connstat_tcp.h
new file mode 100644
index 0000000000..7cad46370d
--- /dev/null
+++ b/usr/src/cmd/connstat/connstat_tcp.h
@@ -0,0 +1,50 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+ */
+
+#ifndef _CONNSTAT_TCP_H
+#define _CONNSTAT_TCP_H
+
+#include <stddef.h>
+#include "connstat.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int tcp_str2state(const char *state);
+connstat_getfieldsfunc_t tcp_get_fields;
+connstat_walkfunc_t tcp_walk_ipv4, tcp_walk_ipv6;
+
+/*
+ * Keep the default output to < 80 columns. For most interactive workflows,
+ * the user will run the command without arguments to get an idea of what
+ * connections exist before narrowing down the investigation to a single
+ * connection (with filtering) and specifying additional fields to output
+ * depending on what the user is interested in.
+ */
+#define TCP_DEFAULT_FIELDS "laddr,lport,raddr,rport,state"
+
+#define CONNSTAT_TCP_PROTO \
+ { "tcp", TCP_DEFAULT_FIELDS, MIB2_TCP, MIB2_TCP_CONN, MIB2_TCP6_CONN, \
+ tcp_get_fields, tcp_walk_ipv4, tcp_walk_ipv6 }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CONNSTAT_TCP_H */
diff --git a/usr/src/man/man1m/Makefile b/usr/src/man/man1m/Makefile
index eda366a82d..ebdd639d57 100644
--- a/usr/src/man/man1m/Makefile
+++ b/usr/src/man/man1m/Makefile
@@ -77,6 +77,7 @@ _MANFILES= 6to4relay.1m \
clear_locks.1m \
clinfo.1m \
clri.1m \
+ connstat.1m \
consadm.1m \
conv_lp.1m \
conv_lpd.1m \
diff --git a/usr/src/man/man1m/connstat.1m b/usr/src/man/man1m/connstat.1m
new file mode 100644
index 0000000000..2175fefdb3
--- /dev/null
+++ b/usr/src/man/man1m/connstat.1m
@@ -0,0 +1,395 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\" CDDL HEADER END
+.\"
+.\"
+.\" Copyright (c) 2016 by Delphix. All rights reserved.
+.\"
+.Dd July 5, 2016
+.Dt CONNSTAT 1M
+.Os
+.Sh NAME
+.Nm connstat
+.Nd report TCP connection statistics
+.Sh SYNOPSIS
+.Nm
+.Op Fl eLP
+.Op Fl 4 Ns | Ns Fl 6
+.Op Fl T Sy d Ns | Ns Sy u
+.Op Fl F Ar filter
+.Op Fl i Ar interval
+.Op Fl c Ar count
+.Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns ...
+.Sh DESCRIPTION
+The
+.Nm
+command reports TCP connection statistics in tabular form.
+Each row of the table represents the activity of one connection.
+The
+.Nm
+command adds virtually no overhead to run as it is aggregating statistics that
+are always collected in the kernel.
+.Pp
+With no arguments,
+.Nm
+prints a single report containing all TCP connections, and includes a basic
+set of fields representing IP address and port information, as well as connection
+state.
+The
+.Fl o
+flag can be used to specify which fields to display, and other arguments to
+filter the set of connections included in the output.
+.Sh OPTIONS
+The arguments are as follows:
+.Bl -tag -width ""
+.It Fl 4 , Fl -ipv4
+Only displays IPv4 connections.
+.It Fl 6 , Fl -ipv6
+Only displays IPv6 connections
+.It Fl c Ar count , Fl -count Ns = Ns Ar count
+Print a specified number of reports before exiting.
+This is used in conjunction with
+.Fl i .
+.It Fl e , Fl -established
+Only display connections that are in state ESTABLISHED.
+This is equivalent to including
+.Sy state=ESTABLISHED
+in the filter argument to the
+.Fl F
+option.
+.It Fl F Ar filter , Fl -filter Ns = Ns Ar filter
+Only display connections that match the filter argument provided.
+The format of the filter is:
+.Pp
+.Ar field Ns = Ns Ar value Ns Oo , Ns Ar field Ns = Ns Ar value Oc Ns ...
+.Pp
+Fields that can currently be filtered are
+.Ar laddr , Ar lport , Ar raddr , Ar rport , and Ar state .
+See the
+.Sx Fields
+section for a description of these fields.
+The filter matches a connection if all of the filter elements match, and a
+field must only appears once in the filter.
+.It Fl i Ar interval , Fl -interval Ns = Ns Ar interval
+Specify an output interval in seconds.
+For each interval, a report containing all connections appropriate given other
+command-line options is printed.
+.It Fl L , Fl -no-loopback
+Exclude connections to the loopback address.
+.It Fl o Ar fields , Fl -output Ns = Ns Ar fields
+Restrict the output to the specified comma-delimited list of field names.
+See the
+.Sx Fields
+section for information about possible fields.
+.It Fl P , Fl -parsable
+Display using a stable, machine-parsable output format.
+The
+.Fl o
+flag must also be given to specify which fields to output and their order.
+Each line of output will consist of comma-delimited (,) fields,
+and no header will be emittted.
+When also using the
+.Fl T
+option, lines indicating the current time will begin with
+.Dq "= " .
+See
+.Sx Example 4
+for an example of how to process parsable output.
+.It Fl T Sy d Ns | Ns Sy u , Fl -timestamp Ns = Ns Sy d Ns | Ns Sy u
+Print a timestamp before each block of output.
+.Pp
+Specify
+.Sy u
+for a printed representation of the internal representation of time (see
+.Xr time 2 Ns ).
+Specify
+.Sy d
+for standard date format (see
+.Xr date 1 Ns ).
+.El
+.Ss Fields
+The following fields are supported.
+Field names are case insensitive.
+Unless otherwise indicated, the values of fields that represent a count (e.g.
+bytes or segments) are cumulative since the connection was established.
+Some of these fields refer to data segments, which are segments that contain
+non-zero amount of data.
+All sizes are in bytes.
+.Bl -tag -width "inunorderbytes"
+.It Sy cwnd
+The size of the local TCP congestion window at this instant.
+.It Sy inbytes
+The number of data bytes received.
+This does not include duplicate bytes received.
+.It Sy insegs
+The number of data segments received.
+This does not include duplicate segments received.
+.It Sy inunorderbytes
+The number of data bytes that were received out of order.
+.It Sy inunordersegs
+The number of data segments that were received out of order.
+.It Sy laddr
+The local IP address.
+.It Sy lport
+The local TCP port.
+.It Sy mss
+The maximum TCP segment size for this connection.
+.It Sy outbytes
+The number of data bytes sent.
+This does not include retransmitted bytes counted by
+.Sy retransbytes .
+.It Sy outsegs
+The number of data segments sent.
+This does not include segments containing retransmitted bytes counted by
+.Sy retranssegs .
+.It Sy raddr
+The remote IP address.
+.It Sy retransbytes
+The number of data bytes retransmitted.
+.It Sy retranssegs
+The number of data segments sent that contained retransmitted bytes.
+.It Sy rport
+The remote TCP port.
+.It Sy rto
+The current retransmission timeout in milliseconds.
+.It Sy rtt
+The current smoothed round-trip time to the peer in microseconds.
+The smoothed RTT average algorithm used is as described in RFC 6298.
+.It Sy rttc
+The number of times that a round-trip sample was added to
+.Sy rtts .
+See
+.Sy rtts
+for a description of how these two fields can be used together to calculate the
+average round-trip over a given period.
+.It Sy rtts
+The sum of all round-trip samples taken over the lifetime of the connection in
+microseconds.
+Each time TCP updates the value of
+.Sy rtt
+with a new sample, that sample's value is added to
+.Sy rtts .
+To calculate the average round-trip over a given period (e.g. between T1 and T2),
+take samples of
+.Sy rtts
+and
+.Sy rttc
+at T1 and T2, and calculate
+.br
+((
+.Sy rtts Ns
+_T2 -
+.Sy rtts Ns
+_T1 ) / (
+.Sy rttc Ns
+_T2 -
+.Sy rttc Ns
+_T1 )).
+.br
+See
+.Sx Example 4
+for an example of how this can be done programmatically from a shell script.
+.It Sy rwnd
+The size of the local TCP receive window at this instant.
+.It Sy state
+The TCP connection state.
+Possible values are:
+.Bl -tag -width "SYN_RECEIVED"
+.It Sy BOUND
+Bound, ready to connect or listen.
+.It Sy CLOSED
+Closed.
+The local endpoint (e.g. socket) is not being used.
+.It Sy CLOSING
+Closed, but still waiting for a termination acknowledgment from the peer.
+.It Sy CLOSE_WAIT
+The peer has shutdown; waiting for the local endpoint to close.
+.It Sy ESTABLISHED
+Connection has been established and data can be transferred.
+.It Sy FIN_WAIT_1
+Local endpoint is closed, but waiting for termination acknowledgment from the
+peer.
+.It Sy FIN_WAIT_2
+Local endpoint is closed, but waiting for a termination request from the peer.
+.It Sy IDLE
+The local endpoint (e.g. socket) has been opened, but is not bound.
+.It Sy LAST_ACK
+The remote endpoint has terminated, and the local endpoint has sent a termination
+request.
+The acknowledgment for this request has not been received.
+.It Sy LISTEN
+Listening for incoming connections.
+.It Sy SYN_RECEIVED
+Initial connection request has been received and acknowledged, and a connection
+request has been sent but not yet acknowledged.
+.It Sy SYN_SENT
+A connection establishment request has been sent but not yet acknowledged.
+.It Sy TIME_WAIT
+Waiting for time to pass after having sent an acknowledgment for the peer's
+connection termination request.
+.El
+.Pp
+See RFC 793 for a more complete understanding of the TCP protocol and TCP
+connection states.
+.It Sy suna
+The number of unacknowledged bytes outstanding at this instant.
+.It Sy swnd
+The size of the local TCP send window (the peer's receive window) at this
+instant.
+.It Sy unsent
+The number of unsent bytes in the local TCP transmit queue at this instant.
+.El
+.Sh EXIT STATUS
+The
+.Nm
+utility exits 0 on success, or 1 if an error occurs.
+.Sh EXAMPLES
+.Bl -tag -width ""
+.It Sy Example 1 List established connections.
+By default, connstat lists basic connection details.
+Using the
+.Fl e
+option allows the user to get a quick glance of established connections.
+.Bd -literal
+$ connstat -e
+ LADDR LPORT RADDR RPORT STATE
+ 10.43.37.172 51275 172.16.105.4 389 ESTABLISHED
+ 10.43.37.172 22 172.16.98.16 62270 ESTABLISHED
+ 10.43.37.172 1020 172.16.100.162 2049 ESTABLISHED
+ 10.43.37.172 1019 10.43.11.64 2049 ESTABLISHED
+ 10.43.37.172 22 172.16.98.16 61520 ESTABLISHED
+ 10.43.37.172 80 10.43.16.132 59467 ESTABLISHED
+.Ed
+.It Sy Example 2 Show one connection's I/O stats every second
+The
+.Fl F
+option is used to filter a specific connection,
+.Fl o
+is used to output specific fields, and
+.Fl i
+to provide the output interval in seconds.
+.Bd -literal
+$ connstat -F lport=22,rport=49675,raddr=172.16.168.30 \e
+ -o inbytes,outbytes -i 1
+ INBYTES OUTBYTES
+ 9589 18101
+ INBYTES OUTBYTES
+ 9589 18341
+ INBYTES OUTBYTES
+ 9589 18501
+ INBYTES OUTBYTES
+ 9589 18661
+ ...
+.Ed
+.It Sy Example 3 Understanding the bottleneck for a given connection
+Understanding the transmit bottleneck for a connection requires knowing the
+size of the congestion window, whether the window is full, and the round-trip
+time to the peer.
+The congestion window is full when
+.Sy suna
+is equal to
+.Sy cwnd .
+If the window is full, then the throughput is limited by the size of the window
+and the round-trip time.
+In that case, knowing these two values is critical.
+Either the window is small because of retransmissions, or the round-trip
+latency is high, or both.
+In the example below, the window is small due to high congestion or an
+unreliable network.
+.Bd -literal
+$ connstat -F lport=41934,rport=50001 \e
+ -o outbytes,suna,cwnd,unsent,retransbytes,rtt -T d -i 1
+July 7, 2016 11:04:40 AM EDT
+ OUTBYTES SUNA CWND UNSENT RETRANSBYTES RTT
+ 1647048093 47784 47784 3017352 3701844 495
+July 7, 2016 11:04:41 AM EDT
+ OUTBYTES SUNA CWND UNSENT RETRANSBYTES RTT
+ 1660720109 41992 41992 1535032 3765556 673
+July 7, 2016 11:04:42 AM EDT
+ OUTBYTES SUNA CWND UNSENT RETRANSBYTES RTT
+ 1661875613 26064 26064 4311688 3829268 571
+July 7, 2016 11:04:43 AM EDT
+ OUTBYTES SUNA CWND UNSENT RETRANSBYTES RTT
+ 1681478637 41992 41992 437304 3932076 1471
+July 7, 2016 11:04:44 AM EDT
+ OUTBYTES SUNA CWND UNSENT RETRANSBYTES RTT
+ 1692028765 44888 44888 1945800 4014612 921
+\&...
+.Ed
+.It Sy Example 4 Calculating average RTT over intervals
+As described in the
+.Sx Fields
+section, the
+.Sy rtts
+and
+.Sy rttc
+fields can be used to calculate average RTT over a period of time.
+The following example combines machine parsable output with these fields to do
+this programmatically.
+The script:
+.Bd -literal
+#!/bin/bash
+
+i=0
+connstat -P -F lport=41934,rport=50001 -o rttc,rtts -i 1 | \e
+ while IFS=, read rttc[$i] rtts[$i]; do
+ if [[ $i != 0 ]]; then
+ let rtt="(${rtts[$i]} - ${rtts[$i - 1]}) / \e
+ (${rttc[$i]} - ${rttc[$i - 1]})"
+ print "avg rtt = ${rtt}us"
+ fi
+ ((i++))
+done
+.Ed
+.Pp
+The output:
+.Bd -literal
+\&...
+avg rtt = 992us
+avg rtt = 829us
+avg rtt = 712us
+avg rtt = 869us
+\&...
+.Ed
+.It Sy Example 5 Show HTTP server connections in TIME_WAIT state
+Connections accumulating in TIME_WAIT state can sometimes be an issue, as these
+connections linger and take up port number space while their time wait timer
+is ticking.
+.Bd -literal
+$ connstat -F state=time_wait,lport=80
+ LADDR LPORT RADDR RPORT STATE
+ 10.43.37.172 80 172.16.168.30 56067 TIME_WAIT
+ 10.43.37.172 80 172.16.168.30 56068 TIME_WAIT
+ 10.43.37.172 80 172.16.168.30 56070 TIME_WAIT
+.Ed
+.El
+.Sh INTERFACE STABILITY
+The command line options for this command are stable, but the output format
+when not using the
+.Fl P
+option and diagnostic messages are not.
+.Sh SEE ALSO
+.Xr netstat 1M
+.Rs
+.%A J. Postel
+.%B Transmission Control Protocol, STD 7, RFC 793
+.%D September 1981
+.Re
+.Rs
+.%A V. Paxson
+.%A M. Allman
+.%A J. Chu
+.%A M. Sargent
+.%B Computing TCP's Retransmission Timer, RFC 6298
+.%D June 2011
+.Re
diff --git a/usr/src/pkg/manifests/SUNWcs.man1m.inc b/usr/src/pkg/manifests/SUNWcs.man1m.inc
index e5e971ed18..8643a52bff 100644
--- a/usr/src/pkg/manifests/SUNWcs.man1m.inc
+++ b/usr/src/pkg/manifests/SUNWcs.man1m.inc
@@ -14,6 +14,7 @@
# Copyright 2015 Nexenta Systems, Inc. All rights reserved.
# Copyright 2016 Toomas Soome <tsoome@me.com>
# Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
+# Copyright (c) 2016 by Delphix. All rights reserved.
#
file path=usr/share/man/man1m/6to4relay.1m
@@ -48,6 +49,7 @@ file path=usr/share/man/man1m/chroot.1m
file path=usr/share/man/man1m/clear_locks.1m
file path=usr/share/man/man1m/clinfo.1m
file path=usr/share/man/man1m/clri.1m
+file path=usr/share/man/man1m/connstat.1m
file path=usr/share/man/man1m/consadm.1m
file path=usr/share/man/man1m/coreadm.1m
file path=usr/share/man/man1m/cron.1m
diff --git a/usr/src/pkg/manifests/SUNWcs.mf b/usr/src/pkg/manifests/SUNWcs.mf
index ce53e443ca..0bdc473d00 100644
--- a/usr/src/pkg/manifests/SUNWcs.mf
+++ b/usr/src/pkg/manifests/SUNWcs.mf
@@ -26,6 +26,7 @@
# Copyright 2017 Nexenta Systems, Inc.
# Copyright 2017 Toomas Soome <tsoome@me.com>
# Copyright 2019 Peter Tribble.
+# Copyright (c) 2011, 2015 by Delphix. All rights reserved.
#
<include SUNWcs.man1.inc>
@@ -708,6 +709,7 @@ file path=usr/bin/cktime mode=0555
file path=usr/bin/ckuid mode=0555
file path=usr/bin/ckyorn mode=0555
file path=usr/bin/clear mode=0555
+file path=usr/bin/connstat mode=0555
file path=usr/bin/coreadm mode=0555
file path=usr/bin/cp mode=0555
file path=usr/bin/cpio mode=0555
diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c
index 46272b2b22..3cfdbe73ff 100644
--- a/usr/src/uts/common/inet/ip/ip.c
+++ b/usr/src/uts/common/inet/ip/ip.c
@@ -9635,12 +9635,18 @@ ip_snmp_get(queue_t *q, mblk_t *mpctl, int level, boolean_t legacy_req)
if ((mpctl = udp_snmp_get(q, mpctl, legacy_req)) == NULL) {
return (1);
}
+ if (level == MIB2_UDP) {
+ goto done;
+ }
}
if (level != MIB2_UDP) {
if ((mpctl = tcp_snmp_get(q, mpctl, legacy_req)) == NULL) {
return (1);
}
+ if (level == MIB2_TCP) {
+ goto done;
+ }
}
if ((mpctl = ip_snmp_get_mib2_ip_traffic_stats(q, mpctl,
@@ -9717,6 +9723,7 @@ ip_snmp_get(queue_t *q, mblk_t *mpctl, int level, boolean_t legacy_req)
if ((mpctl = ip_snmp_get_mib2_ip_dce(q, mpctl, ipst)) == NULL) {
return (1);
}
+done:
freemsg(mpctl);
return (1);
}
diff --git a/usr/src/uts/common/inet/mib2.h b/usr/src/uts/common/inet/mib2.h
index f6b6b996a8..847ad1c560 100644
--- a/usr/src/uts/common/inet/mib2.h
+++ b/usr/src/uts/common/inet/mib2.h
@@ -20,7 +20,10 @@
*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
*/
-/* Copyright (c) 1990 Mentat Inc. */
+/*
+ * Copyright (c) 1990 Mentat Inc.
+ * Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+ */
#ifndef _INET_MIB2_H
#define _INET_MIB2_H
@@ -1354,25 +1357,46 @@ typedef struct mib2_tcpConnEntry {
/* remote port for this connection { tcpConnEntry 5 } */
int tcpConnRemPort; /* In host byte order */
struct tcpConnEntryInfo_s {
- /* seq # of next segment to send */
+ Counter64 ce_in_data_inorder_bytes;
+ Counter64 ce_in_data_inorder_segs;
+ Counter64 ce_in_data_unorder_bytes;
+ Counter64 ce_in_data_unorder_segs;
+ Counter64 ce_in_zwnd_probes;
+
+ Counter64 ce_out_data_bytes;
+ Counter64 ce_out_data_segs;
+ Counter64 ce_out_retrans_bytes;
+ Counter64 ce_out_retrans_segs;
+ Counter64 ce_out_zwnd_probes;
+ Counter64 ce_rtt_sum;
+
+ /* seq # of next segment to send */
Gauge ce_snxt;
/* seq # of of last segment unacknowledged */
Gauge ce_suna;
- /* currect send window size */
+ /* current send window size */
Gauge ce_swnd;
+ /* current congestion window size */
+ Gauge ce_cwnd;
/* seq # of next expected segment */
Gauge ce_rnxt;
/* seq # of last ack'd segment */
Gauge ce_rack;
- /* currenct receive window size */
+ /* # of unsent bytes in the xmit queue */
+ Gauge ce_unsent;
+ /* current receive window size */
Gauge ce_rwnd;
- /* current rto (retransmit timeout) */
+ /* round-trip time smoothed average (us) */
+ Gauge ce_rtt_sa;
+ /* current rto (retransmit timeout) */
Gauge ce_rto;
- /* current max segment size */
+ /* round-trip time count */
+ Gauge ce_rtt_cnt;
+ /* current max segment size */
Gauge ce_mss;
/* actual internal state */
int ce_state;
- } tcpConnEntryInfo;
+ } tcpConnEntryInfo;
/* pid of the processes that created this connection */
uint32_t tcpConnCreationProcess;
@@ -1408,26 +1432,7 @@ typedef struct mib2_tcp6ConnEntry {
DeviceIndex tcp6ConnIfIndex;
/* state of tcp6 connection { ipv6TcpConnEntry 6 } RW */
int tcp6ConnState;
- struct tcp6ConnEntryInfo_s {
- /* seq # of next segment to send */
- Gauge ce_snxt;
- /* seq # of of last segment unacknowledged */
- Gauge ce_suna;
- /* currect send window size */
- Gauge ce_swnd;
- /* seq # of next expected segment */
- Gauge ce_rnxt;
- /* seq # of last ack'd segment */
- Gauge ce_rack;
- /* currenct receive window size */
- Gauge ce_rwnd;
- /* current rto (retransmit timeout) */
- Gauge ce_rto;
- /* current max segment size */
- Gauge ce_mss;
- /* actual internal state */
- int ce_state;
- } tcp6ConnEntryInfo;
+ struct tcpConnEntryInfo_s tcp6ConnEntryInfo;
/* pid of the processes that created this connection */
uint32_t tcp6ConnCreationProcess;
diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h
index 9c5ffed2eb..7e3910e894 100644
--- a/usr/src/uts/common/inet/tcp.h
+++ b/usr/src/uts/common/inet/tcp.h
@@ -177,16 +177,11 @@ typedef struct tcp_s {
mblk_t *tcp_xmit_tail; /* Last data sent */
uint32_t tcp_unsent; /* # of bytes in hand that are unsent */
uint32_t tcp_xmit_tail_unsent; /* # of unsent bytes in xmit_tail */
-
uint32_t tcp_suna; /* Sender unacknowledged */
uint32_t tcp_rexmit_nxt; /* Next rexmit seq num */
uint32_t tcp_rexmit_max; /* Max retran seq num */
uint32_t tcp_cwnd; /* Congestion window */
int32_t tcp_cwnd_cnt; /* cwnd cnt in congestion avoidance */
-
- uint32_t tcp_ibsegs; /* Inbound segments on this stream */
- uint32_t tcp_obsegs; /* Outbound segments on this stream */
-
uint32_t tcp_naglim; /* Tunable nagle limit */
uint32_t tcp_valid_bits;
#define TCP_ISS_VALID 0x1 /* Is the tcp_iss seq num active? */
@@ -194,8 +189,6 @@ typedef struct tcp_s {
#define TCP_URG_VALID 0x4 /* Is the tcp_urg seq num active? */
#define TCP_OFO_FIN_VALID 0x8 /* Has TCP received an out of order FIN? */
-
-
timeout_id_t tcp_timer_tid; /* Control block for timer service */
uchar_t tcp_timer_backoff; /* Backoff shift count. */
int64_t tcp_last_recv_time; /* Last time we receive a segment. */
@@ -282,6 +275,8 @@ typedef struct tcp_s {
uint32_t tcp_cwnd_max;
uint32_t tcp_csuna; /* Clear (no rexmits in window) suna */
+ hrtime_t tcp_rtt_sum; /* Round trip sum */
+ uint32_t tcp_rtt_cnt; /* Round trip count (non_dup ACKs) */
hrtime_t tcp_rtt_sa; /* Round trip smoothed average */
hrtime_t tcp_rtt_sd; /* Round trip smoothed deviation */
uint32_t tcp_rtt_update; /* Round trip update(s) */
@@ -493,6 +488,8 @@ typedef struct tcp_s {
/* FIN-WAIT-2 flush timeout */
uint32_t tcp_fin_wait_2_flush_interval;
+ tcp_conn_stats_t tcp_cs;
+
#ifdef DEBUG
pc_t tcmp_stk[15];
#endif
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index ee1d75924e..d7458c8eee 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -1231,11 +1231,6 @@ tcp_closei_local(tcp_t *tcp)
if (!TCP_IS_SOCKET(tcp))
tcp_acceptor_hash_remove(tcp);
- TCPS_UPDATE_MIB(tcps, tcpHCInSegs, tcp->tcp_ibsegs);
- tcp->tcp_ibsegs = 0;
- TCPS_UPDATE_MIB(tcps, tcpHCOutSegs, tcp->tcp_obsegs);
- tcp->tcp_obsegs = 0;
-
/*
* This can be called via tcp_time_wait_processing() if TCP gets a
* SYN with sequence number outside the TIME-WAIT connection's
@@ -1904,15 +1899,6 @@ tcp_reinit(tcp_t *tcp)
/* Cancel outstanding timers */
tcp_timers_stop(tcp);
- /*
- * Reset everything in the state vector, after updating global
- * MIB data from instance counters.
- */
- TCPS_UPDATE_MIB(tcps, tcpHCInSegs, tcp->tcp_ibsegs);
- tcp->tcp_ibsegs = 0;
- TCPS_UPDATE_MIB(tcps, tcpHCOutSegs, tcp->tcp_obsegs);
- tcp->tcp_obsegs = 0;
-
tcp_close_mpp(&tcp->tcp_xmit_head);
if (tcp->tcp_snd_zcopy_aware)
tcp_zcopy_notify(tcp);
@@ -2084,9 +2070,6 @@ tcp_reinit_values(tcp_t *tcp)
tcp->tcp_swnd = 0;
DONTCARE(tcp->tcp_cwnd); /* Init in tcp_process_options */
- ASSERT(tcp->tcp_ibsegs == 0);
- ASSERT(tcp->tcp_obsegs == 0);
-
if (connp->conn_ht_iphc != NULL) {
kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
connp->conn_ht_iphc = NULL;
@@ -2178,6 +2161,8 @@ tcp_reinit_values(tcp_t *tcp)
DONTCARE(tcp->tcp_rtt_sa); /* Init in tcp_init_values */
DONTCARE(tcp->tcp_rtt_sd); /* Init in tcp_init_values */
tcp->tcp_rtt_update = 0;
+ tcp->tcp_rtt_sum = 0;
+ tcp->tcp_rtt_cnt = 0;
DONTCARE(tcp->tcp_swl1); /* Init in case TCPS_LISTEN/TCPS_SYN_SENT */
DONTCARE(tcp->tcp_swl2); /* Init in case TCPS_LISTEN/TCPS_SYN_SENT */
diff --git a/usr/src/uts/common/inet/tcp/tcp_fusion.c b/usr/src/uts/common/inet/tcp/tcp_fusion.c
index 6acc02d769..e73c34de34 100644
--- a/usr/src/uts/common/inet/tcp/tcp_fusion.c
+++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015 by Delphix. All rights reserved.
*/
#include <sys/types.h>
@@ -645,14 +646,16 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size)
peer_tcp->tcp_rack = peer_tcp->tcp_rnxt;
TCPS_BUMP_MIB(tcps, tcpOutDataSegs);
+ TCPS_BUMP_MIB(tcps, tcpHCOutSegs);
TCPS_UPDATE_MIB(tcps, tcpOutDataBytes, send_size);
+ tcp->tcp_cs.tcp_out_data_bytes += send_size;
+ tcp->tcp_cs.tcp_out_data_segs++;
TCPS_BUMP_MIB(tcps, tcpHCInSegs);
TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs);
TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, send_size);
-
- BUMP_LOCAL(tcp->tcp_obsegs);
- BUMP_LOCAL(peer_tcp->tcp_ibsegs);
+ peer_tcp->tcp_cs.tcp_in_data_inorder_bytes += send_size;
+ peer_tcp->tcp_cs.tcp_in_data_inorder_segs++;
DTRACE_TCP5(send, void, NULL, ip_xmit_attr_t *, connp->conn_ixa,
__dtrace_tcp_void_ip_t *, NULL, tcp_t *, tcp,
diff --git a/usr/src/uts/common/inet/tcp/tcp_input.c b/usr/src/uts/common/inet/tcp/tcp_input.c
index 0e12d23c3e..dd50c3f6ad 100644
--- a/usr/src/uts/common/inet/tcp/tcp_input.c
+++ b/usr/src/uts/common/inet/tcp/tcp_input.c
@@ -559,7 +559,7 @@ tcp_process_options(tcp_t *tcp, tcpha_t *tcpha)
static mblk_t *
tcp_reass(tcp_t *tcp, mblk_t *mp, uint32_t start)
{
- uint32_t end;
+ uint32_t end, bytes;
mblk_t *mp1;
mblk_t *mp2;
mblk_t *next_mp;
@@ -578,26 +578,26 @@ tcp_reass(tcp_t *tcp, mblk_t *mp, uint32_t start)
freeb(mp);
continue;
}
+ bytes = end - start;
mp->b_cont = NULL;
TCP_REASS_SET_SEQ(mp, start);
TCP_REASS_SET_END(mp, end);
mp1 = tcp->tcp_reass_tail;
- if (!mp1) {
- tcp->tcp_reass_tail = mp;
- tcp->tcp_reass_head = mp;
- TCPS_BUMP_MIB(tcps, tcpInDataUnorderSegs);
- TCPS_UPDATE_MIB(tcps, tcpInDataUnorderBytes,
- end - start);
- continue;
- }
- /* New stuff completely beyond tail? */
- if (SEQ_GEQ(start, TCP_REASS_END(mp1))) {
- /* Link it on end. */
- mp1->b_cont = mp;
+ if (mp1 == NULL || SEQ_GEQ(start, TCP_REASS_END(mp1))) {
+ if (mp1 != NULL) {
+ /*
+ * New stuff is beyond the tail; link it on the
+ * end.
+ */
+ mp1->b_cont = mp;
+ } else {
+ tcp->tcp_reass_head = mp;
+ }
tcp->tcp_reass_tail = mp;
TCPS_BUMP_MIB(tcps, tcpInDataUnorderSegs);
- TCPS_UPDATE_MIB(tcps, tcpInDataUnorderBytes,
- end - start);
+ TCPS_UPDATE_MIB(tcps, tcpInDataUnorderBytes, bytes);
+ tcp->tcp_cs.tcp_in_data_unorder_segs++;
+ tcp->tcp_cs.tcp_in_data_unorder_bytes += bytes;
continue;
}
mp1 = tcp->tcp_reass_head;
@@ -2414,7 +2414,7 @@ tcp_input_data(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
flags = (unsigned int)tcpha->tha_flags & 0xFF;
- BUMP_LOCAL(tcp->tcp_ibsegs);
+ TCPS_BUMP_MIB(tcps, tcpHCInSegs);
DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
if ((flags & TH_URG) && sqp != NULL) {
@@ -2659,7 +2659,7 @@ tcp_input_data(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
tcp->tcp_ack_tid = 0;
}
tcp_send_data(tcp, ack_mp);
- BUMP_LOCAL(tcp->tcp_obsegs);
+ TCPS_BUMP_MIB(tcps, tcpHCOutSegs);
TCPS_BUMP_MIB(tcps, tcpOutAck);
if (!IPCL_IS_NONSTR(connp)) {
@@ -3048,6 +3048,7 @@ try_again:;
if (tcp->tcp_rwnd == 0) {
TCPS_BUMP_MIB(tcps, tcpInWinProbe);
+ tcp->tcp_cs.tcp_in_zwnd_probes++;
} else {
TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs);
TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap);
@@ -3297,6 +3298,9 @@ ok:;
} else if (seg_len > 0) {
TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs);
TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len);
+ tcp->tcp_cs.tcp_in_data_inorder_segs++;
+ tcp->tcp_cs.tcp_in_data_inorder_bytes += seg_len;
+
/*
* If an out of order FIN was received before, and the seq
* num and len of the new segment match that of the FIN,
@@ -4146,7 +4150,7 @@ process_ack:
}
mp = tcp_ack_mp(tcp);
if (mp != NULL) {
- BUMP_LOCAL(tcp->tcp_obsegs);
+ TCPS_BUMP_MIB(tcps, tcpHCOutSegs);
TCPS_BUMP_MIB(tcps, tcpOutAck);
tcp_send_data(tcp, mp);
}
@@ -4837,6 +4841,8 @@ xmit_check:
TCPS_BUMP_MIB(tcps, tcpRetransSegs);
TCPS_UPDATE_MIB(tcps, tcpRetransBytes,
snd_size);
+ tcp->tcp_cs.tcp_out_retrans_segs++;
+ tcp->tcp_cs.tcp_out_retrans_bytes += snd_size;
tcp_send_data(tcp, mp1);
}
}
@@ -4912,7 +4918,7 @@ ack_check:
if (mp1 != NULL) {
tcp_send_data(tcp, mp1);
- BUMP_LOCAL(tcp->tcp_obsegs);
+ TCPS_BUMP_MIB(tcps, tcpHCOutSegs);
TCPS_BUMP_MIB(tcps, tcpOutAck);
}
if (tcp->tcp_ack_tid != 0) {
@@ -5228,6 +5234,8 @@ tcp_set_rto(tcp_t *tcp, hrtime_t rtt)
TCPS_BUMP_MIB(tcps, tcpRttUpdate);
tcp->tcp_rtt_update++;
+ tcp->tcp_rtt_sum += m;
+ tcp->tcp_rtt_cnt++;
/* tcp_rtt_sa is not 0 means this is a new sample. */
if (sa != 0) {
diff --git a/usr/src/uts/common/inet/tcp/tcp_output.c b/usr/src/uts/common/inet/tcp/tcp_output.c
index c836076430..f54ab3fb33 100644
--- a/usr/src/uts/common/inet/tcp/tcp_output.c
+++ b/usr/src/uts/common/inet/tcp/tcp_output.c
@@ -1273,7 +1273,9 @@ tcp_output(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
TCPS_BUMP_MIB(tcps, tcpOutDataSegs);
TCPS_UPDATE_MIB(tcps, tcpOutDataBytes, len);
- BUMP_LOCAL(tcp->tcp_obsegs);
+ TCPS_BUMP_MIB(tcps, tcpHCOutSegs);
+ tcp->tcp_cs.tcp_out_data_segs++;
+ tcp->tcp_cs.tcp_out_data_bytes += len;
/* Update the latest receive window size in TCP header. */
tcpha->tha_win = htons(tcp->tcp_rwnd >> tcp->tcp_rcv_ws);
@@ -1960,16 +1962,21 @@ tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len,
}
*snxt += len;
*tail_unsent = (*xmit_tail)->b_wptr - mp1->b_wptr;
- BUMP_LOCAL(tcp->tcp_obsegs);
+ TCPS_BUMP_MIB(tcps, tcpHCOutSegs);
TCPS_BUMP_MIB(tcps, tcpOutDataSegs);
TCPS_UPDATE_MIB(tcps, tcpOutDataBytes, len);
+ tcp->tcp_cs.tcp_out_data_segs++;
+ tcp->tcp_cs.tcp_out_data_bytes += len;
tcp_send_data(tcp, mp);
continue;
}
*snxt += len; /* Adjust later if we don't send all of len */
+ TCPS_BUMP_MIB(tcps, tcpHCOutSegs);
TCPS_BUMP_MIB(tcps, tcpOutDataSegs);
TCPS_UPDATE_MIB(tcps, tcpOutDataBytes, len);
+ tcp->tcp_cs.tcp_out_data_segs++;
+ tcp->tcp_cs.tcp_out_data_bytes += len;
if (*tail_unsent) {
/* Are the bytes above us in flight? */
@@ -2145,6 +2152,7 @@ tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len,
*snxt += spill;
tcp->tcp_last_sent_len += spill;
TCPS_UPDATE_MIB(tcps, tcpOutDataBytes, spill);
+ tcp->tcp_cs.tcp_out_data_bytes += spill;
/*
* Adjust the checksum
*/
@@ -2193,7 +2201,7 @@ tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len,
*/
ixa->ixa_fragsize = ixa->ixa_pmtu;
ixa->ixa_extra_ident = 0;
- tcp->tcp_obsegs += num_lso_seg;
+ TCPS_BUMP_MIB(tcps, tcpHCOutSegs);
TCP_STAT(tcps, tcp_lso_times);
TCP_STAT_UPDATE(tcps, tcp_lso_pkt_out, num_lso_seg);
} else {
@@ -2204,7 +2212,7 @@ tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len,
*/
lso_info_cleanup(mp);
tcp_send_data(tcp, mp);
- BUMP_LOCAL(tcp->tcp_obsegs);
+ TCPS_BUMP_MIB(tcps, tcpHCOutSegs);
}
}
@@ -2420,7 +2428,7 @@ tcp_xmit_ctl(char *str, tcp_t *tcp, uint32_t seq, uint32_t ack, int ctl)
tcp->tcp_rack_cnt = 0;
TCPS_BUMP_MIB(tcps, tcpOutAck);
}
- BUMP_LOCAL(tcp->tcp_obsegs);
+ TCPS_BUMP_MIB(tcps, tcpHCOutSegs);
tcpha->tha_seq = htonl(seq);
tcpha->tha_ack = htonl(ack);
/*
@@ -3394,6 +3402,8 @@ tcp_sack_rexmit(tcp_t *tcp, uint_t *flags)
TCPS_BUMP_MIB(tcps, tcpRetransSegs);
TCPS_UPDATE_MIB(tcps, tcpRetransBytes, seg_len);
TCPS_BUMP_MIB(tcps, tcpOutSackRetransSegs);
+ tcp->tcp_cs.tcp_out_retrans_segs++;
+ tcp->tcp_cs.tcp_out_retrans_bytes += seg_len;
/*
* Update tcp_rexmit_max to extend this SACK recovery phase.
* This happens when new data sent during fast recovery is
@@ -3464,6 +3474,8 @@ tcp_ss_rexmit(tcp_t *tcp)
old_snxt_mp->b_prev = (mblk_t *)(intptr_t)gethrtime();
TCPS_BUMP_MIB(tcps, tcpRetransSegs);
TCPS_UPDATE_MIB(tcps, tcpRetransBytes, cnt);
+ tcp->tcp_cs.tcp_out_retrans_segs++;
+ tcp->tcp_cs.tcp_out_retrans_bytes += cnt;
tcp->tcp_rexmit_nxt = snxt;
}
diff --git a/usr/src/uts/common/inet/tcp/tcp_stats.c b/usr/src/uts/common/inet/tcp/tcp_stats.c
index e6b13fe6c9..dbf320d09d 100644
--- a/usr/src/uts/common/inet/tcp/tcp_stats.c
+++ b/usr/src/uts/common/inet/tcp/tcp_stats.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, Joyent Inc. All rights reserved.
+ * Copyright (c) 2015, 2016 by Delphix. All rights reserved.
*/
#include <sys/types.h>
@@ -86,6 +87,50 @@ tcp_snmp_state(tcp_t *tcp)
}
}
+static void
+tcp_set_conninfo(tcp_t *tcp, struct tcpConnEntryInfo_s *tcei, boolean_t ispriv)
+{
+ /* Don't want just anybody seeing these... */
+ if (ispriv) {
+ tcei->ce_snxt = tcp->tcp_snxt;
+ tcei->ce_suna = tcp->tcp_suna;
+ tcei->ce_rnxt = tcp->tcp_rnxt;
+ tcei->ce_rack = tcp->tcp_rack;
+ } else {
+ /*
+ * Netstat, unfortunately, uses this to get send/receive queue
+ * sizes. How to fix? Why not compute the difference only?
+ */
+ tcei->ce_snxt = tcp->tcp_snxt - tcp->tcp_suna;
+ tcei->ce_suna = 0;
+ tcei->ce_rnxt = tcp->tcp_rnxt - tcp->tcp_rack;
+ tcei->ce_rack = 0;
+ }
+
+ tcei->ce_in_data_inorder_bytes = tcp->tcp_cs.tcp_in_data_inorder_bytes;
+ tcei->ce_in_data_inorder_segs = tcp->tcp_cs.tcp_in_data_inorder_segs;
+ tcei->ce_in_data_unorder_bytes = tcp->tcp_cs.tcp_in_data_unorder_bytes;
+ tcei->ce_in_data_unorder_segs = tcp->tcp_cs.tcp_in_data_unorder_segs;
+ tcei->ce_in_zwnd_probes = tcp->tcp_cs.tcp_in_zwnd_probes;
+
+ tcei->ce_out_data_bytes = tcp->tcp_cs.tcp_out_data_bytes;
+ tcei->ce_out_data_segs = tcp->tcp_cs.tcp_out_data_segs;
+ tcei->ce_out_retrans_bytes = tcp->tcp_cs.tcp_out_retrans_bytes;
+ tcei->ce_out_retrans_segs = tcp->tcp_cs.tcp_out_retrans_segs;
+ tcei->ce_out_zwnd_probes = tcp->tcp_cs.tcp_out_zwnd_probes;
+
+ tcei->ce_unsent = tcp->tcp_unsent;
+ tcei->ce_swnd = tcp->tcp_swnd;
+ tcei->ce_cwnd = tcp->tcp_cwnd;
+ tcei->ce_rwnd = tcp->tcp_rwnd;
+ tcei->ce_rto = tcp->tcp_rto;
+ tcei->ce_mss = tcp->tcp_mss;
+ tcei->ce_state = tcp->tcp_state;
+ tcei->ce_rtt_sa = NSEC2USEC(tcp->tcp_rtt_sa >> 3);
+ tcei->ce_rtt_sum = NSEC2USEC(tcp->tcp_rtt_sum);
+ tcei->ce_rtt_cnt = tcp->tcp_rtt_cnt;
+}
+
/*
* Return SNMP stuff in buffer in mpdata.
*/
@@ -183,11 +228,6 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl, boolean_t legacy_req)
continue; /* not in this zone */
tcp = connp->conn_tcp;
- TCPS_UPDATE_MIB(tcps, tcpHCInSegs, tcp->tcp_ibsegs);
- tcp->tcp_ibsegs = 0;
- TCPS_UPDATE_MIB(tcps, tcpHCOutSegs, tcp->tcp_obsegs);
- tcp->tcp_obsegs = 0;
-
tce6.tcp6ConnState = tce.tcpConnState =
tcp_snmp_state(tcp);
if (tce.tcpConnState == MIB2_TCP_established ||
@@ -243,35 +283,9 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl, boolean_t legacy_req)
} else {
tce6.tcp6ConnIfIndex = connp->conn_bound_if;
}
- /* Don't want just anybody seeing these... */
- if (ispriv) {
- tce6.tcp6ConnEntryInfo.ce_snxt =
- tcp->tcp_snxt;
- tce6.tcp6ConnEntryInfo.ce_suna =
- tcp->tcp_suna;
- tce6.tcp6ConnEntryInfo.ce_rnxt =
- tcp->tcp_rnxt;
- tce6.tcp6ConnEntryInfo.ce_rack =
- tcp->tcp_rack;
- } else {
- /*
- * Netstat, unfortunately, uses this to
- * get send/receive queue sizes. How to fix?
- * Why not compute the difference only?
- */
- tce6.tcp6ConnEntryInfo.ce_snxt =
- tcp->tcp_snxt - tcp->tcp_suna;
- tce6.tcp6ConnEntryInfo.ce_suna = 0;
- tce6.tcp6ConnEntryInfo.ce_rnxt =
- tcp->tcp_rnxt - tcp->tcp_rack;
- tce6.tcp6ConnEntryInfo.ce_rack = 0;
- }
- tce6.tcp6ConnEntryInfo.ce_swnd = tcp->tcp_swnd;
- tce6.tcp6ConnEntryInfo.ce_rwnd = tcp->tcp_rwnd;
- tce6.tcp6ConnEntryInfo.ce_rto = tcp->tcp_rto;
- tce6.tcp6ConnEntryInfo.ce_mss = tcp->tcp_mss;
- tce6.tcp6ConnEntryInfo.ce_state = tcp->tcp_state;
+ tcp_set_conninfo(tcp, &tce6.tcp6ConnEntryInfo,
+ ispriv);
tce6.tcp6ConnCreationProcess =
(connp->conn_cpid < 0) ? MIB2_UNKNOWN_PROCESS :
@@ -307,37 +321,9 @@ tcp_snmp_get(queue_t *q, mblk_t *mpctl, boolean_t legacy_req)
}
tce.tcpConnLocalPort = ntohs(connp->conn_lport);
tce.tcpConnRemPort = ntohs(connp->conn_fport);
- /* Don't want just anybody seeing these... */
- if (ispriv) {
- tce.tcpConnEntryInfo.ce_snxt =
- tcp->tcp_snxt;
- tce.tcpConnEntryInfo.ce_suna =
- tcp->tcp_suna;
- tce.tcpConnEntryInfo.ce_rnxt =
- tcp->tcp_rnxt;
- tce.tcpConnEntryInfo.ce_rack =
- tcp->tcp_rack;
- } else {
- /*
- * Netstat, unfortunately, uses this to
- * get send/receive queue sizes. How
- * to fix?
- * Why not compute the difference only?
- */
- tce.tcpConnEntryInfo.ce_snxt =
- tcp->tcp_snxt - tcp->tcp_suna;
- tce.tcpConnEntryInfo.ce_suna = 0;
- tce.tcpConnEntryInfo.ce_rnxt =
- tcp->tcp_rnxt - tcp->tcp_rack;
- tce.tcpConnEntryInfo.ce_rack = 0;
- }
- tce.tcpConnEntryInfo.ce_swnd = tcp->tcp_swnd;
- tce.tcpConnEntryInfo.ce_rwnd = tcp->tcp_rwnd;
- tce.tcpConnEntryInfo.ce_rto = tcp->tcp_rto;
- tce.tcpConnEntryInfo.ce_mss = tcp->tcp_mss;
- tce.tcpConnEntryInfo.ce_state =
- tcp->tcp_state;
+ tcp_set_conninfo(tcp, &tce.tcpConnEntryInfo,
+ ispriv);
tce.tcpConnCreationProcess =
(connp->conn_cpid < 0) ?
diff --git a/usr/src/uts/common/inet/tcp/tcp_time_wait.c b/usr/src/uts/common/inet/tcp/tcp_time_wait.c
index 72997de24a..caf7aeda50 100644
--- a/usr/src/uts/common/inet/tcp/tcp_time_wait.c
+++ b/usr/src/uts/common/inet/tcp/tcp_time_wait.c
@@ -608,7 +608,7 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
conn_t *connp = tcp->tcp_connp;
tcp_stack_t *tcps = tcp->tcp_tcps;
- BUMP_LOCAL(tcp->tcp_ibsegs);
+ TCPS_BUMP_MIB(tcps, tcpHCInSegs);
DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
flags = (unsigned int)tcpha->tha_flags & 0xFF;
@@ -794,6 +794,8 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
TCPS_BUMP_MIB(tcps, tcpInClosed);
TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs);
TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len);
+ tcp->tcp_cs.tcp_in_data_inorder_segs++;
+ tcp->tcp_cs.tcp_in_data_inorder_bytes += seg_len;
}
if (flags & TH_RST) {
(void) tcp_clean_death(tcp, 0);
diff --git a/usr/src/uts/common/inet/tcp/tcp_timers.c b/usr/src/uts/common/inet/tcp/tcp_timers.c
index b890bf6142..81cf5c57a5 100644
--- a/usr/src/uts/common/inet/tcp/tcp_timers.c
+++ b/usr/src/uts/common/inet/tcp/tcp_timers.c
@@ -594,7 +594,7 @@ tcp_ack_timer(void *arg)
mp = tcp_ack_mp(tcp);
if (mp != NULL) {
- BUMP_LOCAL(tcp->tcp_obsegs);
+ TCPS_BUMP_MIB(tcps, tcpHCOutSegs);
TCPS_BUMP_MIB(tcps, tcpOutAck);
TCPS_BUMP_MIB(tcps, tcpOutAckDelayed);
tcp_send_data(tcp, mp);
@@ -853,6 +853,7 @@ tcp_timer(void *arg)
tcp->tcp_swnd++;
tcp->tcp_zero_win_probe = B_TRUE;
TCPS_BUMP_MIB(tcps, tcpOutWinProbe);
+ tcp->tcp_cs.tcp_out_zwnd_probes++;
} else {
/*
* Handle timeout from sender SWS avoidance.
@@ -1081,6 +1082,8 @@ timer_rexmit:
tcp->tcp_csuna = tcp->tcp_snxt;
TCPS_BUMP_MIB(tcps, tcpRetransSegs);
TCPS_UPDATE_MIB(tcps, tcpRetransBytes, mss);
+ tcp->tcp_cs.tcp_out_retrans_segs++;
+ tcp->tcp_cs.tcp_out_retrans_bytes += mss;
tcp_send_data(tcp, mp);
}
diff --git a/usr/src/uts/common/inet/tcp_stats.h b/usr/src/uts/common/inet/tcp_stats.h
index 487d0d3414..704102e9d6 100644
--- a/usr/src/uts/common/inet/tcp_stats.h
+++ b/usr/src/uts/common/inet/tcp_stats.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015 by Delphix. All rights reserved.
*/
#ifndef _INET_TCP_STATS_H
@@ -205,6 +206,26 @@ typedef struct {
tcp_stat_counter_t tcp_sc_stats;
} tcp_stats_cpu_t;
+/*
+ * Per-connection statistics. Some of these are also kept globally in the
+ * per-cpu tcp_sc_mib entry (see tcp_stats_cpu_t above). We need not maintain
+ * per-cpu versions of these stats since a connection is typically processed
+ * on the same CPU.
+ */
+typedef struct tcp_conn_stats {
+ uint64_t tcp_in_data_inorder_bytes;
+ uint64_t tcp_in_data_inorder_segs;
+ uint64_t tcp_in_data_unorder_bytes;
+ uint64_t tcp_in_data_unorder_segs;
+ uint64_t tcp_in_zwnd_probes;
+
+ uint64_t tcp_out_data_bytes;
+ uint64_t tcp_out_data_segs;
+ uint64_t tcp_out_retrans_bytes;
+ uint64_t tcp_out_retrans_segs;
+ uint64_t tcp_out_zwnd_probes;
+} tcp_conn_stats_t;
+
#define TCPS_BUMP_MIB(tcps, x) \
BUMP_MIB(&(tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_mib, x)