From 0c268761780011e1de45099cf76b2ee464ea8f50 Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Thu, 14 May 2020 21:42:38 +0300 Subject: 12733 loader: autoboot_timeout greater than 10 is not shown Reviewed by: Gergő Doma Reviewed by: Andy Fiddaman Reviewed by: Alexander Eremin Approved by: Robert Mustacchi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- usr/src/boot/sys/boot/forth/menu.4th | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'usr/src') diff --git a/usr/src/boot/sys/boot/forth/menu.4th b/usr/src/boot/sys/boot/forth/menu.4th index 811db1e5b7..e60d5b8d6a 100644 --- a/usr/src/boot/sys/boot/forth/menu.4th +++ b/usr/src/boot/sys/boot/forth/menu.4th @@ -549,28 +549,22 @@ also menu-infrastructure definitions then ; -\ Takes a single integer on the stack and updates the timeout display. The -\ integer must be between 0 and 9 (we will only update a single digit in the -\ source message). +\ Takes an integer on the stack and updates the timeout display. \ : menu-timeout-update ( N -- ) - \ Enforce minimum/maximum - dup 9 > if drop 9 then + \ Enforce minimum dup 0 < if drop 0 then - s" Autoboot in N seconds. [Space] to pause" ( n -- n c-addr/u ) + menu_timeout_x @ menu_timeout_y @ at-xy \ position cursor - 2 pick 0> if - rot 48 + -rot ( n c-addr/u -- n' c-addr/u ) \ convert to ASCII - 12 +c! ( n' c-addr/u -- c-addr/u ) \ replace 'N' above - - menu_timeout_x @ menu_timeout_y @ at-xy \ position cursor - type ( c-addr/u -- ) \ print message + dup 0> if + s" Autoboot in " type + dup . s" second" type + 1 > if [char] s emit then + s" . [Space] to pause " type else - menu_timeout_x @ menu_timeout_y @ at-xy \ position cursor - spaces ( n c-addr/u -- n c-addr ) \ erase message - 2drop ( n c-addr -- ) + drop 40 spaces \ erase message then at-bl -- cgit v1.2.3 From 3382f241dd77fdfc4a9c4b03092e328924cd0c65 Mon Sep 17 00:00:00 2001 From: luozhengzheng Date: Thu, 7 May 2020 23:05:42 -0500 Subject: 12707 Port OpenZFS Coverity Fixes Reviewed by: Brian Behlendorf Reviewed by: Yuri Pankov Reviewed by: Toomas Soome Reviewed by: Gergő Doma Portions contributed by: Jason King Approved by: Dan McDonald MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- usr/src/cmd/zfs/zfs_main.c | 4 +- usr/src/cmd/zinject/zinject.c | 63 ++++++++++++++++++---- .../zfs-tests/cmd/dir_rd_update/dir_rd_update.c | 5 ++ 3 files changed, 60 insertions(+), 12 deletions(-) (limited to 'usr/src') diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c index ef18430bad..08a0b6bcd1 100644 --- a/usr/src/cmd/zfs/zfs_main.c +++ b/usr/src/cmd/zfs/zfs_main.c @@ -4097,7 +4097,7 @@ zfs_do_send(int argc, char **argv) * Incremental source name begins with # or @. * Default to same fs as target. */ - (void) strncpy(frombuf, argv[0], sizeof (frombuf)); + (void) strlcpy(frombuf, argv[0], sizeof (frombuf)); cp = strchr(frombuf, '@'); if (cp != NULL) *cp = '\0'; @@ -7330,7 +7330,7 @@ zfs_do_bookmark(int argc, char **argv) *strchr(snapname, '#') = '\0'; (void) strlcat(snapname, argv[0], sizeof (snapname)); } else { - (void) strncpy(snapname, argv[0], sizeof (snapname)); + (void) strlcpy(snapname, argv[0], sizeof (snapname)); } zhp = zfs_open(g_zfs, snapname, ZFS_TYPE_SNAPSHOT); if (zhp == NULL) diff --git a/usr/src/cmd/zinject/zinject.c b/usr/src/cmd/zinject/zinject.c index 6ab78ad46b..3ba2976df1 100644 --- a/usr/src/cmd/zinject/zinject.c +++ b/usr/src/cmd/zinject/zinject.c @@ -772,6 +772,7 @@ main(int argc, char **argv) if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { (void) fprintf(stderr, "failed to open ZFS device\n"); + libzfs_fini(g_zfs); return (1); } @@ -787,6 +788,7 @@ main(int argc, char **argv) "information.\n"); } + libzfs_fini(g_zfs); return (0); } @@ -805,6 +807,7 @@ main(int argc, char **argv) (void) fprintf(stderr, "invalid action '%s': " "must be 'degrade' or 'fault'\n", optarg); usage(); + libzfs_fini(g_zfs); return (1); } break; @@ -835,6 +838,7 @@ main(int argc, char **argv) (void) fprintf(stderr, "invalid i/o delay " "value: '%s'\n", optarg); usage(); + libzfs_fini(g_zfs); return (1); } break; @@ -864,6 +868,7 @@ main(int argc, char **argv) "be in the range [0.0001, 100.0]\n", ret == EINVAL ? "invalid value: " : ret == ERANGE ? "out of range: " : ""); + libzfs_fini(g_zfs); return (1); } break; @@ -877,6 +882,7 @@ main(int argc, char **argv) (void) fprintf(stderr, "invalid duration '%s': " "must be a positive integer\n", optarg); usage(); + libzfs_fini(g_zfs); return (1); } /* store duration of txgs as its negative */ @@ -884,6 +890,7 @@ main(int argc, char **argv) break; case 'h': usage(); + libzfs_fini(g_zfs); return (0); case 'I': /* default duration, if one hasn't yet been defined */ @@ -897,6 +904,7 @@ main(int argc, char **argv) (void) fprintf(stderr, "invalid level '%s': " "must be an integer\n", optarg); usage(); + libzfs_fini(g_zfs); return (1); } break; @@ -922,6 +930,7 @@ main(int argc, char **argv) (void) fprintf(stderr, "invalid duration '%s': " "must be a positive integer\n", optarg); usage(); + libzfs_fini(g_zfs); return (1); } break; @@ -941,6 +950,7 @@ main(int argc, char **argv) "'%s': must be 'read', 'write', 'free', " "'claim' or 'all'\n", optarg); usage(); + libzfs_fini(g_zfs); return (1); } break; @@ -950,6 +960,7 @@ main(int argc, char **argv) (void) fprintf(stderr, "invalid type '%s'\n", optarg); usage(); + libzfs_fini(g_zfs); return (1); } break; @@ -962,6 +973,7 @@ main(int argc, char **argv) (void) fprintf(stderr, "invalid label type " "'%s'\n", optarg); usage(); + libzfs_fini(g_zfs); return (1); } break; @@ -969,11 +981,13 @@ main(int argc, char **argv) (void) fprintf(stderr, "option -%c requires an " "operand\n", optopt); usage(); + libzfs_fini(g_zfs); return (1); case '?': (void) fprintf(stderr, "invalid option '%c'\n", optopt); usage(); + libzfs_fini(g_zfs); return (2); } } @@ -994,11 +1008,13 @@ main(int argc, char **argv) (void) fprintf(stderr, "cancel (-c) incompatible with " "any other options\n"); usage(); + libzfs_fini(g_zfs); return (2); } if (argc != 0) { (void) fprintf(stderr, "extraneous argument to '-c'\n"); usage(); + libzfs_fini(g_zfs); return (2); } @@ -1010,6 +1026,7 @@ main(int argc, char **argv) (void) fprintf(stderr, "invalid handle id '%s':" " must be an integer or 'all'\n", cancel); usage(); + libzfs_fini(g_zfs); return (1); } return (cancel_handler(id)); @@ -1027,6 +1044,7 @@ main(int argc, char **argv) (void) fprintf(stderr, "device (-d) incompatible with " "data error injection\n"); usage(); + libzfs_fini(g_zfs); return (2); } @@ -1034,21 +1052,25 @@ main(int argc, char **argv) (void) fprintf(stderr, "device (-d) injection requires " "a single pool name\n"); usage(); + libzfs_fini(g_zfs); return (2); } - (void) strcpy(pool, argv[0]); + (void) strlcpy(pool, argv[0], sizeof (pool)); dataset[0] = '\0'; if (error == ECKSUM) { (void) fprintf(stderr, "device error type must be " "'io' or 'nxio'\n"); + libzfs_fini(g_zfs); return (1); } record.zi_iotype = io_type; - if (translate_device(pool, device, label, &record) != 0) + if (translate_device(pool, device, label, &record) != 0) { + libzfs_fini(g_zfs); return (1); + } if (!error) error = ENXIO; @@ -1062,6 +1084,7 @@ main(int argc, char **argv) (void) fprintf(stderr, "raw (-b) format with " "any other options\n"); usage(); + libzfs_fini(g_zfs); return (2); } @@ -1069,21 +1092,25 @@ main(int argc, char **argv) (void) fprintf(stderr, "raw (-b) format expects a " "single pool name\n"); usage(); + libzfs_fini(g_zfs); return (2); } - (void) strcpy(pool, argv[0]); + (void) strlcpy(pool, argv[0], sizeof (pool)); dataset[0] = '\0'; if (error == ENXIO) { (void) fprintf(stderr, "data error type must be " "'checksum' or 'io'\n"); + libzfs_fini(g_zfs); return (1); } record.zi_cmd = ZINJECT_DATA_FAULT; - if (translate_raw(raw, &record) != 0) + if (translate_raw(raw, &record) != 0) { + libzfs_fini(g_zfs); return (1); + } if (!error) error = EIO; } else if (record.zi_cmd == ZINJECT_PANIC) { @@ -1093,6 +1120,7 @@ main(int argc, char **argv) (void) fprintf(stderr, "panic (-p) incompatible with " "other options\n"); usage(); + libzfs_fini(g_zfs); return (2); } @@ -1100,10 +1128,11 @@ main(int argc, char **argv) (void) fprintf(stderr, "panic (-p) injection requires " "a single pool name and an optional id\n"); usage(); + libzfs_fini(g_zfs); return (2); } - (void) strcpy(pool, argv[0]); + (void) strlcpy(pool, argv[0], sizeof (pool)); if (argv[1] != NULL) record.zi_type = atoi(argv[1]); dataset[0] = '\0'; @@ -1121,21 +1150,24 @@ main(int argc, char **argv) (void) fprintf(stderr, "-s or -g meaningless " "without -I (ignore writes)\n"); usage(); + libzfs_fini(g_zfs); return (2); } else if (dur_secs && dur_txg) { (void) fprintf(stderr, "choose a duration either " "in seconds (-s) or a number of txgs (-g) " "but not both\n"); usage(); + libzfs_fini(g_zfs); return (2); } else if (argc != 1) { (void) fprintf(stderr, "ignore writes (-I) " "injection requires a single pool name\n"); usage(); + libzfs_fini(g_zfs); return (2); } - (void) strcpy(pool, argv[0]); + (void) strlcpy(pool, argv[0], sizeof (pool)); dataset[0] = '\0'; } else if (type == TYPE_INVAL) { if (flags == 0) { @@ -1143,16 +1175,18 @@ main(int argc, char **argv) "'-t', '-a', '-p', '-I' or '-u' " "must be specified\n"); usage(); + libzfs_fini(g_zfs); return (2); } if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) { - (void) strcpy(pool, argv[0]); + (void) strlcpy(pool, argv[0], sizeof (pool)); dataset[0] = '\0'; } else if (argc != 0) { (void) fprintf(stderr, "extraneous argument for " "'-f'\n"); usage(); + libzfs_fini(g_zfs); return (2); } @@ -1161,12 +1195,14 @@ main(int argc, char **argv) if (argc != 1) { (void) fprintf(stderr, "missing object\n"); usage(); + libzfs_fini(g_zfs); return (2); } if (error == ENXIO) { (void) fprintf(stderr, "data error type must be " "'checksum' or 'io'\n"); + libzfs_fini(g_zfs); return (1); } @@ -1199,8 +1235,10 @@ main(int argc, char **argv) } if (translate_record(type, argv[0], range, level, &record, pool, - dataset) != 0) + dataset) != 0) { + libzfs_fini(g_zfs); return (1); + } if (!error) error = EIO; } @@ -1211,11 +1249,16 @@ main(int argc, char **argv) * time we access the pool. */ if (dataset[0] != '\0' && domount) { - if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL) + if ((zhp = zfs_open(g_zfs, dataset, + ZFS_TYPE_DATASET)) == NULL) { + libzfs_fini(g_zfs); return (1); + } - if (zfs_unmount(zhp, NULL, 0) != 0) + if (zfs_unmount(zhp, NULL, 0) != 0) { + libzfs_fini(g_zfs); return (1); + } } record.zi_error = error; diff --git a/usr/src/test/zfs-tests/cmd/dir_rd_update/dir_rd_update.c b/usr/src/test/zfs-tests/cmd/dir_rd_update/dir_rd_update.c index bca365c524..0283bc9644 100644 --- a/usr/src/test/zfs-tests/cmd/dir_rd_update/dir_rd_update.c +++ b/usr/src/test/zfs-tests/cmd/dir_rd_update/dir_rd_update.c @@ -63,6 +63,11 @@ main(int argc, char **argv) } cp1 = argv[1]; + if (strlen(cp1) >= (sizeof (dirpath) - strlen("TMP_DIR"))) { + (void) printf("The string length of mount point is " + "too large\n"); + exit(-1); + } (void) strcpy(&dirpath[0], (const char *)cp1); (void) strcat(&dirpath[strlen(dirpath)], "TMP_DIR"); -- cgit v1.2.3 From 109b65249647da8f2f4306cd9b3d2800b05fd59b Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Sat, 16 May 2020 17:31:10 +0300 Subject: 12744 gfx_private: bitmap_cons_clear 8-bit mode is using wrong color Reviewed by: Yuri Pankov Reviewed by: Andy Fiddaman Approved by: Robert Mustacchi --- usr/src/uts/i86pc/io/gfx_private/gfxp_bitmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'usr/src') diff --git a/usr/src/uts/i86pc/io/gfx_private/gfxp_bitmap.c b/usr/src/uts/i86pc/io/gfx_private/gfxp_bitmap.c index 2fed9f162c..1a11d7ff0f 100644 --- a/usr/src/uts/i86pc/io/gfx_private/gfxp_bitmap.c +++ b/usr/src/uts/i86pc/io/gfx_private/gfxp_bitmap.c @@ -450,10 +450,10 @@ bitmap_cons_clear(struct gfxp_fb_softc *softc, struct vis_consclear *ca) for (i = 0; i < console->fb.screen.y; i++) { if (softc->mode == KD_TEXT) { fb = console->fb.fb + i * pitch; - (void) memset(fb, ca->bg_color, pitch); + (void) memset(fb, data, pitch); } fb = console->fb.shadow_fb + i * pitch; - (void) memset(fb, ca->bg_color, pitch); + (void) memset(fb, data, pitch); } break; case 15: -- cgit v1.2.3 From c039d8138bcc82c8082abc5560e0293afb7994cf Mon Sep 17 00:00:00 2001 From: Nan Xiao Date: Sat, 16 May 2020 12:12:37 +0800 Subject: 12741 Fix "more then" typo in dlopen(3c) Reviewed by: Andrew Stormont Reviewed by: Sebastian Wiedenroth Reviewed by: Marcel Telka Approved by: Robert Mustacchi --- usr/src/man/man3c/dlopen.3c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'usr/src') diff --git a/usr/src/man/man3c/dlopen.3c b/usr/src/man/man3c/dlopen.3c index b72d070579..4836a95562 100644 --- a/usr/src/man/man3c/dlopen.3c +++ b/usr/src/man/man3c/dlopen.3c @@ -3,11 +3,10 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH DLOPEN 3C "Sep 7, 2015" +.TH DLOPEN 3C "May 16, 2020" .SH NAME dlopen, dlmopen \- gain access to an executable object file .SH SYNOPSIS -.LP .nf #include #include @@ -21,7 +20,6 @@ dlopen, dlmopen \- gain access to an executable object file .fi .SH DESCRIPTION -.LP The \fBdlopen()\fR function makes an executable object file available to a running process. \fBdlopen()\fR returns to the process a \fIhandle\fR that the process can use on subsequent calls to \fBdlsym\fR(3C), \fBdladdr\fR(3C), @@ -130,7 +128,7 @@ of other objects that include the same group. The program image file and any objects loaded at program startup have the mode \fBRTLD_GLOBAL\fR. The mode \fBRTLD_LOCAL\fR is the default mode for any objects that are acquired with \fBdlopen()\fR. A local object can be a -dependency of more then one group. Any object of mode \fBRTLD_LOCAL\fR that is +dependency of more than one group. Any object of mode \fBRTLD_LOCAL\fR that is referenced as a dependency of an object of mode \fBRTLD_GLOBAL\fR is promoted to \fBRTLD_GLOBAL\fR. In other words, the \fBRTLD_LOCAL\fR mode is ignored. .sp @@ -273,7 +271,6 @@ are opened on a new link-map list must express all of their dependencies. .RE .SH RETURN VALUES -.LP The \fBdlopen()\fR function returns \fINULL\fR if \fIpathname\fR cannot be found, cannot be opened for reading, or is not a shared object or a relocatable object. \fBdlopen()\fR also returns \fINULL\fR if an error occurs during the @@ -281,13 +278,11 @@ process of loading \fIpathname\fR or relocating its symbolic references. See \fBNOTES\fR. Additional diagnostic information is available through \fBdlerror()\fR. .SH USAGE -.LP The \fBdlopen()\fR and \fBdlmopen()\fR functions are members of a family of functions that give the user direct access to the dynamic linking facilities. This family of functions is available only to dynamically-linked processes. See the \fILinker and Libraries Guide\fR. .SH ATTRIBUTES -.LP See \fBattributes\fR(5) for descriptions of the following attributes: .sp @@ -304,7 +299,6 @@ MT\(miLevel MT\(miSafe .TE .SH SEE ALSO -.LP \fBld\fR(1), \fBld.so.1\fR(1), \fBdladdr\fR(3C), \fBdlclose\fR(3C), \fBdldump\fR(3C), \fBdlerror\fR(3C), \fBdlinfo\fR(3C), \fBdlsym\fR(3C), \fBattributes\fR(5), \fBstandards\fR(5) @@ -312,7 +306,6 @@ MT\(miLevel MT\(miSafe .LP \fILinker and Libraries Guide\fR .SH NOTES -.LP If \fIpathname\fR has dependencies on other objects, these objects are automatically loaded by \fBdlopen()\fR. The directory search path used to find \fIpathname\fR and any dependencies can be affected by setting the environment -- cgit v1.2.3 From f13f199891d2a0440db0361743dd73527f565e89 Mon Sep 17 00:00:00 2001 From: Alexander Eremin Date: Sat, 16 May 2020 13:35:05 +0000 Subject: 12729 bootadm set-menu timeout crashes without timeout value Reviewed by: Yuri Pankov Reviewed by: Toomas Soome Reviewed by: Gergő Doma Approved by: Robert Mustacchi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- usr/src/cmd/boot/bootadm/bootadm_loader.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'usr/src') diff --git a/usr/src/cmd/boot/bootadm/bootadm_loader.c b/usr/src/cmd/boot/bootadm/bootadm_loader.c index 5755efd7d5..15bf160745 100644 --- a/usr/src/cmd/boot/bootadm/bootadm_loader.c +++ b/usr/src/cmd/boot/bootadm/bootadm_loader.c @@ -27,6 +27,7 @@ * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright 2016 Toomas Soome * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2020 2020 Data Direct Networks. */ /* @@ -616,6 +617,9 @@ set_option(struct menu_lst *menu, char *dummy, char *opt) val = strchr(opt, '='); if (val != NULL) { *val++ = '\0'; + } else { + bam_error(_("missing value in key=value\n")); + return (BAM_ERROR); } if (strcmp(opt, "default") == 0) { -- cgit v1.2.3 From c61a1653a4d73dbc950dac7d96350fd6cb517486 Mon Sep 17 00:00:00 2001 From: Ryan Zezeski Date: Mon, 4 May 2020 17:50:44 +0000 Subject: 12676 want better offloads for vnics 12677 simnet has bogus mi_tx_cksum_flags 12678 mac_tx() is too eager to emulate hardware offloads Portions contributed by: Patrick Mooney Portions contributed by: Robert Mustacchi Reviewed by: Patrick Mooney Reviewed by: Andy Fiddaman Approved by: Dan McDonald --- usr/src/pkg/manifests/system-test-nettest.mf | 57 + usr/src/test/Makefile | 2 + usr/src/test/net-tests/Makefile | 20 + usr/src/test/net-tests/cmd/Makefile | 36 + usr/src/test/net-tests/cmd/nettest.ksh | 52 + usr/src/test/net-tests/config/Makefile | 38 + usr/src/test/net-tests/config/ip_forwarding.config | 22 + usr/src/test/net-tests/runfiles/Makefile | 38 + usr/src/test/net-tests/runfiles/default.run | 44 + usr/src/test/net-tests/tests/Makefile | 42 + usr/src/test/net-tests/tests/forwarding/Makefile | 67 + usr/src/test/net-tests/tests/forwarding/README | 177 +++ .../net-tests/tests/forwarding/ip_forwarding.ksh | 496 +++++++ .../test/net-tests/tests/forwarding/ip_fwd_001.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_002.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_003.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_004.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_005.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_006.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_007.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_008.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_009.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_010.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_011.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_012.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_013.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_014.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_015.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_016.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_017.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_018.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_019.ksh | 22 + .../test/net-tests/tests/forwarding/ip_fwd_020.ksh | 22 + .../net-tests/tests/forwarding/ip_fwd_suite.ksh | 115 ++ usr/src/test/net-tests/tests/net_common.ksh | 650 +++++++++ usr/src/uts/common/inet/ip/ip6.c | 98 +- usr/src/uts/common/inet/ip/ip6_input.c | 7 +- usr/src/uts/common/inet/ip/ip_input.c | 22 +- usr/src/uts/common/inet/ip6.h | 3 +- usr/src/uts/common/inet/ip_impl.h | 20 +- usr/src/uts/common/io/bridge.c | 51 +- usr/src/uts/common/io/dls/dls_link.c | 8 +- usr/src/uts/common/io/fcoe/fcoe_fc.c | 5 +- usr/src/uts/common/io/mac/mac.c | 88 +- usr/src/uts/common/io/mac/mac_bcast.c | 13 +- usr/src/uts/common/io/mac/mac_client.c | 134 +- usr/src/uts/common/io/mac/mac_datapath_setup.c | 2 +- usr/src/uts/common/io/mac/mac_flow.c | 3 +- usr/src/uts/common/io/mac/mac_provider.c | 96 +- usr/src/uts/common/io/mac/mac_sched.c | 91 +- usr/src/uts/common/io/mac/mac_soft_ring.c | 2 +- usr/src/uts/common/io/mac/mac_util.c | 1490 +++++++++++++++++--- usr/src/uts/common/io/simnet/simnet.c | 495 +++++-- usr/src/uts/common/io/simnet/simnet_impl.h | 13 + usr/src/uts/common/io/stream.c | 12 +- usr/src/uts/common/io/vnic/vnic_dev.c | 23 + usr/src/uts/common/os/ip_cksum.c | 108 ++ usr/src/uts/common/sys/mac.h | 34 +- usr/src/uts/common/sys/mac_client.h | 2 + usr/src/uts/common/sys/mac_client_impl.h | 4 +- usr/src/uts/common/sys/mac_impl.h | 70 +- usr/src/uts/common/sys/pattr.h | 3 + usr/src/uts/common/sys/vnic_impl.h | 3 +- usr/src/uts/common/xen/io/xnb.c | 5 +- 64 files changed, 4619 insertions(+), 582 deletions(-) create mode 100644 usr/src/pkg/manifests/system-test-nettest.mf create mode 100644 usr/src/test/net-tests/Makefile create mode 100644 usr/src/test/net-tests/cmd/Makefile create mode 100644 usr/src/test/net-tests/cmd/nettest.ksh create mode 100644 usr/src/test/net-tests/config/Makefile create mode 100644 usr/src/test/net-tests/config/ip_forwarding.config create mode 100644 usr/src/test/net-tests/runfiles/Makefile create mode 100644 usr/src/test/net-tests/runfiles/default.run create mode 100644 usr/src/test/net-tests/tests/Makefile create mode 100644 usr/src/test/net-tests/tests/forwarding/Makefile create mode 100644 usr/src/test/net-tests/tests/forwarding/README create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_forwarding.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_001.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_002.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_003.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_004.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_005.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_006.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_007.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_008.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_009.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_010.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_011.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_012.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_013.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_014.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_015.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_016.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_017.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_018.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_019.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_020.ksh create mode 100644 usr/src/test/net-tests/tests/forwarding/ip_fwd_suite.ksh create mode 100644 usr/src/test/net-tests/tests/net_common.ksh (limited to 'usr/src') diff --git a/usr/src/pkg/manifests/system-test-nettest.mf b/usr/src/pkg/manifests/system-test-nettest.mf new file mode 100644 index 0000000000..b313b0cc1c --- /dev/null +++ b/usr/src/pkg/manifests/system-test-nettest.mf @@ -0,0 +1,57 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2020 Oxide Computer Company +# + +set name=pkg.fmri value=pkg:/system/test/nettest@$(PKGVERS) +set name=pkg.description value="Miscellaneous Network Unit Tests" +set name=pkg.summary value="Network Unit Test Suite" +set name=info.classification \ + value=org.opensolaris.category.2008:Development/System +set name=variant.arch value=$(ARCH) +dir path=opt/net-tests +dir path=opt/net-tests/bin +dir path=opt/net-tests/config +dir path=opt/net-tests/runfiles +dir path=opt/net-tests/tests +dir path=opt/net-tests/tests/forwarding +file path=opt/net-tests/bin/nettest mode=0555 +file path=opt/net-tests/config/ip_forwarding.config mode=0644 \ + preserve=renamenew +file path=opt/net-tests/runfiles/default.run mode=0444 +file path=opt/net-tests/tests/forwarding/README mode=0444 +file path=opt/net-tests/tests/forwarding/ip_forwarding mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_001 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_002 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_003 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_004 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_005 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_006 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_007 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_008 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_009 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_010 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_011 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_012 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_013 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_014 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_015 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_016 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_017 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_018 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_019 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_020 mode=0555 +file path=opt/net-tests/tests/forwarding/ip_fwd_suite mode=0555 +file path=opt/net-tests/tests/net_common mode=0555 +license lic_CDDL license=lic_CDDL +depend fmri=system/test/testrunner type=require diff --git a/usr/src/test/Makefile b/usr/src/test/Makefile index fa57d36772..9756f02ef7 100644 --- a/usr/src/test/Makefile +++ b/usr/src/test/Makefile @@ -12,6 +12,7 @@ # # Copyright (c) 2012 by Delphix. All rights reserved. # Copyright 2014 Garrett D'Amore +# Copyright 2019 Joyent, Inc. # .PARALLEL: $(SUBDIRS) @@ -20,6 +21,7 @@ SUBDIRS = \ crypto-tests \ elf-tests \ libc-tests \ + net-tests \ os-tests \ smbclient-tests \ test-runner \ diff --git a/usr/src/test/net-tests/Makefile b/usr/src/test/net-tests/Makefile new file mode 100644 index 0000000000..6536e70c59 --- /dev/null +++ b/usr/src/test/net-tests/Makefile @@ -0,0 +1,20 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019, Joyent Inc. +# + +.PARALLEL: $(SUBDIRS) + +SUBDIRS = cmd config runfiles tests + +include $(SRC)/test/Makefile.com diff --git a/usr/src/test/net-tests/cmd/Makefile b/usr/src/test/net-tests/cmd/Makefile new file mode 100644 index 0000000000..b2770c84c6 --- /dev/null +++ b/usr/src/test/net-tests/cmd/Makefile @@ -0,0 +1,36 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/net-tests +ROOTBIN = $(ROOTOPTPKG)/bin +PROGS = nettest +CMDS = $(PROGS:%=$(ROOTBIN)/%) +$(CMDS) := FILEMODE = 0555 + +include $(SRC)/test/Makefile.com + +install: $(CMDS) + +clobber: clean + $(RM) $(CMDS) + +$(CMDS): $(ROOTBIN) + +$(ROOTBIN): + $(INS.dir) + +$(ROOTBIN)/%: %.ksh + $(INS.rename) diff --git a/usr/src/test/net-tests/cmd/nettest.ksh b/usr/src/test/net-tests/cmd/nettest.ksh new file mode 100644 index 0000000000..e7d0e78865 --- /dev/null +++ b/usr/src/test/net-tests/cmd/nettest.ksh @@ -0,0 +1,52 @@ +#!/usr/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# + +export NET_TESTS="/opt/net-tests" +runner="/opt/test-runner/bin/run" + +function fail +{ + echo $1 >&2 + exit ${2:-1} +} + +function find_runfile +{ + typeset distro= + if [[ -f $NET_TESTS/runfiles/default.run ]]; then + distro=default + fi + + [[ -n $distro ]] && echo $NET_TESTS/runfiles/$distro.run +} + +while getopts c: c; do + case $c in + 'c') + runfile=$OPTARG + [[ -f $runfile ]] || fail "Cannot read file: $runfile" + ;; + esac +done +shift $((OPTIND - 1)) + +[[ -z $runfile ]] && runfile=$(find_runfile) +[[ -z $runfile ]] && fail "Couldn't determine distro" + +$runner -c $runfile + +exit $? diff --git a/usr/src/test/net-tests/config/Makefile b/usr/src/test/net-tests/config/Makefile new file mode 100644 index 0000000000..7151577083 --- /dev/null +++ b/usr/src/test/net-tests/config/Makefile @@ -0,0 +1,38 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# + +include $(SRC)/Makefile.master + +CFGS = ip_forwarding.config +ROOTOPTPKG = $(ROOT)/opt/net-tests +ROOTOPTPKGCFG = $(ROOT)/opt/net-tests/config +ROOTOPTPKGDIRS = $(ROOTOPTPKG) $(ROOTOPTPKGCFG) +FILES = $(CFGS:%=$(ROOTOPTPKGCFG)/%) +$(FILES) := FILEMODE = 0644 + +include $(SRC)/test/Makefile.com + +all: $(CFGS) + +install: $(ROOTOPTPKG) $(ROOTOPTPKGCFG) $(FILES) + +clobber: clean + $(RM) $(FILES) + +$(ROOTOPTPKGDIRS): + $(INS.dir) + +$(ROOTOPTPKGCFG)/%: % $(ROOTOPTPKGDIRS) + $(INS.file) diff --git a/usr/src/test/net-tests/config/ip_forwarding.config b/usr/src/test/net-tests/config/ip_forwarding.config new file mode 100644 index 0000000000..4a839cd49d --- /dev/null +++ b/usr/src/test/net-tests/config/ip_forwarding.config @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +# +# See the tests/forwarding/README file for information about how to +# configure and run the tests. +# +export NT_CLIENT=client_zone_name +export NT_ROUTER=router_zone_name +export NT_SERVER=server_zone_name diff --git a/usr/src/test/net-tests/runfiles/Makefile b/usr/src/test/net-tests/runfiles/Makefile new file mode 100644 index 0000000000..d50a8deebf --- /dev/null +++ b/usr/src/test/net-tests/runfiles/Makefile @@ -0,0 +1,38 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# +include $(SRC)/Makefile.master + +SRCS = default.run +ROOTOPTPKG = $(ROOT)/opt/net-tests +RUNFILES = $(ROOTOPTPKG)/runfiles +CMDS = $(SRCS:%=$(RUNFILES)/%) +$(CMDS) := FILEMODE = 0444 + +include $(SRC)/test/Makefile.com + +all: $(SRCS) + +install: $(CMDS) + +clobber: clean + $(RM) $(CMDS) + +$(CMDS): $(RUNFILES) $(SRCS) + +$(RUNFILES): + $(INS.dir) + +$(RUNFILES)/%: % + $(INS.file) diff --git a/usr/src/test/net-tests/runfiles/default.run b/usr/src/test/net-tests/runfiles/default.run new file mode 100644 index 0000000000..cfc1a3df8d --- /dev/null +++ b/usr/src/test/net-tests/runfiles/default.run @@ -0,0 +1,44 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# + +[DEFAULT] +outputdir = /var/tmp/test_results +quiet = False +timeout = 300 + +[/opt/net-tests/tests/forwarding] +tests = [ + 'ip_fwd_001', + 'ip_fwd_002', + 'ip_fwd_003', + 'ip_fwd_004', + 'ip_fwd_005', + 'ip_fwd_006', + 'ip_fwd_007', + 'ip_fwd_008', + 'ip_fwd_009', + 'ip_fwd_010', + 'ip_fwd_011', + 'ip_fwd_012', + 'ip_fwd_013', + 'ip_fwd_014', + 'ip_fwd_015', + 'ip_fwd_016', + 'ip_fwd_017', + 'ip_fwd_018', + 'ip_fwd_019', + 'ip_fwd_020' + ] +user = root diff --git a/usr/src/test/net-tests/tests/Makefile b/usr/src/test/net-tests/tests/Makefile new file mode 100644 index 0000000000..2712d62751 --- /dev/null +++ b/usr/src/test/net-tests/tests/Makefile @@ -0,0 +1,42 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# +include $(SRC)/Makefile.master +include $(SRC)/cmd/Makefile.cmd + +SUBDIRS = forwarding +SCRIPTS = net_common +ROOTOPTPKG = $(ROOT)/opt/net-tests +TESTDIR = $(ROOTOPTPKG)/tests +CMDS = $(SCRIPTS:%=$(TESTDIR)/%) +FILEMODE=0444 +$(CMDS) := FILEMODE = 0555 + +include $(SRC)/test/Makefile.com + +install: $(CMDS) + +clobber: clean + $(RM) $(CMDS) + +$(CMDS): $(TESTDIR) + +$(TESTDIR): + $(INS.dir) + +$(TESTDIR)/%: % + $(INS.file) + +$(TESTDIR)/%: %.ksh + $(INS.rename) diff --git a/usr/src/test/net-tests/tests/forwarding/Makefile b/usr/src/test/net-tests/tests/forwarding/Makefile new file mode 100644 index 0000000000..566db8c86d --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/Makefile @@ -0,0 +1,67 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# +include $(SRC)/Makefile.master +include $(SRC)/cmd/Makefile.cmd + +ROOTOPTPKG = $(ROOT)/opt/net-tests +TESTDIR = $(ROOTOPTPKG)/tests/forwarding + +PROG = \ + ip_forwarding \ + ip_fwd_suite \ + ip_fwd_001 \ + ip_fwd_002 \ + ip_fwd_003 \ + ip_fwd_004 \ + ip_fwd_005 \ + ip_fwd_006 \ + ip_fwd_007 \ + ip_fwd_008 \ + ip_fwd_009 \ + ip_fwd_010 \ + ip_fwd_011 \ + ip_fwd_012 \ + ip_fwd_013 \ + ip_fwd_014 \ + ip_fwd_015 \ + ip_fwd_016 \ + ip_fwd_017 \ + ip_fwd_018 \ + ip_fwd_019 \ + ip_fwd_020 + +DOC = $(TESTDIR)/README + +CMDS = $(PROG:%=$(TESTDIR)/%) +FILEMODE=0444 +$(CMDS) := FILEMODE = 0555 + +include $(SRC)/test/Makefile.com + +install: $(CMDS) $(DOC) + +clobber: clean + $(RM) $(CMDS) $(DOC) + +$(CMDS) $(DOC): $(TESTDIR) + +$(TESTDIR): + $(INS.dir) + +$(TESTDIR)/%: % + $(INS.file) + +$(TESTDIR)/%: %.ksh + $(INS.rename) diff --git a/usr/src/test/net-tests/tests/forwarding/README b/usr/src/test/net-tests/tests/forwarding/README new file mode 100644 index 0000000000..dbe8774a22 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/README @@ -0,0 +1,177 @@ +Running +------- + +* Create three native zones and start them. + +* Edit config/ip_forwarding.config, entering the names of the zones + you created. + +* Run /opt/net-tests/bin/nettest. + +Overview +-------- + +The tests in this directory test the IP forwarding path under several +different variations. All tests require three zones. The tests use +these three zones, along with the simnet driver, to emulate a real IP +forwarding scenario involving multiple hosts. All tests verify that +TCP, UDP, ICMP, IPv4/IPv6, and fragmented IPv4/IPv6 traffic can cross +the IP forwarding datapath. Each test differs in its emulation of +various hardware offload features (which would typically be presented +by real NICs). The diagrams below gives a visual representation of the +situations we are testing and shows how the test components relate to +each other. + +no mac-loopback +--------------- + +In this configuration we make sure that the packet travels from server +to router via "the wire". + + +----------------------------+ ++----------------------------+ |router zone | +|client zone | | +-------------------------+| +|(ipft_client_nic0) | | |ipft_router_nic0 || +| +----------------------+ | | |+----------------------+ || +| |ipft_client0 | | | ||ipft_client_r0 | || +| |192.168.77.2 |<-+-- Wire --+->|192.168.77.1 | || +| |fd00:0:1:4d::2 | | | ||fd00:0:1:4d::1 | || +| +----------------------+ | | |+----------------------+ || ++----------------------------+ | +-------------------------+| + | ^ | + | | | + | | | + | | | + | | | + | IP | | + | forwarding | | + | | | + | | | + | | | ++----------------------------+ | v | +|server zone | |+-------------------------+ | +|(ipft_server_nic0) | ||ipft_router_nic1 | | +| +----------------------+ | || +----------------------+| | +| |ipft_server0 | | || |ipft_server_r0 || | +| |VLAN 5 | | Wire || |VLAN 5 || | +| |192.168.88.2 |<-+----------++>|192.168.88.1 || | +| |fd00:0:1:58::2 | | || |fd00:0:1:58::1 || | +| +----------------------+ | || +----------------------+| | ++----------------------------+ |+-------------------------+ | + +----------------------------+ + +mac-loopback +------------ + +In this configuration we make sure that the packet travels from server +to router via mac-loopback. + + +----------------------------+ ++----------------------------+ |router zone | +|client zone | | +-------------------------+| +|(ipft_nic0) | | |ipft_nic1 || +| +----------------------+ | | |+----------------------+ || +| |ipft_client0 | | | ||ipft_client_r0 | || +| |192.168.77.2 |<-+-- Wire --+->|192.168.77.1 | || +| |fd00:0:1:4d::2 | | | ||fd00:0:1:4d::1 | || +| +----------------------+ | | |+----------------------+ || ++----------------------------+ | +-------------------------+| + | ^ | + | | | + | | | + | | | + | | | + | IP | | + | forwarding | | + | | | + | | | + | | | ++----------------------------+ | v | +|server zone | |+-------------------------+ | +|(ipft_nic1) | ||ipft_nic1 | | +| +----------------------+ | || +----------------------+| | +| |ipft_server0 | | MAC || |ipft_server_r0 || | +| |VLAN 5 | | loopback || |VLAN 5 || | +| |192.168.88.2 |<-+----------++>|192.168.88.1 || | +| |fd00:0:1:58::2 | | || |fd00:0:1:58::1 || | +| +----------------------+ | || +----------------------+| | ++----------------------------+ |+-------------------------+ | + +----------------------------+ + +Requirements +------------ + +* The client and server zones must provide `/usr/bin/socat`. It would + be nice to use netcat but our native version is missing features + like connection timeout. + +* The user must both create and start the three required zones. + +* All three zones should be native zones. + +* You must edit the ip_forwarding.config file; providing it with the + names of the zones you have created. + +Files +----- + +ip_fowarding + + The main test script; it provides the logic for all the tests + below. The different test variations are controlled by options + and it takes the three zones as arguments. This script may be + run by hand but it's easier to use ip_fwd_suite for that + purpose. + +ip_fwd_suite + + This script runs the various configurations of the IP + forwarding test suite. You can run the entire suite or just a + single test via the '-n' option. The "Test Matrix" section + below gives an overview of all the tests in the suite. + +ip_fwd_XXX + + These scripts are mostly here to work around the fact that the + test-runner cannot pass arguments to individual tests. In + order to avoid running everything as the "ip_fwd_suite" test, + we create a file for each configuration. This gives individual + reporting of each test and steers us clear of tripping the + timeout. You can also run these scripts by hand like so: + + NET_TESTS=/opt/net-tests /opt/net-tests/tests/forwarding/ip_fwd_001 + +config/ip_forwarding.config + + This file must be modified to contain the names of the zones + the user crated for running these tests. + +Test Matrix +----------- + +This is a breakdown of all the tests in the IP forwarding test suite. +If a given offload is enabled or disable, it is done so for all +interfaces involved in the test. + +NAME Tx IP Tx ULP LSO Rx IP mac-loopback +001 off none off off no +002 on partial off off no +003 on partial on off no +004 on fullv4 off off no +005 on fullv4 on off no +006 off none off on no +007 on partial off on no +008 on partial on on no +009 on fullv4 off on no +010 on fullv4 on on no + +011 off none off off yes +012 on partial off off yes +013 on partial on off yes +014 on fullv4 off off yes +015 on fullv4 on off yes +016 off none off on yes +017 on partial off on yes +018 on partial on on yes +019 on fullv4 off on yes +020 on fullv4 on on yes diff --git a/usr/src/test/net-tests/tests/forwarding/ip_forwarding.ksh b/usr/src/test/net-tests/tests/forwarding/ip_forwarding.ksh new file mode 100644 index 0000000000..bf7a2255af --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_forwarding.ksh @@ -0,0 +1,496 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +# +# Usage: +# +# ip_forwarding.ksh -bcflnpuv +# +# Where client, router, and server are the names of three native +# zones. The user must create and start these zones; but other +# than that there is no special configuration required for them. +# +# -b Place server and router on same underlying simnet, causing +# them to talk via MAC-loopback. +# +# -c Run cleanup only. +# +# -f Enable Tx ULP hardware checksum. +# +# -l Enable TCP LSO. +# +# -n No cleanup: the various artifacts created by this script will +# remain after execution. +# +# -p Enabled partial Tx ULP hardware checksum. +# +# -r Enable Rx IPv4 header checksum offload. +# +# -u Run UDP tests. +# +# -v Vebose mode. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +. $NET_TESTS/tests/net_common + +function cleanup +{ + if ((nt_cleanup == 0)); then + dbg "skipping cleanup" + return 0 + fi + + rm -rf ${nt_tdirprefix}* + zlogin $nt_client rm -rf ${nt_tdirprefix}* + zlogin $nt_server rm -rf ${nt_tdirprefix}* + + rm_route $nt_client $nt_server_ip $nt_server_subnet $nt_client_router_ip + rm_route $nt_server $nt_client_ip $nt_client_subnet $nt_server_router_ip + rm_route6 $nt_client $nt_server_ip6 $nt_server_subnet6 \ + $nt_client_router_ip6 + rm_route6 $nt_server $nt_client_ip6 $nt_client_subnet6 \ + $nt_server_router_ip6 + + ip_fwd_disable $nt_router + + delete_addr $nt_client ipft_client0 v4 + delete_addr $nt_router ipft_client_r0 v4 + delete_addr $nt_router ipft_server_r0 v4 + delete_addr $nt_server ipft_server0 v4 + + delete_addr $nt_client ipft_client0 v6 + delete_addr $nt_router ipft_client_r0 v6 + delete_addr $nt_router ipft_server_r0 v6 + delete_addr $nt_server ipft_server0 v6 + + delete_if $nt_client ipft_client0 + delete_if $nt_router ipft_client_r0 + delete_if $nt_router ipft_server_r0 + delete_if $nt_server ipft_server0 + + delete_vnic ipft_client0 0 $nt_client + delete_vnic ipft_client_r0 0 $nt_router + delete_vnic ipft_server_r0 5 $nt_router + delete_vnic ipft_server0 5 $nt_server + + for nt_name in ${nt_nics[@]}; do + delete_simnet $nt_name + done +} + +function usage +{ + echo "$nt_tname -bcflnpruv " >&2 +} + +# +# Set test defaults. +# +nt_tname=${NT_TNAME:-$(basename $0)} +nt_loopback=0 +nt_ulp_full=0 +nt_ulp_partial=0 +nt_tcp_lso=0 +nt_udp=0 +nt_rx_ip_cksum=0 +nt_cleanup=1 +nt_cleanup_only=0 + +nt_tdirprefix=/var/tmp/${nt_tname} +nt_tdir=${nt_tdirprefix}.$$ +nt_dfile=${nt_tdir}/${nt_tname}.data +nt_efile=${nt_tdir}/${nt_tname}-expected-sha1 +nt_rfile=${nt_tdir}/${nt_tname}-received-sha1 +nt_ofile=${nt_tdir}/${nt_tname}-received +nt_client_subnet=192.168.77.0/24 +nt_client_ip=192.168.77.2 +nt_client_router_ip=192.168.77.1 +nt_server_subnet=192.168.88.0/24 +nt_server_ip=192.168.88.2 +nt_server_router_ip=192.168.88.1 +nt_port=7774 +nt_client_subnet6=fd00:0:1:4d::2/64 +nt_client_ip6=fd00:0:1:4d::2 +nt_client_router_ip6=fd00:0:1:4d::1 +nt_server_subnet6=fd00:0:1:58::/64 +nt_server_router_ip6=fd00:0:1:58::1 +nt_server_ip6=fd00:0:1:58::2 +nt_port6=7776 +nt_bridge=ipft_switch +typeset -A nt_nics + +while getopts "bcflnpruv" opt; do + case $opt in + b) + nt_loopback=1 + ;; + c) + nt_cleanup_only=1 + ;; + f) + nt_ulp_full=1 + ;; + l) + nt_tcp_lso=1 + ;; + n) + nt_cleanup=0 + ;; + p) + nt_ulp_partial=1 + ;; + r) + nt_rx_ip_cksum=1 + ;; + u) + nt_udp=1 + ;; + v) + DEBUG=1 + ;; + esac +done + +shift $((OPTIND - 1)) + +if ((nt_ulp_partial == 1)) && ((nt_ulp_full == 1)); then + fail "both partial and full checksum enabled" +fi + +if (( $# != 3 )); then + usage + fail "wrong number of arguments" +fi + +nt_client=$1 +nt_router=$2 +nt_server=$3 + +if [[ "$nt_client" == "$nt_router" || "$nt_router" == "$nt_server" || + "$nt_client" == "$nt_server" ]]; then + fail "all zones must be unique" +fi + +dbg "client zone: $nt_client" +dbg "router zone: $nt_router" +dbg "server zone: $nt_server" + +BAIL=1 +zone_exists $nt_client || fail "zone $nt_client not found" +zone_exists $nt_router || fail "zone $nt_router not found" +zone_exists $nt_server || fail "zone $nt_server not found" + +zone_running $nt_client +zone_running $nt_router +zone_running $nt_server + +if ! zlogin $nt_client ls /usr/bin/socat > /dev/null; then + fail "zone $nt_client missing socat" +fi + +if ! zlogin $nt_server ls /usr/bin/socat > /dev/null; then + fail "zone $nt_client missing socat" +fi + +if ((nt_loopback == 0)); then + nt_nics[0]=ipft_client_nic0 + nt_nics[1]=ipft_router_nic0 + nt_nics[2]=ipft_router_nic1 + nt_nics[3]=ipft_server_nic0 +else + nt_nics[0]=ipft_nic0 + nt_nics[1]=ipft_nic1 +fi + +# +# Make a best effort to cleanup artifacts from a previous run. +# +if ((nt_cleanup_only == 1)); then + dbg "performing cleanup only" + BAIL=0 + cleanup + BAIL=1 + exit 0 +fi + +if ! mkdir $nt_tdir; then + fail "failed to mkdir $nt_tdir in GZ" +fi +dbg "created dir $nt_tdir in GZ" +if ! zlogin $nt_client mkdir $nt_tdir; then + fail "failed to mkdir $nt_tdir in $nt_client" +fi +dbg "created dir $nt_tdir in $nt_client" +if ! zlogin $nt_server mkdir $nt_tdir; then + fail "failed to mkdir $nt_tdir in $nt_server" +fi +dbg "created dir $nt_tdir in $nt_server" + +trap cleanup ERR + +for nt_name in ${nt_nics[@]}; do + create_simnet $nt_name +done + +if ((nt_loopback == 0)); then + link_simnets ${nt_nics[0]} ${nt_nics[1]} + link_simnets ${nt_nics[2]} ${nt_nics[3]} +else + link_simnets ${nt_nics[0]} ${nt_nics[1]} +fi + +for nt_name in ${nt_nics[@]}; do + if ((nt_ulp_partial == 1)); then + set_linkprop $nt_name _tx_ulp_cksum partial + fi + + if ((nt_ulp_full == 1)); then + set_linkprop $nt_name _tx_ulp_cksum fullv4 + fi + + if ((nt_ulp_full == 1)) || ((nt_ulp_partial == 1)); then + set_linkprop $nt_name _tx_ipv4_cksum on + fi + + if ((nt_tcp_lso == 1)); then + set_linkprop $nt_name _lso on + fi + + if ((nt_rx_ip_cksum == 1)); then + set_linkprop $nt_name _rx_ipv4_cksum on + fi +done + +if ((nt_loopback == 0)); then + create_vnic ipft_client0 ipft_client_nic0 0 $nt_client + create_vnic ipft_client_r0 ipft_router_nic0 0 $nt_router + create_vnic ipft_server_r0 ipft_router_nic1 5 $nt_router + create_vnic ipft_server0 ipft_server_nic0 5 $nt_server +else + create_vnic ipft_client0 ipft_nic0 0 $nt_client + create_vnic ipft_client_r0 ipft_nic1 0 $nt_router + create_vnic ipft_server_r0 ipft_nic1 5 $nt_router + create_vnic ipft_server0 ipft_nic1 5 $nt_server +fi + +ip_fwd_enable $nt_router + +create_addr $nt_client ipft_client0 $nt_client_ip/24 +create_addr $nt_router ipft_client_r0 $nt_client_router_ip/24 +create_addr $nt_router ipft_server_r0 $nt_server_router_ip/24 +create_addr $nt_server ipft_server0 $nt_server_ip/24 + +add_route $nt_client $nt_server_ip $nt_server_subnet $nt_client_router_ip +add_route $nt_server $nt_client_ip $nt_client_subnet $nt_server_router_ip + +create_addr6 $nt_client ipft_client0 $nt_client_ip6 +create_addr6 $nt_router ipft_client_r0 $nt_client_router_ip6 +create_addr6 $nt_router ipft_server_r0 $nt_server_router_ip6 +create_addr6 $nt_server ipft_server0 $nt_server_ip6 + +add_route6 $nt_client $nt_server_ip6 $nt_server_subnet6 $nt_client_router_ip6 +add_route6 $nt_server $nt_client_ip6 $nt_client_subnet6 $nt_server_router_ip6 + +dd if=/dev/urandom of=$nt_dfile bs=1024 count=1024 > /dev/null 2>&1 +if (($? != 0)); then + fail "failed to create data file: $nt_dfile" +else + dbg "created data file: $nt_dfile" +fi + +digest -a sha1 $nt_dfile > $nt_efile + +# ================================================================ +# client -> server +# ================================================================ +ping $nt_client $nt_client_ip $nt_server_ip +ping $nt_client $nt_client_ip6 $nt_server_ip6 + +start_server $nt_server TCP4 $nt_server_ip $nt_port $nt_ofile +nt_listener_ppid=$! + +# Give the server time to start. +sleep 1 + +dbg "sending 1M $nt_client ($nt_client_ip) -> $nt_server ($nt_server_ip)" +zlogin $nt_client /usr/bin/socat -b 8192 STDIN \ + TCP4:$nt_server_ip:$nt_port,connect-timeout=5 < $nt_dfile + +if (($? != 0)); then + pkill -TERM -P $nt_listener_ppid + fail "failed to run socat client" +else + dbg "sent 1M $nt_client ($nt_client_ip) -> $nt_server ($nt_server_ip)" +fi + +# +# The client may have exited but make sure to give the server time to +# exit and finish computing the SHA1. +# +dbg "waiting for listener $nt_listener_ppid" +wait_for_pid $nt_listener_ppid 5 +dbg "listener $nt_listener_ppid exited" + +digest -a sha1 /zones/$nt_server/root/$nt_ofile > $nt_rfile + +if ! diff $nt_efile $nt_rfile; then + fail "SHA1 comparison failed" +else + dbg "SHA1 comparison passed" +fi + +start_server $nt_server TCP6 $nt_server_ip6 $nt_port6 $nt_rfile +listener_ppid=$! + +# Give the server time to start. +sleep 1 + +zlogin $nt_client /usr/bin/socat -b 8192 STDIN \ + TCP6:[${nt_server_ip6}]:$nt_port6,connect-timeout=5 < $nt_dfile + +if (($? != 0)); then + pkill -TERM -P $nt_listener_ppid + fail "failed to run socat client IPv6" +else + dbg "sent 1M $nt_client ($nt_client_ip6)" \ + "-> $nt_server ($nt_server_ip6) IPv6" +fi + +# +# The client may have exited but make sure to give the server time to +# exit and finish computing the SHA1. +# +dbg "waiting for listener $nt_listener_ppid" +wait_for_pid $nt_listener_ppid 5 +dbg "listener $nt_listener_ppid exited" + +digest -a sha1 /zones/$nt_server/root/$nt_ofile > $nt_rfile + +if ! diff $nt_efile $nt_rfile; then + fail "SHA1 comparison failed" +else + dbg "SHA1 comparison passed" +fi + +if ((nt_udp == 1)); then + ping_udp $nt_client $nt_client_ip $nt_server_ip 256 3 + ping_udp $nt_client $nt_client_ip6 $nt_server_ip6 256 3 + + # + # Test IP fragmentation by sending a larger-than-MTU datagram. + # You can verify fragmentation is happening by dtracing the + # various "Frag" and "Reasm" mibs. + # + dbg "test IP fragmentation $nt_client_ip -> $nt_server_ip" + ping_udp $nt_client $nt_client_ip $nt_server_ip $((1024 * 16)) 3 + + dbg "test IPv6 fragmentation $nt_client_ip6 -> $nt_server_ip6" + ping_udp $nt_client $nt_client_ip6 $nt_server_ip6 $((1024 * 16)) 3 +fi + +# ================================================================ +# server -> client +# ================================================================ +ping $nt_server $nt_server_ip $nt_client_ip +ping $nt_server $nt_server_ip6 $nt_client_ip6 + +start_server $nt_client TCP4 $nt_client_ip $nt_port $nt_ofile +nt_listener_ppid=$! + +# Give the listener time to start. +sleep 1 + +zlogin $nt_server /usr/bin/socat -b 8192 STDIN \ + TCP4:$nt_client_ip:$nt_port,bind=$nt_server_ip,connect-timeout=5 \ + < $nt_dfile + +if (($? != 0)); then + pkill -TERM -P $nt_listener_ppid + fail "failed to run socat client" +else + dbg "sent 1M $nt_server ($nt_server_ip) -> $nt_client ($nt_client_ip)" +fi + +# +# The client may have exited but make sure to give the server time to +# exit and finish computing the SHA1. +# +dbg "waiting for listener $nt_listener_ppid" +wait_for_pid $nt_listener_ppid 5 +dbg "listener $nt_listener_ppid exited" + +digest -a sha1 /zones/$nt_client/root/$nt_ofile > $nt_rfile + +if ! diff $nt_efile $nt_rfile; then + fail "SHA1 comparison failed" +else + dbg "SHA1 comparison passed" +fi + +start_server $nt_client TCP6 $nt_client_ip6 $nt_port6 $nt_ofile +nt_listener_ppid=$! + +# Give the listener time to start. +sleep 1 + +zlogin $nt_server /usr/bin/socat -b 8192 STDIN \ + TCP6:[$nt_client_ip6]:$nt_port6,connect-timeout=5 < $nt_dfile + +if (($? != 0)); then + pkill -TERM -P $nt_listener_ppid + fail "failed to run socat client IPv6" +else + dbg "sent 1M $nt_server ($nt_server_ip6) -> $nt_client ($nt_client_ip6)" +fi + +# +# The client may have exited but make sure to give the server time to +# exit and finish computing the SHA1. +# +dbg "waiting for listener $nt_listener_ppid" +wait_for_pid $nt_listener_ppid 5 +dbg "server $nt_listener_ppid exited" + +digest -a sha1 /zones/$nt_client/root/$nt_ofile > $nt_rfile + +if ! diff $nt_efile $nt_rfile; then + fail "SHA1 comparison failed" +else + dbg "SHA1 comparison passed" +fi + +if ((nt_udp == 1)); then + ping_udp $nt_server $nt_server_ip $nt_client_ip 256 3 + ping_udp $nt_server $nt_server_ip6 $nt_client_ip6 256 3 + + # + # Test IP fragmentation by sending a larger-than-MTU datagram. + # You can verify fragmentation is happening by dtracing the + # various "Frag" and "Reasm" mibs. + # + dbg "test IP fragmentation $nt_server_ip -> $nt_client_ip" + ping_udp $nt_server $nt_server_ip $nt_client_ip $((1024 * 16)) 3 + + dbg "test IPv6 fragmentation $nt_server_ip6 -> $nt_client_ip6" + ping_udp $nt_server $nt_server_ip6 $nt_client_ip6 $((1024 * 16)) 3 +fi + +cleanup +echo "PASS [$nt_tname]" diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_001.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_001.ksh new file mode 100644 index 0000000000..9f6c98d1b3 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_001.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 001 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_002.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_002.ksh new file mode 100644 index 0000000000..06e5ec53ed --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_002.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 002 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_003.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_003.ksh new file mode 100644 index 0000000000..ce84bc0866 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_003.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 003 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_004.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_004.ksh new file mode 100644 index 0000000000..b5fa65ccd1 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_004.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 004 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_005.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_005.ksh new file mode 100644 index 0000000000..9bbd536e19 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_005.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 005 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_006.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_006.ksh new file mode 100644 index 0000000000..2267072a3d --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_006.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 006 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_007.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_007.ksh new file mode 100644 index 0000000000..a0380eb92e --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_007.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 007 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_008.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_008.ksh new file mode 100644 index 0000000000..aed5438f63 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_008.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 008 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_009.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_009.ksh new file mode 100644 index 0000000000..8a0fa9674c --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_009.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 009 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_010.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_010.ksh new file mode 100644 index 0000000000..3c45225597 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_010.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 010 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_011.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_011.ksh new file mode 100644 index 0000000000..62785ff33e --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_011.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 011 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_012.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_012.ksh new file mode 100644 index 0000000000..c09cd77258 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_012.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 012 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_013.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_013.ksh new file mode 100644 index 0000000000..e3cc833f53 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_013.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 013 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_014.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_014.ksh new file mode 100644 index 0000000000..6bd76de190 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_014.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 014 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_015.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_015.ksh new file mode 100644 index 0000000000..d3b1e2fe1d --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_015.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 015 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_016.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_016.ksh new file mode 100644 index 0000000000..aa5903cbe4 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_016.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 016 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_017.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_017.ksh new file mode 100644 index 0000000000..38615b9f94 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_017.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 017 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_018.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_018.ksh new file mode 100644 index 0000000000..e010141458 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_018.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 018 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_019.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_019.ksh new file mode 100644 index 0000000000..e3b16bad43 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_019.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 019 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_020.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_020.ksh new file mode 100644 index 0000000000..9710bae3c1 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_020.ksh @@ -0,0 +1,22 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +$NET_TESTS/tests/forwarding/ip_fwd_suite -n 020 +exit $? diff --git a/usr/src/test/net-tests/tests/forwarding/ip_fwd_suite.ksh b/usr/src/test/net-tests/tests/forwarding/ip_fwd_suite.ksh new file mode 100644 index 0000000000..a1fdc444e3 --- /dev/null +++ b/usr/src/test/net-tests/tests/forwarding/ip_fwd_suite.ksh @@ -0,0 +1,115 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +# +# Run the IP forwarding test suite. +# +# Usage +# +# ip_fwd_suite [-n ] [-a ] +# +# To run all tests: +# +# NET_TESTS=/opt/net-tests ip_fwd_suite +# +# To run one test: +# +# NET_TESTS=/opt/net-tests ip_fwd_suite -n 001 +# +# To run one test with additional arguments passed to 'ip_forwarding': +# +# NET_TESTS=/opt/net-tests ip_fwd_suite -n 001 -a n +# + +if [[ -z $NET_TESTS ]]; then + echo "NET_TESTS not set" >&2 + exit 1 +fi + +. $NET_TESTS/tests/net_common +. $NET_TESTS/config/ip_forwarding.config + +if [[ -z "$NT_CLIENT" ]]; then + fail "NT_CLIENT must be set" +fi + +if [[ -z "$NT_ROUTER" ]]; then + fail "NT_ROUTER must be set" +fi + +if [[ -z "$NT_SERVER" ]]; then + fail "NT_SERVER must be set" +fi + +while getopts "a:n:" opt; do + case $opt in + a) + nt_args=$OPTARG + ;; + n) + nt_name=$OPTARG + ;; + esac +done + +shift $((OPTIND - 1)) + +nt_script=$NET_TESTS/tests/forwarding/ip_forwarding + +# +# See the "Test Matrix" section of the README for a description of +# each test. +# +typeset -A nt_name_args +nt_name_args["001"]="uv" +nt_name_args["002"]="puv" +nt_name_args["003"]="lpuv" +nt_name_args["004"]="fuv" +nt_name_args["005"]="fluv" +nt_name_args["006"]="ruv" +nt_name_args["007"]="pruv" +nt_name_args["008"]="lpruv" +nt_name_args["009"]="fruv" +nt_name_args["010"]="flruv" + +nt_name_args["011"]="buv" +nt_name_args["012"]="bpuv" +nt_name_args["013"]="blpuv" +nt_name_args["014"]="bfuv" +nt_name_args["015"]="bfluv" +nt_name_args["016"]="bruv" +nt_name_args["017"]="bpruv" +nt_name_args["018"]="blpruv" +nt_name_args["019"]="bfruv" +nt_name_args["020"]="bflruv" + +if [[ -n $nt_name ]]; then + if [[ -z ${nt_name_args[$nt_name]} ]]; then + fail "invalid test name: $nt_name" + fi + + export NT_TNAME="ip_fwd_$nt_name" + nt_args="-${nt_name_args[$nt_name]}${nt_args}" + $nt_script $nt_args $NT_CLIENT $NT_ROUTER $NT_SERVER + exit $? +fi + +for nt_name in ${!nt_name_args[@]}; do + export NT_TNAME="ip_fwd_$nt_name" + nt_args="-${nt_name_args[$nt_name]}${nt_args}" + $nt_script $nt_args $NT_CLIENT $NT_ROUTER $NT_SERVER || exit $? +done + +exit 0 diff --git a/usr/src/test/net-tests/tests/net_common.ksh b/usr/src/test/net-tests/tests/net_common.ksh new file mode 100644 index 0000000000..b83cda8c97 --- /dev/null +++ b/usr/src/test/net-tests/tests/net_common.ksh @@ -0,0 +1,650 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. + +# +# Copyright 2019 Joyent, Inc. +# + +# +# Functions shared across the network tests. +# + +DEBUG=0 + +function dbg +{ + typeset msg="$*" + if (($DEBUG == 1)); then + echo "DBG [$nt_tname]: $msg" + fi +} + +function fail +{ + typeset msg="$*" + echo "FAIL [$nt_tname]: $msg" >&2 + exit 1 +} + +function maybe_fail +{ + typeset msg=$1 + + if ((BAIL == 1)); then + fail "$msg" + else + dbg "$msg" + return 1 + fi +} + +function zone_exists +{ + typeset name=$1 + + if (($# != 1)); then + fail "$0: incorrect number of args provided" + fi + + dbg "checking for existence of zone: $name" + if zoneadm -z $name list > /dev/null 2>&1; then + dbg "found zone: $name" + return 0 + else + dbg "zone not found: $name" + return 1 + fi +} + +function zone_running +{ + typeset name=$1 + typeset state=$(zoneadm -z $name list -p | awk -F: '{ print $3 }') + typeset err="zone $name is not running" + + if (($# != 1)); then + fail "$0: incorrect number of args provided" + fi + + dbg "check if zone $name is running" + dbg "state of zone $name: $state" + if [[ "$state" == "running" ]]; then + dbg "zone $name is running" + return 0 + fi + + maybe_fail "$err" +} + +function simnet_exists +{ + typeset name=$1 + + if (($# != 1)); then + fail "$0: incorrect number of args provided" + fi + + if dladm show-simnet $name > /dev/null 2>&1; then + dbg "simnet $name found" + return 0 + else + dbg "simnet $name not found" + return 1 + fi +} + +function create_simnet +{ + typeset name=$1 + typeset err="failed to create simnet $name" + + if (($# != 1)); then + fail "$0: incorrect number of args provided" + fi + + dbg "creating simnet $name" + if simnet_exists $name; then + dbg "simnet $name already exists" + maybe_fail "$err" + return 1 + fi + + if dladm create-simnet > /dev/null $name; then + dbg "created simnet $name" + return 0 + fi + + maybe_fail "$err" +} + +function delete_simnet +{ + typeset name=$1 + typeset err="failed to delete simnet $name" + + if (($# != 1)); then + fail "$0: incorrect number of args provided" + fi + + dbg "deleting simnet $name" + if ! simnet_exists $name; then + dbg "simnet $name doesn't exist" + return 1 + fi + + if dladm delete-simnet $name; then + dbg "simnet $name deleted" + return 0 + fi + + maybe_fail "$err" +} + +function link_simnets +{ + typeset sim1=$1 + typeset sim2=$2 + typeset err="failed to link simnet $sim1 to $sim2" + + if (($# != 2)); then + fail "$0: incorrect number of args provided" + fi + + dbg "linking simnet $sim1 to $sim2" + if dladm modify-simnet -p $sim2 $sim1 > /dev/null; then + dbg "linked simnet $sim1 to $sim2" + return 0 + fi + + maybe_fail "$err" +} + +function vnic_exists +{ + typeset name=$1 + typeset vid=$2 + typeset over=$3 + typeset zone=$4 + + if (($# != 4)); then + fail "$0: incorrect number of args provided" + fi + + if dladm show-vnic $name > /dev/null 2>&1; then + typeset avid=$(dladm show-vnic -p -o vid $name) + typeset aover=$(dladm show-vnic -p -o over $name) + typeset azone=$(dladm show-linkprop -cp zone -o value $name) + if (($avid == $vid)) && [ $aover == $over ] && \ + [ $azone == $zone ] + then + return 0 + else + return 1 + fi + else + return 1 + fi +} + +function create_vnic +{ + typeset name=$1 + typeset over=$2 + typeset vid=$3 + typeset zone=$4 + typeset r=1 + typeset vid_opt="" + typeset vnic_info="$name, vid: $vid, over: $over, zone: $zone" + typeset err="failed to create VNIC: $vnic_info" + + if (($# != 4)); then + fail "$0: incorrect number of args provided" + fi + + if ((vid != 0)); then + vid_opt="-v $vid" + fi + + dbg "creating VNIC: $vnic_info" + if ! dladm create-vnic -t -l $over $vid_opt $name > /dev/null 2>&1 + then + maybe_fail "$err" + return 1 + fi + + dbg "created VNIC: $vnic_info" + if ! zonecfg -z $zone "add net; set physical=$name; end"; then + maybe_fail "failed to assign $name to $zone" + return 1 + fi + + dbg "assigned VNIC $name to $zone" + if zoneadm -z $zone reboot; then + dbg "rebooted $zone" + # + # Make sure the vnic is visible before returning. Without this + # a create_addr command following immediately afterwards could + # fail because the zone is up but the vnic isn't visible yet. + # + sleep 1 + return 0 + fi + + maybe_fail "failed to reboot $zone" +} + +function delete_vnic +{ + typeset name=$1 + typeset vid=$2 + typeset zone=$3 + typeset vnic_info="$name, vid: $vid, zone: $zone" + typeset err1="failed to assign VNIC $name from $zone to GZ" + typeset err2="failed to delete VNIC: $vnic_info" + + if (($# != 3)); then + fail "$0: incorrect number of args provided" + fi + + dbg "assigning VNIC $name from $zone to GZ" + + if ! zonecfg -z $zone "remove net physical=$name"; then + maybe_fail "failed to remove $name from $zone" + return 1 + fi + if ! zoneadm -z $zone reboot; then + maybe_fail "failed to reboot $zone" + return 1 + fi + + dbg "deleting VNIC: $vnic_info" + if dladm delete-vnic $name > /dev/null; then + dbg "deleted VNIC: $vnic_info" + return 0 + fi + + maybe_fail "$err2" +} + +function create_addr +{ + typeset zone=$1 + typeset vnic=$2 + typeset ip=$3 + typeset ipname=${vnic}/v4 + + if (($# != 3)); then + fail "$0: incorrect number of args provided" + fi + + if zlogin $zone ipadm create-addr -t -T static -a $ip \ + $ipname > /dev/null + then + dbg "created addr $ipname ($ip) in zone $zone" + return 0 + fi + + maybe_fail "failed to create addr $ipname ($ip) in zone $zone" +} + +function create_addr6 +{ + typeset zone=$1 + typeset vnic=$2 + typeset ip=$3 + typeset ll_name=${vnic}/v6 + typeset uni_name=${vnic}/v6add + typeset err1="failed to create link-local addr $ll_name in zone $zone" + typeset err2="failed to create unicast addr $uni_name in zone $zone" + + if (($# != 3)); then + fail "$0: incorrect number of args provided" + fi + + if zlogin $zone ipadm create-addr -t -T addrconf $ll_name; then + dbg "created link-local addr $ll_name in zone $zone" + else + maybe_fail "$err1" + return 1 + fi + + if zlogin $zone ipadm create-addr -t -T static -a $ip/64 $uni_name; then + dbg "created unicast addr $uni_name in zone $zone" + else + maybe_fail "$err2" + fi +} + +function delete_addr +{ + typeset zone=$1 + typeset ifname=$2 + typeset version=$3 + typeset ipname=$ifname/$version + + if (($# != 3)); then + fail "$0: incorrect number of args provided" + fi + + if zlogin $zone ipadm show-addr $ipname > /dev/null 2>&1; then + if zlogin $zone ipadm delete-addr $ipname > /dev/null; then + dbg "deleted addr $ipname in zone $zone" + else + maybe_fail "failed to delete addr $ipname in zone $zone" + return 1 + fi + else + dbg "addr $ipname doesn't exist in zone $zone" + fi + + if [[ "v6" == "$version" ]]; then + typeset ipname=$ifname/v6add + typeset err="failed to delete addr $ipname in zone $zone" + + if zlogin $zone ipadm show-addr $ipname > /dev/null 2>&1; then + if zlogin $zone ipadm delete-addr $ipname > /dev/null + then + dbg "deleted addr $ipname in zone $zone" + else + maybe_fail "$err" + fi + else + dbg "addr $ipname doesn't exist in zone $zone" + fi + fi +} + +function delete_if +{ + typeset zone=$1 + typeset ifname=$2 + typeset err="failed to delete interface $ifname in zone $zone" + + if (($# != 2)); then + fail "$0: incorrect number of args provided" + fi + + if zlogin $zone ipadm show-if $ifname > /dev/null 2>&1; then + if zlogin $zone ipadm delete-if $ifname > /dev/null; then + dbg "deleted interface $ifname in zone $zone" + else + maybe_fail "$err" + fi + else + dbg "interface $ifname doesn't exist in zone $zone" + fi +} + +function ip_fwd_enable +{ + typeset zone=$1 + + if (($# != 1)); then + fail "$0: incorrect number of args provided" + fi + + if zlogin $zone routeadm -p ipv4-forwarding | \ + egrep 'current=enabled' > /dev/null + then + dbg "IPv4 forwarding already enabled for $zone" + else + if zlogin $zone routeadm -ue ipv4-forwarding; then + dbg "enabled IPv4 forwarding for $zone" + else + maybe_fail "failed to enable IPv4 forwarding for $zone" + return 1 + fi + fi + + if zlogin $zone routeadm -p ipv6-forwarding | \ + egrep 'current=enabled' > /dev/null + then + dbg "IPv6 forwarding already enabled for $zone" + else + if zlogin $zone routeadm -ue ipv6-forwarding; then + dbg "enabled IPv6 forwarding for $zone" + else + maybe_fail "failed to enable IPv6 forwarding for $zone" + fi + fi +} + +function ip_fwd_disable +{ + typeset zone=$1 + + if (($# != 1)); then + fail "$0: incorrect number of args provided" + fi + + if zlogin $zone routeadm -p ipv4-forwarding | \ + egrep 'current=disabled' > /dev/null + then + dbg "IPv4 forwarding already disabled for $zone" + else + if zlogin $zone routeadm -ud ipv4-forwarding; then + dbg "disabled IPv4 forwarding in $zone" + else + maybe_fail "failed to disable IPv4 forwarding in $zone" + return 1 + fi + fi + + if zlogin $zone routeadm -p ipv6-forwarding | \ + egrep 'current=disabled' > /dev/null + then + dbg "IPv6 forwarding already disabled for $zone" + else + if zlogin $zone routeadm -ud ipv6-forwarding; then + dbg "disabled IPv6 forwarding in $zone" + else + maybe_fail "failed to disable IPv6 forwarding in $zone" + fi + fi +} + +function add_route +{ + typeset zone=$1 + typeset dest=$2 + typeset net=$3 + typeset gateway=$4 + + if (($# != 4)); then + fail "$0: incorrect number of args provided" + fi + + if zlogin $zone route -n add $net $gateway > /dev/null; then + dbg "added route $gateway => $net to $zone" + return 0 + fi + + maybe_fail "failed to add route $gateway => $net to $zone" +} + +function add_route6 +{ + typeset zone=$1 + typeset dest=$2 + typeset net=$3 + typeset gateway=$4 + + if (($# != 4)); then + fail "$0: incorrect number of args provided" + fi + + if zlogin $zone route -n add -inet6 $net $gateway > /dev/null + then + dbg "added route $gateway => $net to $zone" + return 0 + fi + + maybe_fail "failed to add route $gateway => $net to $zone" +} + +function rm_route +{ + typeset zone=$1 + typeset dest=$2 + typeset net=$3 + typeset gateway=$4 + typeset gw=$(zlogin $zone route -n get $dest | \ + grep gateway | awk '{ print $2 }') + typeset err="failed to remove route $gateway => $net from $zone" + + if (($# != 4)); then + fail "$0: incorrect number of args provided" + fi + + if [[ "$gw" == "$gateway" ]]; then + if zlogin $zone route -n delete $net $gateway > /dev/null + then + dbg "removed route $gateway => $net from $zone" + else + maybe_fail "$err" + fi + else + dbg "$zone already lacked route $gateway => $net" + fi +} + +function rm_route6 +{ + typeset zone=$1 + typeset dest=$2 + typeset net=$3 + typeset gateway=$4 + typeset gw=$(zlogin $zone route -n get -inet6 $dest | \ + grep gateway | awk '{ print $2 }') + typeset err="failed to remove route $gateway => $net from $zone" + + if (($# != 4)); then + fail "$0: incorrect number of args provided" + fi + + if [[ "$gw" == "$gateway" ]]; then + if zlogin $zone route -n delete -inet6 $net $gateway > /dev/null + then + dbg "removed route $gateway => $net from $zone" + else + maybe_fail "$err" + fi + else + dbg "$zone already lacked route $gateway => $net" + fi +} + +function set_linkprop +{ + typeset link=$1 + typeset prop=$2 + typeset val=$3 + typeset err="failed to set $link prop: $prop=$val" + + if (($# != 3)); then + fail "$0: incorrect number of args provided" + fi + + dbg "attempt to set $link prop: $prop=$val" + if dladm set-linkprop -p $prop=$val $link; then + dbg "set $link prop: $prop=$val" + return 0 + fi + + maybe_fail "$err" +} + +function ping +{ + typeset zone=$1 + typeset src=$2 + typeset dst=$3 + typeset info="$src -> $dst" + + if (($# != 3)); then + fail "$0: incorrect number of args provided" + fi + + dbg "ping: $info" + if zlogin $zone ping $dst > /dev/null 2>&1; then + dbg "successful ping: $info" + return 0 + fi + + maybe_fail "could not ping: $info" +} + +function ping_udp +{ + typeset client=$1 + typeset client_ip=$2 + typeset server_ip=$3 + typeset size=$4 + typeset num=$5 + typeset info="$client_ip -> $server_ip (size: $size)" + + if (($# != 5)); then + fail "$0: incorrect number of args provided" + fi + + dbg "UDP ping: $info" + if zlogin $client ping -ns -U $server_ip $size $num > /dev/null; then + dbg "UDP ping passed: $info" + return 0 + fi + + maybe_fail "UDP ping failed: $info" +} + +function start_server +{ + typeset zone=$1 + typeset type=$2 + typeset ip=$3 + typeset port=$4 + typeset ofile=$5 + + if (($# != 5)); then + fail "$0: incorrect number of args provided" + fi + + dbg "start server $rfile" + zlogin $zone \ + /usr/bin/socat -u ${type}-LISTEN:$port,bind=[$ip],reuseaddr \ + CREATE:$ofile & + listener_ppid=$! + dbg "listener PPID: $listener_ppid, zone $zone" +} + +function wait_for_pid +{ + typeset pid=$1 + typeset seconds=$2 + typeset s=0 + + if (($# != 2)); then + fail "$0: incorrect number of args provided" + fi + + while true; do + if kill -0 $pid > /dev/null 2>&1; then + if ((seconds == s)); then + maybe_fail "timed out waiting for pid $pid" + return 1 + fi + dbg "waiting for pid $pid" + sleep 1 + ((s++)) + else + return 0 + fi + done +} diff --git a/usr/src/uts/common/inet/ip/ip6.c b/usr/src/uts/common/inet/ip/ip6.c index 659eda42f3..26e7be2fe8 100644 --- a/usr/src/uts/common/inet/ip/ip6.c +++ b/usr/src/uts/common/inet/ip/ip6.c @@ -22,6 +22,7 @@ * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1990 Mentat Inc. * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. */ #include @@ -2729,109 +2730,16 @@ done: return (length); } -/* - * Try to determine where and what are the IPv6 header length and - * pointer to nexthdr value for the upper layer protocol (or an - * unknown next hdr). - * - * Parameters returns a pointer to the nexthdr value; - * Must handle malformed packets of various sorts. - * Function returns failure for malformed cases. - */ -boolean_t -ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, - uint8_t **nexthdrpp) -{ - uint16_t length; - uint_t ehdrlen; - uint8_t *nexthdrp; - uint8_t *whereptr; - uint8_t *endptr; - ip6_dest_t *desthdr; - ip6_rthdr_t *rthdr; - ip6_frag_t *fraghdr; - - ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); - length = IPV6_HDR_LEN; - whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ - endptr = mp->b_wptr; - - nexthdrp = &ip6h->ip6_nxt; - while (whereptr < endptr) { - /* Is there enough left for len + nexthdr? */ - if (whereptr + MIN_EHDR_LEN > endptr) - break; - - switch (*nexthdrp) { - case IPPROTO_HOPOPTS: - case IPPROTO_DSTOPTS: - /* Assumes the headers are identical for hbh and dst */ - desthdr = (ip6_dest_t *)whereptr; - ehdrlen = 8 * (desthdr->ip6d_len + 1); - if ((uchar_t *)desthdr + ehdrlen > endptr) - return (B_FALSE); - nexthdrp = &desthdr->ip6d_nxt; - break; - case IPPROTO_ROUTING: - rthdr = (ip6_rthdr_t *)whereptr; - ehdrlen = 8 * (rthdr->ip6r_len + 1); - if ((uchar_t *)rthdr + ehdrlen > endptr) - return (B_FALSE); - nexthdrp = &rthdr->ip6r_nxt; - break; - case IPPROTO_FRAGMENT: - fraghdr = (ip6_frag_t *)whereptr; - ehdrlen = sizeof (ip6_frag_t); - if ((uchar_t *)&fraghdr[1] > endptr) - return (B_FALSE); - nexthdrp = &fraghdr->ip6f_nxt; - break; - case IPPROTO_NONE: - /* No next header means we're finished */ - default: - *hdr_length_ptr = length; - *nexthdrpp = nexthdrp; - return (B_TRUE); - } - length += ehdrlen; - whereptr += ehdrlen; - *hdr_length_ptr = length; - *nexthdrpp = nexthdrp; - } - switch (*nexthdrp) { - case IPPROTO_HOPOPTS: - case IPPROTO_DSTOPTS: - case IPPROTO_ROUTING: - case IPPROTO_FRAGMENT: - /* - * If any know extension headers are still to be processed, - * the packet's malformed (or at least all the IP header(s) are - * not in the same mblk - and that should never happen. - */ - return (B_FALSE); - - default: - /* - * If we get here, we know that all of the IP headers were in - * the same mblk, even if the ULP header is in the next mblk. - */ - *hdr_length_ptr = length; - *nexthdrpp = nexthdrp; - return (B_TRUE); - } -} - /* * Return the length of the IPv6 related headers (including extension headers) * Returns a length even if the packet is malformed. */ -int +uint16_t ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) { uint16_t hdr_len; - uint8_t *nexthdrp; - (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); + (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, NULL); return (hdr_len); } diff --git a/usr/src/uts/common/inet/ip/ip6_input.c b/usr/src/uts/common/inet/ip/ip6_input.c index cdff35273e..066b5c3f56 100644 --- a/usr/src/uts/common/inet/ip/ip6_input.c +++ b/usr/src/uts/common/inet/ip/ip6_input.c @@ -23,7 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved * * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright 2018 Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -1903,13 +1903,12 @@ ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h, return (ip_input_sw_cksum_v6(mp, ip6h, ira)); } + hck_flags = DB_CKSUMFLAGS(mp); + /* * We apply this for all ULP protocols. Does the HW know to * not set the flags for SCTP and other protocols. */ - - hck_flags = DB_CKSUMFLAGS(mp); - if (hck_flags & HCK_FULLCKSUM_OK) { /* * Hardware has already verified the checksum. diff --git a/usr/src/uts/common/inet/ip/ip_input.c b/usr/src/uts/common/inet/ip/ip_input.c index aea49c19d3..cd6c50c446 100644 --- a/usr/src/uts/common/inet/ip/ip_input.c +++ b/usr/src/uts/common/inet/ip/ip_input.c @@ -23,7 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright 2018 Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -659,11 +660,12 @@ ill_input_short_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg, } /* - * If there is a good HW IP header checksum we clear the need + * If the packet originated from a same-machine sender or + * there is a good HW IP header checksum, we clear the need * look at the IP header checksum. */ - if ((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) && - ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { + if (((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) && + ILL_HCKSUM_CAPABLE(ill) && dohwcksum)) { /* Header checksum was ok. Clear the flag */ DB_CKSUMFLAGS(mp) &= ~HCK_IPV4_HDRCKSUM; ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; @@ -1134,8 +1136,12 @@ ip_forward_xmit_v4(nce_t *nce, ill_t *ill, mblk_t *mp, ipha_t *ipha, icmp_time_exceeded(mp, ICMP_TTL_EXCEEDED, ira); return; } + + /* + * Count the forward as a hop and update the checksum + * accordingly. + */ ipha->ipha_ttl--; - /* Adjust the checksum to reflect the ttl decrement. */ sum = (int)ipha->ipha_hdr_checksum + IP_HDR_CSUM_TTL_ADJUST; ipha->ipha_hdr_checksum = (uint16_t)(sum + (sum >> 16)); @@ -2240,6 +2246,7 @@ ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha, /* No ULP checksum to verify. */ return (B_TRUE); } + /* * Revert to software checksum calculation if the interface * isn't capable of checksum offload. @@ -2252,13 +2259,12 @@ ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha, return (ip_input_sw_cksum_v4(mp, ipha, ira)); } + hck_flags = DB_CKSUMFLAGS(mp); + /* * We apply this for all ULP protocols. Does the HW know to * not set the flags for SCTP and other protocols. */ - - hck_flags = DB_CKSUMFLAGS(mp); - if (hck_flags & HCK_FULLCKSUM_OK) { /* * Hardware has already verified the checksum. diff --git a/usr/src/uts/common/inet/ip6.h b/usr/src/uts/common/inet/ip6.h index 4f5b81c12f..01c25b52b5 100644 --- a/usr/src/uts/common/inet/ip6.h +++ b/usr/src/uts/common/inet/ip6.h @@ -23,6 +23,7 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. */ #ifndef _INET_IP6_H @@ -255,7 +256,7 @@ extern in6_addr_t ip_get_dst_v6(ip6_t *, const mblk_t *, boolean_t *); extern ip6_rthdr_t *ip_find_rthdr_v6(ip6_t *, uint8_t *); extern boolean_t ip_hdr_length_nexthdr_v6(mblk_t *, ip6_t *, uint16_t *, uint8_t **); -extern int ip_hdr_length_v6(mblk_t *, ip6_t *); +extern uint16_t ip_hdr_length_v6(mblk_t *, ip6_t *); extern uint32_t ip_massage_options_v6(ip6_t *, ip6_rthdr_t *, netstack_t *); extern void ip_forward_xmit_v6(nce_t *, mblk_t *, ip6_t *, ip_recv_attr_t *, uint32_t, uint32_t); diff --git a/usr/src/uts/common/inet/ip_impl.h b/usr/src/uts/common/inet/ip_impl.h index 2b37528eb9..87086b4c17 100644 --- a/usr/src/uts/common/inet/ip_impl.h +++ b/usr/src/uts/common/inet/ip_impl.h @@ -21,6 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ #ifndef _INET_IP_IMPL_H @@ -159,9 +160,24 @@ extern "C" { #define ILL_DIRECT_CAPABLE(ill) \ (((ill)->ill_capabilities & ILL_CAPAB_DLD_DIRECT) != 0) -/* This macro is used by the mac layer */ +/* + * Determine if a mblk needs to take the "slow path", aka OTH + * softring. There are multiple reasons why a mblk might take the slow + * path. + * + * o The mblk is not a data message. + * + * o There is more than one outstanding reference to the mblk. + * + * o The IP header is not aligned (we assume alignment in the checksum + * routine). + * + * o The mblk doesn't contain enough data to populate a simple IP header. + */ #define MBLK_RX_FANOUT_SLOWPATH(mp, ipha) \ - (DB_TYPE(mp) != M_DATA || DB_REF(mp) != 1 || !OK_32PTR(ipha) || \ + (DB_TYPE(mp) != M_DATA || \ + (DB_REF(mp) != 1) || \ + !OK_32PTR(ipha) || \ (((uchar_t *)ipha + IP_SIMPLE_HDR_LENGTH) >= (mp)->b_wptr)) /* diff --git a/usr/src/uts/common/io/bridge.c b/usr/src/uts/common/io/bridge.c index bc54527515..389948e295 100644 --- a/usr/src/uts/common/io/bridge.c +++ b/usr/src/uts/common/io/bridge.c @@ -23,6 +23,7 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * Copyright (c) 2016 by Delphix. All rights reserved. + * Copyright 2019 Joyent, Inc. */ /* @@ -41,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -1705,7 +1707,12 @@ reform_vlan_header(mblk_t *mp, uint16_t vlanid, uint16_t tci, uint16_t pvid) if (mp == NULL) return (mp); - /* No forwarded packet can have hardware checksum enabled */ + /* + * A forwarded packet cannot have hardware offloads enabled + * because we don't know if the destination can handle them. + * By this point, any hardware offloads present should have + * been emulated. + */ DB_CKSUMFLAGS(mp) = 0; /* Get the no-modification cases out of the way first */ @@ -1907,17 +1914,22 @@ bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, blp->bl_trillthreads++; mutex_exit(&blp->bl_trilllock); update_header(mp, hdr_info, B_FALSE); - if (is_xmit) - mp = mac_fix_cksum(mp); - /* all trill data frames have Inner.VLAN */ + + /* + * All trill data frames have + * Inner.VLAN. + */ mp = reform_vlan_header(mp, vlanid, tci, 0); + if (mp == NULL) { KIINCR(bki_drops); - fwd_unref(bfp); - return (NULL); + goto done; } + trill_encap_fn(tdp, blp, hdr_info, mp, bfp->bf_trill_nick); + +done: mutex_enter(&blp->bl_trilllock); if (--blp->bl_trillthreads == 0 && blp->bl_trilldata == NULL) @@ -1959,17 +1971,16 @@ bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, mpsend = copymsg(mp); } - if (!from_trill && is_xmit) - mpsend = mac_fix_cksum(mpsend); - mpsend = reform_vlan_header(mpsend, vlanid, tci, blpsend->bl_pvid); + if (mpsend == NULL) { KIINCR(bki_drops); continue; } KIINCR(bki_forwards); + /* * No need to bump up the link reference count, as * the forwarding entry itself holds a reference to @@ -1979,11 +1990,12 @@ bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, mac_rx_common(blpsend->bl_mh, NULL, mpsend); } else { KLPINCR(blpsend, bkl_xmit); - MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, + mpsend = mac_ring_tx(blpsend->bl_mh, NULL, mpsend); freemsg(mpsend); } } + /* * Handle a special case: if we're transmitting to the original * link, then check whether the localaddr flag is set. If it @@ -2070,11 +2082,9 @@ bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, mpsend = copymsg(mp); } - if (!from_trill && is_xmit) - mpsend = mac_fix_cksum(mpsend); - mpsend = reform_vlan_header(mpsend, vlanid, tci, blpsend->bl_pvid); + if (mpsend == NULL) { KIINCR(bki_drops); continue; @@ -2084,10 +2094,13 @@ bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, KIINCR(bki_unknown); else KIINCR(bki_mbcast); + KLPINCR(blpsend, bkl_xmit); - if ((mpcopy = copymsg(mpsend)) != NULL) + if ((mpcopy = copymsg(mpsend)) != NULL) { mac_rx_common(blpsend->bl_mh, NULL, mpcopy); - MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, mpsend); + } + + mpsend = mac_ring_tx(blpsend->bl_mh, NULL, mpsend); freemsg(mpsend); link_unref(blpsend); } @@ -2465,7 +2478,7 @@ bridge_xmit_cb(mac_handle_t mh, mac_ring_handle_t rh, mblk_t *mpnext) (blp->bl_flags & BLF_SDUFAIL)))) { KIINCR(bki_sent); KLINCR(bkl_xmit); - MAC_RING_TX(blp->bl_mh, rh, mpnext, mp); + mp = mac_ring_tx(blp->bl_mh, rh, mpnext); return (mp); } @@ -2523,7 +2536,7 @@ bridge_xmit_cb(mac_handle_t mh, mac_ring_handle_t rh, mblk_t *mpnext) B_FALSE, B_TRUE); } if (mp != NULL) { - MAC_RING_TX(blp->bl_mh, rh, mp, mp); + mp = mac_ring_tx(blp->bl_mh, rh, mp); if (mp == NULL) { KIINCR(bki_sent); KLINCR(bkl_xmit); @@ -2589,7 +2602,7 @@ bridge_trill_decaps(bridge_link_t *blp, mblk_t *mp, uint16_t ingress_nick) /* Deliver a copy locally as well */ if ((mpcopy = copymsg(mp)) != NULL) mac_rx_common(blp->bl_mh, NULL, mpcopy); - MAC_RING_TX(blp->bl_mh, NULL, mp, mp); + mp = mac_ring_tx(blp->bl_mh, NULL, mp); } if (mp == NULL) { KIINCR(bki_sent); @@ -2610,7 +2623,7 @@ bridge_trill_output(bridge_link_t *blp, mblk_t *mp) bridge_inst_t *bip = blp->bl_inst; /* used by macros */ mac_trill_snoop(blp->bl_mh, mp); - MAC_RING_TX(blp->bl_mh, NULL, mp, mp); + mp = mac_ring_tx(blp->bl_mh, NULL, mp); if (mp == NULL) { KIINCR(bki_sent); KLINCR(bkl_xmit); diff --git a/usr/src/uts/common/io/dls/dls_link.c b/usr/src/uts/common/io/dls/dls_link.c index 6f9049b724..4099d0b801 100644 --- a/usr/src/uts/common/io/dls/dls_link.c +++ b/usr/src/uts/common/io/dls/dls_link.c @@ -21,7 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2018 Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ /* @@ -566,7 +566,13 @@ dls_rx_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp, dls_head_t *dhp; mod_hash_key_t key; + /* + * We expect to deal with only a single packet. + */ + ASSERT3P(mp->b_next, ==, NULL); + DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err); + if (err != 0) goto drop; diff --git a/usr/src/uts/common/io/fcoe/fcoe_fc.c b/usr/src/uts/common/io/fcoe/fcoe_fc.c index 42764e48d6..54402b027f 100644 --- a/usr/src/uts/common/io/fcoe/fcoe_fc.c +++ b/usr/src/uts/common/io/fcoe/fcoe_fc.c @@ -22,6 +22,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ /* @@ -39,6 +40,7 @@ #include #include #include +#include /* * FCoE header files @@ -209,6 +211,7 @@ tx_frame: ret_cookie = mac_tx(mac->fm_cli_handle, FRM2MBLK(frm), 0, MAC_TX_NO_ENQUEUE, &ret_mblk); if (ret_cookie != (mac_tx_cookie_t)NULL) { + frm->frm_netb = ret_mblk; mutex_enter(&mac->fm_mutex); (void) cv_reltimedwait(&mac->fm_tx_cv, &mac->fm_mutex, drv_usectohz(100000), TR_CLOCK_TICK); @@ -265,7 +268,7 @@ fcoe_alloc_netb(fcoe_port_t *eport, uint32_t fc_frame_size, uint8_t **ppfc) static void fcoe_free_netb(void *netb) { - freeb((mblk_t *)netb); + freemsgchain((mblk_t *)netb); } fcoe_frame_t * diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c index 76b4765de6..0a52043a15 100644 --- a/usr/src/uts/common/io/mac/mac.c +++ b/usr/src/uts/common/io/mac/mac.c @@ -1753,7 +1753,7 @@ mac_client_clear_flow_cb(mac_client_handle_t mch) flow_entry_t *flent = mcip->mci_flent; mutex_enter(&flent->fe_lock); - flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop; + flent->fe_cb_fn = (flow_fn_t)mac_rx_def; flent->fe_cb_arg1 = NULL; flent->fe_cb_arg2 = NULL; flent->fe_flags |= FE_MC_NO_DATAPATH; @@ -1936,8 +1936,7 @@ mac_hwring_send_priv(mac_client_handle_t mch, mac_ring_handle_t rh, mblk_t *mp) mac_client_impl_t *mcip = (mac_client_impl_t *)mch; mac_impl_t *mip = mcip->mci_mip; - MAC_TX(mip, rh, mp, mcip); - return (mp); + return (mac_provider_tx(mip, rh, mp, mcip)); } /* @@ -4712,9 +4711,9 @@ mac_group_remmac(mac_group_t *group, const uint8_t *addr) } /* - * This is the entry point for packets transmitted through the bridging code. - * If no bridge is in place, MAC_RING_TX transmits using tx ring. The 'rh' - * pointer may be NULL to select the default ring. + * This is the entry point for packets transmitted through the bridge + * code. If no bridge is in place, mac_ring_tx() transmits via the tx + * ring. The 'rh' pointer may be NULL to select the default ring. */ mblk_t * mac_bridge_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp) @@ -4731,8 +4730,34 @@ mac_bridge_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp) mac_bridge_ref_cb(mh, B_TRUE); mutex_exit(&mip->mi_bridge_lock); if (mh == NULL) { - MAC_RING_TX(mip, rh, mp, mp); + mp = mac_ring_tx((mac_handle_t)mip, rh, mp); } else { + /* + * The bridge may place this mblk on a provider's Tx + * path, a mac's Rx path, or both. Since we don't have + * enough information at this point, we can't be sure + * that the destination(s) are capable of handling the + * hardware offloads requested by the mblk. We emulate + * them here as it is the safest choice. In the + * future, if bridge performance becomes a priority, + * we can elide the emulation here and leave the + * choice up to bridge. + * + * We don't clear the DB_CKSUMFLAGS here because + * HCK_IPV4_HDRCKSUM (Tx) and HCK_IPV4_HDRCKSUM_OK + * (Rx) still have the same value. If the bridge + * receives a packet from a HCKSUM_IPHDRCKSUM NIC then + * the mac(s) it is forwarded on may calculate the + * checksum again, but incorrectly (because the + * checksum field is not zero). Until the + * HCK_IPV4_HDRCKSUM/HCK_IPV4_HDRCKSUM_OK issue is + * resovled, we leave the flag clearing in bridge + * itself. + */ + if ((DB_CKSUMFLAGS(mp) & (HCK_TX_FLAGS | HW_LSO_FLAGS)) != 0) { + mac_hw_emul(&mp, NULL, NULL, MAC_ALL_EMULS); + } + mp = mac_bridge_tx_cb(mh, rh, mp); mac_bridge_ref_cb(mh, B_FALSE); } @@ -8804,3 +8829,52 @@ mac_led_set(mac_handle_t mh, mac_led_mode_t desired) return (ret); } + +/* + * Send packets through the Tx ring ('mrh') or through the default + * handler if no ring is specified. Before passing the packet down to + * the MAC provider, emulate any hardware offloads which have been + * requested but are not supported by the provider. + */ +mblk_t * +mac_ring_tx(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + if (mrh == NULL) + mrh = mip->mi_default_tx_ring; + + if (mrh == NULL) + return (mip->mi_tx(mip->mi_driver, mp)); + else + return (mac_hwring_tx(mrh, mp)); +} + +/* + * This is the final stop before reaching the underlying MAC provider. + * This is also where the bridging hook is inserted. Packets that are + * bridged will return through mac_bridge_tx(), with rh nulled out if + * the bridge chooses to send output on a different link due to + * forwarding. + */ +mblk_t * +mac_provider_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp, + mac_client_impl_t *mcip) +{ + /* + * If there is a bound Hybrid I/O share, send packets through + * the default tx ring. When there's a bound Hybrid I/O share, + * the tx rings of this client are mapped in the guest domain + * and not accessible from here. + */ + if (mcip->mci_state_flags & MCIS_SHARE_BOUND) + rh = mip->mi_default_tx_ring; + + if (mip->mi_promisc_list != NULL) + mac_promisc_dispatch(mip, mp, mcip, B_FALSE); + + if (mip->mi_bridge_link == NULL) + return (mac_ring_tx((mac_handle_t)mip, rh, mp)); + else + return (mac_bridge_tx(mip, rh, mp)); +} diff --git a/usr/src/uts/common/io/mac/mac_bcast.c b/usr/src/uts/common/io/mac/mac_bcast.c index 1ff33c3578..5302b89196 100644 --- a/usr/src/uts/common/io/mac/mac_bcast.c +++ b/usr/src/uts/common/io/mac/mac_bcast.c @@ -21,6 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2018 Joyent, Inc. */ #include @@ -146,7 +147,7 @@ mac_bcast_send(void *arg1, void *arg2, mblk_t *mp_chain, boolean_t is_loopback) uint64_t gen; uint_t i; mblk_t *mp_chain1; - flow_entry_t *flent; + flow_entry_t *flent; int err; rw_enter(&mip->mi_rw_lock, RW_READER); @@ -182,13 +183,6 @@ mac_bcast_send(void *arg1, void *arg2, mblk_t *mp_chain, boolean_t is_loopback) */ if ((mp_chain1 = mac_copymsgchain_cksum(mp_chain)) == NULL) break; - /* - * Fix the checksum for packets originating - * from the local machine. - */ - if ((src_mcip != NULL) && - (mp_chain1 = mac_fix_cksum(mp_chain1)) == NULL) - break; FLOW_TRY_REFHOLD(flent, err); if (err != 0) { @@ -246,7 +240,8 @@ mac_bcast_send(void *arg1, void *arg2, mblk_t *mp_chain, boolean_t is_loopback) MCIP_STAT_UPDATE(src_mcip, brdcstxmt, 1); MCIP_STAT_UPDATE(src_mcip, brdcstxmtbytes, msgdsize(mp_chain)); - MAC_TX(mip, mip->mi_default_tx_ring, mp_chain, src_mcip); + mp_chain = mac_provider_tx(mip, mip->mi_default_tx_ring, + mp_chain, src_mcip); if (mp_chain != NULL) freemsgchain(mp_chain); } else { diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c index 7ff05f2ab6..605cb51bf7 100644 --- a/usr/src/uts/common/io/mac/mac_client.c +++ b/usr/src/uts/common/io/mac/mac_client.c @@ -115,6 +115,7 @@ #include #include #include +#include #include #include #include @@ -1357,7 +1358,7 @@ mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name, mcip->mci_mip = mip; mcip->mci_upper_mip = NULL; - mcip->mci_rx_fn = mac_pkt_drop; + mcip->mci_rx_fn = mac_rx_def; mcip->mci_rx_arg = NULL; mcip->mci_rx_p_fn = NULL; mcip->mci_rx_p_arg = NULL; @@ -1629,7 +1630,7 @@ mac_rx_set(mac_client_handle_t mch, mac_rx_t rx_fn, void *arg) void mac_rx_clear(mac_client_handle_t mch) { - mac_rx_set(mch, mac_pkt_drop, NULL); + mac_rx_set(mch, mac_rx_def, NULL); } void @@ -1641,7 +1642,7 @@ mac_rx_barrier(mac_client_handle_t mch) i_mac_perim_enter(mip); /* If a RX callback is set, quiesce and restart that datapath */ - if (mcip->mci_rx_fn != mac_pkt_drop) { + if (mcip->mci_rx_fn != mac_rx_def) { mac_rx_client_quiesce(mch); mac_rx_client_restart(mch); } @@ -2998,7 +2999,7 @@ mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip, mac_misc_stat_delete(flent); /* Initialize the receiver function to a safe routine */ - flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop; + flent->fe_cb_fn = (flow_fn_t)mac_rx_def; flent->fe_cb_arg1 = NULL; flent->fe_cb_arg2 = NULL; @@ -3578,7 +3579,9 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, srs_tx = &srs->srs_tx; if (srs_tx->st_mode == SRS_TX_DEFAULT && (srs->srs_state & SRS_ENQUEUED) == 0 && - mip->mi_nactiveclients == 1 && mp_chain->b_next == NULL) { + mip->mi_nactiveclients == 1 && + mp_chain->b_next == NULL && + (DB_CKSUMFLAGS(mp_chain) & HW_LSO) == 0) { uint64_t obytes; /* @@ -3613,7 +3616,9 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, obytes = (mp_chain->b_cont == NULL ? MBLKL(mp_chain) : msgdsize(mp_chain)); - MAC_TX(mip, srs_tx->st_arg2, mp_chain, mcip); + mp_chain = mac_provider_tx(mip, srs_tx->st_arg2, mp_chain, + mcip); + if (mp_chain == NULL) { cookie = 0; SRS_TX_STAT_UPDATE(srs, opackets, 1); @@ -3625,7 +3630,74 @@ mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint, mutex_exit(&srs->srs_lock); } } else { - cookie = srs_tx->st_func(srs, mp_chain, hint, flag, ret_mp); + mblk_t *mp = mp_chain; + mblk_t *new_head = NULL; + mblk_t *new_tail = NULL; + + /* + * There are occasions where the packets arriving here + * may request hardware offloads that are not + * available from the underlying MAC provider. This + * currently only happens when a packet is sent across + * the MAC-loopback path of one MAC and then forwarded + * (via IP) to another MAC that lacks one or more of + * the hardware offloads provided by the first one. + * However, in the future, we may choose to pretend + * all MAC providers support all offloads, performing + * emulation on Tx as needed. + * + * We iterate each mblk in-turn, emulating hardware + * offloads as required. From this process, we create + * a new chain. The new chain may be the same as the + * original chain (no hardware emulation needed), a + * collection of new mblks (hardware emulation + * needed), or a mix. At this point, the chain is safe + * for consumption by the underlying MAC provider and + * is passed down to the SRS. + */ + while (mp != NULL) { + mblk_t *next = mp->b_next; + mblk_t *tail = NULL; + const uint16_t needed = + (DB_CKSUMFLAGS(mp) ^ mip->mi_tx_cksum_flags) & + DB_CKSUMFLAGS(mp); + + mp->b_next = NULL; + + if ((needed & (HCK_TX_FLAGS | HW_LSO_FLAGS)) != 0) { + mac_emul_t emul = 0; + + if (needed & HCK_IPV4_HDRCKSUM) + emul |= MAC_IPCKSUM_EMUL; + if (needed & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) + emul |= MAC_HWCKSUM_EMUL; + if (needed & HW_LSO) + emul = MAC_LSO_EMUL; + + mac_hw_emul(&mp, &tail, NULL, emul); + + if (mp == NULL) { + mp = next; + continue; + } + } + + if (new_head == NULL) { + new_head = mp; + } else { + new_tail->b_next = mp; + } + + new_tail = (tail == NULL) ? mp : tail; + mp = next; + } + + if (new_head == NULL) { + cookie = 0; + goto done; + } + + cookie = srs_tx->st_func(srs, new_head, hint, flag, ret_mp); } done: @@ -4026,14 +4098,15 @@ mac_client_get_effective_resources(mac_client_handle_t mch, * The unicast packets of MAC_CLIENT_PROMISC_FILTER callbacks are dispatched * after classification by mac_rx_deliver(). */ - static void mac_promisc_dispatch_one(mac_promisc_impl_t *mpip, mblk_t *mp, - boolean_t loopback) + boolean_t loopback, boolean_t local) { - mblk_t *mp_copy, *mp_next; + mblk_t *mp_next; if (!mpip->mpi_no_copy || mpip->mpi_strip_vlan_tag) { + mblk_t *mp_copy; + mp_copy = copymsg(mp); if (mp_copy == NULL) return; @@ -4043,16 +4116,24 @@ mac_promisc_dispatch_one(mac_promisc_impl_t *mpip, mblk_t *mp, if (mp_copy == NULL) return; } - mp_next = NULL; - } else { - mp_copy = mp; - mp_next = mp->b_next; + + /* + * There is code upstack that can't deal with message + * chains. + */ + for (mblk_t *tmp = mp_copy; tmp != NULL; tmp = mp_next) { + mp_next = tmp->b_next; + tmp->b_next = NULL; + mpip->mpi_fn(mpip->mpi_arg, NULL, tmp, loopback); + } + + return; } - mp_copy->b_next = NULL; - mpip->mpi_fn(mpip->mpi_arg, NULL, mp_copy, loopback); - if (mp_copy == mp) - mp->b_next = mp_next; + mp_next = mp->b_next; + mp->b_next = NULL; + mpip->mpi_fn(mpip->mpi_arg, NULL, mp, loopback); + mp->b_next = mp_next; } /* @@ -4094,7 +4175,7 @@ mac_is_mcast(mac_impl_t *mip, mblk_t *mp) */ void mac_promisc_dispatch(mac_impl_t *mip, mblk_t *mp_chain, - mac_client_impl_t *sender) + mac_client_impl_t *sender, boolean_t local) { mac_promisc_impl_t *mpip; mac_cb_t *mcb; @@ -4134,8 +4215,10 @@ mac_promisc_dispatch(mac_impl_t *mip, mblk_t *mp_chain, if (is_sender || mpip->mpi_type == MAC_CLIENT_PROMISC_ALL || - is_mcast) - mac_promisc_dispatch_one(mpip, mp, is_sender); + is_mcast) { + mac_promisc_dispatch_one(mpip, mp, is_sender, + local); + } } } MAC_PROMISC_WALKER_DCR(mip); @@ -4164,7 +4247,8 @@ mac_promisc_client_dispatch(mac_client_impl_t *mcip, mblk_t *mp_chain) mpip = (mac_promisc_impl_t *)mcb->mcb_objp; if (mpip->mpi_type == MAC_CLIENT_PROMISC_FILTERED && !is_mcast) { - mac_promisc_dispatch_one(mpip, mp, B_FALSE); + mac_promisc_dispatch_one(mpip, mp, B_FALSE, + B_FALSE); } } } @@ -4278,8 +4362,9 @@ mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data) mac_impl_t *mip = (mac_impl_t *)mh; /* - * if mi_nactiveclients > 1, only MAC_CAPAB_LEGACY, MAC_CAPAB_HCKSUM, - * MAC_CAPAB_NO_NATIVEVLAN and MAC_CAPAB_NO_ZCOPY can be advertised. + * Some capabilities are restricted when there are more than one active + * clients on the MAC resource. The ones noted below are safe, + * independent of that count. */ if (mip->mi_nactiveclients > 1) { switch (cap) { @@ -4287,6 +4372,7 @@ mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data) return (B_TRUE); case MAC_CAPAB_LEGACY: case MAC_CAPAB_HCKSUM: + case MAC_CAPAB_LSO: case MAC_CAPAB_NO_NATIVEVLAN: break; default: diff --git a/usr/src/uts/common/io/mac/mac_datapath_setup.c b/usr/src/uts/common/io/mac/mac_datapath_setup.c index e3b660c3b3..9a5f94e7d2 100644 --- a/usr/src/uts/common/io/mac/mac_datapath_setup.c +++ b/usr/src/uts/common/io/mac/mac_datapath_setup.c @@ -3476,7 +3476,7 @@ mac_srs_free(mac_soft_ring_set_t *mac_srs) ASSERT((mac_srs->srs_state & (SRS_CONDEMNED | SRS_CONDEMNED_DONE | SRS_PROC | SRS_PROC_FAST)) == (SRS_CONDEMNED | SRS_CONDEMNED_DONE)); - mac_pkt_drop(NULL, NULL, mac_srs->srs_first, B_FALSE); + mac_drop_chain(mac_srs->srs_first, "SRS free"); mac_srs_ring_free(mac_srs); mac_srs_soft_rings_free(mac_srs); mac_srs_fanout_list_free(mac_srs); diff --git a/usr/src/uts/common/io/mac/mac_flow.c b/usr/src/uts/common/io/mac/mac_flow.c index aa4985fe4c..62612122d6 100644 --- a/usr/src/uts/common/io/mac/mac_flow.c +++ b/usr/src/uts/common/io/mac/mac_flow.c @@ -22,6 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2018 Joyent, Inc. */ #include @@ -229,7 +230,7 @@ mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name, cv_init(&flent->fe_cv, NULL, CV_DEFAULT, NULL); /* Initialize the receiver function to a safe routine */ - flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop; + flent->fe_cb_fn = (flow_fn_t)mac_rx_def; flent->fe_index = -1; } (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN); diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c index fbeef1fd2f..ce986fd4bf 100644 --- a/usr/src/uts/common/io/mac/mac_provider.c +++ b/usr/src/uts/common/io/mac/mac_provider.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2018 Joyent, Inc. + * Copyright 2019 Joyent, Inc. * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved. */ @@ -115,6 +115,37 @@ mac_free(mac_register_t *mregp) kmem_free(mregp, sizeof (mac_register_t)); } +/* + * Convert a MAC's offload features into the equivalent DB_CKSUMFLAGS + * value. + */ +static uint16_t +mac_features_to_flags(mac_handle_t mh) +{ + uint16_t flags = 0; + uint32_t cap_sum = 0; + mac_capab_lso_t cap_lso; + + if (mac_capab_get(mh, MAC_CAPAB_HCKSUM, &cap_sum)) { + if (cap_sum & HCKSUM_IPHDRCKSUM) + flags |= HCK_IPV4_HDRCKSUM; + + if (cap_sum & HCKSUM_INET_PARTIAL) + flags |= HCK_PARTIALCKSUM; + else if (cap_sum & (HCKSUM_INET_FULL_V4 | HCKSUM_INET_FULL_V6)) + flags |= HCK_FULLCKSUM; + } + + /* + * We don't need the information stored in 'cap_lso', but we + * need to pass a non-NULL pointer to appease the driver. + */ + if (mac_capab_get(mh, MAC_CAPAB_LSO, &cap_lso)) + flags |= HW_LSO; + + return (flags); +} + /* * mac_register() is how drivers register new MACs with the GLDv3 * framework. The mregp argument is allocated by drivers using the @@ -345,9 +376,13 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp) mip, 0, &p0, TS_RUN, minclsyspri); /* - * Initialize the capabilities + * Cache the DB_CKSUMFLAGS that this MAC supports. */ + mip->mi_tx_cksum_flags = mac_features_to_flags((mac_handle_t)mip); + /* + * Initialize the capabilities + */ bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t)); bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t)); @@ -689,7 +724,7 @@ mac_trill_snoop(mac_handle_t mh, mblk_t *mp) mac_impl_t *mip = (mac_impl_t *)mh; if (mip->mi_promisc_list != NULL) - mac_promisc_dispatch(mip, mp, NULL); + mac_promisc_dispatch(mip, mp, NULL, B_FALSE); } /* @@ -709,7 +744,7 @@ mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) * this MAC, pass them a copy if appropriate. */ if (mip->mi_promisc_list != NULL) - mac_promisc_dispatch(mip, mp_chain, NULL); + mac_promisc_dispatch(mip, mp_chain, NULL, B_FALSE); if (mr != NULL) { /* @@ -969,12 +1004,33 @@ mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize) } /* - * Invoked by driver as well as the framework to notify its capability change. + * The mac provider or mac frameowrk calls this function when it wants + * to notify upstream consumers that the capabilities have changed and + * that they should modify their own internal state accordingly. + * + * We currently have no regard for the fact that a provider could + * decide to drop capabilities which would invalidate pending traffic. + * For example, if one was to disable the Tx checksum offload while + * TCP/IP traffic was being sent by mac clients relying on that + * feature, then those packets would hit the write with missing or + * partial checksums. A proper solution involves not only providing + * notfication, but also performing client quiescing. That is, a capab + * change should be treated as an atomic transaction that forms a + * barrier between traffic relying on the current capabs and traffic + * relying on the new capabs. In practice, simnet is currently the + * only provider that could hit this, and it's an easily avoidable + * situation (and at worst it should only lead to some dropped + * packets). But if we ever want better on-the-fly capab change to + * actual hardware providers, then we should give this update + * mechanism a proper implementation. */ void mac_capab_update(mac_handle_t mh) { - /* Send MAC_NOTE_CAPAB_CHG notification */ + /* + * Send a MAC_NOTE_CAPAB_CHG notification to alert upstream + * clients to renegotiate capabilities. + */ i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG); } @@ -1276,6 +1332,19 @@ i_mac_notify_thread(void *arg) } } + /* + * Depending on which capabs have changed, the Tx + * checksum flags may also need to be updated. + */ + if ((bits & (1 << MAC_NOTE_CAPAB_CHG)) != 0) { + mac_perim_handle_t mph; + mac_handle_t mh = (mac_handle_t)mip; + + mac_perim_enter_by_mh(mh, &mph); + mip->mi_tx_cksum_flags = mac_features_to_flags(mh); + mac_perim_exit(mph); + } + /* * Do notification callbacks for each notification type. */ @@ -1542,15 +1611,22 @@ mac_hcksum_clone(const mblk_t *src, mblk_t *dst) ASSERT3U(DB_TYPE(dst), ==, M_DATA); /* - * Do these assignments unconditionally, rather than only when flags is - * non-zero. This protects a situation where zeroed hcksum data does - * not make the jump onto an mblk_t with stale data in those fields. + * Do these assignments unconditionally, rather than only when + * flags is non-zero. This protects a situation where zeroed + * hcksum data does not make the jump onto an mblk_t with + * stale data in those fields. It's important to copy all + * possible flags (HCK_* as well as HW_*) and not just the + * checksum specific flags. Dropping flags during a clone + * could result in dropped packets. If the caller has good + * reason to drop those flags then it should do it manually, + * after the clone. */ - DB_CKSUMFLAGS(dst) = (DB_CKSUMFLAGS(src) & HCK_FLAGS); + DB_CKSUMFLAGS(dst) = DB_CKSUMFLAGS(src); DB_CKSUMSTART(dst) = DB_CKSUMSTART(src); DB_CKSUMSTUFF(dst) = DB_CKSUMSTUFF(src); DB_CKSUMEND(dst) = DB_CKSUMEND(src); DB_CKSUM16(dst) = DB_CKSUM16(src); + DB_LSOMSS(dst) = DB_LSOMSS(src); } void diff --git a/usr/src/uts/common/io/mac/mac_sched.c b/usr/src/uts/common/io/mac/mac_sched.c index cbd5ce1e19..5b3e87dfd1 100644 --- a/usr/src/uts/common/io/mac/mac_sched.c +++ b/usr/src/uts/common/io/mac/mac_sched.c @@ -968,6 +968,7 @@ #include #include +#include #include #include #include @@ -1327,7 +1328,7 @@ int mac_srs_worker_wakeup_ticks = 0; * b_prev may be set to the fanout hint \ * hence can't use freemsg directly \ */ \ - mac_pkt_drop(NULL, NULL, mp_chain, B_FALSE); \ + mac_drop_chain(mp_chain, "SRS Tx max queue"); \ DTRACE_PROBE1(tx_queued_hiwat, \ mac_soft_ring_set_t *, srs); \ enqueue = 0; \ @@ -1346,11 +1347,11 @@ int mac_srs_worker_wakeup_ticks = 0; if (!(srs->srs_type & SRST_TX)) \ mutex_exit(&srs->srs_bw->mac_bw_lock); -#define MAC_TX_SRS_DROP_MESSAGE(srs, mp, cookie) { \ - mac_pkt_drop(NULL, NULL, mp, B_FALSE); \ +#define MAC_TX_SRS_DROP_MESSAGE(srs, chain, cookie, s) { \ + mac_drop_chain((chain), (s)); \ /* increment freed stats */ \ - mac_srs->srs_tx.st_stat.mts_sdrops++; \ - cookie = (mac_tx_cookie_t)srs; \ + (srs)->srs_tx.st_stat.mts_sdrops++; \ + (cookie) = (mac_tx_cookie_t)(srs); \ } #define MAC_TX_SET_NO_ENQUEUE(srs, mp_chain, ret_mp, cookie) { \ @@ -2321,7 +2322,7 @@ check_again: if (smcip->mci_mip->mi_promisc_list != NULL) { mutex_exit(lock); mac_promisc_dispatch(smcip->mci_mip, - head, NULL); + head, NULL, B_FALSE); mutex_enter(lock); } } @@ -2893,7 +2894,7 @@ again: mac_srs->srs_bw->mac_bw_sz -= sz; mac_srs->srs_bw->mac_bw_drop_bytes += sz; mutex_exit(&mac_srs->srs_bw->mac_bw_lock); - mac_pkt_drop(NULL, NULL, head, B_FALSE); + mac_drop_chain(head, "Rx no bandwidth"); goto leave_poll; } else { mutex_exit(&mac_srs->srs_bw->mac_bw_lock); @@ -3275,9 +3276,10 @@ mac_rx_srs_subflow_process(void *arg, mac_resource_handle_t srs, } /* - * mac_rx_srs_process - * - * Receive side routine called from the interrupt path. + * MAC SRS receive side routine. If the data is coming from the + * network (i.e. from a NIC) then this is called in interrupt context. + * If the data is coming from a local sender (e.g. mac_tx_send() or + * bridge_forward()) then this is not called in interrupt context. * * loopback is set to force a context switch on the loopback * path between MAC clients. @@ -3337,7 +3339,7 @@ mac_rx_srs_process(void *arg, mac_resource_handle_t srs, mblk_t *mp_chain, mac_bw->mac_bw_drop_bytes += sz; mutex_exit(&mac_bw->mac_bw_lock); mutex_exit(&mac_srs->srs_lock); - mac_pkt_drop(NULL, NULL, mp_chain, B_FALSE); + mac_drop_chain(mp_chain, "Rx no bandwidth"); return; } else { if ((mac_bw->mac_bw_sz + sz) <= @@ -3459,7 +3461,8 @@ mac_tx_srs_no_desc(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, ASSERT(tx_mode == SRS_TX_DEFAULT || tx_mode == SRS_TX_BW); if (flag & MAC_DROP_ON_NO_DESC) { - MAC_TX_SRS_DROP_MESSAGE(mac_srs, mp_chain, cookie); + MAC_TX_SRS_DROP_MESSAGE(mac_srs, mp_chain, cookie, + "Tx no desc"); } else { if (mac_srs->srs_first != NULL) wakeup_worker = B_FALSE; @@ -3522,7 +3525,8 @@ mac_tx_srs_enqueue(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, MAC_COUNT_CHAIN(mac_srs, mp_chain, tail, cnt, sz); if (flag & MAC_DROP_ON_NO_DESC) { if (mac_srs->srs_count > mac_srs->srs_tx.st_hiwat) { - MAC_TX_SRS_DROP_MESSAGE(mac_srs, mp_chain, cookie); + MAC_TX_SRS_DROP_MESSAGE(mac_srs, mp_chain, cookie, + "Tx SRS hiwat"); } else { MAC_TX_SRS_ENQUEUE_CHAIN(mac_srs, mp_chain, tail, cnt, sz); @@ -3895,7 +3899,8 @@ mac_tx_bw_mode(mac_soft_ring_set_t *mac_srs, mblk_t *mp_chain, cookie = (mac_tx_cookie_t)mac_srs; *ret_mp = mp_chain; } else { - MAC_TX_SRS_DROP_MESSAGE(mac_srs, mp_chain, cookie); + MAC_TX_SRS_DROP_MESSAGE(mac_srs, mp_chain, cookie, + "Tx no bandwidth"); } mutex_exit(&mac_srs->srs_lock); return (cookie); @@ -4342,7 +4347,7 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, msgdsize(mp)); CHECK_VID_AND_ADD_TAG(mp); - MAC_TX(mip, ring, mp, src_mcip); + mp = mac_provider_tx(mip, ring, mp, src_mcip); /* * If the driver is out of descriptors and does a @@ -4373,7 +4378,6 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, flow_entry_t *dst_flow_ent; void *flow_cookie; size_t pkt_size; - mblk_t *mp1; next = mp->b_next; mp->b_next = NULL; @@ -4388,44 +4392,12 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, dst_flow_ent = mac_tx_classify(mip, mp); if (dst_flow_ent != NULL) { - size_t hdrsize; - int err = 0; - - if (mip->mi_info.mi_nativemedia == DL_ETHER) { - struct ether_vlan_header *evhp = - (struct ether_vlan_header *)mp->b_rptr; - - if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) - hdrsize = sizeof (*evhp); - else - hdrsize = sizeof (struct ether_header); - } else { - mac_header_info_t mhi; - - err = mac_header_info((mac_handle_t)mip, - mp, &mhi); - if (err == 0) - hdrsize = mhi.mhi_hdrsize; - } - /* * Got a matching flow. It's either another * MAC client, or a broadcast/multicast flow. - * Make sure the packet size is within the - * allowed size. If not drop the packet and - * move to next packet. */ - if (err != 0 || - (pkt_size - hdrsize) > mip->mi_sdu_max) { - oerrors++; - DTRACE_PROBE2(loopback__drop, size_t, pkt_size, - mblk_t *, mp); - freemsg(mp); - mp = next; - FLOW_REFRELE(dst_flow_ent); - continue; - } flow_cookie = mac_flow_get_client_cookie(dst_flow_ent); + if (flow_cookie != NULL) { /* * The vnic_bcast_send function expects @@ -4443,6 +4415,7 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, * bypass is set. */ boolean_t do_switch; + mac_client_impl_t *dst_mcip = dst_flow_ent->fe_mcip; @@ -4458,19 +4431,23 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, * check is done inside the MAC_TX() * macro. */ - if (mip->mi_promisc_list != NULL) - mac_promisc_dispatch(mip, mp, src_mcip); + if (mip->mi_promisc_list != NULL) { + mac_promisc_dispatch(mip, mp, src_mcip, + B_TRUE); + } do_switch = ((src_mcip->mci_state_flags & dst_mcip->mci_state_flags & MCIS_CLIENT_POLL_CAPABLE) != 0); - if ((mp1 = mac_fix_cksum(mp)) != NULL) { + mac_hw_emul(&mp, NULL, NULL, MAC_ALL_EMULS); + if (mp != NULL) { (dst_flow_ent->fe_cb_fn)( dst_flow_ent->fe_cb_arg1, dst_flow_ent->fe_cb_arg2, - mp1, do_switch); + mp, do_switch); } + } FLOW_REFRELE(dst_flow_ent); } else { @@ -4478,7 +4455,7 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain, * Unknown destination, send via the underlying * NIC. */ - MAC_TX(mip, ring, mp, src_mcip); + mp = mac_provider_tx(mip, ring, mp, src_mcip); if (mp != NULL) { /* * Adjust for the last packet that @@ -4827,7 +4804,7 @@ mac_tx_sring_enqueue(mac_soft_ring_t *ringp, mblk_t *mp_chain, uint16_t flag, ASSERT(MUTEX_HELD(&ringp->s_ring_lock)); MAC_COUNT_CHAIN(mac_srs, mp_chain, tail, cnt, sz); if (flag & MAC_DROP_ON_NO_DESC) { - mac_pkt_drop(NULL, NULL, mp_chain, B_FALSE); + mac_drop_chain(mp_chain, "Tx softring no desc"); /* increment freed stats */ ringp->s_ring_drops += cnt; cookie = (mac_tx_cookie_t)ringp; @@ -4871,8 +4848,8 @@ mac_tx_sring_enqueue(mac_soft_ring_t *ringp, mblk_t *mp_chain, uint16_t flag, * b_prev may be set to the fanout hint * hence can't use freemsg directly */ - mac_pkt_drop(NULL, NULL, - mp_chain, B_FALSE); + mac_drop_chain(mp_chain, + "Tx softring max queue"); DTRACE_PROBE1(tx_queued_hiwat, mac_soft_ring_t *, ringp); enqueue = B_FALSE; diff --git a/usr/src/uts/common/io/mac/mac_soft_ring.c b/usr/src/uts/common/io/mac/mac_soft_ring.c index f4d2a5ee81..c8a16e6fd3 100644 --- a/usr/src/uts/common/io/mac/mac_soft_ring.c +++ b/usr/src/uts/common/io/mac/mac_soft_ring.c @@ -242,7 +242,7 @@ mac_soft_ring_free(mac_soft_ring_t *softring) ASSERT((softring->s_ring_state & (S_RING_CONDEMNED | S_RING_CONDEMNED_DONE | S_RING_PROC)) == (S_RING_CONDEMNED | S_RING_CONDEMNED_DONE)); - mac_pkt_drop(NULL, NULL, softring->s_ring_first, B_FALSE); + mac_drop_chain(softring->s_ring_first, "softring free"); softring->s_ring_tx_arg2 = NULL; mac_soft_ring_stat_delete(softring); mac_callback_free(softring->s_ring_notify_cb_list); diff --git a/usr/src/uts/common/io/mac/mac_util.c b/usr/src/uts/common/io/mac/mac_util.c index 924d018ad0..03da3a3504 100644 --- a/usr/src/uts/common/io/mac/mac_util.c +++ b/usr/src/uts/common/io/mac/mac_util.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2018 Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ /* @@ -48,6 +48,75 @@ #include #include #include +#include +#include +#include + +/* + * The next two functions are used for dropping packets or chains of + * packets, respectively. We could use one function for both but + * separating the use cases allows us to specify intent and prevent + * dropping more data than intended. + * + * The purpose of these functions is to aid the debugging effort, + * especially in production. Rather than use freemsg()/freemsgchain(), + * it's preferable to use these functions when dropping a packet in + * the MAC layer. These functions should only be used during + * unexpected conditions. That is, any time a packet is dropped + * outside of the regular, successful datapath. Consolidating all + * drops on these functions allows the user to trace one location and + * determine why the packet was dropped based on the msg. It also + * allows the user to inspect the packet before it is freed. Finally, + * it allows the user to avoid tracing freemsg()/freemsgchain() thus + * keeping the hot path running as efficiently as possible. + * + * NOTE: At this time not all MAC drops are aggregated on these + * functions; but that is the plan. This comment should be erased once + * completed. + */ + +/*PRINTFLIKE2*/ +void +mac_drop_pkt(mblk_t *mp, const char *fmt, ...) +{ + va_list adx; + char msg[128]; + char *msgp = msg; + + ASSERT3P(mp->b_next, ==, NULL); + + va_start(adx, fmt); + (void) vsnprintf(msgp, sizeof (msg), fmt, adx); + va_end(adx); + + DTRACE_PROBE2(mac__drop, mblk_t *, mp, char *, msgp); + freemsg(mp); +} + +/*PRINTFLIKE2*/ +void +mac_drop_chain(mblk_t *chain, const char *fmt, ...) +{ + va_list adx; + char msg[128]; + char *msgp = msg; + + va_start(adx, fmt); + (void) vsnprintf(msgp, sizeof (msg), fmt, adx); + va_end(adx); + + /* + * We could use freemsgchain() for the actual freeing but + * since we are already walking the chain to fire the dtrace + * probe we might as well free the msg here too. + */ + for (mblk_t *mp = chain, *next; mp != NULL; ) { + next = mp->b_next; + DTRACE_PROBE2(mac__drop, mblk_t *, mp, char *, msgp); + freemsg(mp); + mp = next; + } +} /* * Copy an mblk, preserving its hardware checksum flags. @@ -89,222 +158,1272 @@ mac_copymsgchain_cksum(mblk_t *mp) } /* - * Process the specified mblk chain for proper handling of hardware - * checksum offload. This routine is invoked for loopback traffic - * between MAC clients. - * The function handles a NULL mblk chain passed as argument. + * Calculate the ULP checksum for IPv4. Return true if the calculation + * was successful, or false if an error occurred. If the later, place + * an error message into '*err'. */ -mblk_t * -mac_fix_cksum(mblk_t *mp_chain) +static boolean_t +mac_sw_cksum_ipv4(mblk_t *mp, uint32_t ip_hdr_offset, ipha_t *ipha, + const char **err) { - mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1; + const uint8_t proto = ipha->ipha_protocol; + size_t len; + const uint32_t ip_hdr_sz = IPH_HDR_LENGTH(ipha); + /* ULP offset from start of L2. */ + const uint32_t ulp_offset = ip_hdr_offset + ip_hdr_sz; + ipaddr_t src, dst; + uint32_t cksum; + uint16_t *up; + + /* + * We need a pointer to the ULP checksum. We're assuming the + * ULP checksum pointer resides in the first mblk. Our native + * TCP stack should always put the headers in the first mblk, + * but currently we have no way to guarantee that other + * clients don't spread headers (or even header fields) across + * mblks. + */ + switch (proto) { + case IPPROTO_TCP: + ASSERT3U(MBLKL(mp), >=, (ulp_offset + sizeof (tcph_t))); + if (MBLKL(mp) < (ulp_offset + sizeof (tcph_t))) { + *err = "mblk doesn't contain TCP header"; + goto bail; + } + + up = IPH_TCPH_CHECKSUMP(ipha, ip_hdr_sz); + cksum = IP_TCP_CSUM_COMP; + break; + + case IPPROTO_UDP: + ASSERT3U(MBLKL(mp), >=, (ulp_offset + sizeof (udpha_t))); + if (MBLKL(mp) < (ulp_offset + sizeof (udpha_t))) { + *err = "mblk doesn't contain UDP header"; + goto bail; + } + + up = IPH_UDPH_CHECKSUMP(ipha, ip_hdr_sz); + cksum = IP_UDP_CSUM_COMP; + break; + + case IPPROTO_SCTP: { + sctp_hdr_t *sctph; + + ASSERT3U(MBLKL(mp), >=, (ulp_offset + sizeof (sctp_hdr_t))); + if (MBLKL(mp) < (ulp_offset + sizeof (sctp_hdr_t))) { + *err = "mblk doesn't contain SCTP header"; + goto bail; + } + + sctph = (sctp_hdr_t *)(mp->b_rptr + ulp_offset); + sctph->sh_chksum = 0; + sctph->sh_chksum = sctp_cksum(mp, ulp_offset); + return (B_TRUE); + } + + default: + *err = "unexpected protocol"; + goto bail; + + } + + /* Pseudo-header checksum. */ + src = ipha->ipha_src; + dst = ipha->ipha_dst; + len = ntohs(ipha->ipha_length) - ip_hdr_sz; + + cksum += (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); + cksum += htons(len); + + /* + * We have already accounted for the pseudo checksum above. + * Make sure the ULP checksum field is zero before computing + * the rest. + */ + *up = 0; + cksum = IP_CSUM(mp, ulp_offset, cksum); + *up = (uint16_t)(cksum ? cksum : ~cksum); + + return (B_TRUE); + +bail: + return (B_FALSE); +} + +/* + * Calculate the ULP checksum for IPv6. Return true if the calculation + * was successful, or false if an error occurred. If the later, place + * an error message into '*err'. + */ +static boolean_t +mac_sw_cksum_ipv6(mblk_t *mp, uint32_t ip_hdr_offset, const char **err) +{ + ip6_t *ip6h = (ip6_t *)(mp->b_rptr + ip_hdr_offset); + const uint8_t proto = ip6h->ip6_nxt; + const uint16_t *iphs = (uint16_t *)ip6h; + /* ULP offset from start of L2. */ + uint32_t ulp_offset; + size_t len; + uint32_t cksum; + uint16_t *up; + uint16_t ip_hdr_sz; + + if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &ip_hdr_sz, NULL)) { + *err = "malformed IPv6 header"; + goto bail; + } + + ulp_offset = ip_hdr_offset + ip_hdr_sz; + + /* + * We need a pointer to the ULP checksum. We're assuming the + * ULP checksum pointer resides in the first mblk. Our native + * TCP stack should always put the headers in the first mblk, + * but currently we have no way to guarantee that other + * clients don't spread headers (or even header fields) across + * mblks. + */ + switch (proto) { + case IPPROTO_TCP: + ASSERT3U(MBLKL(mp), >=, (ulp_offset + sizeof (tcph_t))); + if (MBLKL(mp) < (ulp_offset + sizeof (tcph_t))) { + *err = "mblk doesn't contain TCP header"; + goto bail; + } + + up = IPH_TCPH_CHECKSUMP(ip6h, ip_hdr_sz); + cksum = IP_TCP_CSUM_COMP; + break; + + case IPPROTO_UDP: + ASSERT3U(MBLKL(mp), >=, (ulp_offset + sizeof (udpha_t))); + if (MBLKL(mp) < (ulp_offset + sizeof (udpha_t))) { + *err = "mblk doesn't contain UDP header"; + goto bail; + } + + up = IPH_UDPH_CHECKSUMP(ip6h, ip_hdr_sz); + cksum = IP_UDP_CSUM_COMP; + break; + + case IPPROTO_SCTP: { + sctp_hdr_t *sctph; + + ASSERT3U(MBLKL(mp), >=, (ulp_offset + sizeof (sctp_hdr_t))); + if (MBLKL(mp) < (ulp_offset + sizeof (sctp_hdr_t))) { + *err = "mblk doesn't contain SCTP header"; + goto bail; + } + + sctph = (sctp_hdr_t *)(mp->b_rptr + ulp_offset); + /* + * Zero out the checksum field to ensure proper + * checksum calculation. + */ + sctph->sh_chksum = 0; + sctph->sh_chksum = sctp_cksum(mp, ulp_offset); + return (B_TRUE); + } + + default: + *err = "unexpected protocol"; + goto bail; + } + + /* + * The payload length includes the payload and the IPv6 + * extension headers; the idea is to subtract the extension + * header length to get the real payload length. + */ + len = ntohs(ip6h->ip6_plen) - (ip_hdr_sz - IPV6_HDR_LEN); + cksum += len; + + /* + * We accumulate the pseudo header checksum in cksum; then we + * call IP_CSUM to compute the checksum over the payload. + */ + cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + iphs[8] + iphs[9] + + iphs[10] + iphs[11] + iphs[12] + iphs[13] + iphs[14] + iphs[15] + + iphs[16] + iphs[17] + iphs[18] + iphs[19]; + cksum = IP_CSUM(mp, ulp_offset, cksum); + + /* For UDP/IPv6 a zero UDP checksum is not allowed. Change to 0xffff */ + if (proto == IPPROTO_UDP && cksum == 0) + cksum = ~cksum; + + *up = (uint16_t)cksum; + + return (B_TRUE); + +bail: + return (B_FALSE); +} + +/* + * Perform software checksum on a single message, if needed. The + * emulation performed is determined by an intersection of the mblk's + * flags and the emul flags requested. The emul flags are documented + * in mac.h. + */ +static mblk_t * +mac_sw_cksum(mblk_t *mp, mac_emul_t emul) +{ + mblk_t *skipped_hdr = NULL; uint32_t flags, start, stuff, end, value; + uint32_t ip_hdr_offset; + uint16_t etype; + size_t ip_hdr_sz; + struct ether_header *ehp; + const char *err = ""; - for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) { - uint16_t len; - uint32_t offset; - struct ether_header *ehp; - uint16_t sap; + /* + * This function should only be called from mac_hw_emul() + * which handles mblk chains and the shared ref case. + */ + ASSERT3P(mp->b_next, ==, NULL); - mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags); - if (flags == 0) - continue; + mac_hcksum_get(mp, &start, &stuff, &end, &value, NULL); + + flags = DB_CKSUMFLAGS(mp); + + /* Why call this if checksum emulation isn't needed? */ + ASSERT3U(flags & (HCK_FLAGS), !=, 0); + + /* + * Ethernet, and optionally VLAN header. mac_hw_emul() has + * already verified we have enough data to read the L2 header. + */ + ehp = (struct ether_header *)mp->b_rptr; + if (ntohs(ehp->ether_type) == VLAN_TPID) { + struct ether_vlan_header *evhp; + + evhp = (struct ether_vlan_header *)mp->b_rptr; + etype = ntohs(evhp->ether_type); + ip_hdr_offset = sizeof (struct ether_vlan_header); + } else { + etype = ntohs(ehp->ether_type); + ip_hdr_offset = sizeof (struct ether_header); + } + + /* + * If this packet isn't IP, then leave it alone. We don't want + * to affect non-IP traffic like ARP. Assume the IP header + * doesn't include any options, for now. We will use the + * correct size later after we know there are enough bytes to + * at least fill out the basic header. + */ + switch (etype) { + case ETHERTYPE_IP: + ip_hdr_sz = sizeof (ipha_t); + break; + case ETHERTYPE_IPV6: + ip_hdr_sz = sizeof (ip6_t); + break; + default: + return (mp); + } + + ASSERT3U(MBLKL(mp), >=, ip_hdr_offset); + + /* + * If the first mblk of this packet contains only the ethernet + * header, skip past it for now. Packets with their data + * contained in only a single mblk can then use the fastpaths + * tuned to that possibility. + */ + if (MBLKL(mp) == ip_hdr_offset) { + ip_hdr_offset -= MBLKL(mp); + /* This is guaranteed by mac_hw_emul(). */ + ASSERT3P(mp->b_cont, !=, NULL); + skipped_hdr = mp; + mp = mp->b_cont; + } + + /* + * Both full and partial checksum rely on finding the IP + * header in the current mblk. Our native TCP stack honors + * this assumption but it's prudent to guard our future + * clients that might not honor this contract. + */ + ASSERT3U(MBLKL(mp), >=, ip_hdr_offset + ip_hdr_sz); + if (MBLKL(mp) < (ip_hdr_offset + ip_hdr_sz)) { + err = "mblk doesn't contain IP header"; + goto bail; + } + + /* + * We are about to modify the header mblk; make sure we are + * modifying our own copy. The code that follows assumes that + * the IP/ULP headers exist in this mblk (and drops the + * message if they don't). + */ + if (DB_REF(mp) > 1) { + mblk_t *tmp = copyb(mp); + + if (tmp == NULL) { + err = "copyb failed"; + goto bail; + } + + if (skipped_hdr != NULL) { + ASSERT3P(skipped_hdr->b_cont, ==, mp); + skipped_hdr->b_cont = tmp; + } + + tmp->b_cont = mp->b_cont; + freeb(mp); + mp = tmp; + } + + if (etype == ETHERTYPE_IP) { + ipha_t *ipha = (ipha_t *)(mp->b_rptr + ip_hdr_offset); + + if ((flags & HCK_FULLCKSUM) && (emul & MAC_HWCKSUM_EMUL)) { + if (!mac_sw_cksum_ipv4(mp, ip_hdr_offset, ipha, &err)) + goto bail; + } + + /* We always update the ULP checksum flags. */ + if ((flags & HCK_FULLCKSUM) && (emul & MAC_HWCKSUM_EMULS)) { + flags &= ~HCK_FULLCKSUM; + flags |= HCK_FULLCKSUM_OK; + value = 0; + } /* - * Since the processing of checksum offload for loopback - * traffic requires modification of the packet contents, - * ensure sure that we are always modifying our own copy. + * While unlikely, it's possible to write code that + * might end up calling mac_sw_cksum() twice on the + * same mblk (performing both LSO and checksum + * emualtion in a single mblk chain loop -- the LSO + * emulation inserts a new chain into the existing + * chain and then the loop iterates back over the new + * segments and emulates the checksum a second time). + * Normally this wouldn't be a problem, because the + * HCK_*_OK flags are supposed to indicate that we + * don't need to do peform the work. But + * HCK_IPV4_HDRCKSUM and HCK_IPV4_HDRCKSUM_OK have the + * same value; so we cannot use these flags to + * determine if the IP header checksum has already + * been calculated or not. For this reason, we zero + * out the the checksum first. In the future, we + * should fix the HCK_* flags. */ - if (DB_REF(mp) > 1) { - mp1 = copymsg(mp); - if (mp1 == NULL) - continue; - mp1->b_next = mp->b_next; - mp->b_next = NULL; - freemsg(mp); - if (prev != NULL) - prev->b_next = mp1; - else - new_chain = mp1; - mp = mp1; + if ((flags & HCK_IPV4_HDRCKSUM) && (emul & MAC_HWCKSUM_EMULS)) { + ipha->ipha_hdr_checksum = 0; + ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); + flags &= ~HCK_IPV4_HDRCKSUM; + flags |= HCK_IPV4_HDRCKSUM_OK; + } + } else if (etype == ETHERTYPE_IPV6) { + /* There is no IP header checksum for IPv6. */ + if ((flags & HCK_FULLCKSUM) && (emul & MAC_HWCKSUM_EMUL)) { + if (!mac_sw_cksum_ipv6(mp, ip_hdr_offset, &err)) + goto bail; + flags &= ~HCK_FULLCKSUM; + flags |= HCK_FULLCKSUM_OK; + value = 0; } + } + + /* + * Partial checksum is the same for both IPv4 and IPv6. + */ + if ((flags & HCK_PARTIALCKSUM) && (emul & MAC_HWCKSUM_EMUL)) { + uint16_t *up, partial, cksum; + uchar_t *ipp; /* ptr to beginning of IP header */ + + ipp = mp->b_rptr + ip_hdr_offset; + up = (uint16_t *)((uchar_t *)ipp + stuff); + partial = *up; + *up = 0; + + ASSERT3S(end, >, start); + cksum = ~IP_CSUM_PARTIAL(mp, ip_hdr_offset + start, partial); + *up = cksum != 0 ? cksum : ~cksum; + } + + /* We always update the ULP checksum flags. */ + if ((flags & HCK_PARTIALCKSUM) && (emul & MAC_HWCKSUM_EMULS)) { + flags &= ~HCK_PARTIALCKSUM; + flags |= HCK_FULLCKSUM_OK; + value = 0; + } + + mac_hcksum_set(mp, start, stuff, end, value, flags); + + /* Don't forget to reattach the header. */ + if (skipped_hdr != NULL) { + ASSERT3P(skipped_hdr->b_cont, ==, mp); /* - * Ethernet, and optionally VLAN header. + * Duplicate the HCKSUM data into the header mblk. + * This mimics mac_add_vlan_tag which ensures that + * both the first mblk _and_ the first data bearing + * mblk possess the HCKSUM information. Consumers like + * IP will end up discarding the ether_header mblk, so + * for now, it is important that the data be available + * in both places. */ - /* LINTED: improper alignment cast */ - ehp = (struct ether_header *)mp->b_rptr; - if (ntohs(ehp->ether_type) == VLAN_TPID) { - struct ether_vlan_header *evhp; + mac_hcksum_clone(mp, skipped_hdr); + mp = skipped_hdr; + } - ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); - /* LINTED: improper alignment cast */ - evhp = (struct ether_vlan_header *)mp->b_rptr; - sap = ntohs(evhp->ether_type); - offset = sizeof (struct ether_vlan_header); + return (mp); + +bail: + if (skipped_hdr != NULL) { + ASSERT3P(skipped_hdr->b_cont, ==, mp); + mp = skipped_hdr; + } + + mac_drop_pkt(mp, err); + return (NULL); +} + +/* + * Build a single data segment from an LSO packet. The mblk chain + * returned, seg_head, represents the data segment and is always + * exactly seg_len bytes long. The lso_mp and offset input/output + * parameters track our position in the LSO packet. This function + * exists solely as a helper to mac_sw_lso(). + * + * Case A + * + * The current lso_mp is larger than the requested seg_len. The + * beginning of seg_head may start at the beginning of lso_mp or + * offset into it. In either case, a single mblk is returned, and + * *offset is updated to reflect our new position in the current + * lso_mp. + * + * +----------------------------+ + * | in *lso_mp / out *lso_mp | + * +----------------------------+ + * ^ ^ + * | | + * | | + * | | + * +------------------------+ + * | seg_head | + * +------------------------+ + * ^ ^ + * | | + * in *offset = 0 out *offset = seg_len + * + * |------ seg_len ----| + * + * + * +------------------------------+ + * | in *lso_mp / out *lso_mp | + * +------------------------------+ + * ^ ^ + * | | + * | | + * | | + * +------------------------+ + * | seg_head | + * +------------------------+ + * ^ ^ + * | | + * in *offset = N out *offset = N + seg_len + * + * |------ seg_len ----| + * + * + * + * Case B + * + * The requested seg_len consumes exactly the rest of the lso_mp. + * I.e., the seg_head's b_wptr is equivalent to lso_mp's b_wptr. + * The seg_head may start at the beginning of the lso_mp or at some + * offset into it. In either case we return a single mblk, reset + * *offset to zero, and walk to the next lso_mp. + * + * +------------------------+ +------------------------+ + * | in *lso_mp |---------->| out *lso_mp | + * +------------------------+ +------------------------+ + * ^ ^ ^ + * | | | + * | | out *offset = 0 + * | | + * +------------------------+ + * | seg_head | + * +------------------------+ + * ^ + * | + * in *offset = 0 + * + * |------ seg_len ----| + * + * + * + * +----------------------------+ +------------------------+ + * | in *lso_mp |---------->| out *lso_mp | + * +----------------------------+ +------------------------+ + * ^ ^ ^ + * | | | + * | | out *offset = 0 + * | | + * +------------------------+ + * | seg_head | + * +------------------------+ + * ^ + * | + * in *offset = N + * + * |------ seg_len ----| + * + * + * Case C + * + * The requested seg_len is greater than the current lso_mp. In + * this case we must consume LSO mblks until we have enough data to + * satisfy either case (A) or (B) above. We will return multiple + * mblks linked via b_cont, offset will be set based on the cases + * above, and lso_mp will walk forward at least one mblk, but maybe + * more. + * + * N.B. This digram is not exhaustive. The seg_head may start on + * the beginning of an lso_mp. The seg_tail may end exactly on the + * boundary of an lso_mp. And there may be two (in this case the + * middle block wouldn't exist), three, or more mblks in the + * seg_head chain. This is meant as one example of what might + * happen. The main thing to remember is that the seg_tail mblk + * must be one of case (A) or (B) above. + * + * +------------------+ +----------------+ +------------------+ + * | in *lso_mp |--->| *lso_mp |--->| out *lso_mp | + * +------------------+ +----------------+ +------------------+ + * ^ ^ ^ ^ ^ ^ + * | | | | | | + * | | | | | | + * | | | | | | + * | | | | | | + * +------------+ +----------------+ +------------+ + * | seg_head |--->| |--->| seg_tail | + * +------------+ +----------------+ +------------+ + * ^ ^ + * | | + * in *offset = N out *offset = MBLKL(seg_tail) + * + * |------------------- seg_len -------------------| + * + */ +static mblk_t * +build_data_seg(mblk_t **lso_mp, uint32_t *offset, uint32_t seg_len) +{ + mblk_t *seg_head, *seg_tail, *seg_mp; + + ASSERT3P(*lso_mp, !=, NULL); + ASSERT3U((*lso_mp)->b_rptr + *offset, <, (*lso_mp)->b_wptr); + + seg_mp = dupb(*lso_mp); + if (seg_mp == NULL) + return (NULL); + + seg_head = seg_mp; + seg_tail = seg_mp; + + /* Continue where we left off from in the lso_mp. */ + seg_mp->b_rptr += *offset; + +last_mblk: + /* Case (A) */ + if ((seg_mp->b_rptr + seg_len) < seg_mp->b_wptr) { + *offset += seg_len; + seg_mp->b_wptr = seg_mp->b_rptr + seg_len; + return (seg_head); + } + + /* Case (B) */ + if ((seg_mp->b_rptr + seg_len) == seg_mp->b_wptr) { + *offset = 0; + *lso_mp = (*lso_mp)->b_cont; + return (seg_head); + } + + /* Case (C) */ + ASSERT3U(seg_mp->b_rptr + seg_len, >, seg_mp->b_wptr); + + /* + * The current LSO mblk doesn't have enough data to satisfy + * seg_len -- continue peeling off LSO mblks to build the new + * segment message. If allocation fails we free the previously + * allocated segment mblks and return NULL. + */ + while ((seg_mp->b_rptr + seg_len) > seg_mp->b_wptr) { + ASSERT3U(MBLKL(seg_mp), <=, seg_len); + seg_len -= MBLKL(seg_mp); + *offset = 0; + *lso_mp = (*lso_mp)->b_cont; + seg_mp = dupb(*lso_mp); + + if (seg_mp == NULL) { + freemsgchain(seg_head); + return (NULL); + } + + seg_tail->b_cont = seg_mp; + seg_tail = seg_mp; + } + + /* + * We've walked enough LSO mblks that we can now satisfy the + * remaining seg_len. At this point we need to jump back to + * determine if we have arrived at case (A) or (B). + */ + + /* Just to be paranoid that we didn't underflow. */ + ASSERT3U(seg_len, <, IP_MAXPACKET); + ASSERT3U(seg_len, >, 0); + goto last_mblk; +} + +/* + * Perform software segmentation of a single LSO message. Take an LSO + * message as input and return head/tail pointers as output. This + * function should not be invoked directly but instead through + * mac_hw_emul(). + * + * The resulting chain is comprised of multiple (nsegs) MSS sized + * segments. Each segment will consist of two or more mblks joined by + * b_cont: a header and one or more data mblks. The header mblk is + * allocated anew for each message. The first segment's header is used + * as a template for the rest with adjustments made for things such as + * ID, sequence, length, TCP flags, etc. The data mblks reference into + * the existing LSO mblk (passed in as omp) by way of dupb(). Their + * b_rptr/b_wptr values are adjusted to reference only the fraction of + * the LSO message they are responsible for. At the successful + * completion of this function the original mblk (omp) is freed, + * leaving the newely created segment chain as the only remaining + * reference to the data. + */ +static void +mac_sw_lso(mblk_t *omp, mac_emul_t emul, mblk_t **head, mblk_t **tail, + uint_t *count) +{ + uint32_t ocsum_flags, ocsum_start, ocsum_stuff; + uint32_t mss; + uint32_t oehlen, oiphlen, otcphlen, ohdrslen, opktlen, odatalen; + uint32_t oleft; + uint_t nsegs, seg; + int len; + + struct ether_vlan_header *oevh; + const ipha_t *oiph; + const tcph_t *otcph; + ipha_t *niph; + tcph_t *ntcph; + uint16_t ip_id; + uint32_t tcp_seq, tcp_sum, otcp_sum; + + uint32_t offset; + mblk_t *odatamp; + mblk_t *seg_chain, *prev_nhdrmp, *next_nhdrmp, *nhdrmp, *ndatamp; + mblk_t *tmptail; + + ASSERT3P(head, !=, NULL); + ASSERT3P(tail, !=, NULL); + ASSERT3P(count, !=, NULL); + ASSERT3U((DB_CKSUMFLAGS(omp) & HW_LSO), !=, 0); + + /* Assume we are dealing with a single LSO message. */ + ASSERT3P(omp->b_next, ==, NULL); + + /* + * XXX: This is a hack to deal with mac_add_vlan_tag(). + * + * When VLANs are in play, mac_add_vlan_tag() creates a new + * mblk with just the ether_vlan_header and tacks it onto the + * front of 'omp'. This breaks the assumptions made below; + * namely that the TCP/IP headers are in the first mblk. In + * this case, since we already have to pay the cost of LSO + * emulation, we simply pull up everything. While this might + * seem irksome, keep in mind this will only apply in a couple + * of scenarios: a) an LSO-capable VLAN client sending to a + * non-LSO-capable client over the "MAC/bridge loopback" + * datapath or b) an LSO-capable VLAN client is sending to a + * client that, for whatever reason, doesn't have DLS-bypass + * enabled. Finally, we have to check for both a tagged and + * untagged sized mblk depending on if the mblk came via + * mac_promisc_dispatch() or mac_rx_deliver(). + * + * In the future, two things should be done: + * + * 1. This function should make use of some yet to be + * implemented "mblk helpers". These helper functions would + * perform all the b_cont walking for us and guarantee safe + * access to the mblk data. + * + * 2. We should add some slop to the mblks so that + * mac_add_vlan_tag() can just edit the first mblk instead + * of allocating on the hot path. + */ + if (MBLKL(omp) == sizeof (struct ether_vlan_header) || + MBLKL(omp) == sizeof (struct ether_header)) { + mblk_t *tmp = msgpullup(omp, -1); + + if (tmp == NULL) { + mac_drop_pkt(omp, "failed to pull up"); + goto fail; + } + + mac_hcksum_clone(omp, tmp); + freemsg(omp); + omp = tmp; + } + + mss = DB_LSOMSS(omp); + ASSERT3U(msgsize(omp), <=, IP_MAXPACKET + + sizeof (struct ether_vlan_header)); + opktlen = msgsize(omp); + + /* + * First, get references to the IP and TCP headers and + * determine the total TCP length (header + data). + * + * Thanks to mac_hw_emul() we know that the first mblk must + * contain (at minimum) the full L2 header. However, this + * function assumes more than that. It assumes the L2/L3/L4 + * headers are all contained in the first mblk of a message + * (i.e., no b_cont walking for headers). While this is a + * current reality (our native TCP stack and viona both + * enforce this) things may become more nuanced in the future + * (e.g. when introducing encap support or adding new + * clients). For now we guard against this case by dropping + * the packet. + */ + oevh = (struct ether_vlan_header *)omp->b_rptr; + if (oevh->ether_tpid == htons(ETHERTYPE_VLAN)) + oehlen = sizeof (struct ether_vlan_header); + else + oehlen = sizeof (struct ether_header); + + ASSERT3U(MBLKL(omp), >=, (oehlen + sizeof (ipha_t) + sizeof (tcph_t))); + if (MBLKL(omp) < (oehlen + sizeof (ipha_t) + sizeof (tcph_t))) { + mac_drop_pkt(omp, "mblk doesn't contain TCP/IP headers"); + goto fail; + } + + oiph = (ipha_t *)(omp->b_rptr + oehlen); + oiphlen = IPH_HDR_LENGTH(oiph); + otcph = (tcph_t *)(omp->b_rptr + oehlen + oiphlen); + otcphlen = TCP_HDR_LENGTH(otcph); + + /* + * Currently we only support LSO for TCP/IPv4. + */ + if (IPH_HDR_VERSION(oiph) != IPV4_VERSION) { + mac_drop_pkt(omp, "LSO unsupported IP version: %uhh", + IPH_HDR_VERSION(oiph)); + goto fail; + } + + if (oiph->ipha_protocol != IPPROTO_TCP) { + mac_drop_pkt(omp, "LSO unsupported protocol: %uhh", + oiph->ipha_protocol); + goto fail; + } + + if (otcph->th_flags[0] & (TH_SYN | TH_RST | TH_URG)) { + mac_drop_pkt(omp, "LSO packet has SYN|RST|URG set"); + goto fail; + } + + ohdrslen = oehlen + oiphlen + otcphlen; + if ((len = MBLKL(omp)) < ohdrslen) { + mac_drop_pkt(omp, "LSO packet too short: %d < %u", len, + ohdrslen); + goto fail; + } + + /* + * Either we have data in the first mblk or it's just the + * header. In either case, we need to set rptr to the start of + * the TCP data. + */ + if (len > ohdrslen) { + odatamp = omp; + offset = ohdrslen; + } else { + ASSERT3U(len, ==, ohdrslen); + odatamp = omp->b_cont; + offset = 0; + } + + /* Make sure we still have enough data. */ + ASSERT3U(msgsize(odatamp), >=, opktlen - ohdrslen); + + /* + * If a MAC negotiated LSO then it must negotioate both + * HCKSUM_IPHDRCKSUM and either HCKSUM_INET_FULL_V4 or + * HCKSUM_INET_PARTIAL; because both the IP and TCP headers + * change during LSO segmentation (only the 3 fields of the + * pseudo header checksum don't change: src, dst, proto). Thus + * we would expect these flags (HCK_IPV4_HDRCKSUM | + * HCK_PARTIALCKSUM | HCK_FULLCKSUM) to be set and for this + * function to emulate those checksums in software. However, + * that assumes a world where we only expose LSO if the + * underlying hardware exposes LSO. Moving forward the plan is + * to assume LSO in the upper layers and have MAC perform + * software LSO when the underlying provider doesn't support + * it. In such a world, if the provider doesn't support LSO + * but does support hardware checksum offload, then we could + * simply perform the segmentation and allow the hardware to + * calculate the checksums. To the hardware it's just another + * chain of non-LSO packets. + */ + ASSERT3S(DB_TYPE(omp), ==, M_DATA); + ocsum_flags = DB_CKSUMFLAGS(omp); + ASSERT3U(ocsum_flags & HCK_IPV4_HDRCKSUM, !=, 0); + ASSERT3U(ocsum_flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM), !=, 0); + + /* + * If hardware only provides partial checksum then software + * must supply the pseudo-header checksum. In the case of LSO + * we leave the TCP length at zero to be filled in by + * hardware. This function must handle two scenarios. + * + * 1. Being called by a MAC client on the Rx path to segment + * an LSO packet and calculate the checksum. + * + * 2. Being called by a MAC provider to segment an LSO packet. + * In this case the LSO segmentation is performed in + * software (by this routine) but the MAC provider should + * still calculate the TCP/IP checksums in hardware. + * + * To elaborate on the second case: we cannot have the + * scenario where IP sends LSO packets but the underlying HW + * doesn't support checksum offload -- because in that case + * TCP/IP would calculate the checksum in software (for the + * LSO packet) but then MAC would segment the packet and have + * to redo all the checksum work. So IP should never do LSO + * if HW doesn't support both IP and TCP checksum. + */ + if (ocsum_flags & HCK_PARTIALCKSUM) { + ocsum_start = (uint32_t)DB_CKSUMSTART(omp); + ocsum_stuff = (uint32_t)DB_CKSUMSTUFF(omp); + } + + odatalen = opktlen - ohdrslen; + + /* + * Subtract one to account for the case where the data length + * is evenly divisble by the MSS. Add one to account for the + * fact that the division will always result in one less + * segment than needed. + */ + nsegs = ((odatalen - 1) / mss) + 1; + if (nsegs < 2) { + mac_drop_pkt(omp, "LSO not enough segs: %u", nsegs); + goto fail; + } + + DTRACE_PROBE6(sw__lso__start, mblk_t *, omp, void_ip_t *, oiph, + __dtrace_tcp_tcph_t *, otcph, uint_t, odatalen, uint_t, mss, uint_t, + nsegs); + + seg_chain = NULL; + tmptail = seg_chain; + oleft = odatalen; + + for (uint_t i = 0; i < nsegs; i++) { + boolean_t last_seg = ((i + 1) == nsegs); + uint32_t seg_len; + + /* + * If we fail to allocate, then drop the partially + * allocated chain as well as the LSO packet. Let the + * sender deal with the fallout. + */ + if ((nhdrmp = allocb(ohdrslen, 0)) == NULL) { + freemsgchain(seg_chain); + mac_drop_pkt(omp, "failed to alloc segment header"); + goto fail; + } + ASSERT3P(nhdrmp->b_cont, ==, NULL); + + if (seg_chain == NULL) { + seg_chain = nhdrmp; } else { - sap = ntohs(ehp->ether_type); - offset = sizeof (struct ether_header); + ASSERT3P(tmptail, !=, NULL); + tmptail->b_next = nhdrmp; } - if (MBLKL(mp) <= offset) { - offset -= MBLKL(mp); - if (mp->b_cont == NULL) { - /* corrupted packet, skip it */ - if (prev != NULL) - prev->b_next = mp->b_next; - else - new_chain = mp->b_next; - mp1 = mp->b_next; - mp->b_next = NULL; - freemsg(mp); - mp = mp1; - continue; - } - mp = mp->b_cont; + tmptail = nhdrmp; + + /* + * Calculate this segment's lengh. It's either the MSS + * or whatever remains for the last segment. + */ + seg_len = last_seg ? oleft : mss; + ASSERT3U(seg_len, <=, mss); + ndatamp = build_data_seg(&odatamp, &offset, seg_len); + + if (ndatamp == NULL) { + freemsgchain(seg_chain); + mac_drop_pkt(omp, "LSO failed to segment data"); + goto fail; } - if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) { - ipha_t *ipha = NULL; + /* Attach data mblk to header mblk. */ + nhdrmp->b_cont = ndatamp; + DB_CKSUMFLAGS(ndatamp) &= ~HW_LSO; + ASSERT3U(seg_len, <=, oleft); + oleft -= seg_len; + } + + /* We should have consumed entire LSO msg. */ + ASSERT3S(oleft, ==, 0); + ASSERT3P(odatamp, ==, NULL); - /* - * In order to compute the full and header - * checksums, we need to find and parse - * the IP and/or ULP headers. - */ + /* + * All seg data mblks are referenced by the header mblks, null + * out this pointer to catch any bad derefs. + */ + ndatamp = NULL; + + /* + * Set headers and checksum for first segment. + */ + nhdrmp = seg_chain; + bcopy(omp->b_rptr, nhdrmp->b_rptr, ohdrslen); + nhdrmp->b_wptr = nhdrmp->b_rptr + ohdrslen; + niph = (ipha_t *)(nhdrmp->b_rptr + oehlen); + ASSERT3U(msgsize(nhdrmp->b_cont), ==, mss); + niph->ipha_length = htons(oiphlen + otcphlen + mss); + niph->ipha_hdr_checksum = 0; + ip_id = ntohs(niph->ipha_ident); + ntcph = (tcph_t *)(nhdrmp->b_rptr + oehlen + oiphlen); + tcp_seq = BE32_TO_U32(ntcph->th_seq); + tcp_seq += mss; + + /* + * The first segment shouldn't: + * + * o indicate end of data transmission (FIN), + * o indicate immediate handling of the data (PUSH). + */ + ntcph->th_flags[0] &= ~(TH_FIN | TH_PUSH); + DB_CKSUMFLAGS(nhdrmp) = (uint16_t)(ocsum_flags & ~HW_LSO); + + /* + * If the underlying HW provides partial checksum, then make + * sure to correct the pseudo header checksum before calling + * mac_sw_cksum(). The native TCP stack doesn't include the + * length field in the pseudo header when LSO is in play -- so + * we need to calculate it here. + */ + if (ocsum_flags & HCK_PARTIALCKSUM) { + DB_CKSUMSTART(nhdrmp) = ocsum_start; + DB_CKSUMEND(nhdrmp) = ntohs(niph->ipha_length); + DB_CKSUMSTUFF(nhdrmp) = ocsum_stuff; + tcp_sum = BE16_TO_U16(ntcph->th_sum); + otcp_sum = tcp_sum; + tcp_sum += mss + otcphlen; + tcp_sum = (tcp_sum >> 16) + (tcp_sum & 0xFFFF); + U16_TO_BE16(tcp_sum, ntcph->th_sum); + } + + if ((ocsum_flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) && + (emul & MAC_HWCKSUM_EMULS)) { + next_nhdrmp = nhdrmp->b_next; + nhdrmp->b_next = NULL; + nhdrmp = mac_sw_cksum(nhdrmp, emul); + nhdrmp->b_next = next_nhdrmp; + next_nhdrmp = NULL; + + /* + * We may have freed the nhdrmp argument during + * checksum emulation, make sure that seg_chain + * references a valid mblk. + */ + seg_chain = nhdrmp; + } + + ASSERT3P(nhdrmp, !=, NULL); - sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; + seg = 1; + DTRACE_PROBE5(sw__lso__seg, mblk_t *, nhdrmp, void_ip_t *, + (ipha_t *)(nhdrmp->b_rptr + oehlen), __dtrace_tcp_tcph_t *, + (tcph_t *)(nhdrmp->b_rptr + oehlen + oiphlen), uint_t, mss, + uint_t, seg); + seg++; + /* There better be at least 2 segs. */ + ASSERT3P(nhdrmp->b_next, !=, NULL); + prev_nhdrmp = nhdrmp; + nhdrmp = nhdrmp->b_next; + + /* + * Now adjust the headers of the middle segments. For each + * header we need to adjust the following. + * + * o IP ID + * o IP length + * o TCP sequence + * o TCP flags + * o cksum flags + * o cksum values (if MAC_HWCKSUM_EMUL is set) + */ + for (; seg < nsegs; seg++) { + /* + * We use seg_chain as a reference to the first seg + * header mblk -- this first header is a template for + * the rest of the segments. This copy will include + * the now updated checksum values from the first + * header. We must reset these checksum values to + * their original to make sure we produce the correct + * value. + */ + bcopy(seg_chain->b_rptr, nhdrmp->b_rptr, ohdrslen); + nhdrmp->b_wptr = nhdrmp->b_rptr + ohdrslen; + niph = (ipha_t *)(nhdrmp->b_rptr + oehlen); + niph->ipha_ident = htons(++ip_id); + ASSERT3P(msgsize(nhdrmp->b_cont), ==, mss); + niph->ipha_length = htons(oiphlen + otcphlen + mss); + niph->ipha_hdr_checksum = 0; + ntcph = (tcph_t *)(nhdrmp->b_rptr + oehlen + oiphlen); + U32_TO_BE32(tcp_seq, ntcph->th_seq); + tcp_seq += mss; + /* + * Just like the first segment, the middle segments + * shouldn't have these flags set. + */ + ntcph->th_flags[0] &= ~(TH_FIN | TH_PUSH); + DB_CKSUMFLAGS(nhdrmp) = (uint16_t)(ocsum_flags & ~HW_LSO); + + if (ocsum_flags & HCK_PARTIALCKSUM) { /* - * IP header. + * First and middle segs have same + * pseudo-header checksum. */ - if (sap != ETHERTYPE_IP) - continue; + U16_TO_BE16(tcp_sum, ntcph->th_sum); + DB_CKSUMSTART(nhdrmp) = ocsum_start; + DB_CKSUMEND(nhdrmp) = ntohs(niph->ipha_length); + DB_CKSUMSTUFF(nhdrmp) = ocsum_stuff; + } - ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t)); - /* LINTED: improper alignment cast */ - ipha = (ipha_t *)(mp->b_rptr + offset); - - if (flags & HCK_FULLCKSUM) { - ipaddr_t src, dst; - uint32_t cksum; - uint16_t *up; - uint8_t proto; - - /* - * Pointer to checksum field in ULP header. - */ - proto = ipha->ipha_protocol; - ASSERT(ipha->ipha_version_and_hdr_length == - IP_SIMPLE_HDR_VERSION); - - switch (proto) { - case IPPROTO_TCP: - /* LINTED: improper alignment cast */ - up = IPH_TCPH_CHECKSUMP(ipha, - IP_SIMPLE_HDR_LENGTH); - break; - - case IPPROTO_UDP: - /* LINTED: improper alignment cast */ - up = IPH_UDPH_CHECKSUMP(ipha, - IP_SIMPLE_HDR_LENGTH); - break; - - default: - cmn_err(CE_WARN, "mac_fix_cksum: " - "unexpected protocol: %d", proto); - continue; - } - - /* - * Pseudo-header checksum. - */ - src = ipha->ipha_src; - dst = ipha->ipha_dst; - len = ntohs(ipha->ipha_length) - - IP_SIMPLE_HDR_LENGTH; - - cksum = (dst >> 16) + (dst & 0xFFFF) + - (src >> 16) + (src & 0xFFFF); - cksum += htons(len); - - /* - * The checksum value stored in the packet needs - * to be correct. Compute it here. - */ - *up = 0; - cksum += (((proto) == IPPROTO_UDP) ? - IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP); - cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH + - offset, cksum); - *(up) = (uint16_t)(cksum ? cksum : ~cksum); - - /* - * Flag the packet so that it appears - * that the checksum has already been - * verified by the hardware. - */ - flags &= ~HCK_FULLCKSUM; - flags |= HCK_FULLCKSUM_OK; - value = 0; - } + if ((ocsum_flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) && + (emul & MAC_HWCKSUM_EMULS)) { + next_nhdrmp = nhdrmp->b_next; + nhdrmp->b_next = NULL; + nhdrmp = mac_sw_cksum(nhdrmp, emul); + nhdrmp->b_next = next_nhdrmp; + next_nhdrmp = NULL; + /* We may have freed the original nhdrmp. */ + prev_nhdrmp->b_next = nhdrmp; + } - if (flags & HCK_IPV4_HDRCKSUM) { - ASSERT(ipha != NULL); - ipha->ipha_hdr_checksum = - (uint16_t)ip_csum_hdr(ipha); - flags &= ~HCK_IPV4_HDRCKSUM; - flags |= HCK_IPV4_HDRCKSUM_OK; + DTRACE_PROBE5(sw__lso__seg, mblk_t *, nhdrmp, void_ip_t *, + (ipha_t *)(nhdrmp->b_rptr + oehlen), __dtrace_tcp_tcph_t *, + (tcph_t *)(nhdrmp->b_rptr + oehlen + oiphlen), + uint_t, mss, uint_t, seg); - } + ASSERT3P(nhdrmp->b_next, !=, NULL); + prev_nhdrmp = nhdrmp; + nhdrmp = nhdrmp->b_next; + } + + /* Make sure we are on the last segment. */ + ASSERT3U(seg, ==, nsegs); + ASSERT3P(nhdrmp->b_next, ==, NULL); + + /* + * Now we set the last segment header. The difference being + * that FIN/PSH/RST flags are allowed. + */ + bcopy(seg_chain->b_rptr, nhdrmp->b_rptr, ohdrslen); + nhdrmp->b_wptr = nhdrmp->b_rptr + ohdrslen; + niph = (ipha_t *)(nhdrmp->b_rptr + oehlen); + niph->ipha_ident = htons(++ip_id); + len = msgsize(nhdrmp->b_cont); + ASSERT3S(len, >, 0); + niph->ipha_length = htons(oiphlen + otcphlen + len); + niph->ipha_hdr_checksum = 0; + ntcph = (tcph_t *)(nhdrmp->b_rptr + oehlen + oiphlen); + U32_TO_BE32(tcp_seq, ntcph->th_seq); + + DB_CKSUMFLAGS(nhdrmp) = (uint16_t)(ocsum_flags & ~HW_LSO); + if (ocsum_flags & HCK_PARTIALCKSUM) { + DB_CKSUMSTART(nhdrmp) = ocsum_start; + DB_CKSUMEND(nhdrmp) = ntohs(niph->ipha_length); + DB_CKSUMSTUFF(nhdrmp) = ocsum_stuff; + tcp_sum = otcp_sum; + tcp_sum += len + otcphlen; + tcp_sum = (tcp_sum >> 16) + (tcp_sum & 0xFFFF); + U16_TO_BE16(tcp_sum, ntcph->th_sum); + } + + if ((ocsum_flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) && + (emul & MAC_HWCKSUM_EMULS)) { + /* This should be the last mblk. */ + ASSERT3P(nhdrmp->b_next, ==, NULL); + nhdrmp = mac_sw_cksum(nhdrmp, emul); + prev_nhdrmp->b_next = nhdrmp; + } + + DTRACE_PROBE5(sw__lso__seg, mblk_t *, nhdrmp, void_ip_t *, + (ipha_t *)(nhdrmp->b_rptr + oehlen), __dtrace_tcp_tcph_t *, + (tcph_t *)(nhdrmp->b_rptr + oehlen + oiphlen), uint_t, len, + uint_t, seg); + + /* + * Free the reference to the original LSO message as it is + * being replaced by seg_cahin. + */ + freemsg(omp); + *head = seg_chain; + *tail = nhdrmp; + *count = nsegs; + return; + +fail: + *head = NULL; + *tail = NULL; + *count = 0; +} + +#define HCK_NEEDED (HCK_IPV4_HDRCKSUM | HCK_PARTIALCKSUM | HCK_FULLCKSUM) + +/* + * Emulate various hardware offload features in software. Take a chain + * of packets as input and emulate the hardware features specified in + * 'emul'. The resulting chain's head pointer replaces the 'mp_chain' + * pointer given as input, and its tail pointer is written to + * '*otail'. The number of packets in the new chain is written to + * '*ocount'. The 'otail' and 'ocount' arguments are optional and thus + * may be NULL. The 'mp_chain' argument may point to a NULL chain; in + * which case 'mp_chain' will simply stay a NULL chain. + * + * While unlikely, it is technically possible that this function could + * receive a non-NULL chain as input and return a NULL chain as output + * ('*mp_chain' and '*otail' would be NULL and '*ocount' would be + * zero). This could happen if all the packets in the chain are + * dropped or if we fail to allocate new mblks. In this case, there is + * nothing for the caller to free. In any event, the caller shouldn't + * assume that '*mp_chain' is non-NULL on return. + * + * This function was written with three main use cases in mind. + * + * 1. To emulate hardware offloads when traveling mac-loopback (two + * clients on the same mac). This is wired up in mac_tx_send(). + * + * 2. To provide hardware offloads to the client when the underlying + * provider cannot. This is currently wired up in mac_tx() but we + * still only negotiate offloads when the underlying provider + * supports them. + * + * 3. To emulate real hardware in simnet. + */ +void +mac_hw_emul(mblk_t **mp_chain, mblk_t **otail, uint_t *ocount, mac_emul_t emul) +{ + mblk_t *head = NULL, *tail = NULL; + uint_t count = 0; + + ASSERT3S(~(MAC_HWCKSUM_EMULS | MAC_LSO_EMUL) & emul, ==, 0); + ASSERT3P(mp_chain, !=, NULL); + + for (mblk_t *mp = *mp_chain; mp != NULL; ) { + mblk_t *tmp, *next, *tmphead, *tmptail; + struct ether_header *ehp; + uint32_t flags; + uint_t len = MBLKL(mp), l2len; + + /* Perform LSO/cksum one message at a time. */ + next = mp->b_next; + mp->b_next = NULL; + + /* + * For our sanity the first mblk should contain at + * least the full L2 header. + */ + if (len < sizeof (struct ether_header)) { + mac_drop_pkt(mp, "packet too short (A): %u", len); + mp = next; + continue; } - if (flags & HCK_PARTIALCKSUM) { - uint16_t *up, partial, cksum; - uchar_t *ipp; /* ptr to beginning of IP header */ - - if (mp->b_cont != NULL) { - mblk_t *mp1; - - mp1 = msgpullup(mp, offset + end); - if (mp1 == NULL) - continue; - mp1->b_next = mp->b_next; - mp->b_next = NULL; - freemsg(mp); - if (prev != NULL) - prev->b_next = mp1; - else - new_chain = mp1; - mp = mp1; - } + ehp = (struct ether_header *)mp->b_rptr; + if (ntohs(ehp->ether_type) == VLAN_TPID) + l2len = sizeof (struct ether_vlan_header); + else + l2len = sizeof (struct ether_header); - ipp = mp->b_rptr + offset; - /* LINTED: cast may result in improper alignment */ - up = (uint16_t *)((uchar_t *)ipp + stuff); - partial = *up; - *up = 0; + /* + * If the first mblk is solely the L2 header, then + * there better be more data. + */ + if (len < l2len || (len == l2len && mp->b_cont == NULL)) { + mac_drop_pkt(mp, "packet too short (C): %u", len); + mp = next; + continue; + } + + DTRACE_PROBE2(mac__emul, mblk_t *, mp, mac_emul_t, emul); + + /* + * We use DB_CKSUMFLAGS (instead of mac_hcksum_get()) + * because we don't want to mask-out the LSO flag. + */ + flags = DB_CKSUMFLAGS(mp); - cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start, - end - start, partial); - cksum = ~cksum; - *up = cksum ? cksum : ~cksum; + if ((flags & HW_LSO) && (emul & MAC_LSO_EMUL)) { + uint_t tmpcount = 0; /* - * Since we already computed the whole checksum, - * indicate to the stack that it has already - * been verified by the hardware. + * LSO fix-up handles checksum emulation + * inline (if requested). It also frees mp. */ - flags &= ~HCK_PARTIALCKSUM; - flags |= HCK_FULLCKSUM_OK; - value = 0; + mac_sw_lso(mp, emul, &tmphead, &tmptail, + &tmpcount); + if (tmphead == NULL) { + /* mac_sw_lso() freed the mp. */ + mp = next; + continue; + } + count += tmpcount; + } else if ((flags & HCK_NEEDED) && (emul & MAC_HWCKSUM_EMULS)) { + tmp = mac_sw_cksum(mp, emul); + if (tmp == NULL) { + /* mac_sw_cksum() freed the mp. */ + mp = next; + continue; + } + tmphead = tmp; + tmptail = tmp; + count++; + } else { + /* There is nothing to emulate. */ + tmp = mp; + tmphead = tmp; + tmptail = tmp; + count++; + } + + /* + * The tmp mblk chain is either the start of the new + * chain or added to the tail of the new chain. + */ + if (head == NULL) { + head = tmphead; + tail = tmptail; + } else { + /* Attach the new mblk to the end of the new chain. */ + tail->b_next = tmphead; + tail = tmptail; } - mac_hcksum_set(mp, start, stuff, end, value, flags); + mp = next; } - return (new_chain); + *mp_chain = head; + + if (otail != NULL) + *otail = tail; + + if (ocount != NULL) + *ocount = count; } /* @@ -449,17 +1568,10 @@ mac_strip_vlan_tag_chain(mblk_t *mp_chain) */ /* ARGSUSED */ void -mac_pkt_drop(void *arg, mac_resource_handle_t resource, mblk_t *mp, +mac_rx_def(void *arg, mac_resource_handle_t resource, mblk_t *mp_chain, boolean_t loopback) { - mblk_t *mp1 = mp; - - while (mp1 != NULL) { - mp1->b_prev = NULL; - mp1->b_queue = NULL; - mp1 = mp1->b_next; - } - freemsgchain(mp); + freemsgchain(mp_chain); } /* diff --git a/usr/src/uts/common/io/simnet/simnet.c b/usr/src/uts/common/io/simnet/simnet.c index 727fbbad8e..b215f6e94b 100644 --- a/usr/src/uts/common/io/simnet/simnet.c +++ b/usr/src/uts/common/io/simnet/simnet.c @@ -21,6 +21,8 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * + * Copyright 2019 Joyent, Inc. */ /* @@ -51,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -107,14 +110,15 @@ static int simnet_m_stat(void *, uint_t, uint64_t *); static void simnet_m_ioctl(void *, queue_t *, mblk_t *); static mblk_t *simnet_m_tx(void *, mblk_t *); static int simnet_m_setprop(void *, const char *, mac_prop_id_t, - uint_t, const void *); + const uint_t, const void *); static int simnet_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); static void simnet_m_propinfo(void *, const char *, mac_prop_id_t, mac_prop_info_handle_t); +static boolean_t simnet_m_getcapab(void *, mac_capab_t, void *); static mac_callbacks_t simnet_m_callbacks = { - (MC_IOCTL | MC_SETPROP | MC_GETPROP | MC_PROPINFO), + (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO), simnet_m_stat, simnet_m_start, simnet_m_stop, @@ -124,7 +128,7 @@ static mac_callbacks_t simnet_m_callbacks = { simnet_m_tx, NULL, simnet_m_ioctl, - NULL, + simnet_m_getcapab, NULL, NULL, simnet_m_setprop, @@ -671,6 +675,12 @@ simnet_thread_unref(simnet_dev_t *sdev) mutex_exit(&sdev->sd_instlock); } +/* + * TODO: Add properties to set Rx checksum flag behavior. + * + * o HCK_PARTIALCKSUM. + * o HCK_FULLCKSUM_OK. + */ static void simnet_rx(void *arg) { @@ -683,7 +693,7 @@ simnet_rx(void *arg) /* Check for valid packet header */ if (mac_header_info(sdev->sd_mh, mp, &hdr_info) != 0) { - freemsg(mp); + mac_drop_pkt(mp, "invalid L2 header"); sdev->sd_stats.recv_errors++; goto rx_done; } @@ -712,6 +722,16 @@ simnet_rx(void *arg) } } + /* + * We don't actually calculate and verify the IP header + * checksum because the nature of simnet makes it redundant to + * do so. The point is to test the presence of the flags. The + * Tx side will have already populated the checksum field. + */ + if ((sdev->sd_rx_cksum & HCKSUM_IPHDRCKSUM) != 0) { + mac_hcksum_set(mp, 0, 0, 0, 0, HCK_IPV4_HDRCKSUM_OK); + } + sdev->sd_stats.recv_count++; sdev->sd_stats.rbytes += msgdsize(mp); mac_rx(sdev->sd_mh, NULL, mp); @@ -719,19 +739,22 @@ rx_done: simnet_thread_unref(sdev); } +#define SIMNET_ULP_CKSUM (HCKSUM_INET_FULL_V4 | HCKSUM_INET_PARTIAL) + static mblk_t * simnet_m_tx(void *arg, mblk_t *mp_chain) { simnet_dev_t *sdev = arg; simnet_dev_t *sdev_rx; mblk_t *mpnext = mp_chain; - mblk_t *mp; + mblk_t *mp, *nmp; + mac_emul_t emul = 0; rw_enter(&simnet_dev_lock, RW_READER); if ((sdev_rx = sdev->sd_peer_dev) == NULL) { /* Discard packets when no peer exists */ rw_exit(&simnet_dev_lock); - freemsgchain(mp_chain); + mac_drop_chain(mp_chain, "no peer"); return (NULL); } @@ -748,20 +771,20 @@ simnet_m_tx(void *arg, mblk_t *mp_chain) */ if (!simnet_thread_ref(sdev_rx)) { rw_exit(&simnet_dev_lock); - freemsgchain(mp_chain); + mac_drop_chain(mp_chain, "simnet peer dev not ready"); return (NULL); } rw_exit(&simnet_dev_lock); if (!simnet_thread_ref(sdev)) { simnet_thread_unref(sdev_rx); - freemsgchain(mp_chain); + mac_drop_chain(mp_chain, "simnet dev not ready"); return (NULL); } while ((mp = mpnext) != NULL) { - int len; - int size; + size_t len; + size_t size; mblk_t *mp_new; mblk_t *mp_tmp; @@ -775,7 +798,7 @@ simnet_m_tx(void *arg, mblk_t *mp_chain) mp_new = allocb(size, BPRI_HI); if (mp_new == NULL) { sdev->sd_stats.xmit_errors++; - freemsg(mp); + mac_drop_pkt(mp, "allocb failed"); continue; } bzero(mp_new->b_wptr, size); @@ -789,25 +812,44 @@ simnet_m_tx(void *arg, mblk_t *mp_chain) } /* Pullup packet into a single mblk */ - if (!pullupmsg(mp, -1)) { - sdev->sd_stats.xmit_errors++; - freemsg(mp); - continue; - } - - /* Fix mblk checksum as the pkt dest is local */ - if ((mp = mac_fix_cksum(mp)) == NULL) { + if ((nmp = msgpullup(mp, -1)) == NULL) { sdev->sd_stats.xmit_errors++; + mac_drop_pkt(mp, "msgpullup failed"); continue; + } else { + mac_hcksum_clone(mp, nmp); + freemsg(mp); + mp = nmp; } /* Hold reference for taskq receive processing per-pkt */ if (!simnet_thread_ref(sdev_rx)) { - freemsg(mp); - freemsgchain(mpnext); + mac_drop_pkt(mp, "failed to get thread ref"); + mac_drop_chain(mpnext, "failed to get thread ref"); break; } + if ((sdev->sd_tx_cksum & HCKSUM_IPHDRCKSUM) != 0) + emul |= MAC_IPCKSUM_EMUL; + if ((sdev->sd_tx_cksum & SIMNET_ULP_CKSUM) != 0) + emul |= MAC_HWCKSUM_EMUL; + if (sdev->sd_lso) + emul |= MAC_LSO_EMUL; + + if (emul != 0) + mac_hw_emul(&mp, NULL, NULL, emul); + + if (mp == NULL) { + sdev->sd_stats.xmit_errors++; + continue; + } + + /* + * Remember, we are emulating a real NIC here; the + * checksum flags can't make the trip across the link. + */ + DB_CKSUMFLAGS(mp) = 0; + /* Use taskq for pkt receive to avoid kernel stack explosion */ mp->b_next = (mblk_t *)sdev_rx; if (ddi_taskq_dispatch(simnet_rxq, simnet_rx, mp, @@ -886,6 +928,43 @@ simnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp) miocack(q, mp, msgdsize(mp1), rc); } +static boolean_t +simnet_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) +{ + simnet_dev_t *sdev = arg; + const uint_t tcp_cksums = HCKSUM_INET_FULL_V4 | HCKSUM_INET_PARTIAL; + + switch (cap) { + case MAC_CAPAB_HCKSUM: { + uint32_t *tx_cksum_flags = cap_data; + *tx_cksum_flags = sdev->sd_tx_cksum; + break; + } + case MAC_CAPAB_LSO: { + mac_capab_lso_t *cap_lso = cap_data; + + if (sdev->sd_lso && + (sdev->sd_tx_cksum & HCKSUM_IPHDRCKSUM) != 0 && + (sdev->sd_tx_cksum & tcp_cksums) != 0) { + /* + * The LSO configuration is hardwried for now, + * but there's no reason we couldn't also make + * this configurable in the future. + */ + cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4; + cap_lso->lso_basic_tcp_ipv4.lso_max = SD_LSO_MAXLEN; + break; + } else { + return (B_FALSE); + } + } + default: + return (B_FALSE); + } + + return (B_TRUE); +} + static int simnet_m_stat(void *arg, uint_t stat, uint64_t *val) { @@ -1142,20 +1221,20 @@ set_wl_esslist_priv_prop(simnet_wifidev_t *wdev, uint_t pr_valsize, } static int -simnet_set_priv_prop(simnet_dev_t *sdev, const char *pr_name, - uint_t pr_valsize, const void *pr_val) +simnet_set_priv_prop_wifi(simnet_dev_t *sdev, const char *name, + const uint_t len, const void *val) { simnet_wifidev_t *wdev = sdev->sd_wifidev; long result; - if (strcmp(pr_name, "_wl_esslist") == 0) { - if (pr_val == NULL) + if (strcmp(name, "_wl_esslist") == 0) { + if (val == NULL) return (EINVAL); - return (set_wl_esslist_priv_prop(wdev, pr_valsize, pr_val)); - } else if (strcmp(pr_name, "_wl_connected") == 0) { - if (pr_val == NULL) + return (set_wl_esslist_priv_prop(wdev, len, val)); + } else if (strcmp(name, "_wl_connected") == 0) { + if (val == NULL) return (EINVAL); - (void) ddi_strtol(pr_val, (char **)NULL, 0, &result); + (void) ddi_strtol(val, (char **)NULL, 0, &result); wdev->swd_linkstatus = ((result == 1) ? WL_CONNECTED:WL_NOTCONNECTED); return (0); @@ -1164,37 +1243,89 @@ simnet_set_priv_prop(simnet_dev_t *sdev, const char *pr_name, return (EINVAL); } +/* ARGSUSED */ static int -simnet_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t wldp_length, const void *wldp_buf) +simnet_set_priv_prop_ether(simnet_dev_t *sdev, const char *name, + const uint_t len, const void *val) { - simnet_dev_t *sdev = arg; - simnet_wifidev_t *wdev = sdev->sd_wifidev; - int err = 0; - uint32_t mtu; + if (strcmp(name, SD_PROP_RX_IP_CKSUM) == 0) { + if (val == NULL) + return (EINVAL); - switch (wldp_pr_num) { - case MAC_PROP_MTU: - (void) memcpy(&mtu, wldp_buf, sizeof (mtu)); - if (mtu > ETHERMIN && mtu < SIMNET_MAX_MTU) - return (mac_maxsdu_update(sdev->sd_mh, mtu)); - else + if (strcmp(val, "off") == 0) { + sdev->sd_rx_cksum &= ~HCKSUM_IPHDRCKSUM; + } else if (strcmp(val, "on") == 0) { + sdev->sd_rx_cksum |= HCKSUM_IPHDRCKSUM; + } else { return (EINVAL); - default: - break; + } + + return (0); + } else if (strcmp(name, SD_PROP_TX_ULP_CKSUM) == 0) { + if (val == NULL) + return (EINVAL); + + /* + * Remember, full and partial checksum are mutually + * exclusive. + */ + if (strcmp(val, "none") == 0) { + sdev->sd_tx_cksum &= ~HCKSUM_INET_FULL_V4; + } else if (strcmp(val, "fullv4") == 0) { + sdev->sd_tx_cksum &= ~HCKSUM_INET_PARTIAL; + sdev->sd_tx_cksum |= HCKSUM_INET_FULL_V4; + } else if (strcmp(val, "partial") == 0) { + sdev->sd_tx_cksum &= HCKSUM_INET_FULL_V4; + sdev->sd_tx_cksum |= HCKSUM_INET_PARTIAL; + } else { + return (EINVAL); + } + + return (0); + } else if (strcmp(name, SD_PROP_TX_IP_CKSUM) == 0) { + if (val == NULL) + return (EINVAL); + + if (strcmp(val, "off") == 0) { + sdev->sd_tx_cksum &= ~HCKSUM_IPHDRCKSUM; + } else if (strcmp(val, "on") == 0) { + sdev->sd_tx_cksum |= HCKSUM_IPHDRCKSUM; + } else { + return (EINVAL); + } + + return (0); + } else if (strcmp(name, SD_PROP_LSO) == 0) { + if (val == NULL) + return (EINVAL); + + if (strcmp(val, "off") == 0) { + sdev->sd_lso = B_FALSE; + } else if (strcmp(val, "on") == 0) { + sdev->sd_lso = B_TRUE; + } else { + return (EINVAL); + } + + return (0); } - if (sdev->sd_type == DL_ETHER) - return (ENOTSUP); + return (ENOTSUP); +} + +static int +simnet_setprop_wifi(simnet_dev_t *sdev, const char *name, + const mac_prop_id_t num, const uint_t len, const void *val) +{ + int err = 0; + simnet_wifidev_t *wdev = sdev->sd_wifidev; - /* mac_prop_id */ - switch (wldp_pr_num) { + switch (num) { case MAC_PROP_WL_ESSID: { int i; wl_ess_conf_t *wls; - (void) memcpy(&wdev->swd_essid, wldp_buf, - sizeof (wl_essid_t)); + (void) memcpy(&wdev->swd_essid, val, sizeof (wl_essid_t)); wdev->swd_linkstatus = WL_CONNECTED; /* Lookup the signal strength of the connected ESSID */ @@ -1209,8 +1340,7 @@ simnet_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, break; } case MAC_PROP_WL_BSSID: { - (void) memcpy(&wdev->swd_bssid, wldp_buf, - sizeof (wl_bssid_t)); + (void) memcpy(&wdev->swd_bssid, val, sizeof (wl_bssid_t)); break; } case MAC_PROP_WL_PHY_CONFIG: @@ -1221,10 +1351,10 @@ simnet_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, case MAC_PROP_WL_DESIRED_RATES: break; case MAC_PROP_PRIVATE: - err = simnet_set_priv_prop(sdev, pr_name, - wldp_length, wldp_buf); + err = simnet_set_priv_prop_wifi(sdev, name, len, val); break; default: + err = EINVAL; break; } @@ -1232,66 +1362,159 @@ simnet_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, } static int -simnet_get_priv_prop(simnet_dev_t *sdev, const char *pr_name, - uint_t pr_valsize, void *pr_val) +simnet_setprop_ether(simnet_dev_t *sdev, const char *name, + const mac_prop_id_t num, const uint_t len, const void *val) { - simnet_wifidev_t *wdev = sdev->sd_wifidev; int err = 0; - int value; - if (strcmp(pr_name, "_wl_esslist") == 0) { + switch (num) { + case MAC_PROP_PRIVATE: + err = simnet_set_priv_prop_ether(sdev, name, len, val); + break; + default: + err = EINVAL; + break; + } + + return (err); +} + +static int +simnet_m_setprop(void *arg, const char *name, mac_prop_id_t num, + const uint_t len, const void *val) +{ + simnet_dev_t *sdev = arg; + int err = 0; + uint32_t mtu; + + switch (num) { + case MAC_PROP_MTU: + (void) memcpy(&mtu, val, sizeof (mtu)); + if (mtu > ETHERMIN && mtu < SIMNET_MAX_MTU) + return (mac_maxsdu_update(sdev->sd_mh, mtu)); + else + return (EINVAL); + default: + break; + } + + switch (sdev->sd_type) { + case DL_ETHER: + err = simnet_setprop_ether(sdev, name, num, len, val); + break; + case DL_WIFI: + err = simnet_setprop_wifi(sdev, name, num, len, val); + break; + default: + err = EINVAL; + break; + } + + /* + * We may have modified the configuration of hardware + * offloads. Make sure to renegotiate capabilities with the + * upstream clients. + */ + mac_capab_update(sdev->sd_mh); + return (err); +} + +static int +simnet_get_priv_prop_wifi(const simnet_dev_t *sdev, const char *name, + const uint_t len, void *val) +{ + simnet_wifidev_t *wdev = sdev->sd_wifidev; + int ret, value; + + if (strcmp(name, "_wl_esslist") == 0) { /* Returns num of _wl_ess_conf_t that have been set */ value = wdev->swd_esslist_num; - } else if (strcmp(pr_name, "_wl_connected") == 0) { + } else if (strcmp(name, "_wl_connected") == 0) { value = ((wdev->swd_linkstatus == WL_CONNECTED) ? 1:0); } else { - err = ENOTSUP; + return (ENOTSUP); } - if (err == 0) - (void) snprintf(pr_val, pr_valsize, "%d", value); - return (err); + ret = snprintf(val, len, "%d", value); + + if (ret < 0 || ret >= len) + return (EOVERFLOW); + + return (0); } static int -simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, - uint_t wldp_length, void *wldp_buf) +simnet_get_priv_prop_ether(const simnet_dev_t *sdev, const char *name, + const uint_t len, void *val) { - simnet_dev_t *sdev = arg; - simnet_wifidev_t *wdev = sdev->sd_wifidev; - int err = 0; - int i; + int ret; + char *value; - if (sdev->sd_type == DL_ETHER) + if (strcmp(name, SD_PROP_RX_IP_CKSUM) == 0) { + if ((sdev->sd_rx_cksum & HCKSUM_IPHDRCKSUM) != 0) { + value = "on"; + } else { + value = "off"; + } + } else if (strcmp(name, SD_PROP_TX_ULP_CKSUM) == 0) { + if ((sdev->sd_tx_cksum & HCKSUM_INET_FULL_V4) != 0) { + value = "fullv4"; + } else if ((sdev->sd_tx_cksum & HCKSUM_INET_PARTIAL) != 0) { + value = "partial"; + } else { + value = "none"; + } + } else if (strcmp(name, SD_PROP_TX_IP_CKSUM) == 0) { + if ((sdev->sd_tx_cksum & HCKSUM_IPHDRCKSUM) != 0) { + value = "on"; + } else { + value = "off"; + } + } else if (strcmp(name, SD_PROP_LSO) == 0) { + value = sdev->sd_lso ? "on" : "off"; + } else { return (ENOTSUP); + } - /* mac_prop_id */ - switch (wldp_pr_num) { + ret = snprintf(val, len, "%s", value); + + if (ret < 0 || ret >= len) { + return (EOVERFLOW); + } + + return (0); +} + +static int +simnet_getprop_wifi(const simnet_dev_t *sdev, const char *name, + const mac_prop_id_t num, const uint_t len, void *val) +{ + const simnet_wifidev_t *wdev = sdev->sd_wifidev; + int err = 0; + + switch (num) { case MAC_PROP_WL_ESSID: - (void) memcpy(wldp_buf, &wdev->swd_essid, - sizeof (wl_essid_t)); + (void) memcpy(val, &wdev->swd_essid, sizeof (wl_essid_t)); break; case MAC_PROP_WL_BSSID: - (void) memcpy(wldp_buf, &wdev->swd_bssid, - sizeof (wl_bssid_t)); + (void) memcpy(val, &wdev->swd_bssid, sizeof (wl_bssid_t)); break; case MAC_PROP_WL_PHY_CONFIG: case MAC_PROP_WL_AUTH_MODE: case MAC_PROP_WL_ENCRYPTION: break; case MAC_PROP_WL_LINKSTATUS: - (void) memcpy(wldp_buf, &wdev->swd_linkstatus, + (void) memcpy(val, &wdev->swd_linkstatus, sizeof (wdev->swd_linkstatus)); break; case MAC_PROP_WL_ESS_LIST: { wl_ess_conf_t *w_ess_conf; - ((wl_ess_list_t *)wldp_buf)->wl_ess_list_num = - wdev->swd_esslist_num; + ((wl_ess_list_t *)val)->wl_ess_list_num = wdev->swd_esslist_num; /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_ess_conf = (wl_ess_conf_t *)((char *)wldp_buf + + w_ess_conf = (wl_ess_conf_t *)((char *)val + offsetof(wl_ess_list_t, wl_ess_list_ess)); - for (i = 0; i < wdev->swd_esslist_num; i++) { + for (uint_t i = 0; i < wdev->swd_esslist_num; i++) { (void) memcpy(w_ess_conf, wdev->swd_esslist[i], sizeof (wl_ess_conf_t)); w_ess_conf++; @@ -1299,18 +1522,35 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, break; } case MAC_PROP_WL_RSSI: - *(wl_rssi_t *)wldp_buf = wdev->swd_rssi; + *(wl_rssi_t *)val = wdev->swd_rssi; break; case MAC_PROP_WL_RADIO: - *(wl_radio_t *)wldp_buf = B_TRUE; + *(wl_radio_t *)val = B_TRUE; break; case MAC_PROP_WL_POWER_MODE: break; case MAC_PROP_WL_DESIRED_RATES: break; case MAC_PROP_PRIVATE: - err = simnet_get_priv_prop(sdev, pr_name, wldp_length, - wldp_buf); + err = simnet_get_priv_prop_wifi(sdev, name, len, val); + break; + default: + err = ENOTSUP; + break; + } + + return (err); +} + +static int +simnet_getprop_ether(const simnet_dev_t *sdev, const char *name, + const mac_prop_id_t num, const uint_t len, void *val) +{ + int err = 0; + + switch (num) { + case MAC_PROP_PRIVATE: + err = simnet_get_priv_prop_ether(sdev, name, len, val); break; default: err = ENOTSUP; @@ -1320,14 +1560,36 @@ simnet_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, return (err); } +static int +simnet_m_getprop(void *arg, const char *name, const mac_prop_id_t num, + const uint_t len, void *val) +{ + const simnet_dev_t *sdev = arg; + int err = 0; + + switch (sdev->sd_type) { + case DL_ETHER: + err = simnet_getprop_ether(sdev, name, num, len, val); + break; + case DL_WIFI: + err = simnet_getprop_wifi(sdev, name, num, len, val); + break; + default: + err = EINVAL; + break; + } + + return (err); +} + static void -simnet_priv_propinfo(const char *pr_name, mac_prop_info_handle_t prh) +simnet_priv_propinfo_wifi(const char *name, mac_prop_info_handle_t prh) { char valstr[MAXNAMELEN]; bzero(valstr, sizeof (valstr)); - if (strcmp(pr_name, "_wl_esslist") == 0) { + if (strcmp(name, "_wl_esslist") == 0) { (void) snprintf(valstr, sizeof (valstr), "%d", 0); } @@ -1336,15 +1598,10 @@ simnet_priv_propinfo(const char *pr_name, mac_prop_info_handle_t prh) } static void -simnet_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, +simnet_propinfo_wifi(const char *name, const mac_prop_id_t num, mac_prop_info_handle_t prh) { - simnet_dev_t *sdev = arg; - - if (sdev->sd_type == DL_ETHER) - return; - - switch (wldp_pr_num) { + switch (num) { case MAC_PROP_WL_BSSTYPE: case MAC_PROP_WL_ESS_LIST: case MAC_PROP_WL_SUPPORTED_RATES: @@ -1352,7 +1609,55 @@ simnet_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num, mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); break; case MAC_PROP_PRIVATE: - simnet_priv_propinfo(pr_name, prh); + simnet_priv_propinfo_wifi(name, prh); + break; + } +} + +static void +simnet_priv_propinfo_ether(const char *name, mac_prop_info_handle_t prh) +{ + if (strcmp(name, SD_PROP_RX_IP_CKSUM) == 0 || + strcmp(name, SD_PROP_TX_ULP_CKSUM) == 0 || + strcmp(name, SD_PROP_TX_IP_CKSUM) == 0 || + strcmp(name, SD_PROP_LSO) == 0) { + mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); + } + + if (strcmp(name, SD_PROP_TX_ULP_CKSUM) == 0) { + mac_prop_info_set_default_str(prh, "none"); + } + + if (strcmp(name, SD_PROP_RX_IP_CKSUM) == 0 || + strcmp(name, SD_PROP_TX_IP_CKSUM) == 0 || + strcmp(name, SD_PROP_LSO) == 0) { + mac_prop_info_set_default_str(prh, "off"); + } +} + +static void +simnet_propinfo_ether(const char *name, const mac_prop_id_t num, + mac_prop_info_handle_t prh) +{ + switch (num) { + case MAC_PROP_PRIVATE: + simnet_priv_propinfo_ether(name, prh); + break; + } +} + +static void +simnet_m_propinfo(void *arg, const char *name, const mac_prop_id_t num, + const mac_prop_info_handle_t prh) +{ + simnet_dev_t *sdev = arg; + + switch (sdev->sd_type) { + case DL_ETHER: + simnet_propinfo_ether(name, num, prh); + break; + case DL_WIFI: + simnet_propinfo_wifi(name, num, prh); break; } } diff --git a/usr/src/uts/common/io/simnet/simnet_impl.h b/usr/src/uts/common/io/simnet/simnet_impl.h index 74dcba5113..5d6f16f113 100644 --- a/usr/src/uts/common/io/simnet/simnet_impl.h +++ b/usr/src/uts/common/io/simnet/simnet_impl.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ #ifndef _SYS_SIMNET_IMPL_H @@ -84,13 +85,25 @@ typedef struct simnet_dev { uint_t sd_mac_len; uchar_t sd_mac_addr[MAXMACADDRLEN]; simnet_stats_t sd_stats; + + /* Capabilities */ + uint_t sd_rx_cksum; + uint_t sd_tx_cksum; + boolean_t sd_lso; } simnet_dev_t; +/* Simnet dladm private properties. */ +#define SD_PROP_RX_IP_CKSUM "_rx_ipv4_cksum" +#define SD_PROP_TX_ULP_CKSUM "_tx_ulp_cksum" +#define SD_PROP_TX_IP_CKSUM "_tx_ipv4_cksum" +#define SD_PROP_LSO "_lso" + /* Simnet device flags */ #define SDF_SHUTDOWN 0x00000001 /* Device shutdown, no new ops */ #define SDF_STARTED 0x00000002 /* Device started, allow ops */ #define SIMNET_MAX_MTU 9000 /* Max MTU supported by simnet driver */ +#define SD_LSO_MAXLEN 65535 /* Max LSO supported by simnet driver */ #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/stream.c b/usr/src/uts/common/io/stream.c index ec76c6e2b9..288f77ae47 100644 --- a/usr/src/uts/common/io/stream.c +++ b/usr/src/uts/common/io/stream.c @@ -839,7 +839,7 @@ frnop_func(void *arg) */ static mblk_t * gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp, - void (*lastfree)(mblk_t *, dblk_t *), int kmflags) + void (*lastfree)(mblk_t *, dblk_t *), int kmflags) { dblk_t *dbp; mblk_t *mp; @@ -1450,6 +1450,16 @@ copyb(mblk_t *bp) nbp->b_band = bp->b_band; ndp = nbp->b_datap; + /* + * Copy the various checksum information that came in + * originally. + */ + ndp->db_cksumstart = dp->db_cksumstart; + ndp->db_cksumend = dp->db_cksumend; + ndp->db_cksumstuff = dp->db_cksumstuff; + bcopy(dp->db_struioun.data, ndp->db_struioun.data, + sizeof (dp->db_struioun.data)); + /* * Well, here is a potential issue. If we are trying to * trace a flow, and we copy the message, we might lose diff --git a/usr/src/uts/common/io/vnic/vnic_dev.c b/usr/src/uts/common/io/vnic/vnic_dev.c index bbbd9b46bd..d75db5f258 100644 --- a/usr/src/uts/common/io/vnic/vnic_dev.c +++ b/usr/src/uts/common/io/vnic/vnic_dev.c @@ -457,6 +457,20 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, } else { vnic->vn_hcksum_txflags = 0; } + + /* + * Check for LSO capabilities. LSO implementations + * depend on hardware checksumming, so the same + * requirement is enforced here. + */ + if (vnic->vn_hcksum_txflags != 0) { + if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_LSO, + &vnic->vn_cap_lso)) { + vnic->vn_cap_lso.lso_flags = 0; + } + } else { + vnic->vn_cap_lso.lso_flags = 0; + } } /* register with the MAC module */ @@ -827,6 +841,15 @@ vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) HCKSUM_INET_PARTIAL); break; } + case MAC_CAPAB_LSO: { + mac_capab_lso_t *cap_lso = cap_data; + + if (vnic->vn_cap_lso.lso_flags == 0) { + return (B_FALSE); + } + *cap_lso = vnic->vn_cap_lso; + break; + } case MAC_CAPAB_VNIC: { mac_capab_vnic_t *vnic_capab = cap_data; diff --git a/usr/src/uts/common/os/ip_cksum.c b/usr/src/uts/common/os/ip_cksum.c index 1fa1c9425b..0a237e86ec 100644 --- a/usr/src/uts/common/os/ip_cksum.c +++ b/usr/src/uts/common/os/ip_cksum.c @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -34,6 +35,7 @@ #include #include #include +#include #include #include @@ -556,3 +558,109 @@ ip_csum_hdr(ipha_t *ipha) sum = 0; return ((uint16_t)sum); } + +/* + * This function takes an mblk and IPv6 header as input and returns + * three pieces of information. + * + * 'hdr_length_ptr': The IPv6 header length including extension headers. + * + * 'nethdrpp': A pointer to the "next hedader" value, aka the + * transport header. This argument may be set to NULL if + * only the length is desired. + * + * return: Whether or not the header was malformed. + * + * This function assumes the IPv6 header along with all extensions are + * contained solely in this mblk: i.e., there is no b_cont walking. + */ +boolean_t +ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, + uint8_t **nexthdrpp) +{ + uint16_t length; + uint_t ehdrlen; + uint8_t *nexthdrp; + uint8_t *whereptr; + uint8_t *endptr; + ip6_dest_t *desthdr; + ip6_rthdr_t *rthdr; + ip6_frag_t *fraghdr; + + ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); + length = IPV6_HDR_LEN; + whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ + endptr = mp->b_wptr; + + nexthdrp = &ip6h->ip6_nxt; + while (whereptr < endptr) { + /* Is there enough left for len + nexthdr? */ + if (whereptr + MIN_EHDR_LEN > endptr) + break; + + switch (*nexthdrp) { + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: + /* Assumes the headers are identical for hbh and dst */ + desthdr = (ip6_dest_t *)whereptr; + ehdrlen = 8 * (desthdr->ip6d_len + 1); + if ((uchar_t *)desthdr + ehdrlen > endptr) + return (B_FALSE); + nexthdrp = &desthdr->ip6d_nxt; + break; + case IPPROTO_ROUTING: + rthdr = (ip6_rthdr_t *)whereptr; + ehdrlen = 8 * (rthdr->ip6r_len + 1); + if ((uchar_t *)rthdr + ehdrlen > endptr) + return (B_FALSE); + nexthdrp = &rthdr->ip6r_nxt; + break; + case IPPROTO_FRAGMENT: + fraghdr = (ip6_frag_t *)whereptr; + ehdrlen = sizeof (ip6_frag_t); + if ((uchar_t *)&fraghdr[1] > endptr) + return (B_FALSE); + nexthdrp = &fraghdr->ip6f_nxt; + break; + case IPPROTO_NONE: + /* No next header means we're finished */ + default: + *hdr_length_ptr = length; + + if (nexthdrpp != NULL) + *nexthdrpp = nexthdrp; + + return (B_TRUE); + } + length += ehdrlen; + whereptr += ehdrlen; + *hdr_length_ptr = length; + + if (nexthdrpp != NULL) + *nexthdrpp = nexthdrp; + } + switch (*nexthdrp) { + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + case IPPROTO_FRAGMENT: + /* + * If any know extension headers are still to be processed, + * the packet's malformed (or at least all the IP header(s) are + * not in the same mblk - and that should never happen. + */ + return (B_FALSE); + + default: + /* + * If we get here, we know that all of the IP headers were in + * the same mblk, even if the ULP header is in the next mblk. + */ + *hdr_length_ptr = length; + + if (nexthdrpp != NULL) + *nexthdrpp = nexthdrp; + + return (B_TRUE); + } +} diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h index 0907d6deff..2ce448fc3d 100644 --- a/usr/src/uts/common/sys/mac.h +++ b/usr/src/uts/common/sys/mac.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2017, Joyent, Inc. + * Copyright 2018 Joyent, Inc. * Copyright (c) 2015 Garrett D'Amore */ @@ -613,6 +613,38 @@ typedef struct mactype_register_s { size_t mtr_mappingcount; } mactype_register_t; +/* + * Flags to describe the hardware emulation desired from a client when + * calling mac_hw_emul(). + * + * MAC_HWCKSUM_EMUL + * + * If an mblk is marked with HCK_* flags, then calculate those + * checksums and update the checksum flags. + * + * MAC_IPCKSUM_EMUL + * + * Like MAC_HWCKSUM_EMUL, except only calculate the IPv4 header + * checksum. We still update both the IPv4 and ULP checksum + * flags. + * + * MAC_LSO_EMUL + * + * If an mblk is marked with HW_LSO, then segment the LSO mblk + * into a new chain of mblks which reference the original data + * block. This flag DOES NOT imply MAC_HWCKSUM_EMUL. If the + * caller needs both then it must set both. + */ +typedef enum mac_emul { + MAC_HWCKSUM_EMUL = (1 << 0), + MAC_IPCKSUM_EMUL = (1 << 1), + MAC_LSO_EMUL = (1 << 2) +} mac_emul_t; + +#define MAC_HWCKSUM_EMULS (MAC_HWCKSUM_EMUL | MAC_IPCKSUM_EMUL) +#define MAC_ALL_EMULS (MAC_HWCKSUM_EMUL | MAC_IPCKSUM_EMUL | \ + MAC_LSO_EMUL) + /* * Driver interface functions. */ diff --git a/usr/src/uts/common/sys/mac_client.h b/usr/src/uts/common/sys/mac_client.h index 88ab5f4756..1d1915a816 100644 --- a/usr/src/uts/common/sys/mac_client.h +++ b/usr/src/uts/common/sys/mac_client.h @@ -200,6 +200,8 @@ extern int mac_set_mtu(mac_handle_t, uint_t, uint_t *); extern void mac_client_set_rings(mac_client_handle_t, int, int); +extern void mac_hw_emul(mblk_t **, mblk_t **, uint_t *, mac_emul_t); + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h index d5c66684d0..0e3a6306e0 100644 --- a/usr/src/uts/common/sys/mac_client_impl.h +++ b/usr/src/uts/common/sys/mac_client_impl.h @@ -410,8 +410,8 @@ extern int mac_tx_percpu_cnt; extern void mac_promisc_client_dispatch(mac_client_impl_t *, mblk_t *); extern void mac_client_init(void); extern void mac_client_fini(void); -extern void mac_promisc_dispatch(mac_impl_t *, mblk_t *, - mac_client_impl_t *); +extern void mac_promisc_dispatch(mac_impl_t *, mblk_t *, mac_client_impl_t *, + boolean_t); extern int mac_validate_props(mac_impl_t *, mac_resource_props_t *); diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h index 4625417828..da645ad382 100644 --- a/usr/src/uts/common/sys/mac_impl.h +++ b/usr/src/uts/common/sys/mac_impl.h @@ -35,6 +35,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -289,54 +290,6 @@ struct mac_group_s { #define GROUP_INTR_ENABLE_FUNC(g) (g)->mrg_info.mgi_intr.mi_enable #define GROUP_INTR_DISABLE_FUNC(g) (g)->mrg_info.mgi_intr.mi_disable -#define MAC_RING_TX(mhp, rh, mp, rest) { \ - mac_ring_handle_t mrh = rh; \ - mac_impl_t *mimpl = (mac_impl_t *)mhp; \ - /* \ - * Send packets through a selected tx ring, or through the \ - * default handler if there is no selected ring. \ - */ \ - if (mrh == NULL) \ - mrh = mimpl->mi_default_tx_ring; \ - if (mrh == NULL) { \ - rest = mimpl->mi_tx(mimpl->mi_driver, mp); \ - } else { \ - rest = mac_hwring_tx(mrh, mp); \ - } \ -} - -/* - * This is the final stop before reaching the underlying driver - * or aggregation, so this is where the bridging hook is implemented. - * Packets that are bridged will return through mac_bridge_tx(), with - * rh nulled out if the bridge chooses to send output on a different - * link due to forwarding. - */ -#define MAC_TX(mip, rh, mp, src_mcip) { \ - mac_ring_handle_t rhandle = (rh); \ - /* \ - * If there is a bound Hybrid I/O share, send packets through \ - * the default tx ring. (When there's a bound Hybrid I/O share, \ - * the tx rings of this client are mapped in the guest domain \ - * and not accessible from here.) \ - */ \ - _NOTE(CONSTANTCONDITION) \ - if ((src_mcip)->mci_state_flags & MCIS_SHARE_BOUND) \ - rhandle = (mip)->mi_default_tx_ring; \ - if (mip->mi_promisc_list != NULL) \ - mac_promisc_dispatch(mip, mp, src_mcip); \ - /* \ - * Grab the proper transmit pointer and handle. Special \ - * optimization: we can test mi_bridge_link itself atomically, \ - * and if that indicates no bridge send packets through tx ring.\ - */ \ - if (mip->mi_bridge_link == NULL) { \ - MAC_RING_TX(mip, rhandle, mp, mp); \ - } else { \ - mp = mac_bridge_tx(mip, rhandle, mp); \ - } \ -} - /* mci_tx_flag */ #define MCI_TX_QUIESCE 0x1 @@ -485,6 +438,9 @@ struct mac_impl_s { mac_led_mode_t mi_led_modes; mac_capab_led_t mi_led; + /* Cache of the Tx DB_CKSUMFLAGS that this MAC supports. */ + uint16_t mi_tx_cksum_flags; /* SL */ + /* * MAC address and VLAN lists. SL protected. */ @@ -721,16 +677,30 @@ typedef struct mac_client_impl_s mac_client_impl_t; extern void mac_init(void); extern int mac_fini(void); +/* + * MAC packet/chain drop functions to aggregate all dropped-packet + * debugging to a single surface. + */ +/*PRINTFLIKE2*/ +extern void mac_drop_pkt(mblk_t *, const char *, ...) + __KPRINTFLIKE(2); + +/*PRINTFLIKE2*/ +extern void mac_drop_chain(mblk_t *, const char *, ...) + __KPRINTFLIKE(2); + extern void mac_ndd_ioctl(mac_impl_t *, queue_t *, mblk_t *); extern boolean_t mac_ip_hdr_length_v6(ip6_t *, uint8_t *, uint16_t *, uint8_t *, ip6_frag_t **); extern mblk_t *mac_copymsgchain_cksum(mblk_t *); -extern mblk_t *mac_fix_cksum(mblk_t *); extern void mac_packet_print(mac_handle_t, mblk_t *); extern void mac_rx_deliver(void *, mac_resource_handle_t, mblk_t *, mac_header_info_t *); extern void mac_tx_notify(mac_impl_t *); +extern mblk_t *mac_ring_tx(mac_handle_t, mac_ring_handle_t, mblk_t *); +extern mblk_t *mac_provider_tx(mac_impl_t *, mac_ring_handle_t, mblk_t *, + mac_client_impl_t *); extern void mac_callback_add(mac_cb_info_t *, mac_cb_t **, mac_cb_t *); extern boolean_t mac_callback_remove(mac_cb_info_t *, mac_cb_t **, mac_cb_t *); @@ -832,7 +802,7 @@ extern void mac_flow_set_name(flow_entry_t *, const char *); extern mblk_t *mac_add_vlan_tag(mblk_t *, uint_t, uint16_t); extern mblk_t *mac_add_vlan_tag_chain(mblk_t *, uint_t, uint16_t); extern mblk_t *mac_strip_vlan_tag_chain(mblk_t *); -extern void mac_pkt_drop(void *, mac_resource_handle_t, mblk_t *, boolean_t); +extern void mac_rx_def(void *, mac_resource_handle_t, mblk_t *, boolean_t); extern mblk_t *mac_rx_flow(mac_handle_t, mac_resource_handle_t, mblk_t *); extern void i_mac_share_alloc(mac_client_impl_t *); diff --git a/usr/src/uts/common/sys/pattr.h b/usr/src/uts/common/sys/pattr.h index 1269aeca10..a1fb21ad21 100644 --- a/usr/src/uts/common/sys/pattr.h +++ b/usr/src/uts/common/sys/pattr.h @@ -21,6 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ #ifndef _SYS_PATTR_H @@ -97,6 +98,8 @@ typedef struct pattr_hcksum_s { #define HCK_FLAGS (HCK_IPV4_HDRCKSUM | HCK_PARTIALCKSUM | \ HCK_FULLCKSUM | HCK_FULLCKSUM_OK) +#define HCK_TX_FLAGS (HCK_IPV4_HDRCKSUM | HCK_PARTIALCKSUM | \ + HCK_FULLCKSUM) /* * Extended hardware offloading flags that also use hcksum_flags */ diff --git a/usr/src/uts/common/sys/vnic_impl.h b/usr/src/uts/common/sys/vnic_impl.h index 1a91158da6..4c8d49c621 100644 --- a/usr/src/uts/common/sys/vnic_impl.h +++ b/usr/src/uts/common/sys/vnic_impl.h @@ -21,7 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2015 Joyent, Inc. + * Copyright 2018 Joyent, Inc. */ #ifndef _SYS_VNIC_IMPL_H @@ -64,6 +64,7 @@ typedef struct vnic_s { mac_notify_handle_t vn_mnh; uint32_t vn_hcksum_txflags; + mac_capab_lso_t vn_cap_lso; uint32_t vn_mtu; link_state_t vn_ls; } vnic_t; diff --git a/usr/src/uts/common/xen/io/xnb.c b/usr/src/uts/common/xen/io/xnb.c index 4bf424c44e..23e1d971cb 100644 --- a/usr/src/uts/common/xen/io/xnb.c +++ b/usr/src/uts/common/xen/io/xnb.c @@ -22,6 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2018 Joyent, Inc. */ #ifdef DEBUG @@ -251,8 +252,8 @@ xnb_software_csum(xnb_t *xnbp, mblk_t *mp) * because it doesn't cover all of the interesting cases :-( */ mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM); - - return (mac_fix_cksum(mp)); + mac_hw_emul(&mp, NULL, NULL, MAC_HWCKSUM_EMUL); + return (mp); } mblk_t * -- cgit v1.2.3 From d5b4c61b9474779079c9ef8650d04dd7c8207a06 Mon Sep 17 00:00:00 2001 From: Andy Fiddaman Date: Wed, 13 May 2020 23:12:39 +0000 Subject: 12732 Update Intel microcode to 20200508 Reviewed by: John Levon Reviewed by: Robert Mustacchi Approved by: Dan McDonald --- usr/src/data/ucode/README.ucode | 2 +- usr/src/data/ucode/intel/000706E5-80 | Bin 102400 -> 107520 bytes usr/src/data/ucode/intel/THIRDPARTYLICENSE | 2 +- usr/src/pkg/manifests/system-microcode-intel.mf | 4 ++-- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'usr/src') diff --git a/usr/src/data/ucode/README.ucode b/usr/src/data/ucode/README.ucode index fa2ea2269d..03dcde42f0 100644 --- a/usr/src/data/ucode/README.ucode +++ b/usr/src/data/ucode/README.ucode @@ -38,4 +38,4 @@ of updating the manifest as necessary. Be careful about new files. AMD: Updated in March 2012 as part of illumos#2546. Exact revision unknown. -Intel: Linux 20191115 release +Intel: Linux 20200508 release diff --git a/usr/src/data/ucode/intel/000706E5-80 b/usr/src/data/ucode/intel/000706E5-80 index 1339fd29ed..5924718b16 100644 Binary files a/usr/src/data/ucode/intel/000706E5-80 and b/usr/src/data/ucode/intel/000706E5-80 differ diff --git a/usr/src/data/ucode/intel/THIRDPARTYLICENSE b/usr/src/data/ucode/intel/THIRDPARTYLICENSE index 707d09081e..8fbad3dd21 100644 --- a/usr/src/data/ucode/intel/THIRDPARTYLICENSE +++ b/usr/src/data/ucode/intel/THIRDPARTYLICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2018-2019 Intel Corporation. +Copyright (c) 2018-2020 Intel Corporation. All rights reserved. Redistribution. diff --git a/usr/src/pkg/manifests/system-microcode-intel.mf b/usr/src/pkg/manifests/system-microcode-intel.mf index cafafa2448..bac6419d24 100644 --- a/usr/src/pkg/manifests/system-microcode-intel.mf +++ b/usr/src/pkg/manifests/system-microcode-intel.mf @@ -25,7 +25,7 @@ # Copyright 2014 Gary Mills # Copyright 2019 Peter Tribble. # Copyright 2019 Joyent, Inc. -# Copyright 2019 OmniOS Community Edition (OmniOSce) Association. +# Copyright 2020 OmniOS Community Edition (OmniOSce) Association. # # @@ -35,7 +35,7 @@ # set name=pkg.fmri \ - value=pkg:/system/microcode/intel@20191115,$(PKGVERS_BUILTON)-$(PKGVERS_BRANCH) + value=pkg:/system/microcode/intel@20200508,$(PKGVERS_BUILTON)-$(PKGVERS_BRANCH) set name=pkg.description value="Microcode for Intel CPUs" set name=org.opensolaris.incorp-facet value=true set name=variant.arch value=i386 -- cgit v1.2.3