summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2020-05-20 16:58:49 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2020-05-20 16:58:49 +0000
commit1364aef7f320a3637b64895eae95e67a80095d7c (patch)
tree3be14616bcce5fb1c976ce9c779ba1bfc4504320 /usr/src
parent3817aef5460600089dfc2425a47c170ab4cce9e4 (diff)
parentee8ae3fa63afd7fd57d5e63676a991af0fb8d887 (diff)
downloadillumos-joyent-1364aef7f320a3637b64895eae95e67a80095d7c.tar.gz
[illumos-gate merge]
commit b22a70abf81f995ecc990b8444e63308bc389d5c 12679 want viona driver for bhyve commit d77e6e0f12d19668c0e9068c0fcd7a2123da5373 12693 Enable Forward Error Correction (FEC) configuration via dladm Conflicts: usr/src/cmd/bhyve/Makefile usr/src/lib/libdladm/common/linkprop.c usr/src/man/man1m/dladm.1m usr/src/pkg/manifests/system-bhyve.mf usr/src/uts/common/inet/ipf/ip_fil_solaris.c usr/src/uts/common/inet/ipf/netinet/ipf_stack.h usr/src/uts/common/sys/neti.h usr/src/uts/i86pc/Makefile.files usr/src/uts/i86pc/Makefile.i86pc usr/src/uts/intel/ipf/ipf.global-objs.debug64
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/bhyve/Makefile12
-rw-r--r--usr/src/lib/libdladm/common/linkprop.c78
-rw-r--r--usr/src/man/man1m/dladm.1m85
-rw-r--r--usr/src/man/man9e/mac.9e54
-rw-r--r--usr/src/uts/common/inet/ipf/ip_fil_solaris.c125
-rw-r--r--usr/src/uts/common/io/cxgbe/common/common.h17
-rw-r--r--usr/src/uts/common/io/cxgbe/common/t4_hw.c85
-rw-r--r--usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h20
-rw-r--r--usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c112
-rw-r--r--usr/src/uts/common/io/mac/mac.c27
-rw-r--r--usr/src/uts/common/io/mac/mac_provider.c17
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.c5
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.h7
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_cmd.c101
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_gld.c119
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_intr.c1
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_reg.h55
-rw-r--r--usr/src/uts/common/sys/mac.h10
-rw-r--r--usr/src/uts/common/sys/mac_provider.h3
-rw-r--r--usr/src/uts/i86pc/io/viona/viona.c1409
20 files changed, 884 insertions, 1458 deletions
diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile
index f611af835f..9ac0361876 100644
--- a/usr/src/cmd/bhyve/Makefile
+++ b/usr/src/cmd/bhyve/Makefile
@@ -133,16 +133,8 @@ SMOFF += all_func_returns,leaks,no_if_block
CSTD= $(CSTD_GNU99)
C99MODE= -xc99=%all
-$(PROG) := LDLIBS += \
- -lsocket \
- -lnsl \
- -ldlpi \
- -ldladm \
- -lmd \
- -lsunw_crypto \
- -luuid \
- -lvmmapi \
- -lz
+$(PROG) := LDLIBS += -lsocket -lnsl -ldlpi -ldladm -lmd -lsunw_crypto -luuid \
+ -lvmmapi -lz
$(ZHYVE_PROG) := LDLIBS += -lnvpair
$(MEVENT_TEST_PROG) := LDLIBS += -lsocket
diff --git a/usr/src/lib/libdladm/common/linkprop.c b/usr/src/lib/libdladm/common/linkprop.c
index 703fca8d4e..d489af46eb 100644
--- a/usr/src/lib/libdladm/common/linkprop.c
+++ b/usr/src/lib/libdladm/common/linkprop.c
@@ -22,6 +22,7 @@
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2017 Joyent, Inc.
* Copyright 2015 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2020 RackTop Systems, Inc.
*/
#include <stdlib.h>
@@ -160,7 +161,7 @@ static pd_getf_t get_zone, get_autopush, get_rate_mod, get_rate,
static pd_setf_t set_zone, set_rate, set_powermode, set_radio,
set_public_prop, set_resource, set_stp_prop,
set_bridge_forward, set_bridge_pvid, set_secondary_macs,
- set_promisc_filtered;
+ set_promisc_filtered, set_public_bitprop;
static pd_checkf_t check_zone, check_autopush, check_rate, check_hoplimit,
check_encaplim, check_uint32, check_maxbw, check_cpus,
@@ -255,6 +256,10 @@ static link_attr_t link_attr[] = {
{ MAC_PROP_FLOWCTRL, sizeof (link_flowctrl_t), "flowctrl"},
+ { MAC_PROP_ADV_FEC_CAP, sizeof (link_fec_t), "adv_fec_cap"},
+
+ { MAC_PROP_EN_FEC_CAP, sizeof (link_fec_t), "en_fec_cap"},
+
{ MAC_PROP_ZONE, sizeof (dld_ioc_zid_t), "zone"},
{ MAC_PROP_AUTOPUSH, sizeof (struct dlautopush), "autopush"},
@@ -433,6 +438,12 @@ static val_desc_t link_flow_vals[] = {
{ "rx", LINK_FLOWCTRL_RX },
{ "bi", LINK_FLOWCTRL_BI }
};
+static val_desc_t link_fec_vals[] = {
+ { "none", LINK_FEC_NONE },
+ { "auto", LINK_FEC_AUTO },
+ { "rs", LINK_FEC_RS },
+ { "base-r", LINK_FEC_BASE_R }
+};
static val_desc_t link_priority_vals[] = {
{ "low", MPL_LOW },
{ "medium", MPL_MEDIUM },
@@ -563,6 +574,16 @@ static prop_desc_t prop_table[] = {
set_public_prop, NULL, get_flowctl, NULL,
0, DATALINK_CLASS_PHYS, DL_ETHER },
+ { "adv_fec_cap", { "", LINK_FEC_AUTO },
+ link_fec_vals, VALCNT(link_fec_vals),
+ NULL, NULL, get_bits, NULL,
+ 0, DATALINK_CLASS_PHYS, DL_ETHER },
+
+ { "en_fec_cap", { "", LINK_FEC_AUTO },
+ link_fec_vals, VALCNT(link_fec_vals),
+ set_public_bitprop, NULL, get_bits, NULL,
+ 0, DATALINK_CLASS_PHYS, DL_ETHER },
+
{ "secondary-macs", { "--", 0 }, NULL, 0,
set_secondary_macs, NULL,
get_secondary_macs, check_secondary_macs, PD_CHECK_ALLOC,
@@ -3958,6 +3979,33 @@ done:
return (status);
}
+static dladm_status_t
+set_public_bitprop(dladm_handle_t handle, prop_desc_t *pdp,
+ datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags,
+ datalink_media_t media)
+{
+ uint_t i, j;
+ val_desc_t vd = { 0 };
+
+ if ((pdp->pd_flags & PD_CHECK_ALLOC) != 0)
+ return (DLADM_STATUS_BADARG);
+
+ for (i = 0; i < val_cnt; i++) {
+ for (j = 0; j < pdp->pd_noptval; j++) {
+ if (strcasecmp(vdp[i].vd_name,
+ pdp->pd_optval[j].vd_name) == 0) {
+ vd.vd_val |= pdp->pd_optval[j].vd_val;
+ break;
+ }
+ }
+ }
+
+ if (vd.vd_val == 0)
+ return (DLADM_STATUS_BADARG);
+
+ return (set_public_prop(handle, pdp, linkid, &vd, 1, flags, media));
+}
+
dladm_status_t
i_dladm_macprop(dladm_handle_t handle, void *dip, boolean_t set)
{
@@ -4270,6 +4318,34 @@ get_flowctl(dladm_handle_t handle, prop_desc_t *pdp,
return (DLADM_STATUS_OK);
}
+static dladm_status_t
+get_bits(dladm_handle_t handle, prop_desc_t *pdp,
+ datalink_id_t linkid, char **prop_val, uint_t *val_cnt,
+ datalink_media_t media, uint_t flags, uint_t *perm_flags)
+{
+ uint32_t v;
+ dladm_status_t status;
+ uint_t i, cnt;
+
+ status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags,
+ perm_flags, &v, sizeof (v));
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ cnt = 0;
+ for (i = 0; cnt < *val_cnt && i < pdp->pd_noptval; i++) {
+ if ((v & pdp->pd_optval[i].vd_val) != 0) {
+ (void) snprintf(prop_val[cnt++], DLADM_STRSIZE,
+ pdp->pd_optval[i].vd_name);
+ }
+ }
+
+ if (i < pdp->pd_noptval)
+ return (DLADM_STATUS_BADVALCNT);
+
+ *val_cnt = cnt;
+ return (DLADM_STATUS_OK);
+}
/* ARGSUSED */
static dladm_status_t
diff --git a/usr/src/man/man1m/dladm.1m b/usr/src/man/man1m/dladm.1m
index ffe36dfa07..77bf045e08 100644
--- a/usr/src/man/man1m/dladm.1m
+++ b/usr/src/man/man1m/dladm.1m
@@ -42,12 +42,12 @@
.\"
.\" Copyright (c) 2008, Sun Microsystems, Inc. All Rights Reserved
.\" Copyright 2017 Joyent, Inc.
+.\" Copyright 2020 RackTop Systems, Inc.
.\"
-.TH DLADM 1M "Dec 6, 2017"
+.TH DLADM 1M "May 4, 2020"
.SH NAME
dladm \- administer data links
.SH SYNOPSIS
-.LP
.nf
\fBdladm show-link\fR [\fB-P\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]] [[\fB-p\fR] \fB-o\fR \fIfield\fR[,...]] [\fIlink\fR]
\fBdladm rename-link\fR [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fIlink\fR \fInew-link\fR
@@ -189,7 +189,6 @@ dladm \- administer data links
.fi
.SH DESCRIPTION
-.LP
The \fBdladm\fR command is used to administer data-links. A data-link is
represented in the system as a \fBSTREAMS DLPI\fR (v2) interface which can be
plumbed under protocol stacks such as \fBTCP/IP\fR. Each data-link relies on
@@ -356,7 +355,6 @@ other hosts. For more information on overlay devices, see \fBoverlay\fR(5).
.RE
.SS "Options"
-.LP
Each \fBdladm\fR subcommand has its own set of options. However, many of the
subcommands have the following as a common option:
.sp
@@ -371,7 +369,6 @@ deletion, or renaming-should apply.
.RE
.SS "SUBCOMMANDS"
-.LP
The following subcommands are supported:
.sp
.ne 2
@@ -4872,7 +4869,6 @@ display network usage for all links.
.RE
.SS "Parsable Output Format"
-.LP
Many \fBdladm\fR subcommands have an option that displays output in a
machine-parsable format. The output format is one or more lines of colon
(\fB:\fR) delimited fields. The fields displayed are specific to the subcommand
@@ -4888,7 +4884,6 @@ by using shell \fBread\fR(1) functions with the environment variable
\fBIFS=:\fR (see \fBEXAMPLES\fR, below). Note that escaping is not done when
you request only a single field.
.SS "General Link Properties"
-.LP
The following general link properties are supported:
.sp
.ne 2
@@ -5275,7 +5270,6 @@ currently running on the system. By default, the zone binding is as per
.RE
.SS "Wifi Link Properties"
-.LP
The following \fBWiFi\fR link properties are supported. Note that the ability
to set a given property to a given value depends on the driver and hardware.
.sp
@@ -5327,7 +5321,6 @@ is no fixed speed.
.RE
.SS "Ethernet Link Properties"
-.LP
The following MII Properties, as documented in \fBieee802.3\fR(5), are
supported in read-only mode:
.RS +4
@@ -5488,6 +5481,75 @@ capabilities allowed by the device and the link partner.
.sp
.ne 2
.na
+\fB\fBen_fec_cap\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sets the Forward Error Correct (FEC) code(s) to be advertised by the
+device.
+Valid values are:
+.sp
+.ne 2
+.na
+\fB\fBnone\fR\fR
+.ad
+.sp .6
+.RS 4n
+Allow the device not to use FEC.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBauto\fR\fR
+.ad
+.sp .6
+.RS 4n
+The device will automatically decide which FEC code to use.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBrs\fR\fR
+.ad
+.sp .6
+.RS 4n
+Allow Reed-Solomon FEC code.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBbase-r\fR\fR
+.ad
+.sp .6
+.RS 4n
+Allow Base-R (also known as FireCode) code.
+.RE
+
+Valid input is either \fBauto\fR as a single value, or a comma separated
+combination of \fBnone\fR, \fBrs\fR and \fBbase-r\fR.
+The default value is \fBauto\fR.
+.sp
+.LP
+Note the actual FEC settings and combinations are constrained by the
+capabilities allowed by the device and the link partner.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBadv_fec_cap\fR\fR
+.ad
+.sp .6
+.RS 4n
+(read only) The current negotiated Forward Error Correction code.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBsecondary-macs\fR\fR
.ad
.sp .6
@@ -5542,7 +5604,6 @@ The default value is \fBvlanonly\fR.
.RE
.SS "IP Tunnel Link Properties"
-.LP
The following IP tunnel link properties are supported.
.sp
.ne 2
@@ -5569,7 +5630,6 @@ default value is 4. A value of 0 disables the encapsulation limit.
.RE
.SH EXAMPLES
-.LP
\fBExample 1 \fRConfiguring an Aggregation
.sp
.LP
@@ -6044,7 +6104,6 @@ interface. See \fBifconfig\fR(1M) for a description of how IPv6 addresses are
configured on 6to4 tunnel links.
.SH ATTRIBUTES
-.LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
.LP
@@ -6077,7 +6136,6 @@ Interface Stability Committed
.TE
.SH SEE ALSO
-.LP
\fBacctadm\fR(1M), \fBautopush\fR(1M), \fBifconfig\fR(1M), \fBipsecconf\fR(1M),
\fBndd\fR(1M), \fBpsrset\fR(1M), \fBwpad\fR(1M), \fBzonecfg\fR(1M),
\fBattributes\fR(5), \fBieee802.3\fR(5), \fBoverlay\fR(5), \fBdlpi\fR(7P)
@@ -6092,7 +6150,6 @@ T. Lemon, B. Sommerfeld. February 2006. \fIRFC 4361: Node-specific Client
Identifiers for Dynamic Host Configuration Protocol Version Four (DHCPv4)\fR.
The Internet Society. January 2006.
.SH NOTES
-.LP
The preferred method of referring to an aggregation in the aggregation
subcommands is by its link name. Referring to an aggregation by its integer
\fIkey\fR is supported for backward compatibility, but is not necessary. When
diff --git a/usr/src/man/man9e/mac.9e b/usr/src/man/man9e/mac.9e
index 56762b7856..d3d066a564 100644
--- a/usr/src/man/man9e/mac.9e
+++ b/usr/src/man/man9e/mac.9e
@@ -10,8 +10,9 @@
.\"
.\"
.\" Copyright 2019 Joyent, Inc.
+.\" Copyright 2020 RackTop Systems, Inc.
.\"
-.Dd July 22, 2019
+.Dd May 11, 2020
.Dt MAC 9E
.Os
.Sh NAME
@@ -894,6 +895,57 @@ it has configured the device, not what the device has actually
negotiated.
When setting the property, it should update the hardware and allow the link to
potentially perform auto-negotiation again.
+.It Sy MAC_PROP_EN_FEC_CAP
+.Bd -filled -compact
+Type:
+.Sy link_fec_t |
+Permissions:
+.Sy Read/Write
+.Ed
+.Pp
+The
+.Sy MAC_PROP_EN_FEC_CAP
+property indicates which Forward Error Correction (FEC) code is advertised
+by the device.
+.Pp
+The
+.Sy link_fec_t
+is an enumeration that may be a combination of the following bit values:
+.Bl -tag -width Ds
+.It Sy LINK_FEC_NONE
+No FEC over the link.
+.It Sy LINK_FEC_AUTO
+The FEC coding to use is auto-negotiated,
+.Sy LINK_FEC_AUTO
+cannot be set along with any of the other values.
+This is the default setting the device driver should use.
+.It Sy LINK_FEC_RS
+The link may use Reed-Solomon FEC coding.
+.It Sy LINK_FEC_BASE_R
+The link may use Base-R coding, also common referred to as FireCode.
+.El
+.Pp
+When setting the property, it should update the hardware with the requested, or
+combination of requested codings.
+If a particular combination of codings is not supported by the hardware,
+the device driver should return
+.Er EINVAL .
+When retrieving this property, the device driver should return the current
+value of the property.
+.It Sy MAC_PROP_ADV_FEC_CAP
+.Bd -filled -compact
+Type:
+.Sy link_fec_t |
+Permissions:
+.Sy Read-Only
+.Ed
+.Pp
+The
+.Sy MAC_PROP_ADV_FEC_CAP
+has the same values as
+.Sy MAC_PROP_EN_FEC_CAP .
+The property indicates which Forward Error Correction (FEC) code has been
+negotiated over the link.
.El
.Pp
The remaining properties are all about various auto-negotiation link
diff --git a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
index 9aeba33d30..8ce11b369d 100644
--- a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
+++ b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
@@ -221,6 +221,12 @@ ipf_hook_protocol_notify_ngz(hook_notify_cmd_t command, void *arg,
return (ipf_hook_protocol_notify(command, arg, name, dummy, he_name));
}
+/* viona hook names */
+char *hook_viona_in = "ipfilter_hookviona_in";
+char *hook_viona_in_gz = "ipfilter_hookviona_in_gz";
+char *hook_viona_out = "ipfilter_hookviona_out";
+char *hook_viona_out_gz = "ipfilter_hookviona_out_gz";
+
/* ------------------------------------------------------------------------ */
/* Function: ipldetach */
/* Returns: int - 0 == success, else error. */
@@ -731,6 +737,7 @@ ipf_hook_protocol_notify(hook_notify_cmd_t command, void *arg,
hook_hint_t hint;
boolean_t out;
int ret = 0;
+
const boolean_t gz = ifs->ifs_gz_controlled;
/* We currently only care about viona hooks notifications */
@@ -2438,6 +2445,124 @@ int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
return ipf_hook6(info, 1, FI_NOCKSUM, arg);
}
+/* Static constants used by ipf_hook_ether */
+static uint8_t ipf_eth_bcast_addr[ETHERADDRL] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+static uint8_t ipf_eth_ipv4_mcast[3] = { 0x01, 0x00, 0x5E };
+static uint8_t ipf_eth_ipv6_mcast[2] = { 0x33, 0x33 };
+
+/* ------------------------------------------------------------------------ */
+/* Function: ipf_hook_ether */
+/* Returns: int - 0 == packet ok, else problem, free packet if not done */
+/* Parameters: token(I) - pointer to event */
+/* info(I) - pointer to hook information for firewalling */
+/* */
+/* The ipf_hook_ether hook is currently private to illumos. It represents */
+/* a layer 2 datapath generally used by virtual machines. Currently the */
+/* hook is only used by the viona driver to pass along L2 frames for */
+/* inspection. It requires that the L2 ethernet header is contained within */
+/* a single dblk_t (however layers above the L2 header have no restrctions */
+/* in ipf). ipf does not currently support filtering on L2 fields (e.g. */
+/* filtering on a MAC address or ethertype), however virtual machines do */
+/* not have native IP stack instances where ipf traditionally hooks in. */
+/* Instead this entry point is used to determine if the packet is unicast, */
+/* broadcast, or multicast. The IPv4 or IPv6 packet is then passed to the */
+/* traditional ip hooks for filtering. Non IPv4 or non IPv6 packets are */
+/* not subject to examination. */
+/* ------------------------------------------------------------------------ */
+int ipf_hook_ether(hook_event_token_t token, hook_data_t info, void *arg,
+ boolean_t out)
+{
+ struct ether_header *ethp;
+ hook_pkt_event_t *hpe = (hook_pkt_event_t *)info;
+ mblk_t *mp;
+ size_t offset, len;
+ uint16_t etype;
+ boolean_t v6;
+
+ /*
+ * viona will only pass us mblks with the L2 header contained in a
+ * single data block.
+ */
+ mp = *hpe->hpe_mp;
+ len = MBLKL(mp);
+
+ VERIFY3S(len, >=, sizeof (struct ether_header));
+
+ ethp = (struct ether_header *)mp->b_rptr;
+ if ((etype = ntohs(ethp->ether_type)) == ETHERTYPE_VLAN) {
+ struct ether_vlan_header *evh =
+ (struct ether_vlan_header *)ethp;
+
+ VERIFY3S(len, >=, sizeof (struct ether_vlan_header));
+
+ etype = ntohs(evh->ether_type);
+ offset = sizeof (*evh);
+ } else {
+ offset = sizeof (*ethp);
+ }
+
+ /*
+ * ipf only support filtering IPv4 and IPv6. Ignore other types.
+ */
+ if (etype == ETHERTYPE_IP)
+ v6 = B_FALSE;
+ else if (etype == ETHERTYPE_IPV6)
+ v6 = B_TRUE;
+ else
+ return (0);
+
+ if (bcmp(ipf_eth_bcast_addr, ethp, ETHERADDRL) == 0)
+ hpe->hpe_flags |= HPE_BROADCAST;
+ else if (bcmp(ipf_eth_ipv4_mcast, ethp,
+ sizeof (ipf_eth_ipv4_mcast)) == 0)
+ hpe->hpe_flags |= HPE_MULTICAST;
+ else if (bcmp(ipf_eth_ipv6_mcast, ethp,
+ sizeof (ipf_eth_ipv6_mcast)) == 0)
+ hpe->hpe_flags |= HPE_MULTICAST;
+
+ /* Find the start of the IPv4 or IPv6 header */
+ for (; offset >= len; len = MBLKL(mp)) {
+ offset -= len;
+ mp = mp->b_cont;
+ if (mp == NULL) {
+ freemsg(*hpe->hpe_mp);
+ *hpe->hpe_mp = NULL;
+ return (-1);
+ }
+ }
+ hpe->hpe_mb = mp;
+ hpe->hpe_hdr = mp->b_rptr + offset;
+
+ return (v6 ? ipf_hook6(info, out, 0, arg) :
+ ipf_hook(info, out, 0, arg));
+}
+
+/* ------------------------------------------------------------------------ */
+/* Function: ipf_hookviona_{in,out} */
+/* Returns: int - 0 == packet ok, else problem, free packet if not done */
+/* Parameters: event(I) - pointer to event */
+/* info(I) - pointer to hook information for firewalling */
+/* */
+/* The viona hooks are private hooks to illumos. They represents a layer 2 */
+/* datapath generally used to implement virtual machines. */
+/* along L2 packets. */
+/* */
+/* They end up calling the appropriate traditional ip hooks. */
+/* ------------------------------------------------------------------------ */
+int
+ipf_hookviona_in(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ return (ipf_hook_ether(token, info, arg, B_FALSE));
+}
+
+int
+ipf_hookviona_out(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ return (ipf_hook_ether(token, info, arg, B_TRUE));
+}
+
/* ------------------------------------------------------------------------ */
/* Function: ipf_hookvndl3_in */
/* Returns: int - 0 == packet ok, else problem, free packet if not done */
diff --git a/usr/src/uts/common/io/cxgbe/common/common.h b/usr/src/uts/common/io/cxgbe/common/common.h
index c7de2c4ebf..b8d77ebda3 100644
--- a/usr/src/uts/common/io/cxgbe/common/common.h
+++ b/usr/src/uts/common/io/cxgbe/common/common.h
@@ -20,6 +20,10 @@
* release for licensing terms and conditions.
*/
+/*
+ * Copyright 2020 RackTop Systems, Inc.
+ */
+
#ifndef __CHELSIO_COMMON_H
#define __CHELSIO_COMMON_H
@@ -103,9 +107,16 @@ enum {
typedef unsigned char cc_pause_t;
enum {
- FEC_AUTO = 1 << 0, /* IEEE 802.3 "automatic" */
- FEC_RS = 1 << 1, /* Reed-Solomon */
- FEC_BASER_RS = 1 << 2, /* BaseR/Reed-Solomon */
+ FEC_RS = 1 << 0, /* Reed-Solomon */
+ FEC_BASER_RS = 1 << 1, /* Base-R, aka Firecode */
+ FEC_NONE = 1 << 2, /* no FEC */
+
+ /*
+ * Pseudo FECs that translate to real FECs. The firmware knows nothing
+ * about these and they start at M_FW_PORT_CAP32_FEC + 1. AUTO should
+ * be set all by itself.
+ */
+ FEC_AUTO = 1 << 5,
};
typedef unsigned char cc_fec_t;
diff --git a/usr/src/uts/common/io/cxgbe/common/t4_hw.c b/usr/src/uts/common/io/cxgbe/common/t4_hw.c
index ae88f36f15..4bb48f1b3a 100644
--- a/usr/src/uts/common/io/cxgbe/common/t4_hw.c
+++ b/usr/src/uts/common/io/cxgbe/common/t4_hw.c
@@ -20,6 +20,10 @@
* release for licensing terms and conditions.
*/
+/*
+ * Copyright 2020 RackTop Systems, Inc.
+ */
+
#include "common.h"
#include "t4_regs.h"
#include "t4_regs_values.h"
@@ -4645,20 +4649,57 @@ static inline cc_fec_t fwcap_to_cc_fec(fw_port_cap32_t fw_fec)
if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS)
cc_fec |= FEC_BASER_RS;
- return cc_fec;
+ if (cc_fec == 0)
+ cc_fec = FEC_NONE;
+
+ return (cc_fec);
}
/* Translate Common Code Forward Error Correction specification to Firmware */
-static inline fw_port_cap32_t cc_to_fwcap_fec(cc_fec_t cc_fec)
+static inline boolean_t
+cc_to_fwcap_fec(fw_port_cap32_t *fw_fecp, cc_fec_t cc_fec,
+ struct link_config *lc)
{
fw_port_cap32_t fw_fec = 0;
- if (cc_fec & FEC_RS)
+ if ((cc_fec & FEC_AUTO) != 0) {
+ if ((lc->pcaps & FW_PORT_CAP32_SPEED_100G) == 0)
+ fw_fec |= FW_PORT_CAP32_FEC_BASER_RS;
+
+ if ((lc->pcaps & FW_PORT_CAP32_FORCE_FEC) != 0)
+ fw_fec |= FW_PORT_CAP32_FEC_NO_FEC;
+
+ fw_fec |= FW_PORT_CAP32_FEC_RS;
+
+ *fw_fecp = fw_fec;
+ return (B_TRUE);
+ }
+
+ if ((cc_fec & FEC_RS) != 0)
fw_fec |= FW_PORT_CAP32_FEC_RS;
- if (cc_fec & FEC_BASER_RS)
+
+ if ((cc_fec & FEC_BASER_RS) != 0 &&
+ (lc->pcaps & FW_PORT_CAP32_SPEED_100G) == 0)
fw_fec |= FW_PORT_CAP32_FEC_BASER_RS;
- return fw_fec;
+ if ((cc_fec & FEC_NONE) != 0) {
+ if ((lc->pcaps & FW_PORT_CAP32_FORCE_FEC) != 0) {
+ fw_fec |= FW_PORT_CAP32_FORCE_FEC;
+ fw_fec |= FW_PORT_CAP32_FEC_NO_FEC;
+ }
+
+ *fw_fecp = fw_fec;
+ return (B_TRUE);
+ }
+
+ if (fw_fec == 0)
+ return (B_FALSE);
+
+ if ((lc->pcaps & FW_PORT_CAP32_FORCE_FEC) != 0)
+ fw_fec |= FW_PORT_CAP32_FORCE_FEC;
+
+ *fw_fecp = fw_fec;
+ return (B_TRUE);
}
/**
@@ -4692,11 +4733,18 @@ fw_port_cap32_t t4_link_acaps(struct adapter *adapter, unsigned int port,
* the Transceiver Module EPROM FEC parameters. Otherwise we
* use whatever is in the current Requested FEC settings.
*/
- if (lc->requested_fec & FEC_AUTO)
- cc_fec = fwcap_to_cc_fec(lc->def_acaps);
- else
- cc_fec = lc->requested_fec;
- fw_fec = cc_to_fwcap_fec(cc_fec);
+ if (fec_supported(lc->pcaps)) {
+ if (lc->requested_fec & FEC_AUTO)
+ cc_fec = fwcap_to_cc_fec(lc->def_acaps);
+ else
+ cc_fec = lc->requested_fec;
+
+ if (!cc_to_fwcap_fec(&fw_fec, cc_fec, lc))
+ return (0);
+ } else {
+ fw_fec = 0;
+ cc_fec = FEC_NONE;
+ }
/* Figure out what our Requested Port Capabilities are going to be.
* Note parallel structure in t4_handle_get_port_info() and
@@ -9641,12 +9689,17 @@ static void init_link_config(struct link_config *lc, fw_port_cap32_t pcaps,
lc->speed = 0;
lc->requested_fc = lc->fc = PAUSE_RX | PAUSE_TX;
- /*
- * For Forward Error Control, we default to whatever the Firmware
- * tells us the Link is currently advertising.
- */
- lc->requested_fec = FEC_AUTO;
- lc->fec = fwcap_to_cc_fec(lc->def_acaps);
+ if (fec_supported(pcaps)) {
+ /*
+ * For Forward Error Control, we default to whatever the Firmware
+ * tells us the Link is currently advertising.
+ */
+ lc->requested_fec = FEC_AUTO;
+ lc->fec = fwcap_to_cc_fec(lc->def_acaps);
+ } else {
+ lc->requested_fec = FEC_NONE;
+ lc->fec = FEC_NONE;
+ }
/* If the Port is capable of Auto-Negtotiation, initialize it as
* "enabled" and copy over all of the Physical Port Capabilities
diff --git a/usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h b/usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h
index d705c73891..b998e85bae 100644
--- a/usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h
+++ b/usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h
@@ -11,6 +11,10 @@
* release for licensing terms and conditions.
*/
+/*
+ * Copyright 2020 RackTop Systems, Inc.
+ */
+
#ifndef _T4FW_INTERFACE_H_
#define _T4FW_INTERFACE_H_
@@ -7204,11 +7208,12 @@ enum fw_port_mdi {
#define FW_PORT_CAP32_MDISTRAIGHT 0x00400000UL
#define FW_PORT_CAP32_FEC_RS 0x00800000UL
#define FW_PORT_CAP32_FEC_BASER_RS 0x01000000UL
-#define FW_PORT_CAP32_FEC_RESERVED1 0x02000000UL
+#define FW_PORT_CAP32_FEC_NO_FEC 0x02000000UL
#define FW_PORT_CAP32_FEC_RESERVED2 0x04000000UL
#define FW_PORT_CAP32_FEC_RESERVED3 0x08000000UL
#define FW_PORT_CAP32_FORCE_PAUSE 0x10000000UL
-#define FW_PORT_CAP32_RESERVED2 0xe0000000UL
+#define FW_PORT_CAP32_FORCE_FEC 0x20000000UL
+#define FW_PORT_CAP32_RESERVED2 0xc0000000UL
#define S_FW_PORT_CAP32_SPEED 0
#define M_FW_PORT_CAP32_SPEED 0xfff
@@ -7254,7 +7259,7 @@ enum fw_port_mdi32 {
(((x) >> S_FW_PORT_CAP32_MDI) & M_FW_PORT_CAP32_MDI)
#define S_FW_PORT_CAP32_FEC 23
-#define M_FW_PORT_CAP32_FEC 0x1f
+#define M_FW_PORT_CAP32_FEC 0x5f
#define V_FW_PORT_CAP32_FEC(x) ((x) << S_FW_PORT_CAP32_FEC)
#define G_FW_PORT_CAP32_FEC(x) \
(((x) >> S_FW_PORT_CAP32_FEC) & M_FW_PORT_CAP32_FEC)
@@ -7269,6 +7274,15 @@ enum fw_port_mdi32 {
#define CAP32_FC(__cap32) \
(V_FW_PORT_CAP32_FC(M_FW_PORT_CAP32_FC) & __cap32)
+#ifdef _KERNEL
+static inline boolean_t
+fec_supported(uint32_t caps)
+{
+ return ((caps & (FW_PORT_CAP32_SPEED_25G | FW_PORT_CAP32_SPEED_50G |
+ FW_PORT_CAP32_SPEED_100G)) != 0);
+}
+#endif
+
enum fw_port_action {
FW_PORT_ACTION_L1_CFG = 0x0001,
FW_PORT_ACTION_L2_CFG = 0x0002,
diff --git a/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c b/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c
index 59c0ddde8d..9b4ffd8325 100644
--- a/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c
+++ b/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c
@@ -20,6 +20,10 @@
* release for licensing terms and conditions.
*/
+/*
+ * Copyright 2020 RackTop Systems, Inc.
+ */
+
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/dlpi.h>
@@ -930,6 +934,62 @@ t4_mc_getcapab(void *arg, mac_capab_t cap, void *data)
return (status);
}
+static link_fec_t
+fec_to_link_fec(cc_fec_t cc_fec)
+{
+ link_fec_t link_fec = 0;
+
+ if ((cc_fec & (FEC_RS | FEC_BASER_RS)) == (FEC_RS | FEC_BASER_RS))
+ return (LINK_FEC_AUTO);
+
+ if ((cc_fec & FEC_NONE) != 0)
+ link_fec |= LINK_FEC_NONE;
+
+ if ((cc_fec & FEC_AUTO) != 0)
+ link_fec |= LINK_FEC_AUTO;
+
+ if ((cc_fec & FEC_RS) != 0)
+ link_fec |= LINK_FEC_RS;
+
+ if ((cc_fec & FEC_BASER_RS) != 0)
+ link_fec |= LINK_FEC_BASE_R;
+
+ return (link_fec);
+}
+
+static int
+link_fec_to_fec(int v)
+{
+ int fec = 0;
+
+ if ((v & LINK_FEC_AUTO) != 0) {
+ fec = FEC_AUTO;
+ v &= ~LINK_FEC_AUTO;
+ } else {
+ if ((v & LINK_FEC_NONE) != 0) {
+ fec = FEC_NONE;
+ v &= ~LINK_FEC_NONE;
+ }
+
+ if ((v & LINK_FEC_RS) != 0) {
+ fec |= FEC_RS;
+ v &= ~LINK_FEC_RS;
+ }
+
+ if ((v & LINK_FEC_BASE_R) != 0) {
+ fec |= FEC_BASER_RS;
+ v &= ~LINK_FEC_BASE_R;
+ }
+ }
+
+ if (v != 0)
+ return (-1);
+
+ ASSERT3S(fec, !=, 0);
+
+ return (fec);
+}
+
/* ARGSUSED */
static int
t4_mc_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
@@ -941,7 +1001,9 @@ t4_mc_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
uint8_t v8 = *(uint8_t *)val;
uint32_t v32 = *(uint32_t *)val;
int old, new = 0, relink = 0, rx_mode = 0, rc = 0;
+ boolean_t down_link = B_TRUE;
link_flowctrl_t fc;
+ link_fec_t fec;
/*
* Save a copy of link_config. This can be used to restore link_config
@@ -1009,6 +1071,30 @@ t4_mc_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
}
break;
+ case MAC_PROP_EN_FEC_CAP:
+ if (!fec_supported(lc->pcaps)) {
+ rc = ENOTSUP;
+ break;
+ }
+
+ fec = *(link_fec_t *)val;
+ new = link_fec_to_fec(fec);
+ if (new < 0) {
+ rc = EINVAL;
+ } else if (new != lc->requested_fec) {
+ lc->requested_fec = new;
+ relink = 1;
+ /*
+ * For fec, do not preemptively force the link
+ * down. If changing fec causes the link state
+ * to transition, then appropriate asynchronous
+ * events are generated which correctly reflect
+ * the link state.
+ */
+ down_link = B_FALSE;
+ }
+ break;
+
case MAC_PROP_EN_10GFDX_CAP:
if (lc->pcaps & FW_PORT_CAP32_ANEG && is_10G_port(pi)) {
old = lc->acaps & FW_PORT_CAP32_SPEED_10G;
@@ -1062,7 +1148,8 @@ t4_mc_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
if (isset(&sc->open_device_map, pi->port_id) != 0) {
if (relink != 0) {
- t4_os_link_changed(pi->adapter, pi->port_id, 0);
+ if (down_link)
+ t4_os_link_changed(pi->adapter, pi->port_id, 0);
rc = begin_synchronized_op(pi, 1, 1);
if (rc != 0)
return (rc);
@@ -1143,6 +1230,20 @@ t4_mc_getprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
*(link_flowctrl_t *)val = LINK_FLOWCTRL_NONE;
break;
+ case MAC_PROP_ADV_FEC_CAP:
+ if (!fec_supported(lc->pcaps))
+ return (ENOTSUP);
+
+ *(link_fec_t *)val = fec_to_link_fec(lc->fec);
+ break;
+
+ case MAC_PROP_EN_FEC_CAP:
+ if (!fec_supported(lc->pcaps))
+ return (ENOTSUP);
+
+ *(link_fec_t *)val = fec_to_link_fec(lc->requested_fec);
+ break;
+
case MAC_PROP_ADV_100GFDX_CAP:
case MAC_PROP_EN_100GFDX_CAP:
*u = !!(lc->acaps & FW_PORT_CAP32_SPEED_100G);
@@ -1212,6 +1313,15 @@ t4_mc_propinfo(void *arg, const char *name, mac_prop_id_t id,
mac_prop_info_set_default_link_flowctrl(ph, LINK_FLOWCTRL_BI);
break;
+ case MAC_PROP_EN_FEC_CAP:
+ mac_prop_info_set_default_fec(ph, LINK_FEC_AUTO);
+ break;
+
+ case MAC_PROP_ADV_FEC_CAP:
+ mac_prop_info_set_perm(ph, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_fec(ph, LINK_FEC_AUTO);
+ break;
+
case MAC_PROP_EN_10GFDX_CAP:
if (lc->pcaps & FW_PORT_CAP32_ANEG &&
lc->pcaps & FW_PORT_CAP32_SPEED_10G)
diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c
index 7d88ea2572..4ce359f87b 100644
--- a/usr/src/uts/common/io/mac/mac.c
+++ b/usr/src/uts/common/io/mac/mac.c
@@ -23,6 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2020 Joyent, Inc.
* Copyright 2015 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2020 RackTop Systems, Inc.
*/
/*
@@ -3341,6 +3342,10 @@ mac_prop_check_size(mac_prop_id_t id, uint_t valsize, boolean_t is_range)
case MAC_PROP_FLOWCTRL:
minsize = sizeof (link_flowctrl_t);
break;
+ case MAC_PROP_ADV_FEC_CAP:
+ case MAC_PROP_EN_FEC_CAP:
+ minsize = sizeof (link_fec_t);
+ break;
case MAC_PROP_ADV_5000FDX_CAP:
case MAC_PROP_EN_5000FDX_CAP:
case MAC_PROP_ADV_2500FDX_CAP:
@@ -3529,6 +3534,28 @@ mac_set_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val,
break;
}
+ case MAC_PROP_ADV_FEC_CAP:
+ case MAC_PROP_EN_FEC_CAP: {
+ link_fec_t fec;
+
+ ASSERT(valsize >= sizeof (link_fec_t));
+
+ /*
+ * fec cannot be zero, and auto must be set exclusively.
+ */
+ bcopy(val, &fec, sizeof (link_fec_t));
+ if (fec == 0)
+ return (EINVAL);
+ if ((fec & LINK_FEC_AUTO) != 0 && (fec & ~LINK_FEC_AUTO) != 0)
+ return (EINVAL);
+
+ if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) {
+ err = mip->mi_callbacks->mc_setprop(mip->mi_driver,
+ name, id, valsize, val);
+ }
+ break;
+ }
+
default:
/* For other driver properties, call driver's callback */
if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) {
diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c
index 7f193f68eb..bcca602589 100644
--- a/usr/src/uts/common/io/mac/mac_provider.c
+++ b/usr/src/uts/common/io/mac/mac_provider.c
@@ -23,6 +23,7 @@
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2020 RackTop Systems, Inc.
*/
#include <sys/types.h>
@@ -1530,6 +1531,22 @@ mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph,
}
void
+mac_prop_info_set_default_fec(mac_prop_info_handle_t ph, link_fec_t val)
+{
+ mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
+
+ /* nothing to do if the caller doesn't want the default value */
+ if (pr->pr_default == NULL)
+ return;
+
+ ASSERT(pr->pr_default_size >= sizeof (link_fec_t));
+
+ bcopy(&val, pr->pr_default, sizeof (val));
+
+ pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
+}
+
+void
mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min,
uint32_t max)
{
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.c b/usr/src/uts/common/io/mlxcx/mlxcx.c
index 9fae7c5f77..2aefac33db 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.c
@@ -1756,6 +1756,11 @@ mlxcx_setup_ports(mlxcx_t *mlxp)
mutex_exit(&p->mlp_mtx);
goto err;
}
+ if (!mlxcx_cmd_query_port_fec(mlxp, p)) {
+ mutex_exit(&p->mlp_mtx);
+ goto err;
+ }
+ p->mlp_fec_requested = LINK_FEC_AUTO;
mutex_exit(&p->mlp_mtx);
}
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.h b/usr/src/uts/common/io/mlxcx/mlxcx.h
index 52240df3a3..06277d033c 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.h
@@ -346,6 +346,8 @@ typedef struct mlxcx_port {
mlxcx_eth_proto_t mlp_max_proto;
mlxcx_eth_proto_t mlp_admin_proto;
mlxcx_eth_proto_t mlp_oper_proto;
+ mlxcx_pplm_fec_active_t mlp_fec_active;
+ link_fec_t mlp_fec_requested;
mlxcx_eth_inline_mode_t mlp_wqe_min_inline;
@@ -1320,7 +1322,12 @@ extern boolean_t mlxcx_cmd_access_register(mlxcx_t *, mlxcx_cmd_reg_opmod_t,
mlxcx_register_id_t, mlxcx_register_data_t *);
extern boolean_t mlxcx_cmd_query_port_mtu(mlxcx_t *, mlxcx_port_t *);
extern boolean_t mlxcx_cmd_query_port_status(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_modify_port_status(mlxcx_t *, mlxcx_port_t *,
+ mlxcx_port_status_t);
extern boolean_t mlxcx_cmd_query_port_speed(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_query_port_fec(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_modify_port_fec(mlxcx_t *, mlxcx_port_t *,
+ mlxcx_pplm_fec_caps_t);
extern boolean_t mlxcx_cmd_set_port_mtu(mlxcx_t *, mlxcx_port_t *);
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
index 30fb7ca8ef..f059b856a6 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
@@ -12,6 +12,7 @@
/*
* Copyright 2020, The University of Queensland
* Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 RackTop Systems, Inc.
*/
/*
@@ -1594,6 +1595,8 @@ mlxcx_reg_name(mlxcx_register_id_t rid)
return ("MCIA");
case MLXCX_REG_PPCNT:
return ("PPCNT");
+ case MLXCX_REG_PPLM:
+ return ("PPLM");
default:
return ("???");
}
@@ -1640,6 +1643,9 @@ mlxcx_cmd_access_register(mlxcx_t *mlxp, mlxcx_cmd_reg_opmod_t opmod,
case MLXCX_REG_PPCNT:
dsize = sizeof (mlxcx_reg_ppcnt_t);
break;
+ case MLXCX_REG_PPLM:
+ dsize = sizeof (mlxcx_reg_pplm_t);
+ break;
default:
dsize = 0;
VERIFY(0);
@@ -1776,6 +1782,25 @@ mlxcx_cmd_query_port_status(mlxcx_t *mlxp, mlxcx_port_t *mlp)
}
boolean_t
+mlxcx_cmd_modify_port_status(mlxcx_t *mlxp, mlxcx_port_t *mlp,
+ mlxcx_port_status_t status)
+{
+ mlxcx_register_data_t data;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data, sizeof (data));
+ data.mlrd_paos.mlrd_paos_local_port = mlp->mlp_num + 1;
+ data.mlrd_paos.mlrd_paos_admin_status = status;
+ set_bit32(&data.mlrd_paos.mlrd_paos_flags, MLXCX_PAOS_ADMIN_ST_EN);
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_WRITE,
+ MLXCX_REG_PAOS, &data);
+
+ return (ret);
+}
+
+boolean_t
mlxcx_cmd_query_port_speed(mlxcx_t *mlxp, mlxcx_port_t *mlp)
{
mlxcx_register_data_t data;
@@ -1809,6 +1834,82 @@ mlxcx_cmd_query_port_speed(mlxcx_t *mlxp, mlxcx_port_t *mlp)
}
boolean_t
+mlxcx_cmd_query_port_fec(mlxcx_t *mlxp, mlxcx_port_t *mlp)
+{
+ mlxcx_register_data_t data;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data, sizeof (data));
+ data.mlrd_pplm.mlrd_pplm_local_port = mlp->mlp_num + 1;
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_PPLM, &data);
+
+ if (ret) {
+ mlp->mlp_fec_active =
+ from_be24(data.mlrd_pplm.mlrd_pplm_fec_mode_active);
+ }
+
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_modify_port_fec(mlxcx_t *mlxp, mlxcx_port_t *mlp,
+ mlxcx_pplm_fec_caps_t fec)
+{
+ mlxcx_register_data_t data_in, data_out;
+ mlxcx_pplm_fec_caps_t caps;
+ mlxcx_reg_pplm_t *pplm_in, *pplm_out;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data_in, sizeof (data_in));
+ pplm_in = &data_in.mlrd_pplm;
+ pplm_in->mlrd_pplm_local_port = mlp->mlp_num + 1;
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_PPLM, &data_in);
+
+ if (!ret)
+ return (B_FALSE);
+
+ bzero(&data_out, sizeof (data_out));
+ pplm_out = &data_out.mlrd_pplm;
+ pplm_out->mlrd_pplm_local_port = mlp->mlp_num + 1;
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_56G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_56G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_100G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_100G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_50G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_50G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_25G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_25G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_10_40G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_10_40G, fec & caps);
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_WRITE,
+ MLXCX_REG_PPLM, &data_out);
+
+ return (ret);
+}
+
+boolean_t
mlxcx_cmd_modify_nic_vport_ctx(mlxcx_t *mlxp, mlxcx_port_t *mlp,
mlxcx_modify_nic_vport_ctx_fields_t fields)
{
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
index 5d15ec1fbb..2521641a00 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
@@ -80,6 +80,53 @@ mlxcx_speed_to_bits(mlxcx_eth_proto_t v)
}
}
+static link_fec_t
+mlxcx_fec_to_link_fec(mlxcx_pplm_fec_active_t mlxcx_fec)
+{
+ if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_NONE) != 0)
+ return (LINK_FEC_NONE);
+
+ if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_FIRECODE) != 0)
+ return (LINK_FEC_BASE_R);
+
+ if ((mlxcx_fec & (MLXCX_PPLM_FEC_ACTIVE_RS528 |
+ MLXCX_PPLM_FEC_ACTIVE_RS271 | MLXCX_PPLM_FEC_ACTIVE_RS544 |
+ MLXCX_PPLM_FEC_ACTIVE_RS272)) != 0)
+ return (LINK_FEC_RS);
+
+ return (LINK_FEC_NONE);
+}
+
+static boolean_t
+mlxcx_link_fec_cap(link_fec_t fec, mlxcx_pplm_fec_caps_t *pfecp)
+{
+ mlxcx_pplm_fec_caps_t pplm_fec = 0;
+
+ if ((fec & LINK_FEC_AUTO) != 0) {
+ pplm_fec = MLXCX_PPLM_FEC_CAP_AUTO;
+ fec &= ~LINK_FEC_AUTO;
+ } else if ((fec & LINK_FEC_NONE) != 0) {
+ pplm_fec = MLXCX_PPLM_FEC_CAP_NONE;
+ fec &= ~LINK_FEC_NONE;
+ } else if ((fec & LINK_FEC_RS) != 0) {
+ pplm_fec |= MLXCX_PPLM_FEC_CAP_RS;
+ fec &= ~LINK_FEC_RS;
+ } else if ((fec & LINK_FEC_BASE_R) != 0) {
+ pplm_fec |= MLXCX_PPLM_FEC_CAP_FIRECODE;
+ fec &= ~LINK_FEC_BASE_R;
+ }
+
+ /*
+ * Only one fec option is allowed.
+ */
+ if (fec != 0)
+ return (B_FALSE);
+
+ *pfecp = pplm_fec;
+
+ return (B_TRUE);
+}
+
static int
mlxcx_mac_stat_rfc_2863(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat,
uint64_t *val)
@@ -1091,6 +1138,14 @@ mlxcx_mac_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
mac_prop_info_set_default_uint8(prh, 1);
break;
+ case MAC_PROP_ADV_FEC_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO);
+ break;
+ case MAC_PROP_EN_FEC_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
+ mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO);
+ break;
case MAC_PROP_ADV_100GFDX_CAP:
case MAC_PROP_EN_100GFDX_CAP:
mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
@@ -1150,6 +1205,9 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
uint32_t new_mtu, new_hw_mtu, old_mtu;
mlxcx_buf_shard_t *sh;
boolean_t allocd = B_FALSE;
+ boolean_t relink = B_FALSE;
+ link_fec_t fec;
+ mlxcx_pplm_fec_caps_t cap_fec;
mutex_enter(&port->mlp_mtx);
@@ -1198,11 +1256,57 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
break;
}
break;
+
+ case MAC_PROP_EN_FEC_CAP:
+ bcopy(pr_val, &fec, sizeof (fec));
+ if (!mlxcx_link_fec_cap(fec, &cap_fec)) {
+ ret = EINVAL;
+ break;
+ }
+
+ /*
+ * Don't change the FEC if it is already at the requested
+ * setting AND the port is up.
+ * When the port is down, always set the FEC and attempt
+ * to retrain the link.
+ */
+ if (fec == port->mlp_fec_requested &&
+ fec == mlxcx_fec_to_link_fec(port->mlp_fec_active) &&
+ port->mlp_oper_status != MLXCX_PORT_STATUS_DOWN)
+ break;
+
+ /*
+ * The most like cause of this failing is an invalid
+ * or unsupported fec option.
+ */
+ if (!mlxcx_cmd_modify_port_fec(mlxp, port, cap_fec)) {
+ ret = EINVAL;
+ break;
+ }
+
+ port->mlp_fec_requested = fec;
+
+ /*
+ * For FEC to become effective, the link needs to go back
+ * to training and negotiation state. This happens when
+ * the link transitions from down to up, force a relink.
+ */
+ relink = B_TRUE;
+ break;
+
default:
ret = ENOTSUP;
break;
}
+ if (relink) {
+ if (!mlxcx_cmd_modify_port_status(mlxp, port,
+ MLXCX_PORT_STATUS_DOWN) ||
+ !mlxcx_cmd_modify_port_status(mlxp, port,
+ MLXCX_PORT_STATUS_UP)) {
+ ret = EIO;
+ }
+ }
mutex_exit(&port->mlp_mtx);
return (ret);
@@ -1260,6 +1364,21 @@ mlxcx_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
}
*(uint8_t *)pr_val = port->mlp_autoneg;
break;
+ case MAC_PROP_ADV_FEC_CAP:
+ if (pr_valsize < sizeof (link_fec_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ *(link_fec_t *)pr_val =
+ mlxcx_fec_to_link_fec(port->mlp_fec_active);
+ break;
+ case MAC_PROP_EN_FEC_CAP:
+ if (pr_valsize < sizeof (link_fec_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ *(link_fec_t *)pr_val = port->mlp_fec_requested;
+ break;
case MAC_PROP_MTU:
if (pr_valsize < sizeof (uint32_t)) {
ret = EOVERFLOW;
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
index 4dc4291b08..aed691897b 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
@@ -355,6 +355,7 @@ mlxcx_update_link_state(mlxcx_t *mlxp, mlxcx_port_t *port)
mutex_enter(&port->mlp_mtx);
(void) mlxcx_cmd_query_port_status(mlxp, port);
(void) mlxcx_cmd_query_port_speed(mlxp, port);
+ (void) mlxcx_cmd_query_port_fec(mlxp, port);
switch (port->mlp_oper_status) {
case MLXCX_PORT_STATUS_UP:
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
index 6d09abea5c..abd717842d 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
@@ -2464,6 +2464,59 @@ typedef struct {
} mlxcx_reg_ppcnt_t;
typedef enum {
+ MLXCX_PPLM_FEC_CAP_AUTO = 0,
+ MLXCX_PPLM_FEC_CAP_NONE = (1 << 0),
+ MLXCX_PPLM_FEC_CAP_FIRECODE = (1 << 1),
+ MLXCX_PPLM_FEC_CAP_RS = (1 << 2),
+} mlxcx_pplm_fec_caps_t;
+
+typedef enum {
+ MLXCX_PPLM_FEC_ACTIVE_NONE = (1 << 0),
+ MLXCX_PPLM_FEC_ACTIVE_FIRECODE = (1 << 1),
+ MLXCX_PPLM_FEC_ACTIVE_RS528 = (1 << 2),
+ MLXCX_PPLM_FEC_ACTIVE_RS271 = (1 << 3),
+ MLXCX_PPLM_FEC_ACTIVE_RS544 = (1 << 7),
+ MLXCX_PPLM_FEC_ACTIVE_RS272 = (1 << 9),
+} mlxcx_pplm_fec_active_t;
+
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_56G (bitdef_t){ 16, 0x000f0000 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_100G (bitdef_t){ 12, 0x0000f000 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_50G (bitdef_t){ 8, 0x00000f00 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_25G (bitdef_t){ 4, 0x000000f0 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_10_40G (bitdef_t){ 0, 0x0000000f }
+
+typedef struct {
+ uint8_t mlrd_pplm_rsvd;
+ uint8_t mlrd_pplm_local_port;
+ uint8_t mlrd_pplm_rsvd1[11];
+ uint24be_t mlrd_pplm_fec_mode_active;
+ bits32_t mlrd_pplm_fec_override_cap;
+ bits32_t mlrd_pplm_fec_override_admin;
+ uint16be_t mlrd_pplm_fec_override_cap_400g_8x;
+ uint16be_t mlrd_pplm_fec_override_cap_200g_4x;
+ uint16be_t mlrd_pplm_fec_override_cap_100g_2x;
+ uint16be_t mlrd_pplm_fec_override_cap_50g_1x;
+ uint16be_t mlrd_pplm_fec_override_admin_400g_8x;
+ uint16be_t mlrd_pplm_fec_override_admin_200g_4x;
+ uint16be_t mlrd_pplm_fec_override_admin_100g_2x;
+ uint16be_t mlrd_pplm_fec_override_admin_50g_1x;
+ uint8_t mlrd_pplm_rsvd2[8];
+ uint16be_t mlrd_pplm_fec_override_cap_hdr;
+ uint16be_t mlrd_pplm_fec_override_cap_edr;
+ uint16be_t mlrd_pplm_fec_override_cap_fdr;
+ uint16be_t mlrd_pplm_fec_override_cap_fdr10;
+ uint16be_t mlrd_pplm_fec_override_admin_hdr;
+ uint16be_t mlrd_pplm_fec_override_admin_edr;
+ uint16be_t mlrd_pplm_fec_override_admin_fdr;
+ uint16be_t mlrd_pplm_fec_override_admin_fdr10;
+} mlxcx_reg_pplm_t;
+
+typedef enum {
MLXCX_REG_PMTU = 0x5003,
MLXCX_REG_PTYS = 0x5004,
MLXCX_REG_PAOS = 0x5006,
@@ -2472,6 +2525,7 @@ typedef enum {
MLXCX_REG_MLCR = 0x902B,
MLXCX_REG_MCIA = 0x9014,
MLXCX_REG_PPCNT = 0x5008,
+ MLXCX_REG_PPLM = 0x5023,
} mlxcx_register_id_t;
typedef union {
@@ -2482,6 +2536,7 @@ typedef union {
mlxcx_reg_pmaos_t mlrd_pmaos;
mlxcx_reg_mcia_t mlrd_mcia;
mlxcx_reg_ppcnt_t mlrd_ppcnt;
+ mlxcx_reg_pplm_t mlrd_pplm;
} mlxcx_register_data_t;
typedef enum {
diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h
index 1d7ddf9648..a5974f6d7d 100644
--- a/usr/src/uts/common/sys/mac.h
+++ b/usr/src/uts/common/sys/mac.h
@@ -23,6 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2018 Joyent, Inc.
* Copyright (c) 2015 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2020 RackTop Systems, Inc.
*/
#ifndef _SYS_MAC_H
@@ -88,6 +89,13 @@ typedef enum {
} link_flowctrl_t;
typedef enum {
+ LINK_FEC_NONE = 1 << 0,
+ LINK_FEC_AUTO = 1 << 1,
+ LINK_FEC_RS = 1 << 2,
+ LINK_FEC_BASE_R = 1 << 3
+} link_fec_t;
+
+typedef enum {
LINK_TAGMODE_VLANONLY = 0,
LINK_TAGMODE_NORMAL
} link_tagmode_t;
@@ -239,6 +247,8 @@ typedef enum {
MAC_PROP_EN_25GFDX_CAP,
MAC_PROP_ADV_50GFDX_CAP,
MAC_PROP_EN_50GFDX_CAP,
+ MAC_PROP_EN_FEC_CAP,
+ MAC_PROP_ADV_FEC_CAP,
MAC_PROP_PRIVATE = -1
} mac_prop_id_t;
diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h
index 04c20d6aac..fc0866f2d1 100644
--- a/usr/src/uts/common/sys/mac_provider.h
+++ b/usr/src/uts/common/sys/mac_provider.h
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 RackTop Systems, Inc.
*/
#ifndef _SYS_MAC_PROVIDER_H
@@ -631,6 +632,8 @@ extern void mac_prop_info_set_default_uint32(
mac_prop_info_handle_t, uint32_t);
extern void mac_prop_info_set_default_link_flowctrl(
mac_prop_info_handle_t, link_flowctrl_t);
+extern void mac_prop_info_set_default_fec(
+ mac_prop_info_handle_t, link_fec_t);
extern void mac_prop_info_set_range_uint32(
mac_prop_info_handle_t,
uint32_t, uint32_t);
diff --git a/usr/src/uts/i86pc/io/viona/viona.c b/usr/src/uts/i86pc/io/viona/viona.c
deleted file mode 100644
index 2371a2f3ae..0000000000
--- a/usr/src/uts/i86pc/io/viona/viona.c
+++ /dev/null
@@ -1,1409 +0,0 @@
-/*
- * Copyright (c) 2013 Chris Torek <torek @ torek net>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source. A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- *
- * Copyright 2015 Pluribus Networks Inc.
- * Copyright 2017 Joyent, Inc.
- */
-
-#include <sys/conf.h>
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <sys/ddi.h>
-#include <sys/sunddi.h>
-#include <sys/sunndi.h>
-#include <sys/sysmacros.h>
-#include <sys/strsubr.h>
-#include <sys/strsun.h>
-#include <vm/seg_kmem.h>
-
-#include <sys/dls.h>
-#include <sys/mac_client.h>
-
-#include <sys/viona_io.h>
-
-#define MB (1024UL * 1024)
-#define GB (1024UL * MB)
-
-/*
- * Min. octets in an ethernet frame minus FCS
- */
-#define MIN_BUF_SIZE 60
-
-#define VIONA_NAME "Virtio Network Accelerator"
-
-#define VIONA_CTL_MINOR 0
-#define VIONA_CTL_NODE_NAME "ctl"
-
-#define VIONA_CLI_NAME "viona"
-
-#define VTNET_MAXSEGS 32
-
-#define VRING_ALIGN 4096
-
-#define VRING_DESC_F_NEXT (1 << 0)
-#define VRING_DESC_F_WRITE (1 << 1)
-#define VRING_DESC_F_INDIRECT (1 << 2)
-
-#define VRING_AVAIL_F_NO_INTERRUPT 1
-
-#define VRING_USED_F_NO_NOTIFY 1
-
-#define BCM_NIC_DRIVER "bnxe"
-/*
- * Host capabilities
- */
-#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */
-#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */
-#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */
-
-#define VIONA_S_HOSTCAPS \
- (VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | \
- VIRTIO_NET_F_STATUS)
-
-#pragma pack(1)
-struct virtio_desc {
- uint64_t vd_addr;
- uint32_t vd_len;
- uint16_t vd_flags;
- uint16_t vd_next;
-};
-#pragma pack()
-
-#pragma pack(1)
-struct virtio_used {
- uint32_t vu_idx;
- uint32_t vu_tlen;
-};
-#pragma pack()
-
-#pragma pack(1)
-struct virtio_net_mrgrxhdr {
- uint8_t vrh_flags;
- uint8_t vrh_gso_type;
- uint16_t vrh_hdr_len;
- uint16_t vrh_gso_size;
- uint16_t vrh_csum_start;
- uint16_t vrh_csum_offset;
- uint16_t vrh_bufs;
-};
-struct virtio_net_hdr {
- uint8_t vrh_flags;
- uint8_t vrh_gso_type;
- uint16_t vrh_hdr_len;
- uint16_t vrh_gso_size;
- uint16_t vrh_csum_start;
- uint16_t vrh_csum_offset;
-};
-#pragma pack()
-
-typedef struct viona_vring_hqueue {
- /* Internal state */
- uint16_t hq_size;
- kmutex_t hq_a_mutex;
- kmutex_t hq_u_mutex;
- uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */
-
- /* Host-context pointers to the queue */
- caddr_t hq_baseaddr;
- uint16_t *hq_avail_flags;
- uint16_t *hq_avail_idx; /* monotonically increasing */
- uint16_t *hq_avail_ring;
-
- uint16_t *hq_used_flags;
- uint16_t *hq_used_idx; /* monotonically increasing */
- struct virtio_used *hq_used_ring;
-} viona_vring_hqueue_t;
-
-
-typedef struct viona_link {
- datalink_id_t l_linkid;
-
- struct vm *l_vm;
- size_t l_vm_lomemsize;
- caddr_t l_vm_lomemaddr;
- size_t l_vm_himemsize;
- caddr_t l_vm_himemaddr;
-
- mac_handle_t l_mh;
- mac_client_handle_t l_mch;
-
- kmem_cache_t *l_desb_kmc;
-
- pollhead_t l_pollhead;
-
- viona_vring_hqueue_t l_rx_vring;
- uint_t l_rx_intr;
-
- viona_vring_hqueue_t l_tx_vring;
- kcondvar_t l_tx_cv;
- uint_t l_tx_intr;
- kmutex_t l_tx_mutex;
- int l_tx_outstanding;
- uint32_t l_features;
-} viona_link_t;
-
-typedef struct {
- frtn_t d_frtn;
- viona_link_t *d_link;
- uint_t d_ref;
- uint16_t d_cookie;
- int d_len;
-} viona_desb_t;
-
-typedef struct viona_soft_state {
- viona_link_t *ss_link;
-} viona_soft_state_t;
-
-typedef struct used_elem {
- uint16_t id;
- uint32_t len;
-} used_elem_t;
-
-static void *viona_state;
-static dev_info_t *viona_dip;
-static id_space_t *viona_minor_ids;
-/*
- * copy tx mbufs from virtio ring to avoid necessitating a wait for packet
- * transmission to free resources.
- */
-static boolean_t copy_tx_mblks = B_TRUE;
-
-extern struct vm *vm_lookup_by_name(char *name);
-extern uint64_t vm_gpa2hpa(struct vm *vm, uint64_t gpa, size_t len);
-
-static int viona_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
-static int viona_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
-static int viona_open(dev_t *devp, int flag, int otype, cred_t *credp);
-static int viona_close(dev_t dev, int flag, int otype, cred_t *credp);
-static int viona_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
- cred_t *credp, int *rval);
-static int viona_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
- struct pollhead **phpp);
-
-static int viona_ioc_create(viona_soft_state_t *ss, vioc_create_t *u_create);
-static int viona_ioc_delete(viona_soft_state_t *ss);
-
-static int viona_vm_map(viona_link_t *link);
-static caddr_t viona_gpa2kva(viona_link_t *link, uint64_t gpa);
-static void viona_vm_unmap(viona_link_t *link);
-
-static int viona_ioc_rx_ring_init(viona_link_t *link,
- vioc_ring_init_t *u_ri);
-static int viona_ioc_tx_ring_init(viona_link_t *link,
- vioc_ring_init_t *u_ri);
-static int viona_ioc_rx_ring_reset(viona_link_t *link);
-static int viona_ioc_tx_ring_reset(viona_link_t *link);
-static void viona_ioc_rx_ring_kick(viona_link_t *link);
-static void viona_ioc_tx_ring_kick(viona_link_t *link);
-static int viona_ioc_rx_intr_clear(viona_link_t *link);
-static int viona_ioc_tx_intr_clear(viona_link_t *link);
-
-static void viona_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
- boolean_t loopback);
-static void viona_tx(viona_link_t *link, viona_vring_hqueue_t *hq);
-
-static struct cb_ops viona_cb_ops = {
- viona_open,
- viona_close,
- nodev,
- nodev,
- nodev,
- nodev,
- nodev,
- viona_ioctl,
- nodev,
- nodev,
- nodev,
- viona_chpoll,
- ddi_prop_op,
- 0,
- D_MP | D_NEW | D_HOTPLUG,
- CB_REV,
- nodev,
- nodev
-};
-
-static struct dev_ops viona_ops = {
- DEVO_REV,
- 0,
- nodev,
- nulldev,
- nulldev,
- viona_attach,
- viona_detach,
- nodev,
- &viona_cb_ops,
- NULL,
- ddi_power,
- ddi_quiesce_not_needed
-};
-
-static struct modldrv modldrv = {
- &mod_driverops,
- VIONA_NAME,
- &viona_ops,
-};
-
-static struct modlinkage modlinkage = {
- MODREV_1, &modldrv, NULL
-};
-
-int
-_init(void)
-{
- int ret;
-
- ret = ddi_soft_state_init(&viona_state,
- sizeof (viona_soft_state_t), 0);
- if (ret == 0) {
- ret = mod_install(&modlinkage);
- if (ret != 0) {
- ddi_soft_state_fini(&viona_state);
- return (ret);
- }
- }
-
- return (ret);
-}
-
-int
-_fini(void)
-{
- int ret;
-
- ret = mod_remove(&modlinkage);
- if (ret == 0) {
- ddi_soft_state_fini(&viona_state);
- }
-
- return (ret);
-}
-
-int
-_info(struct modinfo *modinfop)
-{
- return (mod_info(&modlinkage, modinfop));
-}
-
-static void
-set_viona_tx_mode()
-{
- major_t bcm_nic_major;
- if ((bcm_nic_major = ddi_name_to_major(BCM_NIC_DRIVER))
- != DDI_MAJOR_T_NONE) {
- if (ddi_hold_installed_driver(bcm_nic_major) != NULL) {
- copy_tx_mblks = B_FALSE;
- ddi_rele_driver(bcm_nic_major);
- }
- }
-}
-
-static int
-viona_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
-{
- if (cmd != DDI_ATTACH) {
- return (DDI_FAILURE);
- }
-
- viona_minor_ids = id_space_create("viona_minor_id",
- VIONA_CTL_MINOR + 1, UINT16_MAX);
-
- if (ddi_create_minor_node(dip, VIONA_CTL_NODE_NAME,
- S_IFCHR, VIONA_CTL_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS) {
- return (DDI_FAILURE);
- }
-
- viona_dip = dip;
-
- set_viona_tx_mode();
- ddi_report_dev(viona_dip);
-
- return (DDI_SUCCESS);
-}
-
-static int
-viona_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
-{
- if (cmd != DDI_DETACH) {
- return (DDI_FAILURE);
- }
-
- id_space_destroy(viona_minor_ids);
-
- ddi_remove_minor_node(viona_dip, NULL);
-
- viona_dip = NULL;
-
- return (DDI_SUCCESS);
-}
-
-static int
-viona_open(dev_t *devp, int flag, int otype, cred_t *credp)
-{
- int minor;
-
- if (otype != OTYP_CHR) {
- return (EINVAL);
- }
-
- if (drv_priv(credp) != 0) {
- return (EPERM);
- }
-
- if (getminor(*devp) != VIONA_CTL_MINOR) {
- return (ENXIO);
- }
-
- minor = id_alloc(viona_minor_ids);
- if (minor == 0) {
- /* All minors are busy */
- return (EBUSY);
- }
-
- if (ddi_soft_state_zalloc(viona_state, minor) != DDI_SUCCESS) {
- id_free(viona_minor_ids, minor);
- }
-
- *devp = makedevice(getmajor(*devp), minor);
-
- return (0);
-}
-
-static int
-viona_close(dev_t dev, int flag, int otype, cred_t *credp)
-{
- int minor;
- viona_soft_state_t *ss;
-
- if (otype != OTYP_CHR) {
- return (EINVAL);
- }
-
- if (drv_priv(credp) != 0) {
- return (EPERM);
- }
-
- minor = getminor(dev);
-
- ss = ddi_get_soft_state(viona_state, minor);
- if (ss == NULL) {
- return (ENXIO);
- }
-
- viona_ioc_delete(ss);
-
- ddi_soft_state_free(viona_state, minor);
-
- id_free(viona_minor_ids, minor);
-
- return (0);
-}
-
-static int
-viona_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
- cred_t *credp, int *rval)
-{
- viona_soft_state_t *ss;
- int err = 0;
-
- ss = ddi_get_soft_state(viona_state, getminor(dev));
- if (ss == NULL) {
- return (ENXIO);
- }
-
- switch (cmd) {
- case VNA_IOC_CREATE:
- err = viona_ioc_create(ss, (vioc_create_t *)data);
- break;
- case VNA_IOC_DELETE:
- err = viona_ioc_delete(ss);
- break;
- case VNA_IOC_SET_FEATURES:
- if (ss->ss_link == NULL) {
- return (ENOSYS);
- }
- ss->ss_link->l_features = *(int *)data & VIONA_S_HOSTCAPS;
- break;
- case VNA_IOC_GET_FEATURES:
- if (ss->ss_link == NULL) {
- return (ENOSYS);
- }
- *(int *)data = VIONA_S_HOSTCAPS;
- break;
- case VNA_IOC_RX_RING_INIT:
- if (ss->ss_link == NULL) {
- return (ENOSYS);
- }
- err = viona_ioc_rx_ring_init(ss->ss_link,
- (vioc_ring_init_t *)data);
- break;
- case VNA_IOC_RX_RING_RESET:
- if (ss->ss_link == NULL) {
- return (ENOSYS);
- }
- err = viona_ioc_rx_ring_reset(ss->ss_link);
- break;
- case VNA_IOC_RX_RING_KICK:
- if (ss->ss_link == NULL) {
- return (ENOSYS);
- }
- viona_ioc_rx_ring_kick(ss->ss_link);
- err = 0;
- break;
- case VNA_IOC_TX_RING_INIT:
- if (ss->ss_link == NULL) {
- return (ENOSYS);
- }
- err = viona_ioc_tx_ring_init(ss->ss_link,
- (vioc_ring_init_t *)data);
- break;
- case VNA_IOC_TX_RING_RESET:
- if (ss->ss_link == NULL) {
- return (ENOSYS);
- }
- err = viona_ioc_tx_ring_reset(ss->ss_link);
- break;
- case VNA_IOC_TX_RING_KICK:
- if (ss->ss_link == NULL) {
- return (ENOSYS);
- }
- viona_ioc_tx_ring_kick(ss->ss_link);
- err = 0;
- break;
- case VNA_IOC_RX_INTR_CLR:
- if (ss->ss_link == NULL) {
- return (ENOSYS);
- }
- err = viona_ioc_rx_intr_clear(ss->ss_link);
- break;
- case VNA_IOC_TX_INTR_CLR:
- if (ss->ss_link == NULL) {
- return (ENOSYS);
- }
- err = viona_ioc_tx_intr_clear(ss->ss_link);
- break;
- default:
- err = ENOTTY;
- break;
- }
-
- return (err);
-}
-
-static int
-viona_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
- struct pollhead **phpp)
-{
- viona_soft_state_t *ss;
-
- ss = ddi_get_soft_state(viona_state, getminor(dev));
- if (ss == NULL || ss->ss_link == NULL) {
- return (ENXIO);
- }
-
- *reventsp = 0;
-
- if (ss->ss_link->l_rx_intr && (events & POLLIN)) {
- *reventsp |= POLLIN;
- }
-
- if (ss->ss_link->l_tx_intr && (events & POLLOUT)) {
- *reventsp |= POLLOUT;
- }
-
- if (*reventsp == 0 && !anyyet) {
- *phpp = &ss->ss_link->l_pollhead;
- }
-
- return (0);
-}
-
-static int
-viona_ioc_create(viona_soft_state_t *ss, vioc_create_t *u_create)
-{
- vioc_create_t k_create;
- viona_link_t *link;
- char cli_name[MAXNAMELEN];
- int err;
-
- if (ss->ss_link != NULL) {
- return (ENOSYS);
- }
- if (copyin(u_create, &k_create, sizeof (k_create)) != 0) {
- return (EFAULT);
- }
-
- link = kmem_zalloc(sizeof (viona_link_t), KM_SLEEP);
-
- link->l_linkid = k_create.c_linkid;
- link->l_vm = vm_lookup_by_name(k_create.c_vmname);
- if (link->l_vm == NULL) {
- err = ENXIO;
- goto bail;
- }
-
- link->l_vm_lomemsize = k_create.c_lomem_size;
- link->l_vm_himemsize = k_create.c_himem_size;
- err = viona_vm_map(link);
- if (err != 0) {
- goto bail;
- }
-
- err = mac_open_by_linkid(link->l_linkid, &link->l_mh);
- if (err != 0) {
- cmn_err(CE_WARN, "viona create mac_open_by_linkid"
- " returned %d\n", err);
- goto bail;
- }
-
- snprintf(cli_name, sizeof (cli_name), "%s-%d",
- VIONA_CLI_NAME, link->l_linkid);
- err = mac_client_open(link->l_mh, &link->l_mch, cli_name, 0);
- if (err != 0) {
- cmn_err(CE_WARN, "viona create mac_client_open"
- " returned %d\n", err);
- goto bail;
- }
-
- link->l_features = VIONA_S_HOSTCAPS;
- link->l_desb_kmc = kmem_cache_create(cli_name,
- sizeof (viona_desb_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
-
- mutex_init(&link->l_rx_vring.hq_a_mutex, NULL, MUTEX_DRIVER, NULL);
- mutex_init(&link->l_rx_vring.hq_u_mutex, NULL, MUTEX_DRIVER, NULL);
- mutex_init(&link->l_rx_vring.hq_a_mutex, NULL, MUTEX_DRIVER, NULL);
- mutex_init(&link->l_tx_vring.hq_u_mutex, NULL, MUTEX_DRIVER, NULL);
- if (copy_tx_mblks) {
- mutex_init(&link->l_tx_mutex, NULL, MUTEX_DRIVER, NULL);
- cv_init(&link->l_tx_cv, NULL, CV_DRIVER, NULL);
- }
- ss->ss_link = link;
-
- return (0);
-
-bail:
- if (link->l_mch != NULL) {
- mac_client_close(link->l_mch, 0);
- }
- if (link->l_mh != NULL) {
- mac_close(link->l_mh);
- }
-
- kmem_free(link, sizeof (viona_link_t));
-
- return (err);
-}
-
-static int
-viona_ioc_delete(viona_soft_state_t *ss)
-{
- viona_link_t *link;
-
- link = ss->ss_link;
- if (link == NULL) {
- return (ENOSYS);
- }
- if (copy_tx_mblks) {
- mutex_enter(&link->l_tx_mutex);
- while (link->l_tx_outstanding != 0) {
- cv_wait(&link->l_tx_cv, &link->l_tx_mutex);
- }
- mutex_exit(&link->l_tx_mutex);
- }
- if (link->l_mch != NULL) {
- mac_rx_clear(link->l_mch);
- mac_client_close(link->l_mch, 0);
- }
- if (link->l_mh != NULL) {
- mac_close(link->l_mh);
- }
-
- viona_vm_unmap(link);
- mutex_destroy(&link->l_tx_vring.hq_a_mutex);
- mutex_destroy(&link->l_tx_vring.hq_u_mutex);
- mutex_destroy(&link->l_rx_vring.hq_a_mutex);
- mutex_destroy(&link->l_rx_vring.hq_u_mutex);
- if (copy_tx_mblks) {
- mutex_destroy(&link->l_tx_mutex);
- cv_destroy(&link->l_tx_cv);
- }
-
- kmem_cache_destroy(link->l_desb_kmc);
-
- kmem_free(link, sizeof (viona_link_t));
-
- ss->ss_link = NULL;
-
- return (0);
-}
-
-static caddr_t
-viona_mapin_vm_chunk(viona_link_t *link, uint64_t gpa, size_t len)
-{
- caddr_t addr;
- size_t offset;
- pfn_t pfnum;
-
- if (len == 0)
- return (NULL);
-
- addr = vmem_alloc(heap_arena, len, VM_SLEEP);
- if (addr == NULL)
- return (NULL);
-
- for (offset = 0; offset < len; offset += PAGESIZE) {
- pfnum = btop(vm_gpa2hpa(link->l_vm, gpa + offset, PAGESIZE));
- ASSERT(pfnum);
- hat_devload(kas.a_hat, addr + offset, PAGESIZE, pfnum,
- PROT_READ | PROT_WRITE, HAT_LOAD_LOCK);
- }
-
- return (addr);
-}
-
-/*
- * Map the guest physical address space into the kernel virtual address space.
- */
-static int
-viona_vm_map(viona_link_t *link)
-{
- link->l_vm_lomemaddr = viona_mapin_vm_chunk(link,
- 0, link->l_vm_lomemsize);
- if (link->l_vm_lomemaddr == NULL)
- return (-1);
- link->l_vm_himemaddr = viona_mapin_vm_chunk(link,
- 4 * (1024 * 1024 * 1024UL), link->l_vm_himemsize);
- if (link->l_vm_himemsize && link->l_vm_himemaddr == NULL)
- return (-1);
-
- return (0);
-}
-
-/*
- * Translate a guest physical address into a kernel virtual address.
- */
-static caddr_t
-viona_gpa2kva(viona_link_t *link, uint64_t gpa)
-{
- if (gpa < link->l_vm_lomemsize)
- return (link->l_vm_lomemaddr + gpa);
-
- gpa -= (4 * GB);
- if (gpa < link->l_vm_himemsize)
- return (link->l_vm_himemaddr + gpa);
-
- return (NULL);
-}
-
-static void
-viona_vm_unmap(viona_link_t *link)
-{
- if (link->l_vm_lomemaddr) {
- hat_unload(kas.a_hat, link->l_vm_lomemaddr,
- link->l_vm_lomemsize, HAT_UNLOAD_UNLOCK);
- vmem_free(heap_arena, link->l_vm_lomemaddr,
- link->l_vm_lomemsize);
- }
- if (link->l_vm_himemaddr) {
- hat_unload(kas.a_hat, link->l_vm_himemaddr,
- link->l_vm_himemsize, HAT_UNLOAD_UNLOCK);
- vmem_free(heap_arena, link->l_vm_himemaddr,
- link->l_vm_himemsize);
- }
-}
-
-static int
-viona_ioc_ring_init_common(viona_link_t *link, viona_vring_hqueue_t *hq,
- vioc_ring_init_t *u_ri)
-{
- vioc_ring_init_t k_ri;
-
- if (copyin(u_ri, &k_ri, sizeof (k_ri)) != 0) {
- return (EFAULT);
- }
-
- hq->hq_size = k_ri.ri_qsize;
- hq->hq_baseaddr = viona_gpa2kva(link, k_ri.ri_qaddr);
- if (hq->hq_baseaddr == NULL)
- return (EINVAL);
-
- hq->hq_avail_flags = (uint16_t *)(viona_gpa2kva(link,
- k_ri.ri_qaddr + hq->hq_size * sizeof (struct virtio_desc)));
- if (hq->hq_avail_flags == NULL)
- return (EINVAL);
- hq->hq_avail_idx = hq->hq_avail_flags + 1;
- hq->hq_avail_ring = hq->hq_avail_flags + 2;
-
- hq->hq_used_flags = (uint16_t *)(viona_gpa2kva(link,
- P2ROUNDUP(k_ri.ri_qaddr +
- hq->hq_size * sizeof (struct virtio_desc) + 2, VRING_ALIGN)));
- if (hq->hq_used_flags == NULL)
- return (EINVAL);
- hq->hq_used_idx = hq->hq_used_flags + 1;
- hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
-
- /*
- * Initialize queue indexes
- */
- hq->hq_cur_aidx = 0;
-
- return (0);
-}
-
-static int
-viona_ioc_rx_ring_init(viona_link_t *link, vioc_ring_init_t *u_ri)
-{
- viona_vring_hqueue_t *hq;
- int rval;
-
- hq = &link->l_rx_vring;
-
- rval = viona_ioc_ring_init_common(link, hq, u_ri);
- if (rval != 0) {
- return (rval);
- }
-
- return (0);
-}
-
-static int
-viona_ioc_tx_ring_init(viona_link_t *link, vioc_ring_init_t *u_ri)
-{
- viona_vring_hqueue_t *hq;
-
- hq = &link->l_tx_vring;
-
- return (viona_ioc_ring_init_common(link, hq, u_ri));
-}
-
-static int
-viona_ioc_ring_reset_common(viona_vring_hqueue_t *hq)
-{
- /*
- * Reset all soft state
- */
- hq->hq_cur_aidx = 0;
-
- return (0);
-}
-
-static int
-viona_ioc_rx_ring_reset(viona_link_t *link)
-{
- viona_vring_hqueue_t *hq;
-
- mac_rx_clear(link->l_mch);
-
- hq = &link->l_rx_vring;
-
- return (viona_ioc_ring_reset_common(hq));
-}
-
-static int
-viona_ioc_tx_ring_reset(viona_link_t *link)
-{
- viona_vring_hqueue_t *hq;
-
- hq = &link->l_tx_vring;
-
- return (viona_ioc_ring_reset_common(hq));
-}
-
-static void
-viona_ioc_rx_ring_kick(viona_link_t *link)
-{
- viona_vring_hqueue_t *hq = &link->l_rx_vring;
-
- atomic_or_16(hq->hq_used_flags, VRING_USED_F_NO_NOTIFY);
-
- mac_rx_set(link->l_mch, viona_rx, link);
-}
-
-/*
- * Return the number of available descriptors in the vring taking care
- * of the 16-bit index wraparound.
- */
-static inline int
-viona_hq_num_avail(viona_vring_hqueue_t *hq)
-{
- uint16_t ndesc;
-
- /*
- * We're just computing (a-b) in GF(216).
- *
- * The only glitch here is that in standard C,
- * uint16_t promotes to (signed) int when int has
- * more than 16 bits (pretty much always now), so
- * we have to force it back to unsigned.
- */
- ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx;
-
- ASSERT(ndesc <= hq->hq_size);
-
- return (ndesc);
-}
-
-static void
-viona_ioc_tx_ring_kick(viona_link_t *link)
-{
- viona_vring_hqueue_t *hq = &link->l_tx_vring;
-
- do {
- atomic_or_16(hq->hq_used_flags, VRING_USED_F_NO_NOTIFY);
- while (viona_hq_num_avail(hq)) {
- viona_tx(link, hq);
- }
- if (copy_tx_mblks) {
- mutex_enter(&link->l_tx_mutex);
- if (link->l_tx_outstanding != 0) {
- cv_wait_sig(&link->l_tx_cv, &link->l_tx_mutex);
- }
- mutex_exit(&link->l_tx_mutex);
- }
- atomic_and_16(hq->hq_used_flags, ~VRING_USED_F_NO_NOTIFY);
- } while (viona_hq_num_avail(hq));
-}
-
-static int
-viona_ioc_rx_intr_clear(viona_link_t *link)
-{
- link->l_rx_intr = 0;
-
- return (0);
-}
-
-static int
-viona_ioc_tx_intr_clear(viona_link_t *link)
-{
- link->l_tx_intr = 0;
-
- return (0);
-}
-#define VQ_MAX_DESCRIPTORS 512
-
-static int
-vq_popchain(viona_link_t *link, viona_vring_hqueue_t *hq, struct iovec *iov,
- int n_iov, uint16_t *cookie)
-{
- int i;
- int ndesc, nindir;
- int idx, head, next;
- struct virtio_desc *vdir, *vindir, *vp;
-
- idx = hq->hq_cur_aidx;
- ndesc = (uint16_t)((unsigned)*hq->hq_avail_idx - (unsigned)idx);
-
- if (ndesc == 0)
- return (0);
- if (ndesc > hq->hq_size) {
- cmn_err(CE_NOTE, "ndesc (%d) out of range\n", ndesc);
- return (-1);
- }
-
- head = hq->hq_avail_ring[idx & (hq->hq_size - 1)];
- next = head;
-
- for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->vd_next) {
- if (next >= hq->hq_size) {
- cmn_err(CE_NOTE, "descriptor index (%d)"
- "out of range\n", next);
- return (-1);
- }
-
- vdir = (struct virtio_desc *)(hq->hq_baseaddr +
- next * sizeof (struct virtio_desc));
- if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) {
- if (i > n_iov)
- return (-1);
- iov[i].iov_base = viona_gpa2kva(link, vdir->vd_addr);
- if (iov[i].iov_base == NULL) {
- cmn_err(CE_NOTE, "invalid guest physical"
- " address 0x%"PRIx64"\n", vdir->vd_addr);
- return (-1);
- }
- iov[i++].iov_len = vdir->vd_len;
- } else {
- nindir = vdir->vd_len / 16;
- if ((vdir->vd_len & 0xf) || nindir == 0) {
- cmn_err(CE_NOTE, "invalid indir len 0x%x\n",
- vdir->vd_len);
- return (-1);
- }
- vindir = (struct virtio_desc *)
- viona_gpa2kva(link, vdir->vd_addr);
- if (vindir == NULL) {
- cmn_err(CE_NOTE, "invalid guest physical"
- " address 0x%"PRIx64"\n", vdir->vd_addr);
- return (-1);
- }
- next = 0;
- for (;;) {
- vp = &vindir[next];
- if (vp->vd_flags & VRING_DESC_F_INDIRECT) {
- cmn_err(CE_NOTE, "indirect desc"
- " has INDIR flag\n");
- return (-1);
- }
- if (i > n_iov)
- return (-1);
- iov[i].iov_base =
- viona_gpa2kva(link, vp->vd_addr);
- if (iov[i].iov_base == NULL) {
- cmn_err(CE_NOTE, "invalid guest"
- " physical address 0x%"PRIx64"\n",
- vp->vd_addr);
- return (-1);
- }
- iov[i++].iov_len = vp->vd_len;
-
- if (i > VQ_MAX_DESCRIPTORS)
- goto loopy;
- if ((vp->vd_flags & VRING_DESC_F_NEXT) == 0)
- break;
-
- next = vp->vd_next;
- if (next >= nindir) {
- cmn_err(CE_NOTE, "invalid next"
- " %d > %d\n", next, nindir);
- return (-1);
- }
- }
- }
- if ((vdir->vd_flags & VRING_DESC_F_NEXT) == 0) {
- *cookie = head;
- hq->hq_cur_aidx++;
- return (i);
- }
- }
-
-loopy:
- cmn_err(CE_NOTE, "%d > descriptor loop count\n", i);
-
- return (-1);
-}
-
-static void
-vq_pushchain(viona_vring_hqueue_t *hq, uint32_t len, uint16_t cookie)
-{
- struct virtio_used *vu;
- int uidx;
-
- uidx = *hq->hq_used_idx;
- vu = &hq->hq_used_ring[uidx++ & (hq->hq_size - 1)];
- vu->vu_idx = cookie;
- vu->vu_tlen = len;
- membar_producer();
- *hq->hq_used_idx = uidx;
-}
-
-static void
-vq_pushchain_mrgrx(viona_vring_hqueue_t *hq, int num_bufs, used_elem_t *elem)
-{
- struct virtio_used *vu;
- int uidx;
- int i;
-
- uidx = *hq->hq_used_idx;
- if (num_bufs == 1) {
- vu = &hq->hq_used_ring[uidx++ & (hq->hq_size - 1)];
- vu->vu_idx = elem[0].id;
- vu->vu_tlen = elem[0].len;
- } else {
- for (i = 0; i < num_bufs; i++) {
- vu = &hq->hq_used_ring[(uidx + i) & (hq->hq_size - 1)];
- vu->vu_idx = elem[i].id;
- vu->vu_tlen = elem[i].len;
- }
- uidx = uidx + num_bufs;
- }
- membar_producer();
- *hq->hq_used_idx = uidx;
-}
-
-/*
- * Copy bytes from mp to iov.
- * copied_buf: Total num_bytes copied from mblk to iov array.
- * buf: pointer to iov_base.
- * i: index of iov array. Mainly used to identify if we are
- * dealing with first iov array element.
- * rxhdr_size: Virtio header size. Two possibilities in case
- * of MRGRX buf, header has 2 additional bytes.
- * In case of mrgrx, virtio header should be part of iov[0].
- * In case of non-mrgrx, virtio header may or may not be part
- * of iov[0].
- */
-static int
-copy_in_mblk(mblk_t *mp, int copied_buf, caddr_t buf, struct iovec *iov,
- int i, int rxhdr_size)
-{
- int copied_chunk = 0;
- mblk_t *ml;
- int total_buf_len = iov->iov_len;
- /*
- * iov[0] might have header, adjust
- * total_buf_len accordingly
- */
- if (i == 0) {
- total_buf_len = iov->iov_len - rxhdr_size;
- }
- for (ml = mp; ml != NULL; ml = ml->b_cont) {
- size_t chunk = MBLKL(ml);
- /*
- * If chunk is less than
- * copied_buf we should move
- * to correct msgblk
- */
- if (copied_buf != 0) {
- if (copied_buf < chunk) {
- chunk -= copied_buf;
- } else {
- copied_buf -= chunk;
- continue;
- }
- }
- /*
- * iov[0] already has virtio header.
- * and if copied chunk is length of iov_len break
- */
- if (copied_chunk == total_buf_len) {
- break;
- }
- /*
- * Sometimes chunk is total mblk len, sometimes mblk is
- * divided into multiple chunks.
- */
- if (chunk > copied_buf) {
- if (chunk > copied_chunk) {
- if ((chunk + copied_chunk) > total_buf_len)
- chunk = (size_t)total_buf_len
- - copied_chunk;
- } else {
- if (chunk > (total_buf_len - copied_chunk))
- chunk = (size_t)((total_buf_len
- - copied_chunk) - chunk);
- }
- bcopy(ml->b_rptr + copied_buf, buf, chunk);
- } else {
- if (chunk > (total_buf_len - copied_chunk)) {
- chunk = (size_t)(total_buf_len - copied_chunk);
- }
- bcopy(ml->b_rptr + copied_buf, buf, chunk);
- }
- buf += chunk;
- copied_chunk += chunk;
- }
- return (copied_chunk);
-}
-
-static void
-viona_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
- boolean_t loopback)
-{
- viona_link_t *link = arg;
- viona_vring_hqueue_t *hq = &link->l_rx_vring;
- mblk_t *mp0 = mp;
-
- while (viona_hq_num_avail(hq)) {
- struct iovec iov[VTNET_MAXSEGS];
- size_t mblklen;
- int n, i = 0;
- uint16_t cookie;
- struct virtio_net_hdr *vrx = NULL;
- struct virtio_net_mrgrxhdr *vmrgrx = NULL;
-#if notyet
- mblk_t *ml;
-#endif
- caddr_t buf = NULL;
- int total_len = 0;
- int copied_buf = 0;
- int num_bufs = 0;
- int num_pops = 0;
- used_elem_t uelem[VTNET_MAXSEGS];
-
- if (mp == NULL) {
- break;
- }
- mblklen = msgsize(mp);
- if (mblklen == 0) {
- break;
- }
-
- mutex_enter(&hq->hq_a_mutex);
- n = vq_popchain(link, hq, iov, VTNET_MAXSEGS, &cookie);
- mutex_exit(&hq->hq_a_mutex);
- if (n <= 0) {
- break;
- }
- num_pops++;
- if (link->l_features & VIRTIO_NET_F_MRG_RXBUF) {
- int total_n = n;
- int mrgrxhdr_size = sizeof (struct virtio_net_mrgrxhdr);
- /*
- * Get a pointer to the rx header, and use the
- * data immediately following it for the packet buffer.
- */
- vmrgrx = (struct virtio_net_mrgrxhdr *)iov[0].iov_base;
- if (n == 1) {
- buf = iov[0].iov_base + mrgrxhdr_size;
- }
- while (mblklen > copied_buf) {
- if (total_n == i) {
- mutex_enter(&hq->hq_a_mutex);
- n = vq_popchain(link, hq, &iov[i],
- VTNET_MAXSEGS, &cookie);
- mutex_exit(&hq->hq_a_mutex);
- if (n <= 0) {
- freemsgchain(mp0);
- return;
- }
- num_pops++;
- total_n += n;
- }
- if (total_n > i) {
- int copied_chunk = 0;
- if (i != 0) {
- buf = iov[i].iov_base;
- }
- copied_chunk = copy_in_mblk(mp,
- copied_buf, buf, &iov[i], i,
- mrgrxhdr_size);
- copied_buf += copied_chunk;
- uelem[i].id = cookie;
- uelem[i].len = copied_chunk;
- if (i == 0) {
- uelem[i].len += mrgrxhdr_size;
- }
- }
- num_bufs++;
- i++;
- }
- } else {
- boolean_t virt_hdr_incl_iov = B_FALSE;
- int rxhdr_size = sizeof (struct virtio_net_hdr);
- /* First element is header */
- vrx = (struct virtio_net_hdr *)iov[0].iov_base;
- if (n == 1 || iov[0].iov_len > rxhdr_size) {
- buf = iov[0].iov_base + rxhdr_size;
- virt_hdr_incl_iov = B_TRUE;
- total_len += rxhdr_size;
- if (iov[0].iov_len < rxhdr_size) {
- // Buff too small to fit pkt. Drop it.
- freemsgchain(mp0);
- return;
- }
- } else {
- total_len = iov[0].iov_len;
- }
- if (iov[0].iov_len == rxhdr_size)
- i++;
- while (mblklen > copied_buf) {
- if (n > i) {
- int copied_chunk = 0;
- if (i != 0) {
- buf = iov[i].iov_base;
- }
- /*
- * In case of non-mrgrx buf, first
- * descriptor always has header and
- * rest of the descriptors have data.
- * But it is not guaranteed that first
- * descriptor will only have virtio
- * header. It might also have data.
- */
- if (virt_hdr_incl_iov) {
- copied_chunk = copy_in_mblk(mp,
- copied_buf, buf, &iov[i],
- i, rxhdr_size);
- } else {
- copied_chunk = copy_in_mblk(mp,
- copied_buf, buf, &iov[i],
- i, 0);
- }
- copied_buf += copied_chunk;
- total_len += copied_chunk;
- } else {
- /*
- * Drop packet as it cant fit
- * in buf provided by guest.
- */
- freemsgchain(mp0);
- return;
- }
- i++;
- }
- }
- /*
- * The only valid field in the rx packet header is the
- * number of buffers, which is always 1 without TSO
- * support.
- */
- if (link->l_features & VIRTIO_NET_F_MRG_RXBUF) {
- memset(vmrgrx, 0, sizeof (struct virtio_net_mrgrxhdr));
- vmrgrx->vrh_bufs = num_bufs;
- /*
- * Make sure iov[0].iov_len >= MIN_BUF_SIZE
- * otherwise guest will consider it as invalid frame.
- */
- if (num_bufs == 1 && uelem[0].len < MIN_BUF_SIZE) {
- uelem[0].len = MIN_BUF_SIZE;
- }
- /*
- * Release this chain and handle more chains.
- */
- mutex_enter(&hq->hq_u_mutex);
- vq_pushchain_mrgrx(hq, num_pops, uelem);
- mutex_exit(&hq->hq_u_mutex);
- } else {
- memset(vrx, 0, sizeof (struct virtio_net_hdr));
- if (total_len < MIN_BUF_SIZE) {
- total_len = MIN_BUF_SIZE;
- }
- /*
- * Release this chain and handle more chains.
- */
- mutex_enter(&hq->hq_u_mutex);
- vq_pushchain(hq, total_len, cookie);
- mutex_exit(&hq->hq_u_mutex);
- }
-
- mp = mp->b_next;
- }
-
- if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
- if (atomic_cas_uint(&link->l_rx_intr, 0, 1) == 0) {
- pollwakeup(&link->l_pollhead, POLLIN);
- }
- }
-
- freemsgchain(mp0);
-}
-
-static void
-viona_desb_free(viona_desb_t *dp)
-{
- viona_link_t *link;
- viona_vring_hqueue_t *hq;
-#if notyet
- struct virtio_used *vu;
- int uidx;
-#endif
- uint_t ref;
-
- ref = atomic_dec_uint_nv(&dp->d_ref);
- if (ref != 0)
- return;
-
- link = dp->d_link;
- hq = &link->l_tx_vring;
-
- mutex_enter(&hq->hq_u_mutex);
- vq_pushchain(hq, dp->d_len, dp->d_cookie);
- mutex_exit(&hq->hq_u_mutex);
-
- kmem_cache_free(link->l_desb_kmc, dp);
-
- if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
- if (atomic_cas_uint(&link->l_tx_intr, 0, 1) == 0) {
- pollwakeup(&link->l_pollhead, POLLOUT);
- }
- }
- if (copy_tx_mblks) {
- mutex_enter(&link->l_tx_mutex);
- if (--link->l_tx_outstanding == 0) {
- cv_broadcast(&link->l_tx_cv);
- }
- mutex_exit(&link->l_tx_mutex);
- }
-}
-
-static void
-viona_tx(viona_link_t *link, viona_vring_hqueue_t *hq)
-{
- struct iovec iov[VTNET_MAXSEGS];
- uint16_t cookie;
- int i, n;
- mblk_t *mp_head, *mp_tail, *mp;
- viona_desb_t *dp;
- mac_client_handle_t link_mch = link->l_mch;
-
- mp_head = mp_tail = NULL;
-
- mutex_enter(&hq->hq_a_mutex);
- n = vq_popchain(link, hq, iov, VTNET_MAXSEGS, &cookie);
- mutex_exit(&hq->hq_a_mutex);
- ASSERT(n != 0);
-
- dp = kmem_cache_alloc(link->l_desb_kmc, KM_SLEEP);
- dp->d_frtn.free_func = viona_desb_free;
- dp->d_frtn.free_arg = (void *)dp;
- dp->d_link = link;
- dp->d_cookie = cookie;
-
- dp->d_ref = 0;
- dp->d_len = iov[0].iov_len;
-
- for (i = 1; i < n; i++) {
- dp->d_ref++;
- dp->d_len += iov[i].iov_len;
- if (copy_tx_mblks) {
- mp = desballoc((uchar_t *)iov[i].iov_base,
- iov[i].iov_len, BPRI_MED, &dp->d_frtn);
- ASSERT(mp);
- } else {
- mp = allocb(iov[i].iov_len, BPRI_MED);
- ASSERT(mp);
- bcopy((uchar_t *)iov[i].iov_base, mp->b_wptr,
- iov[i].iov_len);
- }
- mp->b_wptr += iov[i].iov_len;
- if (mp_head == NULL) {
- ASSERT(mp_tail == NULL);
- mp_head = mp;
- } else {
- ASSERT(mp_tail != NULL);
- mp_tail->b_cont = mp;
- }
- mp_tail = mp;
- }
- if (copy_tx_mblks == B_FALSE) {
- viona_desb_free(dp);
- }
- if (copy_tx_mblks) {
- mutex_enter(&link->l_tx_mutex);
- link->l_tx_outstanding++;
- mutex_exit(&link->l_tx_mutex);
- }
- mac_tx(link_mch, mp_head, 0, MAC_DROP_ON_NO_DESC, NULL);
-}