summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/io
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/io')
-rw-r--r--usr/src/uts/common/io/audio/impl/audio_grc3.h2
-rw-r--r--usr/src/uts/common/io/bge/bge_main2.c67
-rw-r--r--usr/src/uts/common/io/bnx/bnx.h6
-rw-r--r--usr/src/uts/common/io/bnx/bnxsnd.c2
-rw-r--r--usr/src/uts/common/io/chxge/pe.c12
-rw-r--r--usr/src/uts/common/io/cmlb.c4
-rw-r--r--usr/src/uts/common/io/cxgbe/common/common.h17
-rw-r--r--usr/src/uts/common/io/cxgbe/common/t4_hw.c85
-rw-r--r--usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h20
-rw-r--r--usr/src/uts/common/io/cxgbe/shared/shared.c6
-rw-r--r--usr/src/uts/common/io/cxgbe/shared/shared.h2
-rw-r--r--usr/src/uts/common/io/cxgbe/t4nex/cudbg.h2
-rw-r--r--usr/src/uts/common/io/cxgbe/t4nex/t4_ioctl.c2
-rw-r--r--usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c112
-rw-r--r--usr/src/uts/common/io/e1000g/e1000g_alloc.c2
-rw-r--r--usr/src/uts/common/io/mac/mac.c29
-rw-r--r--usr/src/uts/common/io/mac/mac_client.c27
-rw-r--r--usr/src/uts/common/io/mac/mac_provider.c17
-rw-r--r--usr/src/uts/common/io/mac/mac_sched.c6
-rw-r--r--usr/src/uts/common/io/mac/mac_util.c2
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.c50
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.h16
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_cmd.c101
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_gld.c162
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_intr.c1
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_reg.h55
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_ring.c105
-rw-r--r--usr/src/uts/common/io/stream.c2
28 files changed, 807 insertions, 107 deletions
diff --git a/usr/src/uts/common/io/audio/impl/audio_grc3.h b/usr/src/uts/common/io/audio/impl/audio_grc3.h
index 0003dc1574..4472307edf 100644
--- a/usr/src/uts/common/io/audio/impl/audio_grc3.h
+++ b/usr/src/uts/common/io/audio/impl/audio_grc3.h
@@ -53,7 +53,7 @@ typedef struct grc3state {
int32_t *historyptr;
int32_t dummy_pad1;
- int32_t history[GRC3_MAXHISTORY * 2];
+ int32_t history[GRC3_MAXHISTORY * 2 + 1];
uint32_t outsz;
} grc3state_t;
diff --git a/usr/src/uts/common/io/bge/bge_main2.c b/usr/src/uts/common/io/bge/bge_main2.c
index ab511c068d..81b6528c7c 100644
--- a/usr/src/uts/common/io/bge/bge_main2.c
+++ b/usr/src/uts/common/io/bge/bge_main2.c
@@ -1437,8 +1437,49 @@ bge_unicst_find(bge_t *bgep, const uint8_t *mac_addr)
}
/*
- * Programs the classifier to start steering packets matching 'mac_addr' to the
- * specified ring 'arg'.
+ * The job of bge_addmac() is to set up everything in hardware for the mac
+ * address indicated to map to the specified group.
+ *
+ * For this to make sense, we need to first understand how most of the bge chips
+ * work. A given packet reaches a ring in two distinct logical steps:
+ *
+ * 1) The device must accept the packet.
+ * 2) The device must steer an accepted packet to a specific ring.
+ *
+ * For step 1, the device has four global MAC address filtering registers. We
+ * must either add the address here or put the device in promiscuous mode.
+ * Because there are only four of these and up to four groups, each group is
+ * only allowed to program a single entry. Note, this is not explicitly done in
+ * the driver. Rather, it is implicitly done by how we implement step 2. These
+ * registers start at 0x410 and are referred to as the 'EMAC MAC Addresses' in
+ * the manuals.
+ *
+ * For step 2, the device has eight sets of rule registers that are used to
+ * control how a packet in step 1 is mapped to a specific ring. Each set is
+ * comprised of a control register and a mask register. These start at 0x480 and
+ * are referred to as the 'Receive Rules Control Registers' and 'Receive Rules
+ * Value/Mask Registers'. These can be used to check for a 16-bit or 32-bit
+ * value at an offset in the packet. In addition, two sets can be combined to
+ * create a single conditional rule.
+ *
+ * For our purposes, we need to use this mechanism to steer a mac address to a
+ * specific ring. This requires that we use two of the sets of registers per MAC
+ * address that comes in here. The data about this is stored in 'mac_addr_rule'
+ * member of the 'recv_ring_t'.
+ *
+ * A reasonable question to ask is why are we storing this on the ring, when it
+ * relates to the group. The answer is that the current implementation of the
+ * driver assumes that each group is comprised of a single ring. While some
+ * parts may support additional rings, the driver doesn't take advantage of
+ * that.
+ *
+ * A result of all this is that the driver will support up to 4 groups today.
+ * Each group has a single ring. We want to make sure that each group can have a
+ * single MAC address programmed into it. This results in the check for a rule
+ * being assigned in the 'mac_addr_rule' member of the recv_ring_t below. If a
+ * future part were to support more global MAC address filters in part 1 and
+ * more rule registers needed for part 2, then we could relax this constraint
+ * and allow a group to have more than one MAC address assigned to it.
*/
static int
bge_addmac(void *arg, const uint8_t * mac_addr)
@@ -1461,7 +1502,10 @@ bge_addmac(void *arg, const uint8_t * mac_addr)
}
/*
- * First add the unicast address to a available slot.
+ * The driver only supports a MAC address being programmed to be
+ * received by one ring in step 2. We check the global table of MAC
+ * addresses to see if this address has already been claimed by another
+ * group as a way to determine that.
*/
slot = bge_unicst_find(bgep, mac_addr);
if (slot != -1) {
@@ -1469,6 +1513,17 @@ bge_addmac(void *arg, const uint8_t * mac_addr)
return (EEXIST);
}
+ /*
+ * Check to see if this group has already used its hardware resources
+ * for step 2. If so, we have to return ENOSPC to MAC to indicate that
+ * this group cannot handle an additional MAC address and that MAC will
+ * need to use software classification on the default group.
+ */
+ if (rrp->mac_addr_rule != NULL) {
+ mutex_exit(bgep->genlock);
+ return (ENOSPC);
+ }
+
for (slot = 0; slot < bgep->unicst_addr_total; slot++) {
if (!bgep->curr_addr[slot].set) {
bgep->curr_addr[slot].set = B_TRUE;
@@ -1483,12 +1538,6 @@ bge_addmac(void *arg, const uint8_t * mac_addr)
if ((err = bge_unicst_set(bgep, mac_addr, slot)) != 0)
goto fail;
- /* A rule is already here. Deny this. */
- if (rrp->mac_addr_rule != NULL) {
- err = ether_cmp(mac_addr, rrp->mac_addr_val) ? EEXIST : EBUSY;
- goto fail;
- }
-
/*
* Allocate a bge_rule_info_t to keep track of which rule slots
* are being used.
diff --git a/usr/src/uts/common/io/bnx/bnx.h b/usr/src/uts/common/io/bnx/bnx.h
index e1d53fa9d7..9ef282678e 100644
--- a/usr/src/uts/common/io/bnx/bnx.h
+++ b/usr/src/uts/common/io/bnx/bnx.h
@@ -55,12 +55,6 @@ extern "C" {
-/*
- */
-#pragma weak hcksum_retrieve
-#pragma weak hcksum_assoc
-
-
#include "listq.h"
#include "lm5706.h"
#include "54xx_reg.h"
diff --git a/usr/src/uts/common/io/bnx/bnxsnd.c b/usr/src/uts/common/io/bnx/bnxsnd.c
index 16f1b03c10..f6e154c056 100644
--- a/usr/src/uts/common/io/bnx/bnxsnd.c
+++ b/usr/src/uts/common/io/bnx/bnxsnd.c
@@ -611,7 +611,7 @@ bnx_xmit_ring_xmit_mblk(um_device_t * const umdevice,
umpacket->frag_list.cnt = 0;
umpacket->mp = mp;
- hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags);
+ mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags);
bnx_xmit_pkt_cpy(umdevice, umpacket);
diff --git a/usr/src/uts/common/io/chxge/pe.c b/usr/src/uts/common/io/chxge/pe.c
index 652edba984..48a796470a 100644
--- a/usr/src/uts/common/io/chxge/pe.c
+++ b/usr/src/uts/common/io/chxge/pe.c
@@ -414,12 +414,12 @@ pe_start(ch_t *sa, mblk_t *mp, uint32_t flg)
lseg = ch_bind_dvma_handle(sa, len,
(void *)mp->b_rptr,
&hmp[nseg], mseg - nseg);
- if (lseg == NULL) {
+ if (lseg == 0) {
sa->sge->intr_cnt.tx_no_dvma1++;
if ((lseg = ch_bind_dma_handle(sa, len,
(void *)mp->b_rptr,
&hmp[nseg],
- mseg - nseg)) == NULL) {
+ mseg - nseg)) == 0) {
sa->sge->intr_cnt.tx_no_dma1++;
/*
@@ -444,7 +444,7 @@ pe_start(ch_t *sa, mblk_t *mp, uint32_t flg)
lseg = ch_bind_dma_handle(sa, len,
(void *)mp->b_rptr, &hmp[nseg],
mseg - nseg);
- if (lseg == NULL) {
+ if (lseg == 0) {
sa->sge->intr_cnt.tx_no_dma1++;
/*
@@ -512,12 +512,12 @@ pe_start(ch_t *sa, mblk_t *mp, uint32_t flg)
nseg = ch_bind_dvma_handle(sa, len,
(void *)mp->b_rptr,
&hmp[0], 16);
- if (nseg == NULL) {
+ if (nseg == 0) {
sa->sge->intr_cnt.tx_no_dvma2++;
nseg = ch_bind_dma_handle(sa, len,
(void *)mp->b_rptr,
&hmp[0], 16);
- if (nseg == NULL) {
+ if (nseg == 0) {
sa->sge->intr_cnt.tx_no_dma2++;
/*
@@ -530,7 +530,7 @@ pe_start(ch_t *sa, mblk_t *mp, uint32_t flg)
} else {
nseg = ch_bind_dma_handle(sa, len,
(void *)mp->b_rptr, &hmp[0], 16);
- if (nseg == NULL) {
+ if (nseg == 0) {
sa->sge->intr_cnt.tx_no_dma2++;
/*
diff --git a/usr/src/uts/common/io/cmlb.c b/usr/src/uts/common/io/cmlb.c
index 6275948465..f4ae9f3ed5 100644
--- a/usr/src/uts/common/io/cmlb.c
+++ b/usr/src/uts/common/io/cmlb.c
@@ -1514,7 +1514,7 @@ cmlb_create_minor_nodes(struct cmlb_lun *cl)
if (cl->cl_alter_behavior & CMLB_CREATE_P0_MINOR_NODE) {
if (cmlb_create_minor(CMLB_DEVINFO(cl), "q", S_IFBLK,
(instance << CMLBUNIT_FORCE_P0_SHIFT) | P0_RAW_DISK,
- cl->cl_node_type, NULL, internal) == DDI_FAILURE) {
+ cl->cl_node_type, 0, internal) == DDI_FAILURE) {
ddi_remove_minor_node(CMLB_DEVINFO(cl), NULL);
return (ENXIO);
}
@@ -1522,7 +1522,7 @@ cmlb_create_minor_nodes(struct cmlb_lun *cl)
if (cmlb_create_minor(CMLB_DEVINFO(cl), "q,raw",
S_IFCHR,
(instance << CMLBUNIT_FORCE_P0_SHIFT) | P0_RAW_DISK,
- cl->cl_node_type, NULL, internal) == DDI_FAILURE) {
+ cl->cl_node_type, 0, internal) == DDI_FAILURE) {
ddi_remove_minor_node(CMLB_DEVINFO(cl), NULL);
return (ENXIO);
}
diff --git a/usr/src/uts/common/io/cxgbe/common/common.h b/usr/src/uts/common/io/cxgbe/common/common.h
index c7de2c4ebf..b8d77ebda3 100644
--- a/usr/src/uts/common/io/cxgbe/common/common.h
+++ b/usr/src/uts/common/io/cxgbe/common/common.h
@@ -20,6 +20,10 @@
* release for licensing terms and conditions.
*/
+/*
+ * Copyright 2020 RackTop Systems, Inc.
+ */
+
#ifndef __CHELSIO_COMMON_H
#define __CHELSIO_COMMON_H
@@ -103,9 +107,16 @@ enum {
typedef unsigned char cc_pause_t;
enum {
- FEC_AUTO = 1 << 0, /* IEEE 802.3 "automatic" */
- FEC_RS = 1 << 1, /* Reed-Solomon */
- FEC_BASER_RS = 1 << 2, /* BaseR/Reed-Solomon */
+ FEC_RS = 1 << 0, /* Reed-Solomon */
+ FEC_BASER_RS = 1 << 1, /* Base-R, aka Firecode */
+ FEC_NONE = 1 << 2, /* no FEC */
+
+ /*
+ * Pseudo FECs that translate to real FECs. The firmware knows nothing
+ * about these and they start at M_FW_PORT_CAP32_FEC + 1. AUTO should
+ * be set all by itself.
+ */
+ FEC_AUTO = 1 << 5,
};
typedef unsigned char cc_fec_t;
diff --git a/usr/src/uts/common/io/cxgbe/common/t4_hw.c b/usr/src/uts/common/io/cxgbe/common/t4_hw.c
index ae88f36f15..4bb48f1b3a 100644
--- a/usr/src/uts/common/io/cxgbe/common/t4_hw.c
+++ b/usr/src/uts/common/io/cxgbe/common/t4_hw.c
@@ -20,6 +20,10 @@
* release for licensing terms and conditions.
*/
+/*
+ * Copyright 2020 RackTop Systems, Inc.
+ */
+
#include "common.h"
#include "t4_regs.h"
#include "t4_regs_values.h"
@@ -4645,20 +4649,57 @@ static inline cc_fec_t fwcap_to_cc_fec(fw_port_cap32_t fw_fec)
if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS)
cc_fec |= FEC_BASER_RS;
- return cc_fec;
+ if (cc_fec == 0)
+ cc_fec = FEC_NONE;
+
+ return (cc_fec);
}
/* Translate Common Code Forward Error Correction specification to Firmware */
-static inline fw_port_cap32_t cc_to_fwcap_fec(cc_fec_t cc_fec)
+static inline boolean_t
+cc_to_fwcap_fec(fw_port_cap32_t *fw_fecp, cc_fec_t cc_fec,
+ struct link_config *lc)
{
fw_port_cap32_t fw_fec = 0;
- if (cc_fec & FEC_RS)
+ if ((cc_fec & FEC_AUTO) != 0) {
+ if ((lc->pcaps & FW_PORT_CAP32_SPEED_100G) == 0)
+ fw_fec |= FW_PORT_CAP32_FEC_BASER_RS;
+
+ if ((lc->pcaps & FW_PORT_CAP32_FORCE_FEC) != 0)
+ fw_fec |= FW_PORT_CAP32_FEC_NO_FEC;
+
+ fw_fec |= FW_PORT_CAP32_FEC_RS;
+
+ *fw_fecp = fw_fec;
+ return (B_TRUE);
+ }
+
+ if ((cc_fec & FEC_RS) != 0)
fw_fec |= FW_PORT_CAP32_FEC_RS;
- if (cc_fec & FEC_BASER_RS)
+
+ if ((cc_fec & FEC_BASER_RS) != 0 &&
+ (lc->pcaps & FW_PORT_CAP32_SPEED_100G) == 0)
fw_fec |= FW_PORT_CAP32_FEC_BASER_RS;
- return fw_fec;
+ if ((cc_fec & FEC_NONE) != 0) {
+ if ((lc->pcaps & FW_PORT_CAP32_FORCE_FEC) != 0) {
+ fw_fec |= FW_PORT_CAP32_FORCE_FEC;
+ fw_fec |= FW_PORT_CAP32_FEC_NO_FEC;
+ }
+
+ *fw_fecp = fw_fec;
+ return (B_TRUE);
+ }
+
+ if (fw_fec == 0)
+ return (B_FALSE);
+
+ if ((lc->pcaps & FW_PORT_CAP32_FORCE_FEC) != 0)
+ fw_fec |= FW_PORT_CAP32_FORCE_FEC;
+
+ *fw_fecp = fw_fec;
+ return (B_TRUE);
}
/**
@@ -4692,11 +4733,18 @@ fw_port_cap32_t t4_link_acaps(struct adapter *adapter, unsigned int port,
* the Transceiver Module EPROM FEC parameters. Otherwise we
* use whatever is in the current Requested FEC settings.
*/
- if (lc->requested_fec & FEC_AUTO)
- cc_fec = fwcap_to_cc_fec(lc->def_acaps);
- else
- cc_fec = lc->requested_fec;
- fw_fec = cc_to_fwcap_fec(cc_fec);
+ if (fec_supported(lc->pcaps)) {
+ if (lc->requested_fec & FEC_AUTO)
+ cc_fec = fwcap_to_cc_fec(lc->def_acaps);
+ else
+ cc_fec = lc->requested_fec;
+
+ if (!cc_to_fwcap_fec(&fw_fec, cc_fec, lc))
+ return (0);
+ } else {
+ fw_fec = 0;
+ cc_fec = FEC_NONE;
+ }
/* Figure out what our Requested Port Capabilities are going to be.
* Note parallel structure in t4_handle_get_port_info() and
@@ -9641,12 +9689,17 @@ static void init_link_config(struct link_config *lc, fw_port_cap32_t pcaps,
lc->speed = 0;
lc->requested_fc = lc->fc = PAUSE_RX | PAUSE_TX;
- /*
- * For Forward Error Control, we default to whatever the Firmware
- * tells us the Link is currently advertising.
- */
- lc->requested_fec = FEC_AUTO;
- lc->fec = fwcap_to_cc_fec(lc->def_acaps);
+ if (fec_supported(pcaps)) {
+ /*
+ * For Forward Error Control, we default to whatever the Firmware
+ * tells us the Link is currently advertising.
+ */
+ lc->requested_fec = FEC_AUTO;
+ lc->fec = fwcap_to_cc_fec(lc->def_acaps);
+ } else {
+ lc->requested_fec = FEC_NONE;
+ lc->fec = FEC_NONE;
+ }
/* If the Port is capable of Auto-Negtotiation, initialize it as
* "enabled" and copy over all of the Physical Port Capabilities
diff --git a/usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h b/usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h
index d705c73891..b998e85bae 100644
--- a/usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h
+++ b/usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h
@@ -11,6 +11,10 @@
* release for licensing terms and conditions.
*/
+/*
+ * Copyright 2020 RackTop Systems, Inc.
+ */
+
#ifndef _T4FW_INTERFACE_H_
#define _T4FW_INTERFACE_H_
@@ -7204,11 +7208,12 @@ enum fw_port_mdi {
#define FW_PORT_CAP32_MDISTRAIGHT 0x00400000UL
#define FW_PORT_CAP32_FEC_RS 0x00800000UL
#define FW_PORT_CAP32_FEC_BASER_RS 0x01000000UL
-#define FW_PORT_CAP32_FEC_RESERVED1 0x02000000UL
+#define FW_PORT_CAP32_FEC_NO_FEC 0x02000000UL
#define FW_PORT_CAP32_FEC_RESERVED2 0x04000000UL
#define FW_PORT_CAP32_FEC_RESERVED3 0x08000000UL
#define FW_PORT_CAP32_FORCE_PAUSE 0x10000000UL
-#define FW_PORT_CAP32_RESERVED2 0xe0000000UL
+#define FW_PORT_CAP32_FORCE_FEC 0x20000000UL
+#define FW_PORT_CAP32_RESERVED2 0xc0000000UL
#define S_FW_PORT_CAP32_SPEED 0
#define M_FW_PORT_CAP32_SPEED 0xfff
@@ -7254,7 +7259,7 @@ enum fw_port_mdi32 {
(((x) >> S_FW_PORT_CAP32_MDI) & M_FW_PORT_CAP32_MDI)
#define S_FW_PORT_CAP32_FEC 23
-#define M_FW_PORT_CAP32_FEC 0x1f
+#define M_FW_PORT_CAP32_FEC 0x5f
#define V_FW_PORT_CAP32_FEC(x) ((x) << S_FW_PORT_CAP32_FEC)
#define G_FW_PORT_CAP32_FEC(x) \
(((x) >> S_FW_PORT_CAP32_FEC) & M_FW_PORT_CAP32_FEC)
@@ -7269,6 +7274,15 @@ enum fw_port_mdi32 {
#define CAP32_FC(__cap32) \
(V_FW_PORT_CAP32_FC(M_FW_PORT_CAP32_FC) & __cap32)
+#ifdef _KERNEL
+static inline boolean_t
+fec_supported(uint32_t caps)
+{
+ return ((caps & (FW_PORT_CAP32_SPEED_25G | FW_PORT_CAP32_SPEED_50G |
+ FW_PORT_CAP32_SPEED_100G)) != 0);
+}
+#endif
+
enum fw_port_action {
FW_PORT_ACTION_L1_CFG = 0x0001,
FW_PORT_ACTION_L2_CFG = 0x0002,
diff --git a/usr/src/uts/common/io/cxgbe/shared/shared.c b/usr/src/uts/common/io/cxgbe/shared/shared.c
index 07dd78f189..e86272134a 100644
--- a/usr/src/uts/common/io/cxgbe/shared/shared.c
+++ b/usr/src/uts/common/io/cxgbe/shared/shared.c
@@ -32,17 +32,19 @@
static int rxbuf_ctor(void *, void *, int);
static void rxbuf_dtor(void *, void *);
-void
+int
cxgb_printf(dev_info_t *dip, int level, char *f, ...)
{
va_list list;
char fmt[128];
+ int rv;
- (void) snprintf(fmt, sizeof (fmt), "%s%d: %s", ddi_driver_name(dip),
+ rv = snprintf(fmt, sizeof (fmt), "%s%d: %s", ddi_driver_name(dip),
ddi_get_instance(dip), f);
va_start(list, f);
vcmn_err(level, fmt, list);
va_end(list);
+ return (rv);
}
kmem_cache_t *
diff --git a/usr/src/uts/common/io/cxgbe/shared/shared.h b/usr/src/uts/common/io/cxgbe/shared/shared.h
index 5838416838..d3171c224b 100644
--- a/usr/src/uts/common/io/cxgbe/shared/shared.h
+++ b/usr/src/uts/common/io/cxgbe/shared/shared.h
@@ -66,7 +66,7 @@ struct rxbuf_cache_params {
size_t buf_size;
};
-void cxgb_printf(dev_info_t *dip, int level, char *f, ...);
+int cxgb_printf(dev_info_t *dip, int level, char *f, ...);
kmem_cache_t *rxbuf_cache_create(struct rxbuf_cache_params *p);
void rxbuf_cache_destroy(kmem_cache_t *cache);
struct rxbuf *rxbuf_alloc(kmem_cache_t *cache, int kmflags, uint_t ref_cnt);
diff --git a/usr/src/uts/common/io/cxgbe/t4nex/cudbg.h b/usr/src/uts/common/io/cxgbe/t4nex/cudbg.h
index cb21451e5c..e86de21085 100644
--- a/usr/src/uts/common/io/cxgbe/t4nex/cudbg.h
+++ b/usr/src/uts/common/io/cxgbe/t4nex/cudbg.h
@@ -318,7 +318,7 @@ static struct el ATTRIBUTE_UNUSED entity_list[] = {
};
#ifdef _KERNEL
-typedef int (*cudbg_print_cb) (dev_info_t *dip, ...);
+typedef int (*cudbg_print_cb) (dev_info_t *dip, int, char *, ...);
#else
typedef int (*cudbg_print_cb) (char *, ...);
#endif
diff --git a/usr/src/uts/common/io/cxgbe/t4nex/t4_ioctl.c b/usr/src/uts/common/io/cxgbe/t4nex/t4_ioctl.c
index ee28c8a2ba..85d79e6201 100644
--- a/usr/src/uts/common/io/cxgbe/t4nex/t4_ioctl.c
+++ b/usr/src/uts/common/io/cxgbe/t4nex/t4_ioctl.c
@@ -1706,7 +1706,7 @@ get_cudbg(struct adapter *sc, void *data, int flags)
cudbg = cudbg_get_init(handle);
cudbg->adap = sc;
- cudbg->print = (cudbg_print_cb)(uintptr_t)cxgb_printf;
+ cudbg->print = cxgb_printf;
memcpy(cudbg->dbg_bitmap, dump.bitmap, sizeof(cudbg->dbg_bitmap));
diff --git a/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c b/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c
index 59c0ddde8d..9b4ffd8325 100644
--- a/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c
+++ b/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c
@@ -20,6 +20,10 @@
* release for licensing terms and conditions.
*/
+/*
+ * Copyright 2020 RackTop Systems, Inc.
+ */
+
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/dlpi.h>
@@ -930,6 +934,62 @@ t4_mc_getcapab(void *arg, mac_capab_t cap, void *data)
return (status);
}
+static link_fec_t
+fec_to_link_fec(cc_fec_t cc_fec)
+{
+ link_fec_t link_fec = 0;
+
+ if ((cc_fec & (FEC_RS | FEC_BASER_RS)) == (FEC_RS | FEC_BASER_RS))
+ return (LINK_FEC_AUTO);
+
+ if ((cc_fec & FEC_NONE) != 0)
+ link_fec |= LINK_FEC_NONE;
+
+ if ((cc_fec & FEC_AUTO) != 0)
+ link_fec |= LINK_FEC_AUTO;
+
+ if ((cc_fec & FEC_RS) != 0)
+ link_fec |= LINK_FEC_RS;
+
+ if ((cc_fec & FEC_BASER_RS) != 0)
+ link_fec |= LINK_FEC_BASE_R;
+
+ return (link_fec);
+}
+
+static int
+link_fec_to_fec(int v)
+{
+ int fec = 0;
+
+ if ((v & LINK_FEC_AUTO) != 0) {
+ fec = FEC_AUTO;
+ v &= ~LINK_FEC_AUTO;
+ } else {
+ if ((v & LINK_FEC_NONE) != 0) {
+ fec = FEC_NONE;
+ v &= ~LINK_FEC_NONE;
+ }
+
+ if ((v & LINK_FEC_RS) != 0) {
+ fec |= FEC_RS;
+ v &= ~LINK_FEC_RS;
+ }
+
+ if ((v & LINK_FEC_BASE_R) != 0) {
+ fec |= FEC_BASER_RS;
+ v &= ~LINK_FEC_BASE_R;
+ }
+ }
+
+ if (v != 0)
+ return (-1);
+
+ ASSERT3S(fec, !=, 0);
+
+ return (fec);
+}
+
/* ARGSUSED */
static int
t4_mc_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
@@ -941,7 +1001,9 @@ t4_mc_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
uint8_t v8 = *(uint8_t *)val;
uint32_t v32 = *(uint32_t *)val;
int old, new = 0, relink = 0, rx_mode = 0, rc = 0;
+ boolean_t down_link = B_TRUE;
link_flowctrl_t fc;
+ link_fec_t fec;
/*
* Save a copy of link_config. This can be used to restore link_config
@@ -1009,6 +1071,30 @@ t4_mc_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
}
break;
+ case MAC_PROP_EN_FEC_CAP:
+ if (!fec_supported(lc->pcaps)) {
+ rc = ENOTSUP;
+ break;
+ }
+
+ fec = *(link_fec_t *)val;
+ new = link_fec_to_fec(fec);
+ if (new < 0) {
+ rc = EINVAL;
+ } else if (new != lc->requested_fec) {
+ lc->requested_fec = new;
+ relink = 1;
+ /*
+ * For fec, do not preemptively force the link
+ * down. If changing fec causes the link state
+ * to transition, then appropriate asynchronous
+ * events are generated which correctly reflect
+ * the link state.
+ */
+ down_link = B_FALSE;
+ }
+ break;
+
case MAC_PROP_EN_10GFDX_CAP:
if (lc->pcaps & FW_PORT_CAP32_ANEG && is_10G_port(pi)) {
old = lc->acaps & FW_PORT_CAP32_SPEED_10G;
@@ -1062,7 +1148,8 @@ t4_mc_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
if (isset(&sc->open_device_map, pi->port_id) != 0) {
if (relink != 0) {
- t4_os_link_changed(pi->adapter, pi->port_id, 0);
+ if (down_link)
+ t4_os_link_changed(pi->adapter, pi->port_id, 0);
rc = begin_synchronized_op(pi, 1, 1);
if (rc != 0)
return (rc);
@@ -1143,6 +1230,20 @@ t4_mc_getprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
*(link_flowctrl_t *)val = LINK_FLOWCTRL_NONE;
break;
+ case MAC_PROP_ADV_FEC_CAP:
+ if (!fec_supported(lc->pcaps))
+ return (ENOTSUP);
+
+ *(link_fec_t *)val = fec_to_link_fec(lc->fec);
+ break;
+
+ case MAC_PROP_EN_FEC_CAP:
+ if (!fec_supported(lc->pcaps))
+ return (ENOTSUP);
+
+ *(link_fec_t *)val = fec_to_link_fec(lc->requested_fec);
+ break;
+
case MAC_PROP_ADV_100GFDX_CAP:
case MAC_PROP_EN_100GFDX_CAP:
*u = !!(lc->acaps & FW_PORT_CAP32_SPEED_100G);
@@ -1212,6 +1313,15 @@ t4_mc_propinfo(void *arg, const char *name, mac_prop_id_t id,
mac_prop_info_set_default_link_flowctrl(ph, LINK_FLOWCTRL_BI);
break;
+ case MAC_PROP_EN_FEC_CAP:
+ mac_prop_info_set_default_fec(ph, LINK_FEC_AUTO);
+ break;
+
+ case MAC_PROP_ADV_FEC_CAP:
+ mac_prop_info_set_perm(ph, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_fec(ph, LINK_FEC_AUTO);
+ break;
+
case MAC_PROP_EN_10GFDX_CAP:
if (lc->pcaps & FW_PORT_CAP32_ANEG &&
lc->pcaps & FW_PORT_CAP32_SPEED_10G)
diff --git a/usr/src/uts/common/io/e1000g/e1000g_alloc.c b/usr/src/uts/common/io/e1000g/e1000g_alloc.c
index c7496cd164..8a460fd45a 100644
--- a/usr/src/uts/common/io/e1000g/e1000g_alloc.c
+++ b/usr/src/uts/common/io/e1000g/e1000g_alloc.c
@@ -830,7 +830,7 @@ e1000g_free_dvma_buffer(dma_buffer_t *buf)
return;
}
- buf->dma_address = NULL;
+ buf->dma_address = 0;
if (buf->address != NULL) {
kmem_free(buf->address, buf->size);
diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c
index d698862d81..4ce359f87b 100644
--- a/usr/src/uts/common/io/mac/mac.c
+++ b/usr/src/uts/common/io/mac/mac.c
@@ -23,6 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2020 Joyent, Inc.
* Copyright 2015 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2020 RackTop Systems, Inc.
*/
/*
@@ -3341,6 +3342,10 @@ mac_prop_check_size(mac_prop_id_t id, uint_t valsize, boolean_t is_range)
case MAC_PROP_FLOWCTRL:
minsize = sizeof (link_flowctrl_t);
break;
+ case MAC_PROP_ADV_FEC_CAP:
+ case MAC_PROP_EN_FEC_CAP:
+ minsize = sizeof (link_fec_t);
+ break;
case MAC_PROP_ADV_5000FDX_CAP:
case MAC_PROP_EN_5000FDX_CAP:
case MAC_PROP_ADV_2500FDX_CAP:
@@ -3529,6 +3534,28 @@ mac_set_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val,
break;
}
+ case MAC_PROP_ADV_FEC_CAP:
+ case MAC_PROP_EN_FEC_CAP: {
+ link_fec_t fec;
+
+ ASSERT(valsize >= sizeof (link_fec_t));
+
+ /*
+ * fec cannot be zero, and auto must be set exclusively.
+ */
+ bcopy(val, &fec, sizeof (link_fec_t));
+ if (fec == 0)
+ return (EINVAL);
+ if ((fec & LINK_FEC_AUTO) != 0 && (fec & ~LINK_FEC_AUTO) != 0)
+ return (EINVAL);
+
+ if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) {
+ err = mip->mi_callbacks->mc_setprop(mip->mi_driver,
+ name, id, valsize, val);
+ }
+ break;
+ }
+
default:
/* For other driver properties, call driver's callback */
if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) {
@@ -4741,7 +4768,7 @@ mac_bridge_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp)
* The bridge may place this mblk on a provider's Tx
* path, a mac's Rx path, or both. Since we don't have
* enough information at this point, we can't be sure
- * that the desination(s) are capable of handling the
+ * that the destination(s) are capable of handling the
* hardware offloads requested by the mblk. We emulate
* them here as it is the safest choice. In the
* future, if bridge performance becomes a priority,
diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c
index dcfb4803d6..b166e7987a 100644
--- a/usr/src/uts/common/io/mac/mac_client.c
+++ b/usr/src/uts/common/io/mac/mac_client.c
@@ -4243,7 +4243,7 @@ mac_promisc_dispatch(mac_impl_t *mip, mblk_t *mp_chain,
mpip->mpi_type == MAC_CLIENT_PROMISC_ALL ||
is_mcast) {
mac_promisc_dispatch_one(mpip, mp, is_sender,
- local);
+ local);
}
}
}
@@ -4274,7 +4274,7 @@ mac_promisc_client_dispatch(mac_client_impl_t *mcip, mblk_t *mp_chain)
if (mpip->mpi_type == MAC_CLIENT_PROMISC_FILTERED &&
!is_mcast) {
mac_promisc_dispatch_one(mpip, mp, B_FALSE,
- B_FALSE);
+ B_FALSE);
}
}
}
@@ -4352,12 +4352,27 @@ i_mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
{
mac_impl_t *mip = (mac_impl_t *)mh;
- if (mip->mi_bridge_link != NULL && cap == MAC_CAPAB_NO_ZCOPY)
+ if (mip->mi_bridge_link != NULL && cap == MAC_CAPAB_NO_ZCOPY) {
return (B_TRUE);
- else if (mip->mi_callbacks->mc_callbacks & MC_GETCAPAB)
- return (mip->mi_getcapab(mip->mi_driver, cap, cap_data));
- else
+ } else if (mip->mi_callbacks->mc_callbacks & MC_GETCAPAB) {
+ boolean_t res;
+
+ res = mip->mi_getcapab(mip->mi_driver, cap, cap_data);
+ /*
+ * Until we have suppport for TSOv6 emulation in the MAC
+ * loopback path, do not allow the TSOv6 capability to be
+ * advertised to consumers.
+ */
+ if (res && cap == MAC_CAPAB_LSO) {
+ mac_capab_lso_t *cap_lso = cap_data;
+
+ cap_lso->lso_flags &= ~LSO_TX_BASIC_TCP_IPV6;
+ cap_lso->lso_basic_tcp_ipv6.lso_max = 0;
+ }
+ return (res);
+ } else {
return (B_FALSE);
+ }
}
/*
diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c
index 7f193f68eb..bcca602589 100644
--- a/usr/src/uts/common/io/mac/mac_provider.c
+++ b/usr/src/uts/common/io/mac/mac_provider.c
@@ -23,6 +23,7 @@
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2020 RackTop Systems, Inc.
*/
#include <sys/types.h>
@@ -1530,6 +1531,22 @@ mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph,
}
void
+mac_prop_info_set_default_fec(mac_prop_info_handle_t ph, link_fec_t val)
+{
+ mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
+
+ /* nothing to do if the caller doesn't want the default value */
+ if (pr->pr_default == NULL)
+ return;
+
+ ASSERT(pr->pr_default_size >= sizeof (link_fec_t));
+
+ bcopy(&val, pr->pr_default, sizeof (val));
+
+ pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
+}
+
+void
mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min,
uint32_t max)
{
diff --git a/usr/src/uts/common/io/mac/mac_sched.c b/usr/src/uts/common/io/mac/mac_sched.c
index 94ec8add16..8f983e50e4 100644
--- a/usr/src/uts/common/io/mac/mac_sched.c
+++ b/usr/src/uts/common/io/mac/mac_sched.c
@@ -4443,9 +4443,9 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain,
mac_hw_emul(&mp, NULL, NULL, MAC_ALL_EMULS);
if (mp != NULL) {
(dst_flow_ent->fe_cb_fn)(
- dst_flow_ent->fe_cb_arg1,
- dst_flow_ent->fe_cb_arg2,
- mp, do_switch);
+ dst_flow_ent->fe_cb_arg1,
+ dst_flow_ent->fe_cb_arg2,
+ mp, do_switch);
}
}
diff --git a/usr/src/uts/common/io/mac/mac_util.c b/usr/src/uts/common/io/mac/mac_util.c
index 6e33fb7f56..03da3a3504 100644
--- a/usr/src/uts/common/io/mac/mac_util.c
+++ b/usr/src/uts/common/io/mac/mac_util.c
@@ -258,7 +258,7 @@ bail:
static boolean_t
mac_sw_cksum_ipv6(mblk_t *mp, uint32_t ip_hdr_offset, const char **err)
{
- ip6_t* ip6h = (ip6_t *)(mp->b_rptr + ip_hdr_offset);
+ ip6_t *ip6h = (ip6_t *)(mp->b_rptr + ip_hdr_offset);
const uint8_t proto = ip6h->ip6_nxt;
const uint16_t *iphs = (uint16_t *)ip6h;
/* ULP offset from start of L2. */
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.c b/usr/src/uts/common/io/mlxcx/mlxcx.c
index c90fa0969b..2aefac33db 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.c
@@ -273,11 +273,16 @@
* before making a WQE for it.
*
* After a completion event occurs, the packet is either discarded (and the
- * buffer_t returned to the free list), or it is readied for loaning to MAC.
+ * buffer_t returned to the free list), or it is readied for loaning to MAC
+ * and placed on the "loaned" list in the mlxcx_buffer_shard_t.
*
* Once MAC and the rest of the system have finished with the packet, they call
- * freemsg() on its mblk, which will call mlxcx_buf_mp_return and return the
- * buffer_t to the free list.
+ * freemsg() on its mblk, which will call mlxcx_buf_mp_return. At this point
+ * the fate of the buffer_t is determined by the state of the
+ * mlxcx_buffer_shard_t. When the shard is in its normal state the buffer_t
+ * will be returned to the free list, potentially to be recycled and used
+ * again. But if the shard is draining (E.g. after a ring stop) there will be
+ * no recycling and the buffer_t is immediately destroyed.
*
* At detach/teardown time, buffers are only every destroyed from the free list.
*
@@ -289,18 +294,18 @@
* v
* +----+----+
* | created |
- * +----+----+
- * |
- * |
- * | mlxcx_buf_return
- * |
- * v
- * mlxcx_buf_destroy +----+----+
- * +---------| free |<---------------+
- * | +----+----+ |
+ * +----+----+ +------+
+ * | | dead |
+ * | +------+
+ * | mlxcx_buf_return ^
+ * | |
+ * v | mlxcx_buf_destroy
+ * mlxcx_buf_destroy +----+----+ +-----------+ |
+ * +---------| free |<------no-| draining? |-yes-+
+ * | +----+----+ +-----------+
+ * | | ^
* | | |
- * | | | mlxcx_buf_return
- * v | mlxcx_buf_take |
+ * v | mlxcx_buf_take | mlxcx_buf_return
* +---+--+ v |
* | dead | +---+---+ |
* +------+ | on WQ |- - - - - - - - >O
@@ -759,13 +764,19 @@ mlxcx_mlbs_teardown(mlxcx_t *mlxp, mlxcx_buf_shard_t *s)
mlxcx_buffer_t *buf;
mutex_enter(&s->mlbs_mtx);
+
while (!list_is_empty(&s->mlbs_busy))
cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
- while ((buf = list_head(&s->mlbs_free)) != NULL) {
+
+ while (!list_is_empty(&s->mlbs_loaned))
+ cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+
+ while ((buf = list_head(&s->mlbs_free)) != NULL)
mlxcx_buf_destroy(mlxp, buf);
- }
+
list_destroy(&s->mlbs_free);
list_destroy(&s->mlbs_busy);
+ list_destroy(&s->mlbs_loaned);
mutex_exit(&s->mlbs_mtx);
cv_destroy(&s->mlbs_free_nonempty);
@@ -1336,6 +1347,8 @@ mlxcx_mlbs_create(mlxcx_t *mlxp)
offsetof(mlxcx_buffer_t, mlb_entry));
list_create(&s->mlbs_free, sizeof (mlxcx_buffer_t),
offsetof(mlxcx_buffer_t, mlb_entry));
+ list_create(&s->mlbs_loaned, sizeof (mlxcx_buffer_t),
+ offsetof(mlxcx_buffer_t, mlb_entry));
cv_init(&s->mlbs_free_nonempty, NULL, CV_DRIVER, NULL);
list_insert_tail(&mlxp->mlx_buf_shards, s);
@@ -1743,6 +1756,11 @@ mlxcx_setup_ports(mlxcx_t *mlxp)
mutex_exit(&p->mlp_mtx);
goto err;
}
+ if (!mlxcx_cmd_query_port_fec(mlxp, p)) {
+ mutex_exit(&p->mlp_mtx);
+ goto err;
+ }
+ p->mlp_fec_requested = LINK_FEC_AUTO;
mutex_exit(&p->mlp_mtx);
}
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.h b/usr/src/uts/common/io/mlxcx/mlxcx.h
index da048b4ac3..06277d033c 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.h
@@ -346,6 +346,8 @@ typedef struct mlxcx_port {
mlxcx_eth_proto_t mlp_max_proto;
mlxcx_eth_proto_t mlp_admin_proto;
mlxcx_eth_proto_t mlp_oper_proto;
+ mlxcx_pplm_fec_active_t mlp_fec_active;
+ link_fec_t mlp_fec_requested;
mlxcx_eth_inline_mode_t mlp_wqe_min_inline;
@@ -424,11 +426,18 @@ typedef enum {
MLXCX_BUFFER_ON_CHAIN,
} mlxcx_buffer_state_t;
+typedef enum {
+ MLXCX_SHARD_READY,
+ MLXCX_SHARD_DRAINING,
+} mlxcx_shard_state_t;
+
typedef struct mlxcx_buf_shard {
+ mlxcx_shard_state_t mlbs_state;
list_node_t mlbs_entry;
kmutex_t mlbs_mtx;
list_t mlbs_busy;
list_t mlbs_free;
+ list_t mlbs_loaned;
kcondvar_t mlbs_free_nonempty;
} mlxcx_buf_shard_t;
@@ -1171,6 +1180,8 @@ extern boolean_t mlxcx_buf_loan(mlxcx_t *, mlxcx_buffer_t *);
extern void mlxcx_buf_return(mlxcx_t *, mlxcx_buffer_t *);
extern void mlxcx_buf_return_chain(mlxcx_t *, mlxcx_buffer_t *, boolean_t);
extern void mlxcx_buf_destroy(mlxcx_t *, mlxcx_buffer_t *);
+extern void mlxcx_shard_ready(mlxcx_buf_shard_t *);
+extern void mlxcx_shard_draining(mlxcx_buf_shard_t *);
extern uint_t mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *,
mblk_t *, size_t, mlxcx_buffer_t **);
@@ -1311,7 +1322,12 @@ extern boolean_t mlxcx_cmd_access_register(mlxcx_t *, mlxcx_cmd_reg_opmod_t,
mlxcx_register_id_t, mlxcx_register_data_t *);
extern boolean_t mlxcx_cmd_query_port_mtu(mlxcx_t *, mlxcx_port_t *);
extern boolean_t mlxcx_cmd_query_port_status(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_modify_port_status(mlxcx_t *, mlxcx_port_t *,
+ mlxcx_port_status_t);
extern boolean_t mlxcx_cmd_query_port_speed(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_query_port_fec(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_modify_port_fec(mlxcx_t *, mlxcx_port_t *,
+ mlxcx_pplm_fec_caps_t);
extern boolean_t mlxcx_cmd_set_port_mtu(mlxcx_t *, mlxcx_port_t *);
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
index 30fb7ca8ef..f059b856a6 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
@@ -12,6 +12,7 @@
/*
* Copyright 2020, The University of Queensland
* Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 RackTop Systems, Inc.
*/
/*
@@ -1594,6 +1595,8 @@ mlxcx_reg_name(mlxcx_register_id_t rid)
return ("MCIA");
case MLXCX_REG_PPCNT:
return ("PPCNT");
+ case MLXCX_REG_PPLM:
+ return ("PPLM");
default:
return ("???");
}
@@ -1640,6 +1643,9 @@ mlxcx_cmd_access_register(mlxcx_t *mlxp, mlxcx_cmd_reg_opmod_t opmod,
case MLXCX_REG_PPCNT:
dsize = sizeof (mlxcx_reg_ppcnt_t);
break;
+ case MLXCX_REG_PPLM:
+ dsize = sizeof (mlxcx_reg_pplm_t);
+ break;
default:
dsize = 0;
VERIFY(0);
@@ -1776,6 +1782,25 @@ mlxcx_cmd_query_port_status(mlxcx_t *mlxp, mlxcx_port_t *mlp)
}
boolean_t
+mlxcx_cmd_modify_port_status(mlxcx_t *mlxp, mlxcx_port_t *mlp,
+ mlxcx_port_status_t status)
+{
+ mlxcx_register_data_t data;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data, sizeof (data));
+ data.mlrd_paos.mlrd_paos_local_port = mlp->mlp_num + 1;
+ data.mlrd_paos.mlrd_paos_admin_status = status;
+ set_bit32(&data.mlrd_paos.mlrd_paos_flags, MLXCX_PAOS_ADMIN_ST_EN);
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_WRITE,
+ MLXCX_REG_PAOS, &data);
+
+ return (ret);
+}
+
+boolean_t
mlxcx_cmd_query_port_speed(mlxcx_t *mlxp, mlxcx_port_t *mlp)
{
mlxcx_register_data_t data;
@@ -1809,6 +1834,82 @@ mlxcx_cmd_query_port_speed(mlxcx_t *mlxp, mlxcx_port_t *mlp)
}
boolean_t
+mlxcx_cmd_query_port_fec(mlxcx_t *mlxp, mlxcx_port_t *mlp)
+{
+ mlxcx_register_data_t data;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data, sizeof (data));
+ data.mlrd_pplm.mlrd_pplm_local_port = mlp->mlp_num + 1;
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_PPLM, &data);
+
+ if (ret) {
+ mlp->mlp_fec_active =
+ from_be24(data.mlrd_pplm.mlrd_pplm_fec_mode_active);
+ }
+
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_modify_port_fec(mlxcx_t *mlxp, mlxcx_port_t *mlp,
+ mlxcx_pplm_fec_caps_t fec)
+{
+ mlxcx_register_data_t data_in, data_out;
+ mlxcx_pplm_fec_caps_t caps;
+ mlxcx_reg_pplm_t *pplm_in, *pplm_out;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data_in, sizeof (data_in));
+ pplm_in = &data_in.mlrd_pplm;
+ pplm_in->mlrd_pplm_local_port = mlp->mlp_num + 1;
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_PPLM, &data_in);
+
+ if (!ret)
+ return (B_FALSE);
+
+ bzero(&data_out, sizeof (data_out));
+ pplm_out = &data_out.mlrd_pplm;
+ pplm_out->mlrd_pplm_local_port = mlp->mlp_num + 1;
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_56G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_56G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_100G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_100G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_50G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_50G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_25G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_25G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_10_40G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_10_40G, fec & caps);
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_WRITE,
+ MLXCX_REG_PPLM, &data_out);
+
+ return (ret);
+}
+
+boolean_t
mlxcx_cmd_modify_nic_vport_ctx(mlxcx_t *mlxp, mlxcx_port_t *mlp,
mlxcx_modify_nic_vport_ctx_fields_t fields)
{
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
index a08cec3980..2521641a00 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
@@ -80,6 +80,53 @@ mlxcx_speed_to_bits(mlxcx_eth_proto_t v)
}
}
+static link_fec_t
+mlxcx_fec_to_link_fec(mlxcx_pplm_fec_active_t mlxcx_fec)
+{
+ if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_NONE) != 0)
+ return (LINK_FEC_NONE);
+
+ if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_FIRECODE) != 0)
+ return (LINK_FEC_BASE_R);
+
+ if ((mlxcx_fec & (MLXCX_PPLM_FEC_ACTIVE_RS528 |
+ MLXCX_PPLM_FEC_ACTIVE_RS271 | MLXCX_PPLM_FEC_ACTIVE_RS544 |
+ MLXCX_PPLM_FEC_ACTIVE_RS272)) != 0)
+ return (LINK_FEC_RS);
+
+ return (LINK_FEC_NONE);
+}
+
+static boolean_t
+mlxcx_link_fec_cap(link_fec_t fec, mlxcx_pplm_fec_caps_t *pfecp)
+{
+ mlxcx_pplm_fec_caps_t pplm_fec = 0;
+
+ if ((fec & LINK_FEC_AUTO) != 0) {
+ pplm_fec = MLXCX_PPLM_FEC_CAP_AUTO;
+ fec &= ~LINK_FEC_AUTO;
+ } else if ((fec & LINK_FEC_NONE) != 0) {
+ pplm_fec = MLXCX_PPLM_FEC_CAP_NONE;
+ fec &= ~LINK_FEC_NONE;
+ } else if ((fec & LINK_FEC_RS) != 0) {
+ pplm_fec |= MLXCX_PPLM_FEC_CAP_RS;
+ fec &= ~LINK_FEC_RS;
+ } else if ((fec & LINK_FEC_BASE_R) != 0) {
+ pplm_fec |= MLXCX_PPLM_FEC_CAP_FIRECODE;
+ fec &= ~LINK_FEC_BASE_R;
+ }
+
+ /*
+ * Only one fec option is allowed.
+ */
+ if (fec != 0)
+ return (B_FALSE);
+
+ *pfecp = pplm_fec;
+
+ return (B_TRUE);
+}
+
static int
mlxcx_mac_stat_rfc_2863(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat,
uint64_t *val)
@@ -451,7 +498,8 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
return (NULL);
}
- if (sq->mlwq_state & MLXCX_WQ_TEARDOWN) {
+ if ((sq->mlwq_state & (MLXCX_WQ_TEARDOWN | MLXCX_WQ_STARTED)) !=
+ MLXCX_WQ_STARTED) {
mutex_exit(&sq->mlwq_mtx);
mlxcx_buf_return_chain(mlxp, b, B_FALSE);
return (NULL);
@@ -725,8 +773,28 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
mlxcx_buf_shard_t *s;
mlxcx_buffer_t *buf;
+ /*
+ * To prevent deadlocks and sleeping whilst holding either the
+ * CQ mutex or WQ mutex, we split the stop processing into two
+ * parts.
+ *
+ * With the CQ amd WQ mutexes held the appropriate WQ is stopped.
+ * The Q in the HCA is set to Reset state and flagged as no
+ * longer started. Atomic with changing this WQ state, the buffer
+ * shards are flagged as draining.
+ *
+ * Now, any requests for buffers and attempts to submit messages
+ * will fail and once we're in this state it is safe to relinquish
+ * the CQ and WQ mutexes. Allowing us to complete the ring stop
+ * by waiting for the buffer lists, with the exception of
+ * the loaned list, to drain. Buffers on the loaned list are
+ * not under our control, we will get them back when the mblk tied
+ * to the buffer is freed.
+ */
+
mutex_enter(&cq->mlcq_mtx);
mutex_enter(&wq->mlwq_mtx);
+
if (wq->mlwq_state & MLXCX_WQ_STARTED) {
if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ &&
!mlxcx_cmd_stop_rq(mlxp, wq)) {
@@ -743,7 +811,15 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
}
ASSERT0(wq->mlwq_state & MLXCX_WQ_STARTED);
+ mlxcx_shard_draining(wq->mlwq_bufs);
+ if (wq->mlwq_foreign_bufs != NULL)
+ mlxcx_shard_draining(wq->mlwq_foreign_bufs);
+
+
if (wq->mlwq_state & MLXCX_WQ_BUFFERS) {
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+
/* Return any outstanding buffers to the free pool. */
while ((buf = list_remove_head(&cq->mlcq_buffers)) != NULL) {
mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
@@ -775,12 +851,13 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
mutex_exit(&s->mlbs_mtx);
}
+ mutex_enter(&wq->mlwq_mtx);
wq->mlwq_state &= ~MLXCX_WQ_BUFFERS;
+ mutex_exit(&wq->mlwq_mtx);
+ } else {
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
}
- ASSERT0(wq->mlwq_state & MLXCX_WQ_BUFFERS);
-
- mutex_exit(&wq->mlwq_mtx);
- mutex_exit(&cq->mlcq_mtx);
}
static int
@@ -1061,6 +1138,14 @@ mlxcx_mac_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
mac_prop_info_set_default_uint8(prh, 1);
break;
+ case MAC_PROP_ADV_FEC_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO);
+ break;
+ case MAC_PROP_EN_FEC_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
+ mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO);
+ break;
case MAC_PROP_ADV_100GFDX_CAP:
case MAC_PROP_EN_100GFDX_CAP:
mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
@@ -1120,6 +1205,9 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
uint32_t new_mtu, new_hw_mtu, old_mtu;
mlxcx_buf_shard_t *sh;
boolean_t allocd = B_FALSE;
+ boolean_t relink = B_FALSE;
+ link_fec_t fec;
+ mlxcx_pplm_fec_caps_t cap_fec;
mutex_enter(&port->mlp_mtx);
@@ -1137,7 +1225,8 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
for (; sh != NULL; sh = list_next(&mlxp->mlx_buf_shards, sh)) {
mutex_enter(&sh->mlbs_mtx);
if (!list_is_empty(&sh->mlbs_free) ||
- !list_is_empty(&sh->mlbs_busy)) {
+ !list_is_empty(&sh->mlbs_busy) ||
+ !list_is_empty(&sh->mlbs_loaned)) {
allocd = B_TRUE;
mutex_exit(&sh->mlbs_mtx);
break;
@@ -1167,11 +1256,57 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
break;
}
break;
+
+ case MAC_PROP_EN_FEC_CAP:
+ bcopy(pr_val, &fec, sizeof (fec));
+ if (!mlxcx_link_fec_cap(fec, &cap_fec)) {
+ ret = EINVAL;
+ break;
+ }
+
+ /*
+ * Don't change the FEC if it is already at the requested
+ * setting AND the port is up.
+ * When the port is down, always set the FEC and attempt
+ * to retrain the link.
+ */
+ if (fec == port->mlp_fec_requested &&
+ fec == mlxcx_fec_to_link_fec(port->mlp_fec_active) &&
+ port->mlp_oper_status != MLXCX_PORT_STATUS_DOWN)
+ break;
+
+ /*
+ * The most like cause of this failing is an invalid
+ * or unsupported fec option.
+ */
+ if (!mlxcx_cmd_modify_port_fec(mlxp, port, cap_fec)) {
+ ret = EINVAL;
+ break;
+ }
+
+ port->mlp_fec_requested = fec;
+
+ /*
+ * For FEC to become effective, the link needs to go back
+ * to training and negotiation state. This happens when
+ * the link transitions from down to up, force a relink.
+ */
+ relink = B_TRUE;
+ break;
+
default:
ret = ENOTSUP;
break;
}
+ if (relink) {
+ if (!mlxcx_cmd_modify_port_status(mlxp, port,
+ MLXCX_PORT_STATUS_DOWN) ||
+ !mlxcx_cmd_modify_port_status(mlxp, port,
+ MLXCX_PORT_STATUS_UP)) {
+ ret = EIO;
+ }
+ }
mutex_exit(&port->mlp_mtx);
return (ret);
@@ -1229,6 +1364,21 @@ mlxcx_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
}
*(uint8_t *)pr_val = port->mlp_autoneg;
break;
+ case MAC_PROP_ADV_FEC_CAP:
+ if (pr_valsize < sizeof (link_fec_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ *(link_fec_t *)pr_val =
+ mlxcx_fec_to_link_fec(port->mlp_fec_active);
+ break;
+ case MAC_PROP_EN_FEC_CAP:
+ if (pr_valsize < sizeof (link_fec_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ *(link_fec_t *)pr_val = port->mlp_fec_requested;
+ break;
case MAC_PROP_MTU:
if (pr_valsize < sizeof (uint32_t)) {
ret = EOVERFLOW;
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
index 4dc4291b08..aed691897b 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
@@ -355,6 +355,7 @@ mlxcx_update_link_state(mlxcx_t *mlxp, mlxcx_port_t *port)
mutex_enter(&port->mlp_mtx);
(void) mlxcx_cmd_query_port_status(mlxp, port);
(void) mlxcx_cmd_query_port_speed(mlxp, port);
+ (void) mlxcx_cmd_query_port_fec(mlxp, port);
switch (port->mlp_oper_status) {
case MLXCX_PORT_STATUS_UP:
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
index 6d09abea5c..abd717842d 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
@@ -2464,6 +2464,59 @@ typedef struct {
} mlxcx_reg_ppcnt_t;
typedef enum {
+ MLXCX_PPLM_FEC_CAP_AUTO = 0,
+ MLXCX_PPLM_FEC_CAP_NONE = (1 << 0),
+ MLXCX_PPLM_FEC_CAP_FIRECODE = (1 << 1),
+ MLXCX_PPLM_FEC_CAP_RS = (1 << 2),
+} mlxcx_pplm_fec_caps_t;
+
+typedef enum {
+ MLXCX_PPLM_FEC_ACTIVE_NONE = (1 << 0),
+ MLXCX_PPLM_FEC_ACTIVE_FIRECODE = (1 << 1),
+ MLXCX_PPLM_FEC_ACTIVE_RS528 = (1 << 2),
+ MLXCX_PPLM_FEC_ACTIVE_RS271 = (1 << 3),
+ MLXCX_PPLM_FEC_ACTIVE_RS544 = (1 << 7),
+ MLXCX_PPLM_FEC_ACTIVE_RS272 = (1 << 9),
+} mlxcx_pplm_fec_active_t;
+
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_56G (bitdef_t){ 16, 0x000f0000 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_100G (bitdef_t){ 12, 0x0000f000 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_50G (bitdef_t){ 8, 0x00000f00 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_25G (bitdef_t){ 4, 0x000000f0 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_10_40G (bitdef_t){ 0, 0x0000000f }
+
+typedef struct {
+ uint8_t mlrd_pplm_rsvd;
+ uint8_t mlrd_pplm_local_port;
+ uint8_t mlrd_pplm_rsvd1[11];
+ uint24be_t mlrd_pplm_fec_mode_active;
+ bits32_t mlrd_pplm_fec_override_cap;
+ bits32_t mlrd_pplm_fec_override_admin;
+ uint16be_t mlrd_pplm_fec_override_cap_400g_8x;
+ uint16be_t mlrd_pplm_fec_override_cap_200g_4x;
+ uint16be_t mlrd_pplm_fec_override_cap_100g_2x;
+ uint16be_t mlrd_pplm_fec_override_cap_50g_1x;
+ uint16be_t mlrd_pplm_fec_override_admin_400g_8x;
+ uint16be_t mlrd_pplm_fec_override_admin_200g_4x;
+ uint16be_t mlrd_pplm_fec_override_admin_100g_2x;
+ uint16be_t mlrd_pplm_fec_override_admin_50g_1x;
+ uint8_t mlrd_pplm_rsvd2[8];
+ uint16be_t mlrd_pplm_fec_override_cap_hdr;
+ uint16be_t mlrd_pplm_fec_override_cap_edr;
+ uint16be_t mlrd_pplm_fec_override_cap_fdr;
+ uint16be_t mlrd_pplm_fec_override_cap_fdr10;
+ uint16be_t mlrd_pplm_fec_override_admin_hdr;
+ uint16be_t mlrd_pplm_fec_override_admin_edr;
+ uint16be_t mlrd_pplm_fec_override_admin_fdr;
+ uint16be_t mlrd_pplm_fec_override_admin_fdr10;
+} mlxcx_reg_pplm_t;
+
+typedef enum {
MLXCX_REG_PMTU = 0x5003,
MLXCX_REG_PTYS = 0x5004,
MLXCX_REG_PAOS = 0x5006,
@@ -2472,6 +2525,7 @@ typedef enum {
MLXCX_REG_MLCR = 0x902B,
MLXCX_REG_MCIA = 0x9014,
MLXCX_REG_PPCNT = 0x5008,
+ MLXCX_REG_PPLM = 0x5023,
} mlxcx_register_id_t;
typedef union {
@@ -2482,6 +2536,7 @@ typedef union {
mlxcx_reg_pmaos_t mlrd_pmaos;
mlxcx_reg_mcia_t mlrd_mcia;
mlxcx_reg_ppcnt_t mlrd_ppcnt;
+ mlxcx_reg_pplm_t mlrd_pplm;
} mlxcx_register_data_t;
typedef enum {
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
index 492f8fd8a5..da98a5cf40 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
@@ -1213,6 +1213,8 @@ mlxcx_rx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g,
ASSERT0(rq->mlwq_state & MLXCX_WQ_BUFFERS);
rq->mlwq_state |= MLXCX_WQ_BUFFERS;
+ mlxcx_shard_ready(rq->mlwq_bufs);
+
for (j = 0; j < rq->mlwq_nents; ++j) {
if (!mlxcx_buf_create(mlxp, rq->mlwq_bufs, &b))
break;
@@ -1409,6 +1411,9 @@ mlxcx_tx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g,
}
sq->mlwq_state |= MLXCX_WQ_BUFFERS;
+ mlxcx_shard_ready(sq->mlwq_bufs);
+ mlxcx_shard_ready(sq->mlwq_foreign_bufs);
+
if (!mlxcx_cmd_start_sq(mlxp, sq)) {
mutex_exit(&sq->mlwq_mtx);
mutex_exit(&cq->mlcq_mtx);
@@ -1799,22 +1804,29 @@ mlxcx_rq_refill_task(void *arg)
mlxcx_completion_queue_t *cq = wq->mlwq_cq;
mlxcx_t *mlxp = wq->mlwq_mlx;
mlxcx_buf_shard_t *s = wq->mlwq_bufs;
- boolean_t refill;
+ boolean_t refill, draining;
do {
/*
- * Wait until there are some free buffers.
+ * Wait here until one of 3 conditions:
+ * 1. The shard is draining, or
+ * 2. There are buffers on the free list, or
+ * 3. The WQ is being shut down.
*/
mutex_enter(&s->mlbs_mtx);
- while (list_is_empty(&s->mlbs_free) &&
- (cq->mlcq_state & MLXCX_CQ_TEARDOWN) == 0)
+ while (s->mlbs_state != MLXCX_SHARD_DRAINING &&
+ list_is_empty(&s->mlbs_free) &&
+ (cq->mlcq_state & MLXCX_CQ_TEARDOWN) == 0) {
cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+ }
+
+ draining = (s->mlbs_state == MLXCX_SHARD_DRAINING);
mutex_exit(&s->mlbs_mtx);
mutex_enter(&cq->mlcq_mtx);
mutex_enter(&wq->mlwq_mtx);
- if ((cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) {
+ if (draining || (cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) {
refill = B_FALSE;
wq->mlwq_state &= ~MLXCX_WQ_REFILLING;
} else {
@@ -1851,7 +1863,10 @@ mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
target = mlwq->mlwq_nents - MLXCX_RQ_REFILL_STEP;
cq = mlwq->mlwq_cq;
- if (cq->mlcq_state & MLXCX_CQ_TEARDOWN)
+ if ((mlwq->mlwq_state & MLXCX_WQ_STARTED) == 0)
+ return;
+
+ if ((cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0)
return;
current = cq->mlcq_bufcnt;
@@ -1883,7 +1898,7 @@ mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
return;
}
- if (mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) {
+ if ((mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) != 0) {
for (i = 0; i < n; ++i)
mlxcx_buf_return(mlxp, b[i]);
return;
@@ -2058,7 +2073,6 @@ mlxcx_rx_completion(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq,
wqe_index = buf->mlb_wqe_index;
if (!mlxcx_buf_loan(mlxp, buf)) {
- mlxcx_warn(mlxp, "!loan failed, dropping packet");
mlxcx_buf_return(mlxp, buf);
return (NULL);
}
@@ -2101,16 +2115,11 @@ mlxcx_buf_mp_return(caddr_t arg)
mlxcx_buffer_t *b = (mlxcx_buffer_t *)arg;
mlxcx_t *mlxp = b->mlb_mlx;
- if (b->mlb_state != MLXCX_BUFFER_ON_LOAN) {
- b->mlb_mp = NULL;
- return;
- }
- /*
- * The mblk for this buffer_t (in its mlb_mp field) has been used now,
- * so NULL it out.
- */
+ /* The mblk has been used now, so NULL it out. */
b->mlb_mp = NULL;
- mlxcx_buf_return(mlxp, b);
+
+ if (b->mlb_state == MLXCX_BUFFER_ON_LOAN)
+ mlxcx_buf_return(mlxp, b);
}
boolean_t
@@ -2177,6 +2186,11 @@ mlxcx_buf_take_foreign(mlxcx_t *mlxp, mlxcx_work_queue_t *wq)
mlxcx_buf_shard_t *s = wq->mlwq_foreign_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (NULL);
+ }
+
if ((b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
ASSERT(b->mlb_foreign);
@@ -2345,6 +2359,11 @@ mlxcx_buf_take(mlxcx_t *mlxp, mlxcx_work_queue_t *wq)
mlxcx_buf_shard_t *s = wq->mlwq_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (NULL);
+ }
+
if ((b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
b->mlb_state = MLXCX_BUFFER_ON_WQ;
@@ -2366,6 +2385,11 @@ mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
s = wq->mlwq_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (0);
+ }
+
while (done < nbufs && (b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
b->mlb_state = MLXCX_BUFFER_ON_WQ;
@@ -2379,6 +2403,8 @@ mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
boolean_t
mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b)
{
+ mlxcx_buf_shard_t *s = b->mlb_shard;
+
VERIFY3U(b->mlb_state, ==, MLXCX_BUFFER_ON_WQ);
ASSERT3P(b->mlb_mlx, ==, mlxp);
@@ -2391,6 +2417,12 @@ mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b)
b->mlb_state = MLXCX_BUFFER_ON_LOAN;
b->mlb_wqe_index = 0;
+
+ mutex_enter(&s->mlbs_mtx);
+ list_remove(&s->mlbs_busy, b);
+ list_insert_tail(&s->mlbs_loaned, b);
+ mutex_exit(&s->mlbs_mtx);
+
return (B_TRUE);
}
@@ -2453,7 +2485,23 @@ mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b)
break;
case MLXCX_BUFFER_ON_LOAN:
ASSERT(!b->mlb_foreign);
- list_remove(&s->mlbs_busy, b);
+ list_remove(&s->mlbs_loaned, b);
+ if (s->mlbs_state == MLXCX_SHARD_DRAINING) {
+ /*
+ * When we're draining, Eg during mac_stop(),
+ * we destroy the buffer immediately rather than
+ * recycling it. Otherwise we risk leaving it
+ * on the free list and leaking it.
+ */
+ list_insert_tail(&s->mlbs_free, b);
+ mlxcx_buf_destroy(mlxp, b);
+ /*
+ * Teardown might be waiting for loaned list to empty.
+ */
+ cv_broadcast(&s->mlbs_free_nonempty);
+ mutex_exit(&s->mlbs_mtx);
+ return;
+ }
break;
case MLXCX_BUFFER_FREE:
VERIFY(0);
@@ -2466,7 +2514,7 @@ mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b)
}
list_insert_tail(&s->mlbs_free, b);
- cv_signal(&s->mlbs_free_nonempty);
+ cv_broadcast(&s->mlbs_free_nonempty);
mutex_exit(&s->mlbs_mtx);
@@ -2484,9 +2532,11 @@ void
mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b)
{
mlxcx_buf_shard_t *s = b->mlb_shard;
+
VERIFY(b->mlb_state == MLXCX_BUFFER_FREE ||
b->mlb_state == MLXCX_BUFFER_INIT);
ASSERT(mutex_owned(&s->mlbs_mtx));
+
if (b->mlb_state == MLXCX_BUFFER_FREE)
list_remove(&s->mlbs_free, b);
@@ -2506,3 +2556,20 @@ mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b)
kmem_cache_free(mlxp->mlx_bufs_cache, b);
}
+
+void
+mlxcx_shard_ready(mlxcx_buf_shard_t *s)
+{
+ mutex_enter(&s->mlbs_mtx);
+ s->mlbs_state = MLXCX_SHARD_READY;
+ mutex_exit(&s->mlbs_mtx);
+}
+
+void
+mlxcx_shard_draining(mlxcx_buf_shard_t *s)
+{
+ mutex_enter(&s->mlbs_mtx);
+ s->mlbs_state = MLXCX_SHARD_DRAINING;
+ cv_broadcast(&s->mlbs_free_nonempty);
+ mutex_exit(&s->mlbs_mtx);
+}
diff --git a/usr/src/uts/common/io/stream.c b/usr/src/uts/common/io/stream.c
index 55fd87db45..288f77ae47 100644
--- a/usr/src/uts/common/io/stream.c
+++ b/usr/src/uts/common/io/stream.c
@@ -839,7 +839,7 @@ frnop_func(void *arg)
*/
static mblk_t *
gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp,
- void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
+ void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
{
dblk_t *dbp;
mblk_t *mp;