summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/io/mlxcx
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/io/mlxcx')
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.c50
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.h16
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_cmd.c101
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_gld.c162
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_intr.c1
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_reg.h55
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_ring.c105
7 files changed, 449 insertions, 41 deletions
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.c b/usr/src/uts/common/io/mlxcx/mlxcx.c
index c90fa0969b..2aefac33db 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.c
@@ -273,11 +273,16 @@
* before making a WQE for it.
*
* After a completion event occurs, the packet is either discarded (and the
- * buffer_t returned to the free list), or it is readied for loaning to MAC.
+ * buffer_t returned to the free list), or it is readied for loaning to MAC
+ * and placed on the "loaned" list in the mlxcx_buffer_shard_t.
*
* Once MAC and the rest of the system have finished with the packet, they call
- * freemsg() on its mblk, which will call mlxcx_buf_mp_return and return the
- * buffer_t to the free list.
+ * freemsg() on its mblk, which will call mlxcx_buf_mp_return. At this point
+ * the fate of the buffer_t is determined by the state of the
+ * mlxcx_buffer_shard_t. When the shard is in its normal state the buffer_t
+ * will be returned to the free list, potentially to be recycled and used
+ * again. But if the shard is draining (E.g. after a ring stop) there will be
+ * no recycling and the buffer_t is immediately destroyed.
*
* At detach/teardown time, buffers are only every destroyed from the free list.
*
@@ -289,18 +294,18 @@
* v
* +----+----+
* | created |
- * +----+----+
- * |
- * |
- * | mlxcx_buf_return
- * |
- * v
- * mlxcx_buf_destroy +----+----+
- * +---------| free |<---------------+
- * | +----+----+ |
+ * +----+----+ +------+
+ * | | dead |
+ * | +------+
+ * | mlxcx_buf_return ^
+ * | |
+ * v | mlxcx_buf_destroy
+ * mlxcx_buf_destroy +----+----+ +-----------+ |
+ * +---------| free |<------no-| draining? |-yes-+
+ * | +----+----+ +-----------+
+ * | | ^
* | | |
- * | | | mlxcx_buf_return
- * v | mlxcx_buf_take |
+ * v | mlxcx_buf_take | mlxcx_buf_return
* +---+--+ v |
* | dead | +---+---+ |
* +------+ | on WQ |- - - - - - - - >O
@@ -759,13 +764,19 @@ mlxcx_mlbs_teardown(mlxcx_t *mlxp, mlxcx_buf_shard_t *s)
mlxcx_buffer_t *buf;
mutex_enter(&s->mlbs_mtx);
+
while (!list_is_empty(&s->mlbs_busy))
cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
- while ((buf = list_head(&s->mlbs_free)) != NULL) {
+
+ while (!list_is_empty(&s->mlbs_loaned))
+ cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+
+ while ((buf = list_head(&s->mlbs_free)) != NULL)
mlxcx_buf_destroy(mlxp, buf);
- }
+
list_destroy(&s->mlbs_free);
list_destroy(&s->mlbs_busy);
+ list_destroy(&s->mlbs_loaned);
mutex_exit(&s->mlbs_mtx);
cv_destroy(&s->mlbs_free_nonempty);
@@ -1336,6 +1347,8 @@ mlxcx_mlbs_create(mlxcx_t *mlxp)
offsetof(mlxcx_buffer_t, mlb_entry));
list_create(&s->mlbs_free, sizeof (mlxcx_buffer_t),
offsetof(mlxcx_buffer_t, mlb_entry));
+ list_create(&s->mlbs_loaned, sizeof (mlxcx_buffer_t),
+ offsetof(mlxcx_buffer_t, mlb_entry));
cv_init(&s->mlbs_free_nonempty, NULL, CV_DRIVER, NULL);
list_insert_tail(&mlxp->mlx_buf_shards, s);
@@ -1743,6 +1756,11 @@ mlxcx_setup_ports(mlxcx_t *mlxp)
mutex_exit(&p->mlp_mtx);
goto err;
}
+ if (!mlxcx_cmd_query_port_fec(mlxp, p)) {
+ mutex_exit(&p->mlp_mtx);
+ goto err;
+ }
+ p->mlp_fec_requested = LINK_FEC_AUTO;
mutex_exit(&p->mlp_mtx);
}
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.h b/usr/src/uts/common/io/mlxcx/mlxcx.h
index da048b4ac3..06277d033c 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.h
@@ -346,6 +346,8 @@ typedef struct mlxcx_port {
mlxcx_eth_proto_t mlp_max_proto;
mlxcx_eth_proto_t mlp_admin_proto;
mlxcx_eth_proto_t mlp_oper_proto;
+ mlxcx_pplm_fec_active_t mlp_fec_active;
+ link_fec_t mlp_fec_requested;
mlxcx_eth_inline_mode_t mlp_wqe_min_inline;
@@ -424,11 +426,18 @@ typedef enum {
MLXCX_BUFFER_ON_CHAIN,
} mlxcx_buffer_state_t;
+typedef enum {
+ MLXCX_SHARD_READY,
+ MLXCX_SHARD_DRAINING,
+} mlxcx_shard_state_t;
+
typedef struct mlxcx_buf_shard {
+ mlxcx_shard_state_t mlbs_state;
list_node_t mlbs_entry;
kmutex_t mlbs_mtx;
list_t mlbs_busy;
list_t mlbs_free;
+ list_t mlbs_loaned;
kcondvar_t mlbs_free_nonempty;
} mlxcx_buf_shard_t;
@@ -1171,6 +1180,8 @@ extern boolean_t mlxcx_buf_loan(mlxcx_t *, mlxcx_buffer_t *);
extern void mlxcx_buf_return(mlxcx_t *, mlxcx_buffer_t *);
extern void mlxcx_buf_return_chain(mlxcx_t *, mlxcx_buffer_t *, boolean_t);
extern void mlxcx_buf_destroy(mlxcx_t *, mlxcx_buffer_t *);
+extern void mlxcx_shard_ready(mlxcx_buf_shard_t *);
+extern void mlxcx_shard_draining(mlxcx_buf_shard_t *);
extern uint_t mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *,
mblk_t *, size_t, mlxcx_buffer_t **);
@@ -1311,7 +1322,12 @@ extern boolean_t mlxcx_cmd_access_register(mlxcx_t *, mlxcx_cmd_reg_opmod_t,
mlxcx_register_id_t, mlxcx_register_data_t *);
extern boolean_t mlxcx_cmd_query_port_mtu(mlxcx_t *, mlxcx_port_t *);
extern boolean_t mlxcx_cmd_query_port_status(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_modify_port_status(mlxcx_t *, mlxcx_port_t *,
+ mlxcx_port_status_t);
extern boolean_t mlxcx_cmd_query_port_speed(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_query_port_fec(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_modify_port_fec(mlxcx_t *, mlxcx_port_t *,
+ mlxcx_pplm_fec_caps_t);
extern boolean_t mlxcx_cmd_set_port_mtu(mlxcx_t *, mlxcx_port_t *);
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
index 30fb7ca8ef..f059b856a6 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
@@ -12,6 +12,7 @@
/*
* Copyright 2020, The University of Queensland
* Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 RackTop Systems, Inc.
*/
/*
@@ -1594,6 +1595,8 @@ mlxcx_reg_name(mlxcx_register_id_t rid)
return ("MCIA");
case MLXCX_REG_PPCNT:
return ("PPCNT");
+ case MLXCX_REG_PPLM:
+ return ("PPLM");
default:
return ("???");
}
@@ -1640,6 +1643,9 @@ mlxcx_cmd_access_register(mlxcx_t *mlxp, mlxcx_cmd_reg_opmod_t opmod,
case MLXCX_REG_PPCNT:
dsize = sizeof (mlxcx_reg_ppcnt_t);
break;
+ case MLXCX_REG_PPLM:
+ dsize = sizeof (mlxcx_reg_pplm_t);
+ break;
default:
dsize = 0;
VERIFY(0);
@@ -1776,6 +1782,25 @@ mlxcx_cmd_query_port_status(mlxcx_t *mlxp, mlxcx_port_t *mlp)
}
boolean_t
+mlxcx_cmd_modify_port_status(mlxcx_t *mlxp, mlxcx_port_t *mlp,
+ mlxcx_port_status_t status)
+{
+ mlxcx_register_data_t data;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data, sizeof (data));
+ data.mlrd_paos.mlrd_paos_local_port = mlp->mlp_num + 1;
+ data.mlrd_paos.mlrd_paos_admin_status = status;
+ set_bit32(&data.mlrd_paos.mlrd_paos_flags, MLXCX_PAOS_ADMIN_ST_EN);
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_WRITE,
+ MLXCX_REG_PAOS, &data);
+
+ return (ret);
+}
+
+boolean_t
mlxcx_cmd_query_port_speed(mlxcx_t *mlxp, mlxcx_port_t *mlp)
{
mlxcx_register_data_t data;
@@ -1809,6 +1834,82 @@ mlxcx_cmd_query_port_speed(mlxcx_t *mlxp, mlxcx_port_t *mlp)
}
boolean_t
+mlxcx_cmd_query_port_fec(mlxcx_t *mlxp, mlxcx_port_t *mlp)
+{
+ mlxcx_register_data_t data;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data, sizeof (data));
+ data.mlrd_pplm.mlrd_pplm_local_port = mlp->mlp_num + 1;
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_PPLM, &data);
+
+ if (ret) {
+ mlp->mlp_fec_active =
+ from_be24(data.mlrd_pplm.mlrd_pplm_fec_mode_active);
+ }
+
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_modify_port_fec(mlxcx_t *mlxp, mlxcx_port_t *mlp,
+ mlxcx_pplm_fec_caps_t fec)
+{
+ mlxcx_register_data_t data_in, data_out;
+ mlxcx_pplm_fec_caps_t caps;
+ mlxcx_reg_pplm_t *pplm_in, *pplm_out;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data_in, sizeof (data_in));
+ pplm_in = &data_in.mlrd_pplm;
+ pplm_in->mlrd_pplm_local_port = mlp->mlp_num + 1;
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_PPLM, &data_in);
+
+ if (!ret)
+ return (B_FALSE);
+
+ bzero(&data_out, sizeof (data_out));
+ pplm_out = &data_out.mlrd_pplm;
+ pplm_out->mlrd_pplm_local_port = mlp->mlp_num + 1;
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_56G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_56G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_100G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_100G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_50G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_50G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_25G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_25G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_10_40G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_10_40G, fec & caps);
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_WRITE,
+ MLXCX_REG_PPLM, &data_out);
+
+ return (ret);
+}
+
+boolean_t
mlxcx_cmd_modify_nic_vport_ctx(mlxcx_t *mlxp, mlxcx_port_t *mlp,
mlxcx_modify_nic_vport_ctx_fields_t fields)
{
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
index a08cec3980..2521641a00 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
@@ -80,6 +80,53 @@ mlxcx_speed_to_bits(mlxcx_eth_proto_t v)
}
}
+static link_fec_t
+mlxcx_fec_to_link_fec(mlxcx_pplm_fec_active_t mlxcx_fec)
+{
+ if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_NONE) != 0)
+ return (LINK_FEC_NONE);
+
+ if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_FIRECODE) != 0)
+ return (LINK_FEC_BASE_R);
+
+ if ((mlxcx_fec & (MLXCX_PPLM_FEC_ACTIVE_RS528 |
+ MLXCX_PPLM_FEC_ACTIVE_RS271 | MLXCX_PPLM_FEC_ACTIVE_RS544 |
+ MLXCX_PPLM_FEC_ACTIVE_RS272)) != 0)
+ return (LINK_FEC_RS);
+
+ return (LINK_FEC_NONE);
+}
+
+static boolean_t
+mlxcx_link_fec_cap(link_fec_t fec, mlxcx_pplm_fec_caps_t *pfecp)
+{
+ mlxcx_pplm_fec_caps_t pplm_fec = 0;
+
+ if ((fec & LINK_FEC_AUTO) != 0) {
+ pplm_fec = MLXCX_PPLM_FEC_CAP_AUTO;
+ fec &= ~LINK_FEC_AUTO;
+ } else if ((fec & LINK_FEC_NONE) != 0) {
+ pplm_fec = MLXCX_PPLM_FEC_CAP_NONE;
+ fec &= ~LINK_FEC_NONE;
+ } else if ((fec & LINK_FEC_RS) != 0) {
+ pplm_fec |= MLXCX_PPLM_FEC_CAP_RS;
+ fec &= ~LINK_FEC_RS;
+ } else if ((fec & LINK_FEC_BASE_R) != 0) {
+ pplm_fec |= MLXCX_PPLM_FEC_CAP_FIRECODE;
+ fec &= ~LINK_FEC_BASE_R;
+ }
+
+ /*
+ * Only one fec option is allowed.
+ */
+ if (fec != 0)
+ return (B_FALSE);
+
+ *pfecp = pplm_fec;
+
+ return (B_TRUE);
+}
+
static int
mlxcx_mac_stat_rfc_2863(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat,
uint64_t *val)
@@ -451,7 +498,8 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
return (NULL);
}
- if (sq->mlwq_state & MLXCX_WQ_TEARDOWN) {
+ if ((sq->mlwq_state & (MLXCX_WQ_TEARDOWN | MLXCX_WQ_STARTED)) !=
+ MLXCX_WQ_STARTED) {
mutex_exit(&sq->mlwq_mtx);
mlxcx_buf_return_chain(mlxp, b, B_FALSE);
return (NULL);
@@ -725,8 +773,28 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
mlxcx_buf_shard_t *s;
mlxcx_buffer_t *buf;
+ /*
+ * To prevent deadlocks and sleeping whilst holding either the
+ * CQ mutex or WQ mutex, we split the stop processing into two
+ * parts.
+ *
+ * With the CQ amd WQ mutexes held the appropriate WQ is stopped.
+ * The Q in the HCA is set to Reset state and flagged as no
+ * longer started. Atomic with changing this WQ state, the buffer
+ * shards are flagged as draining.
+ *
+ * Now, any requests for buffers and attempts to submit messages
+ * will fail and once we're in this state it is safe to relinquish
+ * the CQ and WQ mutexes. Allowing us to complete the ring stop
+ * by waiting for the buffer lists, with the exception of
+ * the loaned list, to drain. Buffers on the loaned list are
+ * not under our control, we will get them back when the mblk tied
+ * to the buffer is freed.
+ */
+
mutex_enter(&cq->mlcq_mtx);
mutex_enter(&wq->mlwq_mtx);
+
if (wq->mlwq_state & MLXCX_WQ_STARTED) {
if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ &&
!mlxcx_cmd_stop_rq(mlxp, wq)) {
@@ -743,7 +811,15 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
}
ASSERT0(wq->mlwq_state & MLXCX_WQ_STARTED);
+ mlxcx_shard_draining(wq->mlwq_bufs);
+ if (wq->mlwq_foreign_bufs != NULL)
+ mlxcx_shard_draining(wq->mlwq_foreign_bufs);
+
+
if (wq->mlwq_state & MLXCX_WQ_BUFFERS) {
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+
/* Return any outstanding buffers to the free pool. */
while ((buf = list_remove_head(&cq->mlcq_buffers)) != NULL) {
mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
@@ -775,12 +851,13 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
mutex_exit(&s->mlbs_mtx);
}
+ mutex_enter(&wq->mlwq_mtx);
wq->mlwq_state &= ~MLXCX_WQ_BUFFERS;
+ mutex_exit(&wq->mlwq_mtx);
+ } else {
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
}
- ASSERT0(wq->mlwq_state & MLXCX_WQ_BUFFERS);
-
- mutex_exit(&wq->mlwq_mtx);
- mutex_exit(&cq->mlcq_mtx);
}
static int
@@ -1061,6 +1138,14 @@ mlxcx_mac_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
mac_prop_info_set_default_uint8(prh, 1);
break;
+ case MAC_PROP_ADV_FEC_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO);
+ break;
+ case MAC_PROP_EN_FEC_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
+ mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO);
+ break;
case MAC_PROP_ADV_100GFDX_CAP:
case MAC_PROP_EN_100GFDX_CAP:
mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
@@ -1120,6 +1205,9 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
uint32_t new_mtu, new_hw_mtu, old_mtu;
mlxcx_buf_shard_t *sh;
boolean_t allocd = B_FALSE;
+ boolean_t relink = B_FALSE;
+ link_fec_t fec;
+ mlxcx_pplm_fec_caps_t cap_fec;
mutex_enter(&port->mlp_mtx);
@@ -1137,7 +1225,8 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
for (; sh != NULL; sh = list_next(&mlxp->mlx_buf_shards, sh)) {
mutex_enter(&sh->mlbs_mtx);
if (!list_is_empty(&sh->mlbs_free) ||
- !list_is_empty(&sh->mlbs_busy)) {
+ !list_is_empty(&sh->mlbs_busy) ||
+ !list_is_empty(&sh->mlbs_loaned)) {
allocd = B_TRUE;
mutex_exit(&sh->mlbs_mtx);
break;
@@ -1167,11 +1256,57 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
break;
}
break;
+
+ case MAC_PROP_EN_FEC_CAP:
+ bcopy(pr_val, &fec, sizeof (fec));
+ if (!mlxcx_link_fec_cap(fec, &cap_fec)) {
+ ret = EINVAL;
+ break;
+ }
+
+ /*
+ * Don't change the FEC if it is already at the requested
+ * setting AND the port is up.
+ * When the port is down, always set the FEC and attempt
+ * to retrain the link.
+ */
+ if (fec == port->mlp_fec_requested &&
+ fec == mlxcx_fec_to_link_fec(port->mlp_fec_active) &&
+ port->mlp_oper_status != MLXCX_PORT_STATUS_DOWN)
+ break;
+
+ /*
+ * The most like cause of this failing is an invalid
+ * or unsupported fec option.
+ */
+ if (!mlxcx_cmd_modify_port_fec(mlxp, port, cap_fec)) {
+ ret = EINVAL;
+ break;
+ }
+
+ port->mlp_fec_requested = fec;
+
+ /*
+ * For FEC to become effective, the link needs to go back
+ * to training and negotiation state. This happens when
+ * the link transitions from down to up, force a relink.
+ */
+ relink = B_TRUE;
+ break;
+
default:
ret = ENOTSUP;
break;
}
+ if (relink) {
+ if (!mlxcx_cmd_modify_port_status(mlxp, port,
+ MLXCX_PORT_STATUS_DOWN) ||
+ !mlxcx_cmd_modify_port_status(mlxp, port,
+ MLXCX_PORT_STATUS_UP)) {
+ ret = EIO;
+ }
+ }
mutex_exit(&port->mlp_mtx);
return (ret);
@@ -1229,6 +1364,21 @@ mlxcx_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
}
*(uint8_t *)pr_val = port->mlp_autoneg;
break;
+ case MAC_PROP_ADV_FEC_CAP:
+ if (pr_valsize < sizeof (link_fec_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ *(link_fec_t *)pr_val =
+ mlxcx_fec_to_link_fec(port->mlp_fec_active);
+ break;
+ case MAC_PROP_EN_FEC_CAP:
+ if (pr_valsize < sizeof (link_fec_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ *(link_fec_t *)pr_val = port->mlp_fec_requested;
+ break;
case MAC_PROP_MTU:
if (pr_valsize < sizeof (uint32_t)) {
ret = EOVERFLOW;
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
index 4dc4291b08..aed691897b 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
@@ -355,6 +355,7 @@ mlxcx_update_link_state(mlxcx_t *mlxp, mlxcx_port_t *port)
mutex_enter(&port->mlp_mtx);
(void) mlxcx_cmd_query_port_status(mlxp, port);
(void) mlxcx_cmd_query_port_speed(mlxp, port);
+ (void) mlxcx_cmd_query_port_fec(mlxp, port);
switch (port->mlp_oper_status) {
case MLXCX_PORT_STATUS_UP:
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
index 6d09abea5c..abd717842d 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
@@ -2464,6 +2464,59 @@ typedef struct {
} mlxcx_reg_ppcnt_t;
typedef enum {
+ MLXCX_PPLM_FEC_CAP_AUTO = 0,
+ MLXCX_PPLM_FEC_CAP_NONE = (1 << 0),
+ MLXCX_PPLM_FEC_CAP_FIRECODE = (1 << 1),
+ MLXCX_PPLM_FEC_CAP_RS = (1 << 2),
+} mlxcx_pplm_fec_caps_t;
+
+typedef enum {
+ MLXCX_PPLM_FEC_ACTIVE_NONE = (1 << 0),
+ MLXCX_PPLM_FEC_ACTIVE_FIRECODE = (1 << 1),
+ MLXCX_PPLM_FEC_ACTIVE_RS528 = (1 << 2),
+ MLXCX_PPLM_FEC_ACTIVE_RS271 = (1 << 3),
+ MLXCX_PPLM_FEC_ACTIVE_RS544 = (1 << 7),
+ MLXCX_PPLM_FEC_ACTIVE_RS272 = (1 << 9),
+} mlxcx_pplm_fec_active_t;
+
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_56G (bitdef_t){ 16, 0x000f0000 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_100G (bitdef_t){ 12, 0x0000f000 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_50G (bitdef_t){ 8, 0x00000f00 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_25G (bitdef_t){ 4, 0x000000f0 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_10_40G (bitdef_t){ 0, 0x0000000f }
+
+typedef struct {
+ uint8_t mlrd_pplm_rsvd;
+ uint8_t mlrd_pplm_local_port;
+ uint8_t mlrd_pplm_rsvd1[11];
+ uint24be_t mlrd_pplm_fec_mode_active;
+ bits32_t mlrd_pplm_fec_override_cap;
+ bits32_t mlrd_pplm_fec_override_admin;
+ uint16be_t mlrd_pplm_fec_override_cap_400g_8x;
+ uint16be_t mlrd_pplm_fec_override_cap_200g_4x;
+ uint16be_t mlrd_pplm_fec_override_cap_100g_2x;
+ uint16be_t mlrd_pplm_fec_override_cap_50g_1x;
+ uint16be_t mlrd_pplm_fec_override_admin_400g_8x;
+ uint16be_t mlrd_pplm_fec_override_admin_200g_4x;
+ uint16be_t mlrd_pplm_fec_override_admin_100g_2x;
+ uint16be_t mlrd_pplm_fec_override_admin_50g_1x;
+ uint8_t mlrd_pplm_rsvd2[8];
+ uint16be_t mlrd_pplm_fec_override_cap_hdr;
+ uint16be_t mlrd_pplm_fec_override_cap_edr;
+ uint16be_t mlrd_pplm_fec_override_cap_fdr;
+ uint16be_t mlrd_pplm_fec_override_cap_fdr10;
+ uint16be_t mlrd_pplm_fec_override_admin_hdr;
+ uint16be_t mlrd_pplm_fec_override_admin_edr;
+ uint16be_t mlrd_pplm_fec_override_admin_fdr;
+ uint16be_t mlrd_pplm_fec_override_admin_fdr10;
+} mlxcx_reg_pplm_t;
+
+typedef enum {
MLXCX_REG_PMTU = 0x5003,
MLXCX_REG_PTYS = 0x5004,
MLXCX_REG_PAOS = 0x5006,
@@ -2472,6 +2525,7 @@ typedef enum {
MLXCX_REG_MLCR = 0x902B,
MLXCX_REG_MCIA = 0x9014,
MLXCX_REG_PPCNT = 0x5008,
+ MLXCX_REG_PPLM = 0x5023,
} mlxcx_register_id_t;
typedef union {
@@ -2482,6 +2536,7 @@ typedef union {
mlxcx_reg_pmaos_t mlrd_pmaos;
mlxcx_reg_mcia_t mlrd_mcia;
mlxcx_reg_ppcnt_t mlrd_ppcnt;
+ mlxcx_reg_pplm_t mlrd_pplm;
} mlxcx_register_data_t;
typedef enum {
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
index 492f8fd8a5..da98a5cf40 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
@@ -1213,6 +1213,8 @@ mlxcx_rx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g,
ASSERT0(rq->mlwq_state & MLXCX_WQ_BUFFERS);
rq->mlwq_state |= MLXCX_WQ_BUFFERS;
+ mlxcx_shard_ready(rq->mlwq_bufs);
+
for (j = 0; j < rq->mlwq_nents; ++j) {
if (!mlxcx_buf_create(mlxp, rq->mlwq_bufs, &b))
break;
@@ -1409,6 +1411,9 @@ mlxcx_tx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g,
}
sq->mlwq_state |= MLXCX_WQ_BUFFERS;
+ mlxcx_shard_ready(sq->mlwq_bufs);
+ mlxcx_shard_ready(sq->mlwq_foreign_bufs);
+
if (!mlxcx_cmd_start_sq(mlxp, sq)) {
mutex_exit(&sq->mlwq_mtx);
mutex_exit(&cq->mlcq_mtx);
@@ -1799,22 +1804,29 @@ mlxcx_rq_refill_task(void *arg)
mlxcx_completion_queue_t *cq = wq->mlwq_cq;
mlxcx_t *mlxp = wq->mlwq_mlx;
mlxcx_buf_shard_t *s = wq->mlwq_bufs;
- boolean_t refill;
+ boolean_t refill, draining;
do {
/*
- * Wait until there are some free buffers.
+ * Wait here until one of 3 conditions:
+ * 1. The shard is draining, or
+ * 2. There are buffers on the free list, or
+ * 3. The WQ is being shut down.
*/
mutex_enter(&s->mlbs_mtx);
- while (list_is_empty(&s->mlbs_free) &&
- (cq->mlcq_state & MLXCX_CQ_TEARDOWN) == 0)
+ while (s->mlbs_state != MLXCX_SHARD_DRAINING &&
+ list_is_empty(&s->mlbs_free) &&
+ (cq->mlcq_state & MLXCX_CQ_TEARDOWN) == 0) {
cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+ }
+
+ draining = (s->mlbs_state == MLXCX_SHARD_DRAINING);
mutex_exit(&s->mlbs_mtx);
mutex_enter(&cq->mlcq_mtx);
mutex_enter(&wq->mlwq_mtx);
- if ((cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) {
+ if (draining || (cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) {
refill = B_FALSE;
wq->mlwq_state &= ~MLXCX_WQ_REFILLING;
} else {
@@ -1851,7 +1863,10 @@ mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
target = mlwq->mlwq_nents - MLXCX_RQ_REFILL_STEP;
cq = mlwq->mlwq_cq;
- if (cq->mlcq_state & MLXCX_CQ_TEARDOWN)
+ if ((mlwq->mlwq_state & MLXCX_WQ_STARTED) == 0)
+ return;
+
+ if ((cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0)
return;
current = cq->mlcq_bufcnt;
@@ -1883,7 +1898,7 @@ mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
return;
}
- if (mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) {
+ if ((mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) != 0) {
for (i = 0; i < n; ++i)
mlxcx_buf_return(mlxp, b[i]);
return;
@@ -2058,7 +2073,6 @@ mlxcx_rx_completion(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq,
wqe_index = buf->mlb_wqe_index;
if (!mlxcx_buf_loan(mlxp, buf)) {
- mlxcx_warn(mlxp, "!loan failed, dropping packet");
mlxcx_buf_return(mlxp, buf);
return (NULL);
}
@@ -2101,16 +2115,11 @@ mlxcx_buf_mp_return(caddr_t arg)
mlxcx_buffer_t *b = (mlxcx_buffer_t *)arg;
mlxcx_t *mlxp = b->mlb_mlx;
- if (b->mlb_state != MLXCX_BUFFER_ON_LOAN) {
- b->mlb_mp = NULL;
- return;
- }
- /*
- * The mblk for this buffer_t (in its mlb_mp field) has been used now,
- * so NULL it out.
- */
+ /* The mblk has been used now, so NULL it out. */
b->mlb_mp = NULL;
- mlxcx_buf_return(mlxp, b);
+
+ if (b->mlb_state == MLXCX_BUFFER_ON_LOAN)
+ mlxcx_buf_return(mlxp, b);
}
boolean_t
@@ -2177,6 +2186,11 @@ mlxcx_buf_take_foreign(mlxcx_t *mlxp, mlxcx_work_queue_t *wq)
mlxcx_buf_shard_t *s = wq->mlwq_foreign_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (NULL);
+ }
+
if ((b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
ASSERT(b->mlb_foreign);
@@ -2345,6 +2359,11 @@ mlxcx_buf_take(mlxcx_t *mlxp, mlxcx_work_queue_t *wq)
mlxcx_buf_shard_t *s = wq->mlwq_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (NULL);
+ }
+
if ((b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
b->mlb_state = MLXCX_BUFFER_ON_WQ;
@@ -2366,6 +2385,11 @@ mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
s = wq->mlwq_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (0);
+ }
+
while (done < nbufs && (b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
b->mlb_state = MLXCX_BUFFER_ON_WQ;
@@ -2379,6 +2403,8 @@ mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
boolean_t
mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b)
{
+ mlxcx_buf_shard_t *s = b->mlb_shard;
+
VERIFY3U(b->mlb_state, ==, MLXCX_BUFFER_ON_WQ);
ASSERT3P(b->mlb_mlx, ==, mlxp);
@@ -2391,6 +2417,12 @@ mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b)
b->mlb_state = MLXCX_BUFFER_ON_LOAN;
b->mlb_wqe_index = 0;
+
+ mutex_enter(&s->mlbs_mtx);
+ list_remove(&s->mlbs_busy, b);
+ list_insert_tail(&s->mlbs_loaned, b);
+ mutex_exit(&s->mlbs_mtx);
+
return (B_TRUE);
}
@@ -2453,7 +2485,23 @@ mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b)
break;
case MLXCX_BUFFER_ON_LOAN:
ASSERT(!b->mlb_foreign);
- list_remove(&s->mlbs_busy, b);
+ list_remove(&s->mlbs_loaned, b);
+ if (s->mlbs_state == MLXCX_SHARD_DRAINING) {
+ /*
+ * When we're draining, Eg during mac_stop(),
+ * we destroy the buffer immediately rather than
+ * recycling it. Otherwise we risk leaving it
+ * on the free list and leaking it.
+ */
+ list_insert_tail(&s->mlbs_free, b);
+ mlxcx_buf_destroy(mlxp, b);
+ /*
+ * Teardown might be waiting for loaned list to empty.
+ */
+ cv_broadcast(&s->mlbs_free_nonempty);
+ mutex_exit(&s->mlbs_mtx);
+ return;
+ }
break;
case MLXCX_BUFFER_FREE:
VERIFY(0);
@@ -2466,7 +2514,7 @@ mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b)
}
list_insert_tail(&s->mlbs_free, b);
- cv_signal(&s->mlbs_free_nonempty);
+ cv_broadcast(&s->mlbs_free_nonempty);
mutex_exit(&s->mlbs_mtx);
@@ -2484,9 +2532,11 @@ void
mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b)
{
mlxcx_buf_shard_t *s = b->mlb_shard;
+
VERIFY(b->mlb_state == MLXCX_BUFFER_FREE ||
b->mlb_state == MLXCX_BUFFER_INIT);
ASSERT(mutex_owned(&s->mlbs_mtx));
+
if (b->mlb_state == MLXCX_BUFFER_FREE)
list_remove(&s->mlbs_free, b);
@@ -2506,3 +2556,20 @@ mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b)
kmem_cache_free(mlxp->mlx_bufs_cache, b);
}
+
+void
+mlxcx_shard_ready(mlxcx_buf_shard_t *s)
+{
+ mutex_enter(&s->mlbs_mtx);
+ s->mlbs_state = MLXCX_SHARD_READY;
+ mutex_exit(&s->mlbs_mtx);
+}
+
+void
+mlxcx_shard_draining(mlxcx_buf_shard_t *s)
+{
+ mutex_enter(&s->mlbs_mtx);
+ s->mlbs_state = MLXCX_SHARD_DRAINING;
+ cv_broadcast(&s->mlbs_free_nonempty);
+ mutex_exit(&s->mlbs_mtx);
+}