diff options
Diffstat (limited to 'usr/src/uts/common/io/mlxcx')
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx.c | 50 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx.h | 16 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx_cmd.c | 101 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx_gld.c | 162 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx_intr.c | 1 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx_reg.h | 55 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx_ring.c | 105 |
7 files changed, 449 insertions, 41 deletions
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.c b/usr/src/uts/common/io/mlxcx/mlxcx.c index c90fa0969b..2aefac33db 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx.c @@ -273,11 +273,16 @@ * before making a WQE for it. * * After a completion event occurs, the packet is either discarded (and the - * buffer_t returned to the free list), or it is readied for loaning to MAC. + * buffer_t returned to the free list), or it is readied for loaning to MAC + * and placed on the "loaned" list in the mlxcx_buffer_shard_t. * * Once MAC and the rest of the system have finished with the packet, they call - * freemsg() on its mblk, which will call mlxcx_buf_mp_return and return the - * buffer_t to the free list. + * freemsg() on its mblk, which will call mlxcx_buf_mp_return. At this point + * the fate of the buffer_t is determined by the state of the + * mlxcx_buffer_shard_t. When the shard is in its normal state the buffer_t + * will be returned to the free list, potentially to be recycled and used + * again. But if the shard is draining (E.g. after a ring stop) there will be + * no recycling and the buffer_t is immediately destroyed. * * At detach/teardown time, buffers are only every destroyed from the free list. * @@ -289,18 +294,18 @@ * v * +----+----+ * | created | - * +----+----+ - * | - * | - * | mlxcx_buf_return - * | - * v - * mlxcx_buf_destroy +----+----+ - * +---------| free |<---------------+ - * | +----+----+ | + * +----+----+ +------+ + * | | dead | + * | +------+ + * | mlxcx_buf_return ^ + * | | + * v | mlxcx_buf_destroy + * mlxcx_buf_destroy +----+----+ +-----------+ | + * +---------| free |<------no-| draining? |-yes-+ + * | +----+----+ +-----------+ + * | | ^ * | | | - * | | | mlxcx_buf_return - * v | mlxcx_buf_take | + * v | mlxcx_buf_take | mlxcx_buf_return * +---+--+ v | * | dead | +---+---+ | * +------+ | on WQ |- - - - - - - - >O @@ -759,13 +764,19 @@ mlxcx_mlbs_teardown(mlxcx_t *mlxp, mlxcx_buf_shard_t *s) mlxcx_buffer_t *buf; mutex_enter(&s->mlbs_mtx); + while (!list_is_empty(&s->mlbs_busy)) cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); - while ((buf = list_head(&s->mlbs_free)) != NULL) { + + while (!list_is_empty(&s->mlbs_loaned)) + cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); + + while ((buf = list_head(&s->mlbs_free)) != NULL) mlxcx_buf_destroy(mlxp, buf); - } + list_destroy(&s->mlbs_free); list_destroy(&s->mlbs_busy); + list_destroy(&s->mlbs_loaned); mutex_exit(&s->mlbs_mtx); cv_destroy(&s->mlbs_free_nonempty); @@ -1336,6 +1347,8 @@ mlxcx_mlbs_create(mlxcx_t *mlxp) offsetof(mlxcx_buffer_t, mlb_entry)); list_create(&s->mlbs_free, sizeof (mlxcx_buffer_t), offsetof(mlxcx_buffer_t, mlb_entry)); + list_create(&s->mlbs_loaned, sizeof (mlxcx_buffer_t), + offsetof(mlxcx_buffer_t, mlb_entry)); cv_init(&s->mlbs_free_nonempty, NULL, CV_DRIVER, NULL); list_insert_tail(&mlxp->mlx_buf_shards, s); @@ -1743,6 +1756,11 @@ mlxcx_setup_ports(mlxcx_t *mlxp) mutex_exit(&p->mlp_mtx); goto err; } + if (!mlxcx_cmd_query_port_fec(mlxp, p)) { + mutex_exit(&p->mlp_mtx); + goto err; + } + p->mlp_fec_requested = LINK_FEC_AUTO; mutex_exit(&p->mlp_mtx); } diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.h b/usr/src/uts/common/io/mlxcx/mlxcx.h index da048b4ac3..06277d033c 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx.h +++ b/usr/src/uts/common/io/mlxcx/mlxcx.h @@ -346,6 +346,8 @@ typedef struct mlxcx_port { mlxcx_eth_proto_t mlp_max_proto; mlxcx_eth_proto_t mlp_admin_proto; mlxcx_eth_proto_t mlp_oper_proto; + mlxcx_pplm_fec_active_t mlp_fec_active; + link_fec_t mlp_fec_requested; mlxcx_eth_inline_mode_t mlp_wqe_min_inline; @@ -424,11 +426,18 @@ typedef enum { MLXCX_BUFFER_ON_CHAIN, } mlxcx_buffer_state_t; +typedef enum { + MLXCX_SHARD_READY, + MLXCX_SHARD_DRAINING, +} mlxcx_shard_state_t; + typedef struct mlxcx_buf_shard { + mlxcx_shard_state_t mlbs_state; list_node_t mlbs_entry; kmutex_t mlbs_mtx; list_t mlbs_busy; list_t mlbs_free; + list_t mlbs_loaned; kcondvar_t mlbs_free_nonempty; } mlxcx_buf_shard_t; @@ -1171,6 +1180,8 @@ extern boolean_t mlxcx_buf_loan(mlxcx_t *, mlxcx_buffer_t *); extern void mlxcx_buf_return(mlxcx_t *, mlxcx_buffer_t *); extern void mlxcx_buf_return_chain(mlxcx_t *, mlxcx_buffer_t *, boolean_t); extern void mlxcx_buf_destroy(mlxcx_t *, mlxcx_buffer_t *); +extern void mlxcx_shard_ready(mlxcx_buf_shard_t *); +extern void mlxcx_shard_draining(mlxcx_buf_shard_t *); extern uint_t mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *, mblk_t *, size_t, mlxcx_buffer_t **); @@ -1311,7 +1322,12 @@ extern boolean_t mlxcx_cmd_access_register(mlxcx_t *, mlxcx_cmd_reg_opmod_t, mlxcx_register_id_t, mlxcx_register_data_t *); extern boolean_t mlxcx_cmd_query_port_mtu(mlxcx_t *, mlxcx_port_t *); extern boolean_t mlxcx_cmd_query_port_status(mlxcx_t *, mlxcx_port_t *); +extern boolean_t mlxcx_cmd_modify_port_status(mlxcx_t *, mlxcx_port_t *, + mlxcx_port_status_t); extern boolean_t mlxcx_cmd_query_port_speed(mlxcx_t *, mlxcx_port_t *); +extern boolean_t mlxcx_cmd_query_port_fec(mlxcx_t *, mlxcx_port_t *); +extern boolean_t mlxcx_cmd_modify_port_fec(mlxcx_t *, mlxcx_port_t *, + mlxcx_pplm_fec_caps_t); extern boolean_t mlxcx_cmd_set_port_mtu(mlxcx_t *, mlxcx_port_t *); diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c index 30fb7ca8ef..f059b856a6 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c @@ -12,6 +12,7 @@ /* * Copyright 2020, The University of Queensland * Copyright (c) 2018, Joyent, Inc. + * Copyright 2020 RackTop Systems, Inc. */ /* @@ -1594,6 +1595,8 @@ mlxcx_reg_name(mlxcx_register_id_t rid) return ("MCIA"); case MLXCX_REG_PPCNT: return ("PPCNT"); + case MLXCX_REG_PPLM: + return ("PPLM"); default: return ("???"); } @@ -1640,6 +1643,9 @@ mlxcx_cmd_access_register(mlxcx_t *mlxp, mlxcx_cmd_reg_opmod_t opmod, case MLXCX_REG_PPCNT: dsize = sizeof (mlxcx_reg_ppcnt_t); break; + case MLXCX_REG_PPLM: + dsize = sizeof (mlxcx_reg_pplm_t); + break; default: dsize = 0; VERIFY(0); @@ -1776,6 +1782,25 @@ mlxcx_cmd_query_port_status(mlxcx_t *mlxp, mlxcx_port_t *mlp) } boolean_t +mlxcx_cmd_modify_port_status(mlxcx_t *mlxp, mlxcx_port_t *mlp, + mlxcx_port_status_t status) +{ + mlxcx_register_data_t data; + boolean_t ret; + + ASSERT(mutex_owned(&mlp->mlp_mtx)); + bzero(&data, sizeof (data)); + data.mlrd_paos.mlrd_paos_local_port = mlp->mlp_num + 1; + data.mlrd_paos.mlrd_paos_admin_status = status; + set_bit32(&data.mlrd_paos.mlrd_paos_flags, MLXCX_PAOS_ADMIN_ST_EN); + + ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_WRITE, + MLXCX_REG_PAOS, &data); + + return (ret); +} + +boolean_t mlxcx_cmd_query_port_speed(mlxcx_t *mlxp, mlxcx_port_t *mlp) { mlxcx_register_data_t data; @@ -1809,6 +1834,82 @@ mlxcx_cmd_query_port_speed(mlxcx_t *mlxp, mlxcx_port_t *mlp) } boolean_t +mlxcx_cmd_query_port_fec(mlxcx_t *mlxp, mlxcx_port_t *mlp) +{ + mlxcx_register_data_t data; + boolean_t ret; + + ASSERT(mutex_owned(&mlp->mlp_mtx)); + bzero(&data, sizeof (data)); + data.mlrd_pplm.mlrd_pplm_local_port = mlp->mlp_num + 1; + + ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ, + MLXCX_REG_PPLM, &data); + + if (ret) { + mlp->mlp_fec_active = + from_be24(data.mlrd_pplm.mlrd_pplm_fec_mode_active); + } + + return (ret); +} + +boolean_t +mlxcx_cmd_modify_port_fec(mlxcx_t *mlxp, mlxcx_port_t *mlp, + mlxcx_pplm_fec_caps_t fec) +{ + mlxcx_register_data_t data_in, data_out; + mlxcx_pplm_fec_caps_t caps; + mlxcx_reg_pplm_t *pplm_in, *pplm_out; + boolean_t ret; + + ASSERT(mutex_owned(&mlp->mlp_mtx)); + bzero(&data_in, sizeof (data_in)); + pplm_in = &data_in.mlrd_pplm; + pplm_in->mlrd_pplm_local_port = mlp->mlp_num + 1; + + ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ, + MLXCX_REG_PPLM, &data_in); + + if (!ret) + return (B_FALSE); + + bzero(&data_out, sizeof (data_out)); + pplm_out = &data_out.mlrd_pplm; + pplm_out->mlrd_pplm_local_port = mlp->mlp_num + 1; + + caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap, + MLXCX_PPLM_CAP_56G); + set_bits32(&pplm_out->mlrd_pplm_fec_override_admin, + MLXCX_PPLM_CAP_56G, fec & caps); + + caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap, + MLXCX_PPLM_CAP_100G); + set_bits32(&pplm_out->mlrd_pplm_fec_override_admin, + MLXCX_PPLM_CAP_100G, fec & caps); + + caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap, + MLXCX_PPLM_CAP_50G); + set_bits32(&pplm_out->mlrd_pplm_fec_override_admin, + MLXCX_PPLM_CAP_50G, fec & caps); + + caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap, + MLXCX_PPLM_CAP_25G); + set_bits32(&pplm_out->mlrd_pplm_fec_override_admin, + MLXCX_PPLM_CAP_25G, fec & caps); + + caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap, + MLXCX_PPLM_CAP_10_40G); + set_bits32(&pplm_out->mlrd_pplm_fec_override_admin, + MLXCX_PPLM_CAP_10_40G, fec & caps); + + ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_WRITE, + MLXCX_REG_PPLM, &data_out); + + return (ret); +} + +boolean_t mlxcx_cmd_modify_nic_vport_ctx(mlxcx_t *mlxp, mlxcx_port_t *mlp, mlxcx_modify_nic_vport_ctx_fields_t fields) { diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c index a08cec3980..2521641a00 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c @@ -80,6 +80,53 @@ mlxcx_speed_to_bits(mlxcx_eth_proto_t v) } } +static link_fec_t +mlxcx_fec_to_link_fec(mlxcx_pplm_fec_active_t mlxcx_fec) +{ + if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_NONE) != 0) + return (LINK_FEC_NONE); + + if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_FIRECODE) != 0) + return (LINK_FEC_BASE_R); + + if ((mlxcx_fec & (MLXCX_PPLM_FEC_ACTIVE_RS528 | + MLXCX_PPLM_FEC_ACTIVE_RS271 | MLXCX_PPLM_FEC_ACTIVE_RS544 | + MLXCX_PPLM_FEC_ACTIVE_RS272)) != 0) + return (LINK_FEC_RS); + + return (LINK_FEC_NONE); +} + +static boolean_t +mlxcx_link_fec_cap(link_fec_t fec, mlxcx_pplm_fec_caps_t *pfecp) +{ + mlxcx_pplm_fec_caps_t pplm_fec = 0; + + if ((fec & LINK_FEC_AUTO) != 0) { + pplm_fec = MLXCX_PPLM_FEC_CAP_AUTO; + fec &= ~LINK_FEC_AUTO; + } else if ((fec & LINK_FEC_NONE) != 0) { + pplm_fec = MLXCX_PPLM_FEC_CAP_NONE; + fec &= ~LINK_FEC_NONE; + } else if ((fec & LINK_FEC_RS) != 0) { + pplm_fec |= MLXCX_PPLM_FEC_CAP_RS; + fec &= ~LINK_FEC_RS; + } else if ((fec & LINK_FEC_BASE_R) != 0) { + pplm_fec |= MLXCX_PPLM_FEC_CAP_FIRECODE; + fec &= ~LINK_FEC_BASE_R; + } + + /* + * Only one fec option is allowed. + */ + if (fec != 0) + return (B_FALSE); + + *pfecp = pplm_fec; + + return (B_TRUE); +} + static int mlxcx_mac_stat_rfc_2863(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat, uint64_t *val) @@ -451,7 +498,8 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp) return (NULL); } - if (sq->mlwq_state & MLXCX_WQ_TEARDOWN) { + if ((sq->mlwq_state & (MLXCX_WQ_TEARDOWN | MLXCX_WQ_STARTED)) != + MLXCX_WQ_STARTED) { mutex_exit(&sq->mlwq_mtx); mlxcx_buf_return_chain(mlxp, b, B_FALSE); return (NULL); @@ -725,8 +773,28 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh) mlxcx_buf_shard_t *s; mlxcx_buffer_t *buf; + /* + * To prevent deadlocks and sleeping whilst holding either the + * CQ mutex or WQ mutex, we split the stop processing into two + * parts. + * + * With the CQ amd WQ mutexes held the appropriate WQ is stopped. + * The Q in the HCA is set to Reset state and flagged as no + * longer started. Atomic with changing this WQ state, the buffer + * shards are flagged as draining. + * + * Now, any requests for buffers and attempts to submit messages + * will fail and once we're in this state it is safe to relinquish + * the CQ and WQ mutexes. Allowing us to complete the ring stop + * by waiting for the buffer lists, with the exception of + * the loaned list, to drain. Buffers on the loaned list are + * not under our control, we will get them back when the mblk tied + * to the buffer is freed. + */ + mutex_enter(&cq->mlcq_mtx); mutex_enter(&wq->mlwq_mtx); + if (wq->mlwq_state & MLXCX_WQ_STARTED) { if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ && !mlxcx_cmd_stop_rq(mlxp, wq)) { @@ -743,7 +811,15 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh) } ASSERT0(wq->mlwq_state & MLXCX_WQ_STARTED); + mlxcx_shard_draining(wq->mlwq_bufs); + if (wq->mlwq_foreign_bufs != NULL) + mlxcx_shard_draining(wq->mlwq_foreign_bufs); + + if (wq->mlwq_state & MLXCX_WQ_BUFFERS) { + mutex_exit(&wq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); + /* Return any outstanding buffers to the free pool. */ while ((buf = list_remove_head(&cq->mlcq_buffers)) != NULL) { mlxcx_buf_return_chain(mlxp, buf, B_FALSE); @@ -775,12 +851,13 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh) mutex_exit(&s->mlbs_mtx); } + mutex_enter(&wq->mlwq_mtx); wq->mlwq_state &= ~MLXCX_WQ_BUFFERS; + mutex_exit(&wq->mlwq_mtx); + } else { + mutex_exit(&wq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); } - ASSERT0(wq->mlwq_state & MLXCX_WQ_BUFFERS); - - mutex_exit(&wq->mlwq_mtx); - mutex_exit(&cq->mlcq_mtx); } static int @@ -1061,6 +1138,14 @@ mlxcx_mac_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); mac_prop_info_set_default_uint8(prh, 1); break; + case MAC_PROP_ADV_FEC_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO); + break; + case MAC_PROP_EN_FEC_CAP: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); + mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO); + break; case MAC_PROP_ADV_100GFDX_CAP: case MAC_PROP_EN_100GFDX_CAP: mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); @@ -1120,6 +1205,9 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, uint32_t new_mtu, new_hw_mtu, old_mtu; mlxcx_buf_shard_t *sh; boolean_t allocd = B_FALSE; + boolean_t relink = B_FALSE; + link_fec_t fec; + mlxcx_pplm_fec_caps_t cap_fec; mutex_enter(&port->mlp_mtx); @@ -1137,7 +1225,8 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, for (; sh != NULL; sh = list_next(&mlxp->mlx_buf_shards, sh)) { mutex_enter(&sh->mlbs_mtx); if (!list_is_empty(&sh->mlbs_free) || - !list_is_empty(&sh->mlbs_busy)) { + !list_is_empty(&sh->mlbs_busy) || + !list_is_empty(&sh->mlbs_loaned)) { allocd = B_TRUE; mutex_exit(&sh->mlbs_mtx); break; @@ -1167,11 +1256,57 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, break; } break; + + case MAC_PROP_EN_FEC_CAP: + bcopy(pr_val, &fec, sizeof (fec)); + if (!mlxcx_link_fec_cap(fec, &cap_fec)) { + ret = EINVAL; + break; + } + + /* + * Don't change the FEC if it is already at the requested + * setting AND the port is up. + * When the port is down, always set the FEC and attempt + * to retrain the link. + */ + if (fec == port->mlp_fec_requested && + fec == mlxcx_fec_to_link_fec(port->mlp_fec_active) && + port->mlp_oper_status != MLXCX_PORT_STATUS_DOWN) + break; + + /* + * The most like cause of this failing is an invalid + * or unsupported fec option. + */ + if (!mlxcx_cmd_modify_port_fec(mlxp, port, cap_fec)) { + ret = EINVAL; + break; + } + + port->mlp_fec_requested = fec; + + /* + * For FEC to become effective, the link needs to go back + * to training and negotiation state. This happens when + * the link transitions from down to up, force a relink. + */ + relink = B_TRUE; + break; + default: ret = ENOTSUP; break; } + if (relink) { + if (!mlxcx_cmd_modify_port_status(mlxp, port, + MLXCX_PORT_STATUS_DOWN) || + !mlxcx_cmd_modify_port_status(mlxp, port, + MLXCX_PORT_STATUS_UP)) { + ret = EIO; + } + } mutex_exit(&port->mlp_mtx); return (ret); @@ -1229,6 +1364,21 @@ mlxcx_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, } *(uint8_t *)pr_val = port->mlp_autoneg; break; + case MAC_PROP_ADV_FEC_CAP: + if (pr_valsize < sizeof (link_fec_t)) { + ret = EOVERFLOW; + break; + } + *(link_fec_t *)pr_val = + mlxcx_fec_to_link_fec(port->mlp_fec_active); + break; + case MAC_PROP_EN_FEC_CAP: + if (pr_valsize < sizeof (link_fec_t)) { + ret = EOVERFLOW; + break; + } + *(link_fec_t *)pr_val = port->mlp_fec_requested; + break; case MAC_PROP_MTU: if (pr_valsize < sizeof (uint32_t)) { ret = EOVERFLOW; diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c index 4dc4291b08..aed691897b 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c @@ -355,6 +355,7 @@ mlxcx_update_link_state(mlxcx_t *mlxp, mlxcx_port_t *port) mutex_enter(&port->mlp_mtx); (void) mlxcx_cmd_query_port_status(mlxp, port); (void) mlxcx_cmd_query_port_speed(mlxp, port); + (void) mlxcx_cmd_query_port_fec(mlxp, port); switch (port->mlp_oper_status) { case MLXCX_PORT_STATUS_UP: diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h index 6d09abea5c..abd717842d 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h +++ b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h @@ -2464,6 +2464,59 @@ typedef struct { } mlxcx_reg_ppcnt_t; typedef enum { + MLXCX_PPLM_FEC_CAP_AUTO = 0, + MLXCX_PPLM_FEC_CAP_NONE = (1 << 0), + MLXCX_PPLM_FEC_CAP_FIRECODE = (1 << 1), + MLXCX_PPLM_FEC_CAP_RS = (1 << 2), +} mlxcx_pplm_fec_caps_t; + +typedef enum { + MLXCX_PPLM_FEC_ACTIVE_NONE = (1 << 0), + MLXCX_PPLM_FEC_ACTIVE_FIRECODE = (1 << 1), + MLXCX_PPLM_FEC_ACTIVE_RS528 = (1 << 2), + MLXCX_PPLM_FEC_ACTIVE_RS271 = (1 << 3), + MLXCX_PPLM_FEC_ACTIVE_RS544 = (1 << 7), + MLXCX_PPLM_FEC_ACTIVE_RS272 = (1 << 9), +} mlxcx_pplm_fec_active_t; + +/* CSTYLED */ +#define MLXCX_PPLM_CAP_56G (bitdef_t){ 16, 0x000f0000 } +/* CSTYLED */ +#define MLXCX_PPLM_CAP_100G (bitdef_t){ 12, 0x0000f000 } +/* CSTYLED */ +#define MLXCX_PPLM_CAP_50G (bitdef_t){ 8, 0x00000f00 } +/* CSTYLED */ +#define MLXCX_PPLM_CAP_25G (bitdef_t){ 4, 0x000000f0 } +/* CSTYLED */ +#define MLXCX_PPLM_CAP_10_40G (bitdef_t){ 0, 0x0000000f } + +typedef struct { + uint8_t mlrd_pplm_rsvd; + uint8_t mlrd_pplm_local_port; + uint8_t mlrd_pplm_rsvd1[11]; + uint24be_t mlrd_pplm_fec_mode_active; + bits32_t mlrd_pplm_fec_override_cap; + bits32_t mlrd_pplm_fec_override_admin; + uint16be_t mlrd_pplm_fec_override_cap_400g_8x; + uint16be_t mlrd_pplm_fec_override_cap_200g_4x; + uint16be_t mlrd_pplm_fec_override_cap_100g_2x; + uint16be_t mlrd_pplm_fec_override_cap_50g_1x; + uint16be_t mlrd_pplm_fec_override_admin_400g_8x; + uint16be_t mlrd_pplm_fec_override_admin_200g_4x; + uint16be_t mlrd_pplm_fec_override_admin_100g_2x; + uint16be_t mlrd_pplm_fec_override_admin_50g_1x; + uint8_t mlrd_pplm_rsvd2[8]; + uint16be_t mlrd_pplm_fec_override_cap_hdr; + uint16be_t mlrd_pplm_fec_override_cap_edr; + uint16be_t mlrd_pplm_fec_override_cap_fdr; + uint16be_t mlrd_pplm_fec_override_cap_fdr10; + uint16be_t mlrd_pplm_fec_override_admin_hdr; + uint16be_t mlrd_pplm_fec_override_admin_edr; + uint16be_t mlrd_pplm_fec_override_admin_fdr; + uint16be_t mlrd_pplm_fec_override_admin_fdr10; +} mlxcx_reg_pplm_t; + +typedef enum { MLXCX_REG_PMTU = 0x5003, MLXCX_REG_PTYS = 0x5004, MLXCX_REG_PAOS = 0x5006, @@ -2472,6 +2525,7 @@ typedef enum { MLXCX_REG_MLCR = 0x902B, MLXCX_REG_MCIA = 0x9014, MLXCX_REG_PPCNT = 0x5008, + MLXCX_REG_PPLM = 0x5023, } mlxcx_register_id_t; typedef union { @@ -2482,6 +2536,7 @@ typedef union { mlxcx_reg_pmaos_t mlrd_pmaos; mlxcx_reg_mcia_t mlrd_mcia; mlxcx_reg_ppcnt_t mlrd_ppcnt; + mlxcx_reg_pplm_t mlrd_pplm; } mlxcx_register_data_t; typedef enum { diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c index 492f8fd8a5..da98a5cf40 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c @@ -1213,6 +1213,8 @@ mlxcx_rx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g, ASSERT0(rq->mlwq_state & MLXCX_WQ_BUFFERS); rq->mlwq_state |= MLXCX_WQ_BUFFERS; + mlxcx_shard_ready(rq->mlwq_bufs); + for (j = 0; j < rq->mlwq_nents; ++j) { if (!mlxcx_buf_create(mlxp, rq->mlwq_bufs, &b)) break; @@ -1409,6 +1411,9 @@ mlxcx_tx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g, } sq->mlwq_state |= MLXCX_WQ_BUFFERS; + mlxcx_shard_ready(sq->mlwq_bufs); + mlxcx_shard_ready(sq->mlwq_foreign_bufs); + if (!mlxcx_cmd_start_sq(mlxp, sq)) { mutex_exit(&sq->mlwq_mtx); mutex_exit(&cq->mlcq_mtx); @@ -1799,22 +1804,29 @@ mlxcx_rq_refill_task(void *arg) mlxcx_completion_queue_t *cq = wq->mlwq_cq; mlxcx_t *mlxp = wq->mlwq_mlx; mlxcx_buf_shard_t *s = wq->mlwq_bufs; - boolean_t refill; + boolean_t refill, draining; do { /* - * Wait until there are some free buffers. + * Wait here until one of 3 conditions: + * 1. The shard is draining, or + * 2. There are buffers on the free list, or + * 3. The WQ is being shut down. */ mutex_enter(&s->mlbs_mtx); - while (list_is_empty(&s->mlbs_free) && - (cq->mlcq_state & MLXCX_CQ_TEARDOWN) == 0) + while (s->mlbs_state != MLXCX_SHARD_DRAINING && + list_is_empty(&s->mlbs_free) && + (cq->mlcq_state & MLXCX_CQ_TEARDOWN) == 0) { cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); + } + + draining = (s->mlbs_state == MLXCX_SHARD_DRAINING); mutex_exit(&s->mlbs_mtx); mutex_enter(&cq->mlcq_mtx); mutex_enter(&wq->mlwq_mtx); - if ((cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) { + if (draining || (cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) { refill = B_FALSE; wq->mlwq_state &= ~MLXCX_WQ_REFILLING; } else { @@ -1851,7 +1863,10 @@ mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) target = mlwq->mlwq_nents - MLXCX_RQ_REFILL_STEP; cq = mlwq->mlwq_cq; - if (cq->mlcq_state & MLXCX_CQ_TEARDOWN) + if ((mlwq->mlwq_state & MLXCX_WQ_STARTED) == 0) + return; + + if ((cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) return; current = cq->mlcq_bufcnt; @@ -1883,7 +1898,7 @@ mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) return; } - if (mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) { + if ((mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) != 0) { for (i = 0; i < n; ++i) mlxcx_buf_return(mlxp, b[i]); return; @@ -2058,7 +2073,6 @@ mlxcx_rx_completion(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, wqe_index = buf->mlb_wqe_index; if (!mlxcx_buf_loan(mlxp, buf)) { - mlxcx_warn(mlxp, "!loan failed, dropping packet"); mlxcx_buf_return(mlxp, buf); return (NULL); } @@ -2101,16 +2115,11 @@ mlxcx_buf_mp_return(caddr_t arg) mlxcx_buffer_t *b = (mlxcx_buffer_t *)arg; mlxcx_t *mlxp = b->mlb_mlx; - if (b->mlb_state != MLXCX_BUFFER_ON_LOAN) { - b->mlb_mp = NULL; - return; - } - /* - * The mblk for this buffer_t (in its mlb_mp field) has been used now, - * so NULL it out. - */ + /* The mblk has been used now, so NULL it out. */ b->mlb_mp = NULL; - mlxcx_buf_return(mlxp, b); + + if (b->mlb_state == MLXCX_BUFFER_ON_LOAN) + mlxcx_buf_return(mlxp, b); } boolean_t @@ -2177,6 +2186,11 @@ mlxcx_buf_take_foreign(mlxcx_t *mlxp, mlxcx_work_queue_t *wq) mlxcx_buf_shard_t *s = wq->mlwq_foreign_bufs; mutex_enter(&s->mlbs_mtx); + if (s->mlbs_state != MLXCX_SHARD_READY) { + mutex_exit(&s->mlbs_mtx); + return (NULL); + } + if ((b = list_remove_head(&s->mlbs_free)) != NULL) { ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE); ASSERT(b->mlb_foreign); @@ -2345,6 +2359,11 @@ mlxcx_buf_take(mlxcx_t *mlxp, mlxcx_work_queue_t *wq) mlxcx_buf_shard_t *s = wq->mlwq_bufs; mutex_enter(&s->mlbs_mtx); + if (s->mlbs_state != MLXCX_SHARD_READY) { + mutex_exit(&s->mlbs_mtx); + return (NULL); + } + if ((b = list_remove_head(&s->mlbs_free)) != NULL) { ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE); b->mlb_state = MLXCX_BUFFER_ON_WQ; @@ -2366,6 +2385,11 @@ mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, s = wq->mlwq_bufs; mutex_enter(&s->mlbs_mtx); + if (s->mlbs_state != MLXCX_SHARD_READY) { + mutex_exit(&s->mlbs_mtx); + return (0); + } + while (done < nbufs && (b = list_remove_head(&s->mlbs_free)) != NULL) { ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE); b->mlb_state = MLXCX_BUFFER_ON_WQ; @@ -2379,6 +2403,8 @@ mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, boolean_t mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b) { + mlxcx_buf_shard_t *s = b->mlb_shard; + VERIFY3U(b->mlb_state, ==, MLXCX_BUFFER_ON_WQ); ASSERT3P(b->mlb_mlx, ==, mlxp); @@ -2391,6 +2417,12 @@ mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b) b->mlb_state = MLXCX_BUFFER_ON_LOAN; b->mlb_wqe_index = 0; + + mutex_enter(&s->mlbs_mtx); + list_remove(&s->mlbs_busy, b); + list_insert_tail(&s->mlbs_loaned, b); + mutex_exit(&s->mlbs_mtx); + return (B_TRUE); } @@ -2453,7 +2485,23 @@ mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b) break; case MLXCX_BUFFER_ON_LOAN: ASSERT(!b->mlb_foreign); - list_remove(&s->mlbs_busy, b); + list_remove(&s->mlbs_loaned, b); + if (s->mlbs_state == MLXCX_SHARD_DRAINING) { + /* + * When we're draining, Eg during mac_stop(), + * we destroy the buffer immediately rather than + * recycling it. Otherwise we risk leaving it + * on the free list and leaking it. + */ + list_insert_tail(&s->mlbs_free, b); + mlxcx_buf_destroy(mlxp, b); + /* + * Teardown might be waiting for loaned list to empty. + */ + cv_broadcast(&s->mlbs_free_nonempty); + mutex_exit(&s->mlbs_mtx); + return; + } break; case MLXCX_BUFFER_FREE: VERIFY(0); @@ -2466,7 +2514,7 @@ mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b) } list_insert_tail(&s->mlbs_free, b); - cv_signal(&s->mlbs_free_nonempty); + cv_broadcast(&s->mlbs_free_nonempty); mutex_exit(&s->mlbs_mtx); @@ -2484,9 +2532,11 @@ void mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b) { mlxcx_buf_shard_t *s = b->mlb_shard; + VERIFY(b->mlb_state == MLXCX_BUFFER_FREE || b->mlb_state == MLXCX_BUFFER_INIT); ASSERT(mutex_owned(&s->mlbs_mtx)); + if (b->mlb_state == MLXCX_BUFFER_FREE) list_remove(&s->mlbs_free, b); @@ -2506,3 +2556,20 @@ mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b) kmem_cache_free(mlxp->mlx_bufs_cache, b); } + +void +mlxcx_shard_ready(mlxcx_buf_shard_t *s) +{ + mutex_enter(&s->mlbs_mtx); + s->mlbs_state = MLXCX_SHARD_READY; + mutex_exit(&s->mlbs_mtx); +} + +void +mlxcx_shard_draining(mlxcx_buf_shard_t *s) +{ + mutex_enter(&s->mlbs_mtx); + s->mlbs_state = MLXCX_SHARD_DRAINING; + cv_broadcast(&s->mlbs_free_nonempty); + mutex_exit(&s->mlbs_mtx); +} |