summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorPaul Winder <pwinder@racktopsystems.com>2020-03-31 11:22:25 +0100
committerPaul Winder <paul@winders.demon.co.uk>2020-04-22 16:28:03 +0100
commit82b4190e0f86654c179e1dad46c51c6f999464ec (patch)
treee8bc5b6e14a02722c7185ebba509e336f5f6b4bf /usr/src
parent865498e43471404cd766389d4b8e045ed6ef3be1 (diff)
downloadillumos-joyent-82b4190e0f86654c179e1dad46c51c6f999464ec.tar.gz
12480 long mblk chain will cause mlxcx to stop sending
Reviewed by: Garrett D'Amore <garrett@damore.org> Reviewed by: Andy Stormont <astormont@racktopsystems.com> Reviewed by: Robert Mustacchi <rm@fingolfin.org> Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.h4
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_gld.c13
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_reg.h12
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_ring.c146
4 files changed, 120 insertions, 55 deletions
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.h b/usr/src/uts/common/io/mlxcx/mlxcx.h
index bf07691095..da048b4ac3 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.h
@@ -1172,8 +1172,8 @@ extern void mlxcx_buf_return(mlxcx_t *, mlxcx_buffer_t *);
extern void mlxcx_buf_return_chain(mlxcx_t *, mlxcx_buffer_t *, boolean_t);
extern void mlxcx_buf_destroy(mlxcx_t *, mlxcx_buffer_t *);
-extern mlxcx_buffer_t *mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *,
- mblk_t *, size_t);
+extern uint_t mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *,
+ mblk_t *, size_t, mlxcx_buffer_t **);
extern boolean_t mlxcx_rx_group_setup(mlxcx_t *, mlxcx_ring_group_t *);
extern boolean_t mlxcx_tx_group_setup(mlxcx_t *, mlxcx_ring_group_t *);
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
index a1d50659c1..a08cec3980 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
@@ -395,6 +395,7 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
uint32_t chkflags = 0;
boolean_t ok;
size_t take = 0;
+ uint_t bcount;
VERIFY(mp->b_next == NULL);
@@ -430,8 +431,8 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
}
}
- b = mlxcx_buf_bind_or_copy(mlxp, sq, kmp, take);
- if (b == NULL) {
+ bcount = mlxcx_buf_bind_or_copy(mlxp, sq, kmp, take, &b);
+ if (bcount == 0) {
atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC);
return (mp);
}
@@ -457,10 +458,12 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
}
/*
- * Similar logic here: bufcnt is only manipulated atomically, and
- * bufhwm is set at startup.
+ * If the completion queue buffer count is already at or above
+ * the high water mark, or the addition of this new chain will
+ * exceed the CQ ring size, then indicate we are blocked.
*/
- if (cq->mlcq_bufcnt >= cq->mlcq_bufhwm) {
+ if (cq->mlcq_bufcnt >= cq->mlcq_bufhwm ||
+ (cq->mlcq_bufcnt + bcount) > cq->mlcq_nents) {
atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_BLOCKED_MAC);
goto blocked;
}
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
index f65280d41d..6d09abea5c 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
@@ -390,8 +390,16 @@ typedef enum {
MLXCX_WQE_OP_RDMA_R = 0x10,
} mlxcx_wqe_opcode_t;
+#define MLXCX_WQE_OCTOWORD 16
#define MLXCX_SQE_MAX_DS ((1 << 6) - 1)
-#define MLXCX_SQE_MAX_PTRS 61
+/*
+ * Calculate the max number of address pointers in a single ethernet
+ * send message. This is the remainder from MLXCX_SQE_MAX_DS
+ * after accounting for the Control and Ethernet segements.
+ */
+#define MLXCX_SQE_MAX_PTRS (MLXCX_SQE_MAX_DS - \
+ (sizeof (mlxcx_wqe_eth_seg_t) + sizeof (mlxcx_wqe_control_seg_t)) / \
+ MLXCX_WQE_OCTOWORD)
typedef enum {
MLXCX_SQE_FENCE_NONE = 0x0,
@@ -2497,6 +2505,8 @@ typedef struct {
#pragma pack()
+CTASSERT(MLXCX_SQE_MAX_PTRS > 0);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
index 2305f943a7..492f8fd8a5 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
@@ -25,6 +25,7 @@
#include <sys/sysmacros.h>
#include <sys/atomic.h>
#include <sys/cpuvar.h>
+#include <sys/sdt.h>
#include <sys/pattr.h>
#include <sys/dlpi.h>
@@ -1567,8 +1568,8 @@ mlxcx_sq_add_buffer(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq,
inlinelen);
}
- ent0->mlsqe_control.mlcs_ds =
- offsetof(mlxcx_sendq_ent_t, mlsqe_data) / 16;
+ ent0->mlsqe_control.mlcs_ds = offsetof(mlxcx_sendq_ent_t, mlsqe_data) /
+ MLXCX_WQE_OCTOWORD;
if (chkflags & HCK_IPV4_HDRCKSUM) {
ASSERT(mlxp->mlx_caps->mlc_checksum);
@@ -1653,7 +1654,20 @@ mlxcx_sq_add_buffer(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq,
/*
* Make sure the workqueue entry is flushed out before updating
* the doorbell.
+ * If the ring has wrapped, we need to flush the front and back.
*/
+ if ((first + ents) > mlwq->mlwq_nents) {
+ uint_t sync_cnt = mlwq->mlwq_nents - first;
+
+ VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle,
+ (uintptr_t)ent0 - (uintptr_t)mlwq->mlwq_send_ent,
+ sync_cnt * sizeof (mlxcx_sendq_ent_t),
+ DDI_DMA_SYNC_FORDEV));
+
+ ent0 = &mlwq->mlwq_send_ent[0];
+ ents -= sync_cnt;
+ }
+
VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle,
(uintptr_t)ent0 - (uintptr_t)mlwq->mlwq_send_ent,
ents * sizeof (mlxcx_sendq_ent_t), DDI_DMA_SYNC_FORDEV));
@@ -2205,58 +2219,64 @@ copyb:
return (b);
}
-mlxcx_buffer_t *
-mlxcx_buf_bind_or_copy(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
- mblk_t *mpb, size_t off)
+static mlxcx_buffer_t *
+mlxcx_bind_or_copy_mblk(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
+ mblk_t *mp, size_t off)
{
- mlxcx_buffer_t *b, *b0 = NULL;
- boolean_t first = B_TRUE;
- mblk_t *mp;
+ mlxcx_buffer_t *b;
uint8_t *rptr;
size_t sz;
- size_t ncookies = 0;
boolean_t ret;
- for (mp = mpb; mp != NULL; mp = mp->b_cont) {
- rptr = mp->b_rptr;
- sz = MBLKL(mp);
+ rptr = mp->b_rptr;
+ sz = MBLKL(mp);
- if (off > 0)
- ASSERT3U(off, <, sz);
- rptr += off;
- sz -= off;
+#ifdef DEBUG
+ if (off > 0) {
+ ASSERT3U(off, <, sz);
+ }
+#endif
- if (sz < mlxp->mlx_props.mldp_tx_bind_threshold) {
- b = mlxcx_copy_data(mlxp, wq, rptr, sz);
- if (b == NULL)
- goto failed;
- } else {
- b = mlxcx_buf_take_foreign(mlxp, wq);
- if (b == NULL)
- goto failed;
+ rptr += off;
+ sz -= off;
- ret = mlxcx_dma_bind_mblk(mlxp, &b->mlb_dma, mp, off,
- B_FALSE);
+ if (sz < mlxp->mlx_props.mldp_tx_bind_threshold) {
+ b = mlxcx_copy_data(mlxp, wq, rptr, sz);
+ } else {
+ b = mlxcx_buf_take_foreign(mlxp, wq);
+ if (b == NULL)
+ return (NULL);
- if (!ret) {
- mlxcx_buf_return(mlxp, b);
+ ret = mlxcx_dma_bind_mblk(mlxp, &b->mlb_dma, mp, off,
+ B_FALSE);
- b = mlxcx_copy_data(mlxp, wq, rptr, sz);
- if (b == NULL)
- goto failed;
- }
+ if (!ret) {
+ mlxcx_buf_return(mlxp, b);
+
+ b = mlxcx_copy_data(mlxp, wq, rptr, sz);
}
+ }
+
+ return (b);
+}
+
+uint_t
+mlxcx_buf_bind_or_copy(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
+ mblk_t *mpb, size_t off, mlxcx_buffer_t **bp)
+{
+ mlxcx_buffer_t *b, *b0 = NULL;
+ boolean_t first = B_TRUE;
+ mblk_t *mp;
+ size_t offset = off;
+ size_t ncookies = 0;
+ uint_t count = 0;
+
+ for (mp = mpb; mp != NULL && ncookies <= MLXCX_SQE_MAX_PTRS;
+ mp = mp->b_cont) {
+ b = mlxcx_bind_or_copy_mblk(mlxp, wq, mp, offset);
+ if (b == NULL)
+ goto failed;
- /*
- * We might overestimate here when we've copied data, since
- * the buffer might be longer than what we copied into it. This
- * is safe since it's always wrong in the conservative
- * direction (and we will blow up later when we actually
- * generate the WQE anyway).
- *
- * If the assert below ever blows, we'll have to come and fix
- * this up so we can transmit these packets.
- */
ncookies += b->mlb_dma.mxdb_ncookies;
if (first)
@@ -2267,23 +2287,55 @@ mlxcx_buf_bind_or_copy(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
b->mlb_tx_mp = mp;
b->mlb_tx_head = b0;
- b->mlb_used = sz;
+ b->mlb_used = MBLKL(mp) - offset;
if (!first)
list_insert_tail(&b0->mlb_tx_chain, b);
first = B_FALSE;
- off = 0;
+ offset = 0;
+
+ count++;
+ }
+
+ /*
+ * The chain of mblks has resulted in too many cookies for
+ * a single message. This is unusual, so take the hit to tidy
+ * up, do a pullup to a single mblk and allocate the requisite
+ * buf.
+ */
+ if (ncookies > MLXCX_SQE_MAX_PTRS) {
+ DTRACE_PROBE4(pullup, mlxcx_t *, mlxp, mlxcx_work_queue_t *, wq,
+ mblk_t *, mpb, size_t, ncookies);
+
+ if (b0 != NULL)
+ mlxcx_buf_return_chain(mlxp, b0, B_TRUE);
+
+ if ((mp = msgpullup(mpb, -1)) == NULL)
+ return (0);
+
+ b0 = mlxcx_bind_or_copy_mblk(mlxp, wq, mp, off);
+ if (b0 == NULL) {
+ freemsg(mp);
+ return (0);
+ }
+ freemsg(mpb);
+
+ b0->mlb_tx_mp = mp;
+ b0->mlb_tx_head = b0;
+ b0->mlb_used = MBLKL(mp) - off;
+
+ count = 1;
}
- ASSERT3U(ncookies, <=, MLXCX_SQE_MAX_PTRS);
+ *bp = b0;
- return (b0);
+ return (count);
failed:
if (b0 != NULL)
mlxcx_buf_return_chain(mlxp, b0, B_TRUE);
- return (NULL);
+ return (0);
}
mlxcx_buffer_t *