summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Winder <paul.winder@tegile.com>2017-02-20 14:00:11 +0000
committerHans Rosenfeld <rosenfeld@grumpf.hope-2000.org>2017-02-23 15:46:40 +0100
commit396505af9432aab52f4853cfde77ca834a9cce76 (patch)
tree962e0bdc3a43923f1196ed99e27c75956756c2c2
parent1951a93366b3e52f7c49ec9b2dbfb21bccc56a3e (diff)
downloadillumos-joyent-396505af9432aab52f4853cfde77ca834a9cce76.tar.gz
7865 Allow i40e to use multiple rings
Reviewed by: Robert Mustacchi <rm@joyent.com> Reviewed by: Dale Ghent <daleg@omniti.com> Approved by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_adminq_cmd.h18
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_common.c122
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_prototype.h8
-rw-r--r--usr/src/uts/common/io/i40e/i40e_gld.c15
-rw-r--r--usr/src/uts/common/io/i40e/i40e_intr.c97
-rw-r--r--usr/src/uts/common/io/i40e/i40e_main.c157
-rw-r--r--usr/src/uts/common/io/i40e/i40e_sw.h9
7 files changed, 338 insertions, 88 deletions
diff --git a/usr/src/uts/common/io/i40e/core/i40e_adminq_cmd.h b/usr/src/uts/common/io/i40e/core/i40e_adminq_cmd.h
index af9f107597..806fe0e97b 100644
--- a/usr/src/uts/common/io/i40e/core/i40e_adminq_cmd.h
+++ b/usr/src/uts/common/io/i40e/core/i40e_adminq_cmd.h
@@ -158,6 +158,9 @@ enum i40e_admin_queue_opc {
i40e_aqc_opc_set_port_parameters = 0x0203,
i40e_aqc_opc_get_switch_resource_alloc = 0x0204,
+ i40e_aqc_opc_rx_ctl_reg_read = 0x0206,
+ i40e_aqc_opc_rx_ctl_reg_write = 0x0207,
+
i40e_aqc_opc_add_vsi = 0x0210,
i40e_aqc_opc_update_vsi_parameters = 0x0211,
i40e_aqc_opc_get_vsi_parameters = 0x0212,
@@ -694,6 +697,20 @@ struct i40e_aqc_switch_resource_alloc_element_resp {
I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp);
+/* Read Receive control registers (direct 0x0206)
+ * Write Receive control registers (direct 0x0207)
+ * used for accessing Rx control registers that can be
+ * slow and need special handling when under high Rx load
+ */
+struct i40e_aqc_rx_ctl_reg_read_write {
+ __le32 reserved1;
+ __le32 address;
+ __le32 reserved2;
+ __le32 value;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_rx_ctl_reg_read_write);
+
/* Add VSI (indirect 0x0210)
* this indirect command uses struct i40e_aqc_vsi_properties_data
* as the indirect buffer (128 bytes)
@@ -838,6 +855,7 @@ struct i40e_aqc_vsi_properties_data {
#define I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT 9
#define I40E_AQ_VSI_TC_QUE_NUMBER_MASK (0x7 << \
I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT)
+#define I40E_AQ_VSI_TC_QUE_SIZE_MAX (1 << 0x6)
/* queueing option section */
u8 queueing_opt_flags;
#ifdef X722_SUPPORT
diff --git a/usr/src/uts/common/io/i40e/core/i40e_common.c b/usr/src/uts/common/io/i40e/core/i40e_common.c
index c58eb9de1e..257c485b04 100644
--- a/usr/src/uts/common/io/i40e/core/i40e_common.c
+++ b/usr/src/uts/common/io/i40e/core/i40e_common.c
@@ -5610,6 +5610,128 @@ enum i40e_status_code i40e_aq_configure_partition_bw(struct i40e_hw *hw,
}
/**
+ * i40e_aq_rx_ctl_read_register - use FW to read from an Rx control register
+ * @hw: pointer to the hw struct
+ * @reg_addr: register address
+ * @reg_val: ptr to register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Use the firmware to read the Rx control register,
+ * especially useful if the Rx unit is under heavy pressure
+ **/
+enum i40e_status_code i40e_aq_rx_ctl_read_register(struct i40e_hw *hw,
+ u32 reg_addr, u32 *reg_val,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_rx_ctl_reg_read_write *cmd_resp =
+ (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (reg_val == NULL)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_rx_ctl_reg_read);
+
+ cmd_resp->address = CPU_TO_LE32(reg_addr);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (status == I40E_SUCCESS)
+ *reg_val = LE32_TO_CPU(cmd_resp->value);
+
+ return status;
+}
+
+/**
+ * i40e_read_rx_ctl - read from an Rx control register
+ * @hw: pointer to the hw struct
+ * @reg_addr: register address
+ **/
+u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr)
+{
+ enum i40e_status_code status = I40E_SUCCESS;
+ bool use_register;
+ int retry = 5;
+ u32 val = 0;
+
+ use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5);
+ if (!use_register) {
+do_retry:
+ status = i40e_aq_rx_ctl_read_register(hw, reg_addr, &val, NULL);
+ if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) {
+ i40e_msec_delay(1);
+ retry--;
+ goto do_retry;
+ }
+ }
+
+ /* if the AQ access failed, try the old-fashioned way */
+ if (status || use_register)
+ val = rd32(hw, reg_addr);
+
+ return val;
+}
+
+/**
+ * i40e_aq_rx_ctl_write_register
+ * @hw: pointer to the hw struct
+ * @reg_addr: register address
+ * @reg_val: register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Use the firmware to write to an Rx control register,
+ * especially useful if the Rx unit is under heavy pressure
+ **/
+enum i40e_status_code i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
+ u32 reg_addr, u32 reg_val,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_rx_ctl_reg_read_write *cmd =
+ (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_rx_ctl_reg_write);
+
+ cmd->address = CPU_TO_LE32(reg_addr);
+ cmd->value = CPU_TO_LE32(reg_val);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_write_rx_ctl - write to an Rx control register
+ * @hw: pointer to the hw struct
+ * @reg_addr: register address
+ * @reg_val: register value
+ **/
+void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val)
+{
+ enum i40e_status_code status = I40E_SUCCESS;
+ bool use_register;
+ int retry = 5;
+
+ use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5);
+ if (!use_register) {
+do_retry:
+ status = i40e_aq_rx_ctl_write_register(hw, reg_addr,
+ reg_val, NULL);
+ if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) {
+ i40e_msec_delay(1);
+ retry--;
+ goto do_retry;
+ }
+ }
+
+ /* if the AQ access failed, try the old-fashioned way */
+ if (status || use_register)
+ wr32(hw, reg_addr, reg_val);
+}
+
+/**
* i40e_aq_send_msg_to_pf
* @hw: pointer to the hardware structure
* @v_opcode: opcodes for VF-PF communication
diff --git a/usr/src/uts/common/io/i40e/core/i40e_prototype.h b/usr/src/uts/common/io/i40e/core/i40e_prototype.h
index 6f1cfc3afe..5516642273 100644
--- a/usr/src/uts/common/io/i40e/core/i40e_prototype.h
+++ b/usr/src/uts/common/io/i40e/core/i40e_prototype.h
@@ -475,4 +475,12 @@ enum i40e_status_code i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id,
void *buff, u16 *ret_buff_size,
u8 *ret_next_table, u32 *ret_next_index,
struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_rx_ctl_read_register(struct i40e_hw *hw,
+ u32 reg_addr, u32 *reg_val,
+ struct i40e_asq_cmd_details *cmd_details);
+u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr);
+enum i40e_status_code i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
+ u32 reg_addr, u32 reg_val,
+ struct i40e_asq_cmd_details *cmd_details);
+void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val);
#endif /* _I40E_PROTOTYPE_H_ */
diff --git a/usr/src/uts/common/io/i40e/i40e_gld.c b/usr/src/uts/common/io/i40e/i40e_gld.c
index b599a051f5..f12968a510 100644
--- a/usr/src/uts/common/io/i40e/i40e_gld.c
+++ b/usr/src/uts/common/io/i40e/i40e_gld.c
@@ -12,6 +12,7 @@
/*
* Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright 2016 Joyent, Inc.
+ * Copyright 2017 Tegile Systems, Inc. All rights reserved.
*/
/*
@@ -453,11 +454,11 @@ i40e_rx_ring_intr_enable(mac_intr_handle_t intrh)
i40e_trqpair_t *itrq = (i40e_trqpair_t *)intrh;
i40e_t *i40e = itrq->itrq_i40e;
- mutex_enter(&i40e->i40e_general_lock);
- ASSERT(i40e->i40e_intr_poll == B_TRUE);
+ mutex_enter(&itrq->itrq_rx_lock);
+ ASSERT(itrq->itrq_intr_poll == B_TRUE);
i40e_intr_rx_queue_enable(i40e, itrq->itrq_index);
- i40e->i40e_intr_poll = B_FALSE;
- mutex_exit(&i40e->i40e_general_lock);
+ itrq->itrq_intr_poll = B_FALSE;
+ mutex_exit(&itrq->itrq_rx_lock);
return (0);
}
@@ -469,10 +470,10 @@ i40e_rx_ring_intr_disable(mac_intr_handle_t intrh)
i40e_trqpair_t *itrq = (i40e_trqpair_t *)intrh;
i40e_t *i40e = itrq->itrq_i40e;
- mutex_enter(&i40e->i40e_general_lock);
+ mutex_enter(&itrq->itrq_rx_lock);
i40e_intr_rx_queue_disable(i40e, itrq->itrq_index);
- i40e->i40e_intr_poll = B_TRUE;
- mutex_exit(&i40e->i40e_general_lock);
+ itrq->itrq_intr_poll = B_TRUE;
+ mutex_exit(&itrq->itrq_rx_lock);
return (0);
}
diff --git a/usr/src/uts/common/io/i40e/i40e_intr.c b/usr/src/uts/common/io/i40e/i40e_intr.c
index ba9bea7b20..41387dbd3a 100644
--- a/usr/src/uts/common/io/i40e/i40e_intr.c
+++ b/usr/src/uts/common/io/i40e/i40e_intr.c
@@ -11,6 +11,7 @@
/*
* Copyright 2016 Joyent, Inc.
+ * Copyright 2017 Tegile Systems, Inc. All rights reserved.
*/
/*
@@ -76,11 +77,6 @@
* queue defines the next one in either the I40E_QINT_RQCTL or I40E_QINT_TQCTL
* register.
*
- * Because we only have a single queue enabled at the moment and we always have
- * two interrupts, we do something pretty simple and just know that there's one
- * data queue in the interrupt handler. Longer term, we'll need to think harder
- * about this, but for the moment it'll have to suffice.
- *
* Finally, the individual interrupt vector itself has the ability to be enabled
* and disabled. The overall interrupt is controlled through the
* I40E_PFINT_DYN_CTLN() register. This is used to turn on and off the interrupt
@@ -199,8 +195,8 @@ i40e_intr_set_itr(i40e_t *i40e, i40e_itr_index_t itr, uint_t val)
return;
}
- for (i = 1; i < i40e->i40e_intr_count; i++) {
- I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i - 1), val);
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i), val);
}
}
@@ -331,18 +327,18 @@ i40e_intr_io_clear_cause(i40e_t *i40e)
return;
}
- for (i = 1; i < i40e->i40e_intr_count; i++) {
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
uint32_t reg;
#ifdef DEBUG
/*
* Verify that the interrupt in question is disabled. This is a
* prerequisite of modifying the data in question.
*/
- reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i - 1));
+ reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i));
VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK);
#endif
reg = I40E_QUEUE_TYPE_EOL;
- I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i - 1), reg);
+ I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i), reg);
}
i40e_flush(hw);
@@ -365,11 +361,11 @@ i40e_intr_chip_fini(i40e_t *i40e)
* and the interrupt linked lists have been zeroed.
*/
if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
- for (i = 1; i < i40e->i40e_intr_count; i++) {
- reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i - 1));
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i));
VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK);
- reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i - 1));
+ reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i));
VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
}
}
@@ -388,32 +384,39 @@ i40e_intr_init_queue_msix(i40e_t *i40e)
{
i40e_hw_t *hw = &i40e->i40e_hw_space;
uint32_t reg;
+ int i;
/*
- * Because we only have a single queue, just do something simple now.
- * How this all works will need to really be properly redone based on
- * the bit maps, etc. Note that we skip the ITR logic for the moment,
- * just to make our lives as explicit and simple as possible.
+ * Map queues to MSI-X interrupts. Queue i is mapped to vector i + 1.
+ * Note that we skip the ITR logic for the moment, just to make our
+ * lives as explicit and simple as possible.
*/
- reg = (0 << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) |
- (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
- I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(0), reg);
-
- reg = (1 << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
- (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
- (0 << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
- (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
- I40E_QINT_RQCTL_CAUSE_ENA_MASK;
-
- I40E_WRITE_REG(hw, I40E_QINT_RQCTL(0), reg);
-
- reg = (1 << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
- (I40E_ITR_INDEX_TX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
- (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
- (I40E_QUEUE_TYPE_RX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
- I40E_QINT_TQCTL_CAUSE_ENA_MASK;
-
- I40E_WRITE_REG(hw, I40E_QINT_TQCTL(0), reg);
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];
+
+ reg = (i << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) |
+ (I40E_QUEUE_TYPE_RX <<
+ I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+ I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i), reg);
+
+ reg =
+ (itrq->itrq_rx_intrvec << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
+ (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
+ (i << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
+ (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
+ I40E_QINT_RQCTL_CAUSE_ENA_MASK;
+
+ I40E_WRITE_REG(hw, I40E_QINT_RQCTL(i), reg);
+
+ reg =
+ (itrq->itrq_tx_intrvec << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
+ (I40E_ITR_INDEX_TX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
+ (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
+ (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT) |
+ I40E_QINT_TQCTL_CAUSE_ENA_MASK;
+
+ I40E_WRITE_REG(hw, I40E_QINT_TQCTL(i), reg);
+ }
}
@@ -601,14 +604,15 @@ i40e_intr_adminq_work(i40e_t *i40e)
static void
i40e_intr_rx_work(i40e_t *i40e, int queue)
{
- mblk_t *mp;
+ mblk_t *mp = NULL;
i40e_trqpair_t *itrq;
ASSERT(queue < i40e->i40e_num_trqpairs);
itrq = &i40e->i40e_trqpairs[queue];
mutex_enter(&itrq->itrq_rx_lock);
- mp = i40e_ring_rx(itrq, I40E_POLL_NULL);
+ if (!itrq->itrq_intr_poll)
+ mp = i40e_ring_rx(itrq, I40E_POLL_NULL);
mutex_exit(&itrq->itrq_rx_lock);
if (mp != NULL) {
@@ -675,20 +679,9 @@ i40e_intr_msix(void *arg1, void *arg2)
return (DDI_INTR_CLAIMED);
}
- VERIFY(vector_idx == 1);
-
- /*
- * Note that we explicitly do not check this value under the lock even
- * though assignments to it are done so. In this case, the cost of
- * getting this wrong is at worst a bit of additional contention and
- * even more rarely, a duplicated packet. However, the cost on the other
- * hand is a lot more. This is something that as we more generally
- * implement ring support we should revisit.
- */
- if (i40e->i40e_intr_poll != B_TRUE)
- i40e_intr_rx_work(i40e, 0);
- i40e_intr_tx_work(i40e, 0);
- i40e_intr_io_enable(i40e, 1);
+ i40e_intr_rx_work(i40e, vector_idx - 1);
+ i40e_intr_tx_work(i40e, vector_idx - 1);
+ i40e_intr_io_enable(i40e, vector_idx);
return (DDI_INTR_CLAIMED);
}
diff --git a/usr/src/uts/common/io/i40e/i40e_main.c b/usr/src/uts/common/io/i40e/i40e_main.c
index d765174387..690ddc5a8d 100644
--- a/usr/src/uts/common/io/i40e/i40e_main.c
+++ b/usr/src/uts/common/io/i40e/i40e_main.c
@@ -12,6 +12,7 @@
/*
* Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright 2016 Joyent, Inc.
+ * Copyright 2017 Tegile Systems, Inc. All rights reserved.
*/
/*
@@ -359,7 +360,6 @@
* overview of expected work:
*
* o TSO support
- * o RSS / multiple ring support
* o Multiple group support
* o DMA binding and breaking up the locking in ring recycling.
* o Enhanced detection of device errors
@@ -371,7 +371,7 @@
#include "i40e_sw.h"
-static char i40e_ident[] = "Intel 10/40Gb Ethernet v1.0.0";
+static char i40e_ident[] = "Intel 10/40Gb Ethernet v1.0.1";
/*
* The i40e_glock primarily protects the lists below and the i40e_device_t
@@ -1594,7 +1594,10 @@ i40e_init_properties(i40e_t *i40e)
static boolean_t
i40e_alloc_intr_handles(i40e_t *i40e, dev_info_t *devinfo, int intr_type)
{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ ddi_acc_handle_t rh = i40e->i40e_osdep_space.ios_reg_handle;
int request, count, actual, rc, min;
+ uint32_t reg;
switch (intr_type) {
case DDI_INTR_TYPE_FIXED:
@@ -1603,16 +1606,24 @@ i40e_alloc_intr_handles(i40e_t *i40e, dev_info_t *devinfo, int intr_type)
min = 1;
break;
case DDI_INTR_TYPE_MSIX:
+ min = 2;
+ if (!i40e->i40e_mr_enable) {
+ request = 2;
+ break;
+ }
+ reg = I40E_READ_REG(hw, I40E_GLPCI_CNF2);
/*
- * At the moment, we always request two MSI-X while we still
- * only support a single interrupt. The upper bound on what's
- * supported by a given device is defined by MSI_X_PF_N in
- * GLPCI_CNF2. When we evolve, we should read it to determine
- * what the real max is.
+ * Should this read fail, we will drop back to using
+ * MSI or fixed interrupts.
*/
- ASSERT(i40e->i40e_num_trqpairs == 1);
- request = 2;
- min = 2;
+ if (i40e_check_acc_handle(rh) != DDI_FM_OK) {
+ ddi_fm_service_impact(i40e->i40e_dip,
+ DDI_SERVICE_DEGRADED);
+ return (B_FALSE);
+ }
+ request = (reg & I40E_GLPCI_CNF2_MSI_X_PF_N_MASK) >>
+ I40E_GLPCI_CNF2_MSI_X_PF_N_SHIFT;
+ request++; /* the register value is n - 1 */
break;
default:
panic("bad interrupt type passed to i40e_alloc_intr_handles: "
@@ -1704,8 +1715,13 @@ i40e_alloc_intrs(i40e_t *i40e, dev_info_t *devinfo)
if ((intr_types & DDI_INTR_TYPE_MSIX) &&
i40e->i40e_intr_force <= I40E_INTR_MSIX) {
- if (i40e_alloc_intr_handles(i40e, devinfo, DDI_INTR_TYPE_MSIX))
+ if (i40e_alloc_intr_handles(i40e, devinfo,
+ DDI_INTR_TYPE_MSIX)) {
+ i40e->i40e_num_trqpairs =
+ MIN(i40e->i40e_intr_count - 1,
+ I40E_AQ_VSI_TC_QUE_SIZE_MAX);
return (B_TRUE);
+ }
}
/*
@@ -1736,23 +1752,25 @@ i40e_alloc_intrs(i40e_t *i40e, dev_info_t *devinfo)
static boolean_t
i40e_map_intrs_to_vectors(i40e_t *i40e)
{
+ int i;
+
if (i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) {
return (B_TRUE);
}
/*
- * At the moment, we only have one queue and one interrupt thus both are
- * on that one interrupt. However, longer term we need to go back to
- * using the ixgbe style map of queues to vectors or walk the linked
- * list from the device to know what to go handle. Therefore for the
- * moment, since we need to map our single set of rings to the one
- * I/O interrupt that exists for MSI-X.
+ * Each queue pair is mapped to a single interrupt, so transmit
+ * and receive interrupts for a given queue share the same vector.
+ * The number of queue pairs is one less than the number of interrupt
+ * vectors and is assigned the vector one higher than its index.
+ * Vector zero is reserved for the admin queue.
*/
- ASSERT(i40e->i40e_intr_count == 2);
- ASSERT(i40e->i40e_num_trqpairs == 1);
+ ASSERT(i40e->i40e_intr_count == i40e->i40e_num_trqpairs + 1);
- i40e->i40e_trqpairs[0].itrq_rx_intrvec = 1;
- i40e->i40e_trqpairs[0].itrq_tx_intrvec = 1;
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ i40e->i40e_trqpairs[i].itrq_rx_intrvec = i + 1;
+ i40e->i40e_trqpairs[i].itrq_tx_intrvec = i + 1;
+ }
return (B_TRUE);
}
@@ -1898,7 +1916,7 @@ static boolean_t
i40e_config_vsi(i40e_t *i40e, i40e_hw_t *hw)
{
struct i40e_vsi_context context;
- int err;
+ int err, tc_queues;
bzero(&context, sizeof (struct i40e_vsi_context));
context.seid = i40e->i40e_vsi_id;
@@ -1909,13 +1927,32 @@ i40e_config_vsi(i40e_t *i40e, i40e_hw_t *hw)
return (B_FALSE);
}
+ i40e->i40e_vsi_num = context.vsi_number;
+
/*
* Set the queue and traffic class bits. Keep it simple for now.
*/
context.info.valid_sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
context.info.mapping_flags = I40E_AQ_VSI_QUE_MAP_CONTIG;
context.info.queue_mapping[0] = I40E_ASSIGN_ALL_QUEUES;
- context.info.tc_mapping[0] = I40E_TRAFFIC_CLASS_NO_QUEUES;
+
+ /*
+ * tc_queues determines the size of the traffic class, where the
+ * size is 2^^tc_queues to a maximum of 64.
+ * Some examples:
+ * i40e_num_trqpairs == 1 => tc_queues = 0, 2^^0 = 1.
+ * i40e_num_trqpairs == 7 => tc_queues = 3, 2^^3 = 8.
+ * i40e_num_trqpairs == 8 => tc_queues = 3, 2^^3 = 8.
+ * i40e_num_trqpairs == 9 => tc_queues = 4, 2^^4 = 16.
+ * i40e_num_trqpairs == 17 => tc_queues = 5, 2^^5 = 32.
+ * i40e_num_trqpairs == 64 => tc_queues = 6, 2^^6 = 64.
+ */
+ tc_queues = ddi_fls(i40e->i40e_num_trqpairs - 1);
+
+ context.info.tc_mapping[0] = ((0 << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) &
+ I40E_AQ_VSI_TC_QUE_OFFSET_MASK) |
+ ((tc_queues << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT) &
+ I40E_AQ_VSI_TC_QUE_NUMBER_MASK);
context.info.valid_sections |= I40E_AQ_VSI_PROP_VLAN_VALID;
context.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL |
@@ -1938,6 +1975,76 @@ i40e_config_vsi(i40e_t *i40e, i40e_hw_t *hw)
}
/*
+ * Set up RSS.
+ * 1. Seed the hash key.
+ * 2. Enable PCTYPEs for the hash filter.
+ * 3. Populate the LUT.
+ *
+ * Note: When/if X722 support is added the hash key is seeded via a call
+ * to i40e_aq_set_rss_key(), and the LUT is populated using
+ * i40e_aq_set_rss_lut().
+ */
+static boolean_t
+i40e_config_rss(i40e_t *i40e, i40e_hw_t *hw)
+{
+ int i;
+ uint8_t lut_mask;
+ uint32_t *hlut;
+ uint64_t hena;
+ boolean_t rv = B_TRUE;
+ uint32_t seed[I40E_PFQF_HKEY_MAX_INDEX + 1];
+
+ /*
+ * 1. Seed the hash key
+ */
+ (void) random_get_pseudo_bytes((uint8_t *)seed, sizeof (seed));
+
+ for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
+ i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), seed[i]);
+
+ /*
+ * 2. Configure PCTYPES
+ */
+ hena = (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) |
+ (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) |
+ (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
+ (1ULL << I40E_FILTER_PCTYPE_FRAG_IPV4) |
+ (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) |
+ (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) |
+ (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) |
+ (1ULL << I40E_FILTER_PCTYPE_FRAG_IPV6) |
+ (1ULL << I40E_FILTER_PCTYPE_L2_PAYLOAD);
+
+ i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (uint32_t)hena);
+ i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (uint32_t)(hena >> 32));
+
+ /*
+ * 3. Populate LUT
+ *
+ * Each entry in the LUT is 8 bits and is used to index
+ * the rx queue. Populate the LUT in a round robin fashion
+ * with rx queue indices from 0 to i40e_num_trqpairs - 1.
+ */
+ hlut = kmem_alloc(hw->func_caps.rss_table_size, KM_NOSLEEP);
+ if (hlut == NULL) {
+ i40e_error(i40e, "i40e_config_rss() buffer allocation failed");
+ return (B_FALSE);
+ }
+
+ lut_mask = (1 << hw->func_caps.rss_table_entry_width) - 1;
+
+ for (i = 0; i < hw->func_caps.rss_table_size; i++)
+ ((uint8_t *)hlut)[i] = (i % i40e->i40e_num_trqpairs) & lut_mask;
+
+ for (i = 0; i < hw->func_caps.rss_table_size >> 2; i++)
+ I40E_WRITE_REG(hw, I40E_PFQF_HLUT(i), hlut[i]);
+
+ kmem_free(hlut, hw->func_caps.rss_table_size);
+
+ return (rv);
+}
+
+/*
* Wrapper to kick the chipset on.
*/
static boolean_t
@@ -1970,6 +2077,7 @@ i40e_chip_start(i40e_t *i40e)
bzero(&filter, sizeof (filter));
filter.enable_ethtype = TRUE;
filter.enable_macvlan = TRUE;
+ filter.hash_lut_size = I40E_HASH_LUT_SIZE_512;
rc = i40e_set_filter_control(hw, &filter);
if (rc != I40E_SUCCESS) {
@@ -1982,6 +2090,9 @@ i40e_chip_start(i40e_t *i40e)
if (!i40e_config_vsi(i40e, hw))
return (B_FALSE);
+ if (!i40e_config_rss(i40e, hw))
+ return (B_FALSE);
+
i40e_flush(hw);
return (B_TRUE);
diff --git a/usr/src/uts/common/io/i40e/i40e_sw.h b/usr/src/uts/common/io/i40e/i40e_sw.h
index 4995f53107..69348687f6 100644
--- a/usr/src/uts/common/io/i40e/i40e_sw.h
+++ b/usr/src/uts/common/io/i40e/i40e_sw.h
@@ -12,6 +12,7 @@
/*
* Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright 2016 Joyent, Inc.
+ * Copyright 2017 Tegile Systems, Inc. All rights reserved.
*/
/*
@@ -526,6 +527,7 @@ typedef struct i40e_trqpair {
uint64_t itrq_rxgen; /* Generation number for mac/GLDv3. */
uint32_t itrq_index; /* Queue index in the PF */
uint32_t itrq_rx_intrvec; /* Receive interrupt vector. */
+ boolean_t itrq_intr_poll; /* True when polling */
/* Receive-side stats. */
i40e_rxq_stat_t itrq_rxstat;
@@ -768,6 +770,7 @@ typedef struct i40e {
* Device state, switch information, and resources.
*/
int i40e_vsi_id;
+ uint16_t i40e_vsi_num;
struct i40e_device *i40e_device;
i40e_func_rsrc_t i40e_resources;
uint16_t i40e_switch_rsrc_alloc;
@@ -811,11 +814,6 @@ typedef struct i40e {
/*
* Interrupt state
- *
- * Note that the use of a single boolean_t for i40e_intr_poll isn't
- * really the best design. When we have more than a single ring on the
- * device working, we'll transition to using something more
- * sophisticated.
*/
uint_t i40e_intr_pri;
uint_t i40e_intr_force;
@@ -827,7 +825,6 @@ typedef struct i40e {
size_t i40e_intr_size;
ddi_intr_handle_t *i40e_intr_handles;
ddi_cb_handle_t i40e_callback_handle;
- boolean_t i40e_intr_poll;
/*
* DMA attributes. See i40e_transceiver.c for why we have copies of them