diff options
author | Paul Winder <paul.winder@tegile.com> | 2017-02-20 14:00:11 +0000 |
---|---|---|
committer | Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> | 2017-02-23 15:46:40 +0100 |
commit | 396505af9432aab52f4853cfde77ca834a9cce76 (patch) | |
tree | 962e0bdc3a43923f1196ed99e27c75956756c2c2 | |
parent | 1951a93366b3e52f7c49ec9b2dbfb21bccc56a3e (diff) | |
download | illumos-joyent-396505af9432aab52f4853cfde77ca834a9cce76.tar.gz |
7865 Allow i40e to use multiple rings
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dale Ghent <daleg@omniti.com>
Approved by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
-rw-r--r-- | usr/src/uts/common/io/i40e/core/i40e_adminq_cmd.h | 18 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/core/i40e_common.c | 122 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/core/i40e_prototype.h | 8 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_gld.c | 15 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_intr.c | 97 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_main.c | 157 | ||||
-rw-r--r-- | usr/src/uts/common/io/i40e/i40e_sw.h | 9 |
7 files changed, 338 insertions, 88 deletions
diff --git a/usr/src/uts/common/io/i40e/core/i40e_adminq_cmd.h b/usr/src/uts/common/io/i40e/core/i40e_adminq_cmd.h index af9f107597..806fe0e97b 100644 --- a/usr/src/uts/common/io/i40e/core/i40e_adminq_cmd.h +++ b/usr/src/uts/common/io/i40e/core/i40e_adminq_cmd.h @@ -158,6 +158,9 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_set_port_parameters = 0x0203, i40e_aqc_opc_get_switch_resource_alloc = 0x0204, + i40e_aqc_opc_rx_ctl_reg_read = 0x0206, + i40e_aqc_opc_rx_ctl_reg_write = 0x0207, + i40e_aqc_opc_add_vsi = 0x0210, i40e_aqc_opc_update_vsi_parameters = 0x0211, i40e_aqc_opc_get_vsi_parameters = 0x0212, @@ -694,6 +697,20 @@ struct i40e_aqc_switch_resource_alloc_element_resp { I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp); +/* Read Receive control registers (direct 0x0206) + * Write Receive control registers (direct 0x0207) + * used for accessing Rx control registers that can be + * slow and need special handling when under high Rx load + */ +struct i40e_aqc_rx_ctl_reg_read_write { + __le32 reserved1; + __le32 address; + __le32 reserved2; + __le32 value; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_rx_ctl_reg_read_write); + /* Add VSI (indirect 0x0210) * this indirect command uses struct i40e_aqc_vsi_properties_data * as the indirect buffer (128 bytes) @@ -838,6 +855,7 @@ struct i40e_aqc_vsi_properties_data { #define I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT 9 #define I40E_AQ_VSI_TC_QUE_NUMBER_MASK (0x7 << \ I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT) +#define I40E_AQ_VSI_TC_QUE_SIZE_MAX (1 << 0x6) /* queueing option section */ u8 queueing_opt_flags; #ifdef X722_SUPPORT diff --git a/usr/src/uts/common/io/i40e/core/i40e_common.c b/usr/src/uts/common/io/i40e/core/i40e_common.c index c58eb9de1e..257c485b04 100644 --- a/usr/src/uts/common/io/i40e/core/i40e_common.c +++ b/usr/src/uts/common/io/i40e/core/i40e_common.c @@ -5610,6 +5610,128 @@ enum i40e_status_code i40e_aq_configure_partition_bw(struct i40e_hw *hw, } /** + * i40e_aq_rx_ctl_read_register - use FW to read from an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: ptr to register value + * @cmd_details: pointer to command details structure or NULL + * + * Use the firmware to read the Rx control register, + * especially useful if the Rx unit is under heavy pressure + **/ +enum i40e_status_code i40e_aq_rx_ctl_read_register(struct i40e_hw *hw, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_rx_ctl_reg_read_write *cmd_resp = + (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw; + enum i40e_status_code status; + + if (reg_val == NULL) + return I40E_ERR_PARAM; + + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_rx_ctl_reg_read); + + cmd_resp->address = CPU_TO_LE32(reg_addr); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + if (status == I40E_SUCCESS) + *reg_val = LE32_TO_CPU(cmd_resp->value); + + return status; +} + +/** + * i40e_read_rx_ctl - read from an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + **/ +u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr) +{ + enum i40e_status_code status = I40E_SUCCESS; + bool use_register; + int retry = 5; + u32 val = 0; + + use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5); + if (!use_register) { +do_retry: + status = i40e_aq_rx_ctl_read_register(hw, reg_addr, &val, NULL); + if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) { + i40e_msec_delay(1); + retry--; + goto do_retry; + } + } + + /* if the AQ access failed, try the old-fashioned way */ + if (status || use_register) + val = rd32(hw, reg_addr); + + return val; +} + +/** + * i40e_aq_rx_ctl_write_register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: register value + * @cmd_details: pointer to command details structure or NULL + * + * Use the firmware to write to an Rx control register, + * especially useful if the Rx unit is under heavy pressure + **/ +enum i40e_status_code i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_rx_ctl_reg_read_write *cmd = + (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw; + enum i40e_status_code status; + + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_rx_ctl_reg_write); + + cmd->address = CPU_TO_LE32(reg_addr); + cmd->value = CPU_TO_LE32(reg_val); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** + * i40e_write_rx_ctl - write to an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: register value + **/ +void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val) +{ + enum i40e_status_code status = I40E_SUCCESS; + bool use_register; + int retry = 5; + + use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5); + if (!use_register) { +do_retry: + status = i40e_aq_rx_ctl_write_register(hw, reg_addr, + reg_val, NULL); + if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) { + i40e_msec_delay(1); + retry--; + goto do_retry; + } + } + + /* if the AQ access failed, try the old-fashioned way */ + if (status || use_register) + wr32(hw, reg_addr, reg_val); +} + +/** * i40e_aq_send_msg_to_pf * @hw: pointer to the hardware structure * @v_opcode: opcodes for VF-PF communication diff --git a/usr/src/uts/common/io/i40e/core/i40e_prototype.h b/usr/src/uts/common/io/i40e/core/i40e_prototype.h index 6f1cfc3afe..5516642273 100644 --- a/usr/src/uts/common/io/i40e/core/i40e_prototype.h +++ b/usr/src/uts/common/io/i40e/core/i40e_prototype.h @@ -475,4 +475,12 @@ enum i40e_status_code i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id, void *buff, u16 *ret_buff_size, u8 *ret_next_table, u32 *ret_next_index, struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code i40e_aq_rx_ctl_read_register(struct i40e_hw *hw, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details); +u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr); +enum i40e_status_code i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details); +void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); #endif /* _I40E_PROTOTYPE_H_ */ diff --git a/usr/src/uts/common/io/i40e/i40e_gld.c b/usr/src/uts/common/io/i40e/i40e_gld.c index b599a051f5..f12968a510 100644 --- a/usr/src/uts/common/io/i40e/i40e_gld.c +++ b/usr/src/uts/common/io/i40e/i40e_gld.c @@ -12,6 +12,7 @@ /* * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved. * Copyright 2016 Joyent, Inc. + * Copyright 2017 Tegile Systems, Inc. All rights reserved. */ /* @@ -453,11 +454,11 @@ i40e_rx_ring_intr_enable(mac_intr_handle_t intrh) i40e_trqpair_t *itrq = (i40e_trqpair_t *)intrh; i40e_t *i40e = itrq->itrq_i40e; - mutex_enter(&i40e->i40e_general_lock); - ASSERT(i40e->i40e_intr_poll == B_TRUE); + mutex_enter(&itrq->itrq_rx_lock); + ASSERT(itrq->itrq_intr_poll == B_TRUE); i40e_intr_rx_queue_enable(i40e, itrq->itrq_index); - i40e->i40e_intr_poll = B_FALSE; - mutex_exit(&i40e->i40e_general_lock); + itrq->itrq_intr_poll = B_FALSE; + mutex_exit(&itrq->itrq_rx_lock); return (0); } @@ -469,10 +470,10 @@ i40e_rx_ring_intr_disable(mac_intr_handle_t intrh) i40e_trqpair_t *itrq = (i40e_trqpair_t *)intrh; i40e_t *i40e = itrq->itrq_i40e; - mutex_enter(&i40e->i40e_general_lock); + mutex_enter(&itrq->itrq_rx_lock); i40e_intr_rx_queue_disable(i40e, itrq->itrq_index); - i40e->i40e_intr_poll = B_TRUE; - mutex_exit(&i40e->i40e_general_lock); + itrq->itrq_intr_poll = B_TRUE; + mutex_exit(&itrq->itrq_rx_lock); return (0); } diff --git a/usr/src/uts/common/io/i40e/i40e_intr.c b/usr/src/uts/common/io/i40e/i40e_intr.c index ba9bea7b20..41387dbd3a 100644 --- a/usr/src/uts/common/io/i40e/i40e_intr.c +++ b/usr/src/uts/common/io/i40e/i40e_intr.c @@ -11,6 +11,7 @@ /* * Copyright 2016 Joyent, Inc. + * Copyright 2017 Tegile Systems, Inc. All rights reserved. */ /* @@ -76,11 +77,6 @@ * queue defines the next one in either the I40E_QINT_RQCTL or I40E_QINT_TQCTL * register. * - * Because we only have a single queue enabled at the moment and we always have - * two interrupts, we do something pretty simple and just know that there's one - * data queue in the interrupt handler. Longer term, we'll need to think harder - * about this, but for the moment it'll have to suffice. - * * Finally, the individual interrupt vector itself has the ability to be enabled * and disabled. The overall interrupt is controlled through the * I40E_PFINT_DYN_CTLN() register. This is used to turn on and off the interrupt @@ -199,8 +195,8 @@ i40e_intr_set_itr(i40e_t *i40e, i40e_itr_index_t itr, uint_t val) return; } - for (i = 1; i < i40e->i40e_intr_count; i++) { - I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i - 1), val); + for (i = 0; i < i40e->i40e_num_trqpairs; i++) { + I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i), val); } } @@ -331,18 +327,18 @@ i40e_intr_io_clear_cause(i40e_t *i40e) return; } - for (i = 1; i < i40e->i40e_intr_count; i++) { + for (i = 0; i < i40e->i40e_num_trqpairs; i++) { uint32_t reg; #ifdef DEBUG /* * Verify that the interrupt in question is disabled. This is a * prerequisite of modifying the data in question. */ - reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i - 1)); + reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i)); VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK); #endif reg = I40E_QUEUE_TYPE_EOL; - I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i - 1), reg); + I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i), reg); } i40e_flush(hw); @@ -365,11 +361,11 @@ i40e_intr_chip_fini(i40e_t *i40e) * and the interrupt linked lists have been zeroed. */ if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { - for (i = 1; i < i40e->i40e_intr_count; i++) { - reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i - 1)); + for (i = 0; i < i40e->i40e_num_trqpairs; i++) { + reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i)); VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK); - reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i - 1)); + reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i)); VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL); } } @@ -388,32 +384,39 @@ i40e_intr_init_queue_msix(i40e_t *i40e) { i40e_hw_t *hw = &i40e->i40e_hw_space; uint32_t reg; + int i; /* - * Because we only have a single queue, just do something simple now. - * How this all works will need to really be properly redone based on - * the bit maps, etc. Note that we skip the ITR logic for the moment, - * just to make our lives as explicit and simple as possible. + * Map queues to MSI-X interrupts. Queue i is mapped to vector i + 1. + * Note that we skip the ITR logic for the moment, just to make our + * lives as explicit and simple as possible. */ - reg = (0 << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) | - (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); - I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(0), reg); - - reg = (1 << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | - (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | - (0 << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | - (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | - I40E_QINT_RQCTL_CAUSE_ENA_MASK; - - I40E_WRITE_REG(hw, I40E_QINT_RQCTL(0), reg); - - reg = (1 << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | - (I40E_ITR_INDEX_TX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | - (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | - (I40E_QUEUE_TYPE_RX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | - I40E_QINT_TQCTL_CAUSE_ENA_MASK; - - I40E_WRITE_REG(hw, I40E_QINT_TQCTL(0), reg); + for (i = 0; i < i40e->i40e_num_trqpairs; i++) { + i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i]; + + reg = (i << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) | + (I40E_QUEUE_TYPE_RX << + I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); + I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i), reg); + + reg = + (itrq->itrq_rx_intrvec << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | + (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | + (i << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | + (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | + I40E_QINT_RQCTL_CAUSE_ENA_MASK; + + I40E_WRITE_REG(hw, I40E_QINT_RQCTL(i), reg); + + reg = + (itrq->itrq_tx_intrvec << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | + (I40E_ITR_INDEX_TX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | + (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | + (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT) | + I40E_QINT_TQCTL_CAUSE_ENA_MASK; + + I40E_WRITE_REG(hw, I40E_QINT_TQCTL(i), reg); + } } @@ -601,14 +604,15 @@ i40e_intr_adminq_work(i40e_t *i40e) static void i40e_intr_rx_work(i40e_t *i40e, int queue) { - mblk_t *mp; + mblk_t *mp = NULL; i40e_trqpair_t *itrq; ASSERT(queue < i40e->i40e_num_trqpairs); itrq = &i40e->i40e_trqpairs[queue]; mutex_enter(&itrq->itrq_rx_lock); - mp = i40e_ring_rx(itrq, I40E_POLL_NULL); + if (!itrq->itrq_intr_poll) + mp = i40e_ring_rx(itrq, I40E_POLL_NULL); mutex_exit(&itrq->itrq_rx_lock); if (mp != NULL) { @@ -675,20 +679,9 @@ i40e_intr_msix(void *arg1, void *arg2) return (DDI_INTR_CLAIMED); } - VERIFY(vector_idx == 1); - - /* - * Note that we explicitly do not check this value under the lock even - * though assignments to it are done so. In this case, the cost of - * getting this wrong is at worst a bit of additional contention and - * even more rarely, a duplicated packet. However, the cost on the other - * hand is a lot more. This is something that as we more generally - * implement ring support we should revisit. - */ - if (i40e->i40e_intr_poll != B_TRUE) - i40e_intr_rx_work(i40e, 0); - i40e_intr_tx_work(i40e, 0); - i40e_intr_io_enable(i40e, 1); + i40e_intr_rx_work(i40e, vector_idx - 1); + i40e_intr_tx_work(i40e, vector_idx - 1); + i40e_intr_io_enable(i40e, vector_idx); return (DDI_INTR_CLAIMED); } diff --git a/usr/src/uts/common/io/i40e/i40e_main.c b/usr/src/uts/common/io/i40e/i40e_main.c index d765174387..690ddc5a8d 100644 --- a/usr/src/uts/common/io/i40e/i40e_main.c +++ b/usr/src/uts/common/io/i40e/i40e_main.c @@ -12,6 +12,7 @@ /* * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved. * Copyright 2016 Joyent, Inc. + * Copyright 2017 Tegile Systems, Inc. All rights reserved. */ /* @@ -359,7 +360,6 @@ * overview of expected work: * * o TSO support - * o RSS / multiple ring support * o Multiple group support * o DMA binding and breaking up the locking in ring recycling. * o Enhanced detection of device errors @@ -371,7 +371,7 @@ #include "i40e_sw.h" -static char i40e_ident[] = "Intel 10/40Gb Ethernet v1.0.0"; +static char i40e_ident[] = "Intel 10/40Gb Ethernet v1.0.1"; /* * The i40e_glock primarily protects the lists below and the i40e_device_t @@ -1594,7 +1594,10 @@ i40e_init_properties(i40e_t *i40e) static boolean_t i40e_alloc_intr_handles(i40e_t *i40e, dev_info_t *devinfo, int intr_type) { + i40e_hw_t *hw = &i40e->i40e_hw_space; + ddi_acc_handle_t rh = i40e->i40e_osdep_space.ios_reg_handle; int request, count, actual, rc, min; + uint32_t reg; switch (intr_type) { case DDI_INTR_TYPE_FIXED: @@ -1603,16 +1606,24 @@ i40e_alloc_intr_handles(i40e_t *i40e, dev_info_t *devinfo, int intr_type) min = 1; break; case DDI_INTR_TYPE_MSIX: + min = 2; + if (!i40e->i40e_mr_enable) { + request = 2; + break; + } + reg = I40E_READ_REG(hw, I40E_GLPCI_CNF2); /* - * At the moment, we always request two MSI-X while we still - * only support a single interrupt. The upper bound on what's - * supported by a given device is defined by MSI_X_PF_N in - * GLPCI_CNF2. When we evolve, we should read it to determine - * what the real max is. + * Should this read fail, we will drop back to using + * MSI or fixed interrupts. */ - ASSERT(i40e->i40e_num_trqpairs == 1); - request = 2; - min = 2; + if (i40e_check_acc_handle(rh) != DDI_FM_OK) { + ddi_fm_service_impact(i40e->i40e_dip, + DDI_SERVICE_DEGRADED); + return (B_FALSE); + } + request = (reg & I40E_GLPCI_CNF2_MSI_X_PF_N_MASK) >> + I40E_GLPCI_CNF2_MSI_X_PF_N_SHIFT; + request++; /* the register value is n - 1 */ break; default: panic("bad interrupt type passed to i40e_alloc_intr_handles: " @@ -1704,8 +1715,13 @@ i40e_alloc_intrs(i40e_t *i40e, dev_info_t *devinfo) if ((intr_types & DDI_INTR_TYPE_MSIX) && i40e->i40e_intr_force <= I40E_INTR_MSIX) { - if (i40e_alloc_intr_handles(i40e, devinfo, DDI_INTR_TYPE_MSIX)) + if (i40e_alloc_intr_handles(i40e, devinfo, + DDI_INTR_TYPE_MSIX)) { + i40e->i40e_num_trqpairs = + MIN(i40e->i40e_intr_count - 1, + I40E_AQ_VSI_TC_QUE_SIZE_MAX); return (B_TRUE); + } } /* @@ -1736,23 +1752,25 @@ i40e_alloc_intrs(i40e_t *i40e, dev_info_t *devinfo) static boolean_t i40e_map_intrs_to_vectors(i40e_t *i40e) { + int i; + if (i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) { return (B_TRUE); } /* - * At the moment, we only have one queue and one interrupt thus both are - * on that one interrupt. However, longer term we need to go back to - * using the ixgbe style map of queues to vectors or walk the linked - * list from the device to know what to go handle. Therefore for the - * moment, since we need to map our single set of rings to the one - * I/O interrupt that exists for MSI-X. + * Each queue pair is mapped to a single interrupt, so transmit + * and receive interrupts for a given queue share the same vector. + * The number of queue pairs is one less than the number of interrupt + * vectors and is assigned the vector one higher than its index. + * Vector zero is reserved for the admin queue. */ - ASSERT(i40e->i40e_intr_count == 2); - ASSERT(i40e->i40e_num_trqpairs == 1); + ASSERT(i40e->i40e_intr_count == i40e->i40e_num_trqpairs + 1); - i40e->i40e_trqpairs[0].itrq_rx_intrvec = 1; - i40e->i40e_trqpairs[0].itrq_tx_intrvec = 1; + for (i = 0; i < i40e->i40e_num_trqpairs; i++) { + i40e->i40e_trqpairs[i].itrq_rx_intrvec = i + 1; + i40e->i40e_trqpairs[i].itrq_tx_intrvec = i + 1; + } return (B_TRUE); } @@ -1898,7 +1916,7 @@ static boolean_t i40e_config_vsi(i40e_t *i40e, i40e_hw_t *hw) { struct i40e_vsi_context context; - int err; + int err, tc_queues; bzero(&context, sizeof (struct i40e_vsi_context)); context.seid = i40e->i40e_vsi_id; @@ -1909,13 +1927,32 @@ i40e_config_vsi(i40e_t *i40e, i40e_hw_t *hw) return (B_FALSE); } + i40e->i40e_vsi_num = context.vsi_number; + /* * Set the queue and traffic class bits. Keep it simple for now. */ context.info.valid_sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; context.info.mapping_flags = I40E_AQ_VSI_QUE_MAP_CONTIG; context.info.queue_mapping[0] = I40E_ASSIGN_ALL_QUEUES; - context.info.tc_mapping[0] = I40E_TRAFFIC_CLASS_NO_QUEUES; + + /* + * tc_queues determines the size of the traffic class, where the + * size is 2^^tc_queues to a maximum of 64. + * Some examples: + * i40e_num_trqpairs == 1 => tc_queues = 0, 2^^0 = 1. + * i40e_num_trqpairs == 7 => tc_queues = 3, 2^^3 = 8. + * i40e_num_trqpairs == 8 => tc_queues = 3, 2^^3 = 8. + * i40e_num_trqpairs == 9 => tc_queues = 4, 2^^4 = 16. + * i40e_num_trqpairs == 17 => tc_queues = 5, 2^^5 = 32. + * i40e_num_trqpairs == 64 => tc_queues = 6, 2^^6 = 64. + */ + tc_queues = ddi_fls(i40e->i40e_num_trqpairs - 1); + + context.info.tc_mapping[0] = ((0 << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) & + I40E_AQ_VSI_TC_QUE_OFFSET_MASK) | + ((tc_queues << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT) & + I40E_AQ_VSI_TC_QUE_NUMBER_MASK); context.info.valid_sections |= I40E_AQ_VSI_PROP_VLAN_VALID; context.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL | @@ -1938,6 +1975,76 @@ i40e_config_vsi(i40e_t *i40e, i40e_hw_t *hw) } /* + * Set up RSS. + * 1. Seed the hash key. + * 2. Enable PCTYPEs for the hash filter. + * 3. Populate the LUT. + * + * Note: When/if X722 support is added the hash key is seeded via a call + * to i40e_aq_set_rss_key(), and the LUT is populated using + * i40e_aq_set_rss_lut(). + */ +static boolean_t +i40e_config_rss(i40e_t *i40e, i40e_hw_t *hw) +{ + int i; + uint8_t lut_mask; + uint32_t *hlut; + uint64_t hena; + boolean_t rv = B_TRUE; + uint32_t seed[I40E_PFQF_HKEY_MAX_INDEX + 1]; + + /* + * 1. Seed the hash key + */ + (void) random_get_pseudo_bytes((uint8_t *)seed, sizeof (seed)); + + for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) + i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), seed[i]); + + /* + * 2. Configure PCTYPES + */ + hena = (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | + (1ULL << I40E_FILTER_PCTYPE_FRAG_IPV4) | + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) | + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | + (1ULL << I40E_FILTER_PCTYPE_FRAG_IPV6) | + (1ULL << I40E_FILTER_PCTYPE_L2_PAYLOAD); + + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (uint32_t)hena); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (uint32_t)(hena >> 32)); + + /* + * 3. Populate LUT + * + * Each entry in the LUT is 8 bits and is used to index + * the rx queue. Populate the LUT in a round robin fashion + * with rx queue indices from 0 to i40e_num_trqpairs - 1. + */ + hlut = kmem_alloc(hw->func_caps.rss_table_size, KM_NOSLEEP); + if (hlut == NULL) { + i40e_error(i40e, "i40e_config_rss() buffer allocation failed"); + return (B_FALSE); + } + + lut_mask = (1 << hw->func_caps.rss_table_entry_width) - 1; + + for (i = 0; i < hw->func_caps.rss_table_size; i++) + ((uint8_t *)hlut)[i] = (i % i40e->i40e_num_trqpairs) & lut_mask; + + for (i = 0; i < hw->func_caps.rss_table_size >> 2; i++) + I40E_WRITE_REG(hw, I40E_PFQF_HLUT(i), hlut[i]); + + kmem_free(hlut, hw->func_caps.rss_table_size); + + return (rv); +} + +/* * Wrapper to kick the chipset on. */ static boolean_t @@ -1970,6 +2077,7 @@ i40e_chip_start(i40e_t *i40e) bzero(&filter, sizeof (filter)); filter.enable_ethtype = TRUE; filter.enable_macvlan = TRUE; + filter.hash_lut_size = I40E_HASH_LUT_SIZE_512; rc = i40e_set_filter_control(hw, &filter); if (rc != I40E_SUCCESS) { @@ -1982,6 +2090,9 @@ i40e_chip_start(i40e_t *i40e) if (!i40e_config_vsi(i40e, hw)) return (B_FALSE); + if (!i40e_config_rss(i40e, hw)) + return (B_FALSE); + i40e_flush(hw); return (B_TRUE); diff --git a/usr/src/uts/common/io/i40e/i40e_sw.h b/usr/src/uts/common/io/i40e/i40e_sw.h index 4995f53107..69348687f6 100644 --- a/usr/src/uts/common/io/i40e/i40e_sw.h +++ b/usr/src/uts/common/io/i40e/i40e_sw.h @@ -12,6 +12,7 @@ /* * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved. * Copyright 2016 Joyent, Inc. + * Copyright 2017 Tegile Systems, Inc. All rights reserved. */ /* @@ -526,6 +527,7 @@ typedef struct i40e_trqpair { uint64_t itrq_rxgen; /* Generation number for mac/GLDv3. */ uint32_t itrq_index; /* Queue index in the PF */ uint32_t itrq_rx_intrvec; /* Receive interrupt vector. */ + boolean_t itrq_intr_poll; /* True when polling */ /* Receive-side stats. */ i40e_rxq_stat_t itrq_rxstat; @@ -768,6 +770,7 @@ typedef struct i40e { * Device state, switch information, and resources. */ int i40e_vsi_id; + uint16_t i40e_vsi_num; struct i40e_device *i40e_device; i40e_func_rsrc_t i40e_resources; uint16_t i40e_switch_rsrc_alloc; @@ -811,11 +814,6 @@ typedef struct i40e { /* * Interrupt state - * - * Note that the use of a single boolean_t for i40e_intr_poll isn't - * really the best design. When we have more than a single ring on the - * device working, we'll transition to using something more - * sophisticated. */ uint_t i40e_intr_pri; uint_t i40e_intr_force; @@ -827,7 +825,6 @@ typedef struct i40e { size_t i40e_intr_size; ddi_intr_handle_t *i40e_intr_handles; ddi_cb_handle_t i40e_callback_handle; - boolean_t i40e_intr_poll; /* * DMA attributes. See i40e_transceiver.c for why we have copies of them |