summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr/src/uts/common/io/aggr/aggr_grp.c6
-rw-r--r--usr/src/uts/common/io/mac/mac.c95
-rw-r--r--usr/src/uts/common/io/mac/mac_datapath_setup.c81
-rw-r--r--usr/src/uts/common/io/nxge/nxge_hio.c71
-rw-r--r--usr/src/uts/common/io/nxge/nxge_hio_guest.c116
-rw-r--r--usr/src/uts/common/io/nxge/nxge_main.c120
-rw-r--r--usr/src/uts/common/io/nxge/nxge_rxdma.c76
-rw-r--r--usr/src/uts/common/io/nxge/nxge_send.c72
-rw-r--r--usr/src/uts/common/io/nxge/nxge_virtual.c3
-rw-r--r--usr/src/uts/common/sys/mac_client_priv.h2
-rw-r--r--usr/src/uts/common/sys/mac_impl.h6
-rw-r--r--usr/src/uts/common/sys/mac_soft_ring.h3
-rw-r--r--usr/src/uts/common/sys/nxge/nxge_hio.h34
-rw-r--r--usr/src/uts/sun4v/io/vnet.c1408
-rw-r--r--usr/src/uts/sun4v/io/vnet_dds.c30
-rw-r--r--usr/src/uts/sun4v/io/vnet_gen.c236
-rw-r--r--usr/src/uts/sun4v/sys/vnet.h115
-rw-r--r--usr/src/uts/sun4v/sys/vnet_gen.h6
18 files changed, 1998 insertions, 482 deletions
diff --git a/usr/src/uts/common/io/aggr/aggr_grp.c b/usr/src/uts/common/io/aggr/aggr_grp.c
index 8e080da083..c619144958 100644
--- a/usr/src/uts/common/io/aggr/aggr_grp.c
+++ b/usr/src/uts/common/io/aggr/aggr_grp.c
@@ -623,7 +623,8 @@ aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
/*
* Get the list the the underlying HW rings.
*/
- hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh);
+ hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh,
+ MAC_RING_TYPE_RX);
if (port->lp_hwgh != NULL) {
/*
@@ -689,7 +690,8 @@ aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
goto done;
ASSERT(rx_grp->arg_gh != NULL);
- hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh);
+ hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh,
+ MAC_RING_TYPE_RX);
/*
* If hw_rh_cnt is 0, it means that the underlying port does not
diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c
index 91d7aab331..21982219b9 100644
--- a/usr/src/uts/common/io/mac/mac.c
+++ b/usr/src/uts/common/io/mac/mac.c
@@ -1426,35 +1426,54 @@ mac_hwrings_rx_process(void *arg, mac_resource_handle_t srs,
*/
int
mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh,
- mac_ring_handle_t *hwrh)
+ mac_ring_handle_t *hwrh, mac_ring_type_t rtype)
{
mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
- flow_entry_t *flent = mcip->mci_flent;
- mac_group_t *grp = flent->fe_rx_ring_group;
- mac_ring_t *ring;
int cnt = 0;
- /*
- * The mac client did not reserve any RX group, return directly.
- * This is probably because the underlying MAC does not support
- * any RX groups.
- */
- *hwgh = NULL;
- if (grp == NULL)
- return (0);
+ switch (rtype) {
+ case MAC_RING_TYPE_RX: {
+ flow_entry_t *flent = mcip->mci_flent;
+ mac_group_t *grp;
+ mac_ring_t *ring;
- /*
- * This RX group must be reserved by this mac client.
- */
- ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) &&
- (mch == (mac_client_handle_t)(MAC_RX_GROUP_ONLY_CLIENT(grp))));
+ grp = flent->fe_rx_ring_group;
+ /*
+ * The mac client did not reserve any RX group, return directly.
+ * This is probably because the underlying MAC does not support
+ * any groups.
+ */
+ *hwgh = NULL;
+ if (grp == NULL)
+ return (0);
+ /*
+ * This group must be reserved by this mac client.
+ */
+ ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) &&
+ (mch == (mac_client_handle_t)
+ (MAC_RX_GROUP_ONLY_CLIENT(grp))));
+ for (ring = grp->mrg_rings;
+ ring != NULL; ring = ring->mr_next, cnt++) {
+ ASSERT(cnt < MAX_RINGS_PER_GROUP);
+ hwrh[cnt] = (mac_ring_handle_t)ring;
+ }
+ *hwgh = (mac_group_handle_t)grp;
+ return (cnt);
+ }
+ case MAC_RING_TYPE_TX: {
+ mac_soft_ring_set_t *tx_srs;
+ mac_srs_tx_t *tx;
- for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) {
- ASSERT(cnt < MAX_RINGS_PER_GROUP);
- hwrh[cnt++] = (mac_ring_handle_t)ring;
+ tx_srs = MCIP_TX_SRS(mcip);
+ tx = &tx_srs->srs_tx;
+ for (; cnt < tx->st_ring_count; cnt++)
+ hwrh[cnt] = tx->st_rings[cnt];
+ return (cnt);
+ }
+ default:
+ ASSERT(B_FALSE);
+ return (-1);
}
- *hwgh = (mac_group_handle_t)grp;
- return (cnt);
}
/*
@@ -1524,6 +1543,22 @@ mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup)
return (info->mri_poll(info->mri_driver, bytes_to_pickup));
}
+/*
+ * Send packets through the selected tx ring.
+ */
+mblk_t *
+mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp)
+{
+ mac_ring_t *ring = (mac_ring_t *)rh;
+ mac_ring_info_t *info = &ring->mr_info;
+
+ ASSERT(ring->mr_type == MAC_RING_TYPE_TX);
+ ASSERT(ring->mr_state >= MR_INUSE);
+ ASSERT(info->mri_tx != NULL);
+
+ return (info->mri_tx(info->mri_driver, mp));
+}
+
int
mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr)
{
@@ -3429,22 +3464,6 @@ mac_release_tx_ring(mac_ring_handle_t rh)
}
/*
- * Send packets through a selected tx ring.
- */
-mblk_t *
-mac_ring_tx(mac_ring_handle_t rh, mblk_t *mp)
-{
- mac_ring_t *ring = (mac_ring_t *)rh;
- mac_ring_info_t *info = &ring->mr_info;
-
- ASSERT(ring->mr_type == MAC_RING_TYPE_TX);
- ASSERT(ring->mr_state >= MR_INUSE);
- ASSERT(info->mri_tx != NULL);
-
- return (info->mri_tx(info->mri_driver, mp));
-}
-
-/*
* Find a ring from its index.
*/
mac_ring_t *
diff --git a/usr/src/uts/common/io/mac/mac_datapath_setup.c b/usr/src/uts/common/io/mac/mac_datapath_setup.c
index 7b8c4c6567..dc5b51cb80 100644
--- a/usr/src/uts/common/io/mac/mac_datapath_setup.c
+++ b/usr/src/uts/common/io/mac/mac_datapath_setup.c
@@ -2235,6 +2235,10 @@ mac_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
tx->st_group);
tx->st_group = NULL;
}
+ if (tx->st_ring_count != 0) {
+ kmem_free(tx->st_rings,
+ sizeof (mac_ring_handle_t) * tx->st_ring_count);
+ }
if (tx->st_arg2 != NULL) {
ASSERT(tx_srs->srs_type & SRST_TX);
mac_release_tx_ring(tx->st_arg2);
@@ -3203,7 +3207,7 @@ mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
mac_impl_t *mip = mcip->mci_mip;
mac_soft_ring_set_t *tx_srs;
int i, tx_ring_count = 0, tx_rings_reserved = 0;
- mac_ring_handle_t *tx_ring = NULL;
+ mac_ring_handle_t *tx_rings = NULL;
uint32_t soft_ring_type;
mac_group_t *grp = NULL;
mac_ring_t *ring;
@@ -3221,7 +3225,7 @@ mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
}
if (tx_ring_count != 0) {
- tx_ring = kmem_zalloc(sizeof (mac_ring_handle_t) *
+ tx_rings = kmem_zalloc(sizeof (mac_ring_handle_t) *
tx_ring_count, KM_SLEEP);
}
@@ -3231,8 +3235,12 @@ mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
* NIC's.
*/
if (srs_type == SRST_FLOW ||
- (mcip->mci_state_flags & MCIS_NO_HWRINGS) != 0)
- goto use_default_ring;
+ (mcip->mci_state_flags & MCIS_NO_HWRINGS) != 0) {
+ /* use default ring */
+ tx_rings[0] = (void *)mip->mi_default_tx_ring;
+ tx_rings_reserved++;
+ goto rings_assigned;
+ }
if (mcip->mci_share != NULL)
ring = grp->mrg_rings;
@@ -3245,8 +3253,7 @@ mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
* then each Tx ring will have a Tx-side soft ring. All
* these soft rings will be hang off Tx SRS.
*/
- for (i = 0, tx_rings_reserved = 0;
- i < tx_ring_count; i++, tx_rings_reserved++) {
+ for (i = 0; i < tx_ring_count; i++) {
if (mcip->mci_share != NULL) {
/*
* The ring was already chosen and associated
@@ -3255,42 +3262,39 @@ mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
* between the share and non-share cases.
*/
ASSERT(ring != NULL);
- tx_ring[i] = (mac_ring_handle_t)ring;
+ tx_rings[i] = (mac_ring_handle_t)ring;
ring = ring->mr_next;
} else {
- tx_ring[i] =
+ tx_rings[i] =
(mac_ring_handle_t)mac_reserve_tx_ring(mip, NULL);
- if (tx_ring[i] == NULL)
+ if (tx_rings[i] == NULL) {
+ /*
+ * We have run out of Tx rings. So
+ * give the default ring too.
+ */
+ tx_rings[i] = (void *)mip->mi_default_tx_ring;
+ tx_rings_reserved++;
break;
+ }
}
+ tx_rings_reserved++;
}
+
+rings_assigned:
if (mac_tx_serialize || (mip->mi_v12n_level & MAC_VIRT_SERIALIZE))
serialize = B_TRUE;
/*
* Did we get the requested number of tx rings?
- * There are 3 actions we can take depending upon the number
+ * There are 2 actions we can take depending upon the number
* of tx_rings we got.
- * 1) If we got none, then hook up the tx_srs with the
- * default ring.
- * 2) If we got one, then get the tx_ring from the soft ring,
+ * 1) If we got one, then get the tx_ring from the soft ring,
* save it in SRS and free up the soft ring.
- * 3) If we got more than 1, then do the tx fanout among the
+ * 2) If we got more than 1, then do the tx fanout among the
* rings we obtained.
*/
- switch (tx_rings_reserved) {
- case 1:
- /*
- * No need to allocate Tx soft rings. Tx-side soft
- * rings are for Tx fanout case. Just use Tx SRS.
- */
- /* FALLTHRU */
-
- case 0:
-use_default_ring:
- if (tx_rings_reserved == 0)
- tx->st_arg2 = (void *)mip->mi_default_tx_ring;
- else
- tx->st_arg2 = (void *)tx_ring[0];
+ ASSERT(tx_rings_reserved != 0);
+ if (tx_rings_reserved == 1) {
+ tx->st_arg2 = (void *)tx_rings[0];
/* For ring_count of 0 or 1, set the tx_mode and return */
if (tx_srs->srs_type & SRST_BW_CONTROL)
tx->st_mode = SRS_TX_BW;
@@ -3298,18 +3302,9 @@ use_default_ring:
tx->st_mode = SRS_TX_SERIALIZE;
else
tx->st_mode = SRS_TX_DEFAULT;
- break;
-
- default:
+ } else {
/*
* We got multiple Tx rings for Tx fanout.
- *
- * cpuid of -1 is passed. This creates an unbound
- * worker thread. Instead the code should get CPU
- * binding information and pass that to
- * mac_soft_ring_create(). This needs to be done
- * in conjunction with Rx-side soft ring
- * bindings.
*/
soft_ring_type = ST_RING_OTH | ST_RING_TX;
if (tx_srs->srs_type & SRST_BW_CONTROL) {
@@ -3322,7 +3317,7 @@ use_default_ring:
for (i = 0; i < tx_rings_reserved; i++) {
(void) mac_soft_ring_create(i, 0, NULL, soft_ring_type,
maxclsyspri, mcip, tx_srs, -1, NULL, mcip,
- (mac_resource_handle_t)tx_ring[i]);
+ (mac_resource_handle_t)tx_rings[i]);
}
mac_srs_update_fanout_list(tx_srs);
}
@@ -3332,8 +3327,12 @@ use_default_ring:
int, tx->st_mode, int, tx_srs->srs_oth_ring_count);
if (tx_ring_count != 0) {
- kmem_free(tx_ring,
- sizeof (mac_ring_handle_t) * tx_ring_count);
+ tx->st_ring_count = tx_rings_reserved;
+ tx->st_rings = kmem_zalloc(sizeof (mac_ring_handle_t) *
+ tx_rings_reserved, KM_SLEEP);
+ for (i = 0; i < tx->st_ring_count; i++)
+ tx->st_rings[i] = tx_rings[i];
+ kmem_free(tx_rings, sizeof (mac_ring_handle_t) * tx_ring_count);
}
}
diff --git a/usr/src/uts/common/io/nxge/nxge_hio.c b/usr/src/uts/common/io/nxge/nxge_hio.c
index 827553301c..b58acde5e8 100644
--- a/usr/src/uts/common/io/nxge/nxge_hio.c
+++ b/usr/src/uts/common/io/nxge/nxge_hio.c
@@ -41,9 +41,6 @@
#include <sys/nxge/nxge_txdma.h>
#include <sys/nxge/nxge_hio.h>
-#define NXGE_HIO_SHARE_MIN_CHANNELS 2
-#define NXGE_HIO_SHARE_MAX_CHANNELS 2
-
/*
* External prototypes
*/
@@ -1057,23 +1054,6 @@ nxge_hio_init(
NXGE_DEBUG_MSG((nxge, HIO_CTL,
"Hybrid IO-capable service domain"));
return (NXGE_OK);
- } else {
- /*
- * isLDOMguest(nxge) == B_TRUE
- */
- nx_vio_fp_t *vio;
- nhd->type = NXGE_HIO_TYPE_GUEST;
-
- vio = &nhd->hio.vio;
- vio->__register = (vio_net_resource_reg_t)
- modgetsymvalue("vio_net_resource_reg", 0);
- vio->unregister = (vio_net_resource_unreg_t)
- modgetsymvalue("vio_net_resource_unreg", 0);
-
- if (vio->__register == 0 || vio->unregister == 0) {
- NXGE_ERROR_MSG((nxge, VIR_CTL, "vio_net is absent!"));
- return (NXGE_ERROR);
- }
}
return (0);
@@ -1144,12 +1124,16 @@ nxge_hio_clear_unicst(p_nxge_t nxgep, const uint8_t *mac_addr)
static int
nxge_hio_add_mac(void *arg, const uint8_t *mac_addr)
{
- nxge_ring_group_t *group = (nxge_ring_group_t *)arg;
- p_nxge_t nxge = group->nxgep;
- int rv;
- nxge_hio_vr_t *vr; /* The Virtualization Region */
+ nxge_ring_group_t *group = (nxge_ring_group_t *)arg;
+ p_nxge_t nxge = group->nxgep;
+ int rv;
+ nxge_hio_vr_t *vr; /* The Virtualization Region */
ASSERT(group->type == MAC_RING_TYPE_RX);
+ ASSERT(group->nxgep != NULL);
+
+ if (isLDOMguest(group->nxgep))
+ return (0);
mutex_enter(nxge->genlock);
@@ -1174,8 +1158,7 @@ nxge_hio_add_mac(void *arg, const uint8_t *mac_addr)
/*
* Program the mac address for the group.
*/
- if ((rv = nxge_hio_group_mac_add(nxge, group,
- mac_addr)) != 0) {
+ if ((rv = nxge_hio_group_mac_add(nxge, group, mac_addr)) != 0) {
return (rv);
}
@@ -1206,6 +1189,10 @@ nxge_hio_rem_mac(void *arg, const uint8_t *mac_addr)
int rv, slot;
ASSERT(group->type == MAC_RING_TYPE_RX);
+ ASSERT(group->nxgep != NULL);
+
+ if (isLDOMguest(group->nxgep))
+ return (0);
mutex_enter(nxge->genlock);
@@ -1253,14 +1240,16 @@ nxge_hio_group_start(mac_group_driver_t gdriver)
int dev_gindex;
ASSERT(group->type == MAC_RING_TYPE_RX);
+ ASSERT(group->nxgep != NULL);
-#ifdef later
ASSERT(group->nxgep->nxge_mac_state == NXGE_MAC_STARTED);
-#endif
if (group->nxgep->nxge_mac_state != NXGE_MAC_STARTED)
return (ENXIO);
mutex_enter(group->nxgep->genlock);
+ if (isLDOMguest(group->nxgep))
+ goto nxge_hio_group_start_exit;
+
dev_gindex = group->nxgep->pt_config.hw_config.def_mac_rxdma_grpid +
group->gindex;
rdc_grp_p = &group->nxgep->pt_config.rdc_grps[dev_gindex];
@@ -1289,9 +1278,9 @@ nxge_hio_group_start(mac_group_driver_t gdriver)
(void) nxge_init_fzc_rdc_tbl(group->nxgep, rdc_grp_p, rdctbl);
+nxge_hio_group_start_exit:
group->started = B_TRUE;
mutex_exit(group->nxgep->genlock);
-
return (0);
}
@@ -1305,6 +1294,9 @@ nxge_hio_group_stop(mac_group_driver_t gdriver)
mutex_enter(group->nxgep->genlock);
group->started = B_FALSE;
+ if (isLDOMguest(group->nxgep))
+ goto nxge_hio_group_stop_exit;
+
/*
* Unbind the RDC table previously bound for this group.
*
@@ -1314,6 +1306,7 @@ nxge_hio_group_stop(mac_group_driver_t gdriver)
if (group->gindex != 0)
(void) nxge_fzc_rdc_tbl_unbind(group->nxgep, group->rdctbl);
+nxge_hio_group_stop_exit:
mutex_exit(group->nxgep->genlock);
}
@@ -1334,20 +1327,26 @@ nxge_hio_group_get(void *arg, mac_ring_type_t type, int groupid,
group->gindex = groupid;
group->sindex = 0; /* not yet bound to a share */
- dev_gindex = nxgep->pt_config.hw_config.def_mac_rxdma_grpid +
- groupid;
+ if (!isLDOMguest(nxgep)) {
+ dev_gindex =
+ nxgep->pt_config.hw_config.def_mac_rxdma_grpid +
+ groupid;
- if (nxgep->pt_config.hw_config.def_mac_rxdma_grpid ==
- dev_gindex)
- group->port_default_grp = B_TRUE;
+ if (nxgep->pt_config.hw_config.def_mac_rxdma_grpid ==
+ dev_gindex)
+ group->port_default_grp = B_TRUE;
+
+ infop->mgi_count =
+ nxgep->pt_config.rdc_grps[dev_gindex].max_rdcs;
+ } else {
+ infop->mgi_count = NXGE_HIO_SHARE_MAX_CHANNELS;
+ }
infop->mgi_driver = (mac_group_driver_t)group;
infop->mgi_start = nxge_hio_group_start;
infop->mgi_stop = nxge_hio_group_stop;
infop->mgi_addmac = nxge_hio_add_mac;
infop->mgi_remmac = nxge_hio_rem_mac;
- infop->mgi_count =
- nxgep->pt_config.rdc_grps[dev_gindex].max_rdcs;
break;
case MAC_RING_TYPE_TX:
diff --git a/usr/src/uts/common/io/nxge/nxge_hio_guest.c b/usr/src/uts/common/io/nxge/nxge_hio_guest.c
index eb05298299..78c1bb53a6 100644
--- a/usr/src/uts/common/io/nxge/nxge_hio_guest.c
+++ b/usr/src/uts/common/io/nxge/nxge_hio_guest.c
@@ -35,46 +35,9 @@
#include <sys/nxge/nxge_fzc.h>
#include <sys/nxge/nxge_rxdma.h>
#include <sys/nxge/nxge_txdma.h>
-
#include <sys/nxge/nxge_hio.h>
/*
- * nxge_hio_unregister
- *
- * Unregister with the VNET module.
- *
- * Arguments:
- * nxge
- *
- * Notes:
- * We must uninitialize all DMA channels associated with the VR, too.
- *
- * We're assuming that the channels will be disabled & unassigned
- * in the service domain, after we're done here.
- *
- * Context:
- * Guest domain
- */
-void
-nxge_hio_unregister(
- nxge_t *nxge)
-{
- nxge_hio_data_t *nhd = (nxge_hio_data_t *)nxge->nxge_hw_p->hio;
-
- if (nhd == 0) {
- return;
- }
-
-#if defined(sun4v)
- /* Unregister with vNet. */
- if (nhd->hio.vio.unregister) {
- if (nxge->hio_vr)
- (*nhd->hio.vio.unregister)(nxge->hio_vr->vhp);
- }
-#endif
-}
-
-/*
* nxge_guest_regs_map
*
* Map in a guest domain's register set(s).
@@ -95,8 +58,7 @@ static ddi_device_acc_attr_t nxge_guest_register_access_attributes = {
};
int
-nxge_guest_regs_map(
- nxge_t *nxge)
+nxge_guest_regs_map(nxge_t *nxge)
{
dev_regs_t *regs;
off_t regsize;
@@ -211,31 +173,22 @@ static void nxge_check_guest_state(nxge_hio_vr_t *);
int
nxge_hio_vr_add(nxge_t *nxge)
{
- extern mac_callbacks_t nxge_m_callbacks;
-
- nxge_hio_data_t *nhd = (nxge_hio_data_t *)nxge->nxge_hw_p->hio;
- nxge_hio_vr_t *vr;
- nxge_hio_dc_t *dc;
-
- int *reg_val;
- uint_t reg_len;
- uint8_t vr_index;
-
- nxhv_vr_fp_t *fp;
- uint64_t vr_address, vr_size;
- uint32_t cookie;
+ extern nxge_status_t nxge_mac_register(p_nxge_t);
- nxhv_dc_fp_t *tx, *rx;
- uint64_t tx_map, rx_map;
-
- uint64_t hv_rv;
-
- /* Variables needed to register with vnet. */
- mac_register_t *mac_info;
- ether_addr_t mac_addr;
- nx_vio_fp_t *vio;
-
- int i;
+ nxge_hio_data_t *nhd = (nxge_hio_data_t *)nxge->nxge_hw_p->hio;
+ nxge_hio_vr_t *vr;
+ nxge_hio_dc_t *dc;
+ int *reg_val;
+ uint_t reg_len;
+ uint8_t vr_index;
+ nxhv_vr_fp_t *fp;
+ uint64_t vr_address, vr_size;
+ uint32_t cookie;
+ nxhv_dc_fp_t *tx, *rx;
+ uint64_t tx_map, rx_map;
+ uint64_t hv_rv;
+ int i;
+ nxge_status_t status;
NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_vr_add"));
@@ -384,40 +337,13 @@ nxge_hio_vr_add(nxge_t *nxge)
}
}
- /*
- * Register with vnet.
- */
- if ((mac_info = mac_alloc(MAC_VERSION)) == NULL)
- return (NXGE_ERROR);
-
- mac_info->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
- mac_info->m_driver = nxge;
- mac_info->m_dip = nxge->dip;
- mac_info->m_src_addr = KMEM_ZALLOC(MAXMACADDRLEN, KM_SLEEP);
- mac_info->m_dst_addr = KMEM_ZALLOC(MAXMACADDRLEN, KM_SLEEP);
- (void) memset(mac_info->m_src_addr, 0xff, sizeof (MAXMACADDRLEN));
- mac_info->m_callbacks = &nxge_m_callbacks;
- mac_info->m_min_sdu = 0;
- mac_info->m_max_sdu = NXGE_MTU_DEFAULT_MAX -
- sizeof (struct ether_header) - ETHERFCSL - 4;
-
- (void) memset(&mac_addr, 0xff, sizeof (mac_addr));
-
- /* Register with vio_net. */
- vio = &nhd->hio.vio;
- if ((*vio->__register)(mac_info, VIO_NET_RES_HYBRID,
- nxge->hio_mac_addr, mac_addr, &vr->vhp, &vio->cb)) {
- NXGE_DEBUG_MSG((nxge, HIO_CTL, "HIO registration() failed"));
- KMEM_FREE(mac_info->m_src_addr, MAXMACADDRLEN);
- KMEM_FREE(mac_info->m_dst_addr, MAXMACADDRLEN);
- mac_free(mac_info);
- return (NXGE_ERROR);
+ status = nxge_mac_register(nxge);
+ if (status != NXGE_OK) {
+ cmn_err(CE_WARN, "nxge(%d): nxge_mac_register failed\n",
+ nxge->instance);
+ return (status);
}
- KMEM_FREE(mac_info->m_src_addr, MAXMACADDRLEN);
- KMEM_FREE(mac_info->m_dst_addr, MAXMACADDRLEN);
- mac_free(mac_info);
-
nxge->hio_vr = vr; /* For faster lookups. */
NXGE_DEBUG_MSG((nxge, HIO_CTL, "<== nxge_hio_vr_add"));
diff --git a/usr/src/uts/common/io/nxge/nxge_main.c b/usr/src/uts/common/io/nxge/nxge_main.c
index a1ab453851..c0020bdac4 100644
--- a/usr/src/uts/common/io/nxge/nxge_main.c
+++ b/usr/src/uts/common/io/nxge/nxge_main.c
@@ -272,14 +272,11 @@ static void nxge_m_stop(void *);
static int nxge_m_multicst(void *, boolean_t, const uint8_t *);
static int nxge_m_promisc(void *, boolean_t);
static void nxge_m_ioctl(void *, queue_t *, mblk_t *);
-static nxge_status_t nxge_mac_register(p_nxge_t);
+nxge_status_t nxge_mac_register(p_nxge_t);
static int nxge_altmac_set(p_nxge_t nxgep, uint8_t *mac_addr,
int slot, int rdctbl, boolean_t usetbl);
void nxge_mmac_kstat_update(p_nxge_t nxgep, int slot,
boolean_t factory);
-#if defined(sun4v)
-extern mblk_t *nxge_m_tx(void *arg, mblk_t *mp);
-#endif
static void nxge_m_getfactaddr(void *, uint_t, uint8_t *);
static boolean_t nxge_m_getcapab(void *, mac_capab_t, void *);
@@ -630,11 +627,6 @@ nxge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
if (nxgep->niu_type != N2_NIU) {
nxge_set_pci_replay_timeout(nxgep);
}
-#if defined(sun4v)
- if (isLDOMguest(nxgep)) {
- nxge_m_callbacks.mc_tx = nxge_m_tx;
- }
-#endif
#if defined(sun4v)
/* This is required by nxge_hio_init(), which follows. */
@@ -961,11 +953,7 @@ nxge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
(void) nxge_link_monitor(nxgep, LINK_MONITOR_STOP);
- if (isLDOMguest(nxgep)) {
- if (nxgep->nxge_mac_state == NXGE_MAC_STARTED)
- nxge_m_stop((void *)nxgep);
- nxge_hio_unregister(nxgep);
- } else if (nxgep->mach && (status = mac_unregister(nxgep->mach)) != 0) {
+ if (nxgep->mach && (status = mac_unregister(nxgep->mach)) != 0) {
NXGE_ERROR_MSG((nxgep, NXGE_ERR_CTL,
"<== nxge_detach status = 0x%08X", status));
return (DDI_FAILURE);
@@ -4294,10 +4282,13 @@ nxge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
case MAC_CAPAB_MULTIFACTADDR: {
mac_capab_multifactaddr_t *mfacp = cap_data;
- mutex_enter(nxgep->genlock);
- mfacp->mcm_naddr = nxgep->nxge_mmac_info.num_factory_mmac;
- mfacp->mcm_getaddr = nxge_m_getfactaddr;
- mutex_exit(nxgep->genlock);
+ if (!isLDOMguest(nxgep)) {
+ mutex_enter(nxgep->genlock);
+ mfacp->mcm_naddr =
+ nxgep->nxge_mmac_info.num_factory_mmac;
+ mfacp->mcm_getaddr = nxge_m_getfactaddr;
+ mutex_exit(nxgep->genlock);
+ }
break;
}
@@ -4325,34 +4316,68 @@ nxge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
mutex_enter(nxgep->genlock);
if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
- cap_rings->mr_group_type = MAC_GROUP_TYPE_DYNAMIC;
- cap_rings->mr_rnum = p_cfgp->max_rdcs;
- cap_rings->mr_rget = nxge_fill_ring;
- cap_rings->mr_gnum = p_cfgp->max_rdc_grpids;
- cap_rings->mr_gget = nxge_hio_group_get;
- cap_rings->mr_gaddring = nxge_group_add_ring;
- cap_rings->mr_gremring = nxge_group_rem_ring;
+ if (isLDOMguest(nxgep)) {
+ cap_rings->mr_group_type =
+ MAC_GROUP_TYPE_STATIC;
+ cap_rings->mr_rnum =
+ NXGE_HIO_SHARE_MAX_CHANNELS;
+ cap_rings->mr_rget = nxge_fill_ring;
+ cap_rings->mr_gnum = 1;
+ cap_rings->mr_gget = nxge_hio_group_get;
+ cap_rings->mr_gaddring = NULL;
+ cap_rings->mr_gremring = NULL;
+ } else {
+ /*
+ * Service Domain.
+ */
+ cap_rings->mr_group_type =
+ MAC_GROUP_TYPE_DYNAMIC;
+ cap_rings->mr_rnum = p_cfgp->max_rdcs;
+ cap_rings->mr_rget = nxge_fill_ring;
+ cap_rings->mr_gnum = p_cfgp->max_rdc_grpids;
+ cap_rings->mr_gget = nxge_hio_group_get;
+ cap_rings->mr_gaddring = nxge_group_add_ring;
+ cap_rings->mr_gremring = nxge_group_rem_ring;
+ }
NXGE_DEBUG_MSG((nxgep, RX_CTL,
"==> nxge_m_getcapab: rx nrings[%d] ngroups[%d]",
p_cfgp->max_rdcs, p_cfgp->max_rdc_grpids));
} else {
- cap_rings->mr_group_type = MAC_GROUP_TYPE_DYNAMIC;
- cap_rings->mr_rnum = p_cfgp->tdc.count;
- cap_rings->mr_rget = nxge_fill_ring;
- if (isLDOMservice(nxgep)) {
- /* share capable */
- /* Do not report the default ring: hence -1 */
+ /*
+ * TX Rings.
+ */
+ if (isLDOMguest(nxgep)) {
+ cap_rings->mr_group_type =
+ MAC_GROUP_TYPE_STATIC;
+ cap_rings->mr_rnum =
+ NXGE_HIO_SHARE_MAX_CHANNELS;
+ cap_rings->mr_rget = nxge_fill_ring;
+ cap_rings->mr_gnum = 0;
+ cap_rings->mr_gget = NULL;
+ cap_rings->mr_gaddring = NULL;
+ cap_rings->mr_gremring = NULL;
+ } else {
+ /*
+ * Service Domain.
+ */
+ cap_rings->mr_group_type =
+ MAC_GROUP_TYPE_DYNAMIC;
+ cap_rings->mr_rnum = p_cfgp->tdc.count;
+ cap_rings->mr_rget = nxge_fill_ring;
+
+ /*
+ * Share capable.
+ *
+ * Do not report the default group: hence -1
+ */
cap_rings->mr_gnum =
NXGE_MAX_TDC_GROUPS / nxgep->nports - 1;
- } else {
- cap_rings->mr_gnum = 0;
+ cap_rings->mr_gget = nxge_hio_group_get;
+ cap_rings->mr_gaddring = nxge_group_add_ring;
+ cap_rings->mr_gremring = nxge_group_rem_ring;
}
- cap_rings->mr_gget = nxge_hio_group_get;
- cap_rings->mr_gaddring = nxge_group_add_ring;
- cap_rings->mr_gremring = nxge_group_rem_ring;
-
NXGE_DEBUG_MSG((nxgep, TX_CTL,
"==> nxge_m_getcapab: tx rings # of rings %d",
p_cfgp->tdc.count));
@@ -6372,7 +6397,7 @@ nxge_intrs_disable(p_nxge_t nxgep)
NXGE_DEBUG_MSG((nxgep, INT_CTL, "<== nxge_intrs_disable"));
}
-static nxge_status_t
+nxge_status_t
nxge_mac_register(p_nxge_t nxgep)
{
mac_register_t *macp;
@@ -6386,7 +6411,13 @@ nxge_mac_register(p_nxge_t nxgep)
macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
macp->m_driver = nxgep;
macp->m_dip = nxgep->dip;
- macp->m_src_addr = nxgep->ouraddr.ether_addr_octet;
+ if (!isLDOMguest(nxgep)) {
+ macp->m_src_addr = nxgep->ouraddr.ether_addr_octet;
+ } else {
+ macp->m_src_addr = KMEM_ZALLOC(MAXMACADDRLEN, KM_SLEEP);
+ macp->m_dst_addr = KMEM_ZALLOC(MAXMACADDRLEN, KM_SLEEP);
+ (void) memset(macp->m_src_addr, 0xff, sizeof (MAXMACADDRLEN));
+ }
macp->m_callbacks = &nxge_m_callbacks;
macp->m_min_sdu = 0;
nxgep->mac.default_mtu = nxgep->mac.maxframesize -
@@ -6395,7 +6426,12 @@ nxge_mac_register(p_nxge_t nxgep)
macp->m_margin = VLAN_TAGSZ;
macp->m_priv_props = nxge_priv_props;
macp->m_priv_prop_count = NXGE_MAX_PRIV_PROPS;
- macp->m_v12n = MAC_VIRT_HIO | MAC_VIRT_LEVEL1 | MAC_VIRT_SERIALIZE;
+ if (isLDOMguest(nxgep)) {
+ macp->m_v12n = MAC_VIRT_LEVEL1 | MAC_VIRT_SERIALIZE;
+ } else {
+ macp->m_v12n = MAC_VIRT_HIO | MAC_VIRT_LEVEL1 | \
+ MAC_VIRT_SERIALIZE;
+ }
NXGE_DEBUG_MSG((nxgep, MAC_CTL,
"==> nxge_mac_register: instance %d "
@@ -6406,6 +6442,10 @@ nxge_mac_register(p_nxge_t nxgep)
NXGE_EHEADER_VLAN_CRC));
status = mac_register(macp, &nxgep->mach);
+ if (isLDOMguest(nxgep)) {
+ KMEM_FREE(macp->m_src_addr, MAXMACADDRLEN);
+ KMEM_FREE(macp->m_dst_addr, MAXMACADDRLEN);
+ }
mac_free(macp);
if (status != 0) {
diff --git a/usr/src/uts/common/io/nxge/nxge_rxdma.c b/usr/src/uts/common/io/nxge/nxge_rxdma.c
index 313e76c8f0..4b427d1a8d 100644
--- a/usr/src/uts/common/io/nxge/nxge_rxdma.c
+++ b/usr/src/uts/common/io/nxge/nxge_rxdma.c
@@ -1756,7 +1756,7 @@ nxge_rx_intr(void *arg1, void *arg2)
uint8_t channel;
npi_handle_t handle;
rx_dma_ctl_stat_t cs;
- p_rx_rcr_ring_t rcr_ring;
+ p_rx_rcr_ring_t rcrp;
mblk_t *mp = NULL;
if (ldvp == NULL) {
@@ -1789,7 +1789,7 @@ nxge_rx_intr(void *arg1, void *arg2)
/*
* Get the ring to enable us to process packets.
*/
- rcr_ring = nxgep->rx_rcr_rings->rcr_rings[ldvp->vdma_index];
+ rcrp = nxgep->rx_rcr_rings->rcr_rings[ldvp->vdma_index];
/*
* The RCR ring lock must be held when packets
@@ -1799,7 +1799,7 @@ nxge_rx_intr(void *arg1, void *arg2)
* (will cause fatal errors such as rcrincon bit set)
* and the setting of the poll_flag.
*/
- MUTEX_ENTER(&rcr_ring->lock);
+ MUTEX_ENTER(&rcrp->lock);
/*
* Get the control and status for this channel.
@@ -1840,12 +1840,12 @@ nxge_rx_intr(void *arg1, void *arg2)
mgm.value);
}
}
- MUTEX_EXIT(&rcr_ring->lock);
+ MUTEX_EXIT(&rcrp->lock);
return (DDI_INTR_CLAIMED);
}
- ASSERT(rcr_ring->ldgp == ldgp);
- ASSERT(rcr_ring->ldvp == ldvp);
+ ASSERT(rcrp->ldgp == ldgp);
+ ASSERT(rcrp->ldvp == ldvp);
RXDMA_REG_READ64(handle, RX_DMA_CTL_STAT_REG, channel, &cs.value);
@@ -1856,8 +1856,8 @@ nxge_rx_intr(void *arg1, void *arg2)
cs.bits.hdw.rcrto,
cs.bits.hdw.rcrthres));
- if (rcr_ring->poll_flag == 0) {
- mp = nxge_rx_pkts(nxgep, rcr_ring, cs, -1);
+ if (!rcrp->poll_flag) {
+ mp = nxge_rx_pkts(nxgep, rcrp, cs, -1);
}
/* error events. */
@@ -1873,27 +1873,34 @@ nxge_rx_intr(void *arg1, void *arg2)
* these two edge triggered bits.
*/
cs.value &= RX_DMA_CTL_STAT_WR1C;
- cs.bits.hdw.mex = rcr_ring->poll_flag ? 0 : 1;
+ cs.bits.hdw.mex = rcrp->poll_flag ? 0 : 1;
RXDMA_REG_WRITE64(handle, RX_DMA_CTL_STAT_REG, channel,
cs.value);
/*
* If the polling mode is enabled, disable the interrupt.
*/
- if (rcr_ring->poll_flag) {
+ if (rcrp->poll_flag) {
NXGE_DEBUG_MSG((nxgep, NXGE_ERR_CTL,
"==> nxge_rx_intr: rdc %d ldgp $%p ldvp $%p "
"(disabling interrupts)", channel, ldgp, ldvp));
+
/*
* Disarm this logical group if this is a single device
* group.
*/
if (ldgp->nldvs == 1) {
- ldgimgm_t mgm;
- mgm.value = 0;
- mgm.bits.ldw.arm = 0;
- NXGE_REG_WR64(handle,
- LDGIMGN_REG + LDSV_OFFSET(ldgp->ldg), mgm.value);
+ if (isLDOMguest(nxgep)) {
+ ldgp->arm = B_FALSE;
+ nxge_hio_ldgimgn(nxgep, ldgp);
+ } else {
+ ldgimgm_t mgm;
+ mgm.value = 0;
+ mgm.bits.ldw.arm = 0;
+ NXGE_REG_WR64(handle,
+ LDGIMGN_REG + LDSV_OFFSET(ldgp->ldg),
+ mgm.value);
+ }
}
} else {
/*
@@ -1920,24 +1927,11 @@ nxge_rx_intr(void *arg1, void *arg2)
"==> nxge_rx_intr: rdc %d ldgp $%p "
"exiting ISR (and call mac_rx_ring)", channel, ldgp));
}
- MUTEX_EXIT(&rcr_ring->lock);
+ MUTEX_EXIT(&rcrp->lock);
if (mp != NULL) {
- if (!isLDOMguest(nxgep))
- mac_rx_ring(nxgep->mach, rcr_ring->rcr_mac_handle, mp,
- rcr_ring->rcr_gen_num);
-#if defined(sun4v)
- else { /* isLDOMguest(nxgep) */
- nxge_hio_data_t *nhd = (nxge_hio_data_t *)
- nxgep->nxge_hw_p->hio;
- nx_vio_fp_t *vio = &nhd->hio.vio;
-
- if (vio->cb.vio_net_rx_cb) {
- (*vio->cb.vio_net_rx_cb)
- (nxgep->hio_vr->vhp, mp);
- }
- }
-#endif
+ mac_rx_ring(nxgep->mach, rcrp->rcr_mac_handle, mp,
+ rcrp->rcr_gen_num);
}
NXGE_DEBUG_MSG((nxgep, RX_CTL, "<== nxge_rx_intr: DDI_INTR_CLAIMED"));
return (DDI_INTR_CLAIMED);
@@ -2720,6 +2714,7 @@ nxge_enable_poll(void *arg)
uint32_t channel;
if (ring_handle == NULL) {
+ ASSERT(ring_handle != NULL);
return (0);
}
@@ -2760,6 +2755,7 @@ nxge_disable_poll(void *arg)
uint32_t channel;
if (ring_handle == NULL) {
+ ASSERT(ring_handle != NULL);
return (0);
}
@@ -2816,12 +2812,18 @@ nxge_disable_poll(void *arg)
"==> nxge_disable_poll: rdc %d ldgp $%p (enable intr)",
ringp->rdc, ldgp));
if (ldgp->nldvs == 1) {
- ldgimgm_t mgm;
- mgm.value = 0;
- mgm.bits.ldw.arm = 1;
- mgm.bits.ldw.timer = ldgp->ldg_timer;
- NXGE_REG_WR64(handle,
- LDGIMGN_REG + LDSV_OFFSET(ldgp->ldg), mgm.value);
+ if (isLDOMguest(nxgep)) {
+ ldgp->arm = B_TRUE;
+ nxge_hio_ldgimgn(nxgep, ldgp);
+ } else {
+ ldgimgm_t mgm;
+ mgm.value = 0;
+ mgm.bits.ldw.arm = 1;
+ mgm.bits.ldw.timer = ldgp->ldg_timer;
+ NXGE_REG_WR64(handle,
+ LDGIMGN_REG + LDSV_OFFSET(ldgp->ldg),
+ mgm.value);
+ }
}
ringp->poll_flag = 0;
}
diff --git a/usr/src/uts/common/io/nxge/nxge_send.c b/usr/src/uts/common/io/nxge/nxge_send.c
index 16ce76ccad..4f7edf292a 100644
--- a/usr/src/uts/common/io/nxge/nxge_send.c
+++ b/usr/src/uts/common/io/nxge/nxge_send.c
@@ -66,20 +66,9 @@ nxge_tx_ring_task(void *arg)
(void) nxge_txdma_reclaim(ring->nxgep, ring, 0);
MUTEX_EXIT(&ring->lock);
- if (!isLDOMguest(ring->nxgep) && !ring->tx_ring_offline)
+ if (!ring->tx_ring_offline) {
mac_tx_ring_update(ring->nxgep->mach, ring->tx_ring_handle);
-#if defined(sun4v)
- else {
- nxge_hio_data_t *nhd =
- (nxge_hio_data_t *)ring->nxgep->nxge_hw_p->hio;
- nx_vio_fp_t *vio = &nhd->hio.vio;
-
- /* Call back vnet. */
- if (vio->cb.vio_net_tx_update) {
- (*vio->cb.vio_net_tx_update)(ring->nxgep->hio_vr->vhp);
- }
}
-#endif
}
static void
@@ -141,65 +130,6 @@ nxge_tx_ring_send(void *arg, mblk_t *mp)
return ((mblk_t *)NULL);
}
-#if defined(sun4v)
-
-/*
- * Hashing policy for load balancing over the set of TX rings
- * available to the driver.
- */
-static uint8_t nxge_tx_hash_policy = MAC_PKT_HASH_L4;
-
-/*
- * nxge_m_tx() is needed for Hybrid I/O operation of the vnet in
- * the guest domain. See CR 6778758 for long term solution.
- *
- * The guest domain driver will for now hash the packet
- * to pick a DMA channel from the only group it has group 0.
- */
-
-mblk_t *
-nxge_m_tx(void *arg, mblk_t *mp)
-{
- p_nxge_t nxgep = (p_nxge_t)arg;
- mblk_t *next;
- uint64_t rindex;
- p_tx_ring_t tx_ring_p;
- int status;
-
- NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_m_tx"));
-
- /*
- * Hash to pick a ring from Group 0, the only TX group
- * for a guest domain driver.
- */
- rindex = mac_pkt_hash(DL_ETHER, mp, nxge_tx_hash_policy, B_TRUE);
- rindex = rindex % nxgep->pt_config.tdc_grps[0].max_tdcs;
-
- /*
- * Get the ring handle.
- */
- tx_ring_p = nxgep->tx_rings->rings[rindex];
-
- while (mp != NULL) {
- next = mp->b_next;
- mp->b_next = NULL;
-
- status = nxge_start(nxgep, tx_ring_p, mp);
- if (status != 0) {
- mp->b_next = next;
- nxge_tx_ring_dispatch(tx_ring_p);
- return (mp);
- }
-
- mp = next;
- }
-
- NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_m_tx"));
- return ((mblk_t *)NULL);
-}
-
-#endif
-
int
nxge_start(p_nxge_t nxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp)
{
diff --git a/usr/src/uts/common/io/nxge/nxge_virtual.c b/usr/src/uts/common/io/nxge/nxge_virtual.c
index ff78d828d6..c0468f8fed 100644
--- a/usr/src/uts/common/io/nxge/nxge_virtual.c
+++ b/usr/src/uts/common/io/nxge/nxge_virtual.c
@@ -3994,6 +3994,9 @@ nxge_get_rxring_index(p_nxge_t nxgep, int groupid, int ringidx)
p_dma_cfgp = &nxgep->pt_config;
p_cfgp = &p_dma_cfgp->hw_config;
+ if (isLDOMguest(nxgep))
+ return (ringidx);
+
for (i = 0; i < groupid; i++) {
rdc_grp_p =
&p_dma_cfgp->rdc_grps[p_cfgp->def_mac_rxdma_grpid + i];
diff --git a/usr/src/uts/common/sys/mac_client_priv.h b/usr/src/uts/common/sys/mac_client_priv.h
index 4acc126a8f..6174dd1a72 100644
--- a/usr/src/uts/common/sys/mac_client_priv.h
+++ b/usr/src/uts/common/sys/mac_client_priv.h
@@ -120,7 +120,7 @@ extern void mac_rx_client_quiesce(mac_client_handle_t);
extern void mac_rx_client_restart(mac_client_handle_t);
extern void mac_srs_perm_quiesce(mac_client_handle_t, boolean_t);
extern int mac_hwrings_get(mac_client_handle_t, mac_group_handle_t *,
- mac_ring_handle_t *);
+ mac_ring_handle_t *, mac_ring_type_t);
extern void mac_hwring_setup(mac_ring_handle_t, mac_resource_handle_t);
extern void mac_hwring_teardown(mac_ring_handle_t);
extern int mac_hwring_disable_intr(mac_ring_handle_t);
diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h
index 3c2e30f37e..a93335606f 100644
--- a/usr/src/uts/common/sys/mac_impl.h
+++ b/usr/src/uts/common/sys/mac_impl.h
@@ -262,7 +262,7 @@ struct mac_group_s {
#define MAC_RING_TX_DEFAULT(mip, mp) \
((mip->mi_default_tx_ring == NULL) ? \
mip->mi_tx(mip->mi_driver, mp) : \
- mac_ring_tx(mip->mi_default_tx_ring, mp))
+ mac_hwring_tx(mip->mi_default_tx_ring, mp))
#define MAC_TX(mip, ring, mp, mcip) { \
/* \
@@ -275,7 +275,7 @@ struct mac_group_s {
(ring == NULL)) \
mp = MAC_RING_TX_DEFAULT(mip, mp); \
else \
- mp = mac_ring_tx(ring, mp); \
+ mp = mac_hwring_tx(ring, mp); \
}
/* mci_tx_flag */
@@ -585,7 +585,7 @@ extern int mac_group_addmac(mac_group_t *, const uint8_t *);
extern int mac_group_remmac(mac_group_t *, const uint8_t *);
extern int mac_rx_group_add_flow(mac_client_impl_t *, flow_entry_t *,
mac_group_t *);
-extern mblk_t *mac_ring_tx(mac_ring_handle_t, mblk_t *);
+extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *);
extern mac_ring_t *mac_reserve_tx_ring(mac_impl_t *, mac_ring_t *);
extern void mac_release_tx_ring(mac_ring_handle_t);
extern mac_group_t *mac_reserve_tx_group(mac_impl_t *, mac_share_handle_t);
diff --git a/usr/src/uts/common/sys/mac_soft_ring.h b/usr/src/uts/common/sys/mac_soft_ring.h
index 4973b84215..4b07fb4e9f 100644
--- a/usr/src/uts/common/sys/mac_soft_ring.h
+++ b/usr/src/uts/common/sys/mac_soft_ring.h
@@ -131,6 +131,9 @@ typedef struct mac_srs_tx_s {
void *st_arg1;
void *st_arg2;
mac_group_t *st_group; /* TX group for share */
+ uint32_t st_ring_count; /* no. of tx rings */
+ mac_ring_handle_t *st_rings;
+
boolean_t st_woken_up;
/*
diff --git a/usr/src/uts/common/sys/nxge/nxge_hio.h b/usr/src/uts/common/sys/nxge/nxge_hio.h
index d57a5424eb..b18f32e346 100644
--- a/usr/src/uts/common/sys/nxge/nxge_hio.h
+++ b/usr/src/uts/common/sys/nxge/nxge_hio.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -35,9 +35,6 @@ extern "C" {
#include <nxge_ipp.h>
#include <nxge_fflp.h>
#include <sys/mac_provider.h>
-#if defined(sun4v)
-#include <sys/vnet_res.h>
-#endif
#define isLDOMservice(nxge) \
(nxge->environs == SOLARIS_SERVICE_DOMAIN)
@@ -46,6 +43,9 @@ extern "C" {
#define isLDOMs(nxge) \
(isLDOMservice(nxge) || isLDOMguest(nxge))
+#define NXGE_HIO_SHARE_MIN_CHANNELS 2
+#define NXGE_HIO_SHARE_MAX_CHANNELS 2
+
/* ------------------------------------------------------------------ */
typedef uint8_t nx_rdc_t;
typedef uint8_t nx_tdc_t;
@@ -88,37 +88,19 @@ typedef struct {
dc_getinfo getinfo;
} nxhv_dc_fp_t;
-#if defined(sun4v)
-typedef struct {
- vio_net_resource_reg_t __register;
- vio_net_resource_unreg_t unregister;
-
- vio_net_callbacks_t cb;
-
-} nx_vio_fp_t;
-#endif
-
typedef struct {
boolean_t ldoms;
-
nxhv_vr_fp_t vr;
nxhv_dc_fp_t tx;
nxhv_dc_fp_t rx;
-
-#if defined(sun4v)
- nx_vio_fp_t vio;
-#endif
-
} nxhv_fp_t;
/* ------------------------------------------------------------------ */
#define NXGE_VR_SR_MAX 8 /* There are 8 subregions (SR). */
typedef enum {
-
NXGE_HIO_TYPE_SERVICE, /* We are a service domain driver. */
NXGE_HIO_TYPE_GUEST /* We are a guest domain driver. */
-
} nxge_hio_type_t;
typedef enum {
@@ -130,7 +112,6 @@ typedef enum {
FUNC2_VIR = 0x5000000,
FUNC3_MNT = 0x6000000,
FUNC3_VIR = 0x7000000
-
} vr_base_address_t;
#define VR_STEP 0x2000000
@@ -146,7 +127,6 @@ typedef enum { /* 0-8 */
FUNC3_VIR0,
FUNC3_VIR1,
FUNC_VIR_MAX
-
} vr_region_t;
typedef enum {
@@ -159,13 +139,11 @@ typedef enum {
VP_CHANNEL_6,
VP_CHANNEL_7,
VP_CHANNEL_MAX
-
} vp_channel_t;
typedef enum {
VP_BOUND_TX = 1,
VP_BOUND_RX
-
} vpc_type_t;
#define VP_VC_OFFSET(channel) (channel << 10)
@@ -254,9 +232,6 @@ typedef struct nxge_hio_vr {
ether_addr_t altmac; /* The alternate MAC address. */
int slot; /* According to nxge_m_mmac_add(). */
-#if defined(sun4v)
- vio_net_handle_t vhp; /* The handle given to us by the vnet. */
-#endif
nxge_grp_t rx_group;
nxge_grp_t tx_group;
@@ -273,7 +248,6 @@ typedef struct {
uint64_t map; /* Currently unused */
int vector; /* The DDI vector number (index) */
-
} hio_ldg_t;
/*
diff --git a/usr/src/uts/sun4v/io/vnet.c b/usr/src/uts/sun4v/io/vnet.c
index 32b67b2588..884665b77f 100644
--- a/usr/src/uts/sun4v/io/vnet.c
+++ b/usr/src/uts/sun4v/io/vnet.c
@@ -40,6 +40,8 @@
#include <sys/dlpi.h>
#include <net/if.h>
#include <sys/mac_provider.h>
+#include <sys/mac_client.h>
+#include <sys/mac_client_priv.h>
#include <sys/mac_ether.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
@@ -75,11 +77,38 @@ static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
#ifdef VNET_IOC_DEBUG
static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
#endif
+static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data);
+static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
+ const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle);
+static void vnet_get_group(void *arg, mac_ring_type_t type, const int index,
+ mac_group_info_t *infop, mac_group_handle_t handle);
+static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
+static void vnet_rx_ring_stop(mac_ring_driver_t rdriver);
+static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
+static void vnet_tx_ring_stop(mac_ring_driver_t rdriver);
+static int vnet_ring_enable_intr(void *arg);
+static int vnet_ring_disable_intr(void *arg);
+static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup);
+static int vnet_addmac(void *arg, const uint8_t *mac_addr);
+static int vnet_remmac(void *arg, const uint8_t *mac_addr);
/* vnet internal functions */
static int vnet_unattach(vnet_t *vnetp);
+static void vnet_ring_grp_init(vnet_t *vnetp);
+static void vnet_ring_grp_uninit(vnet_t *vnetp);
static int vnet_mac_register(vnet_t *);
static int vnet_read_mac_address(vnet_t *vnetp);
+static int vnet_bind_vgenring(vnet_res_t *vresp);
+static void vnet_unbind_vgenring(vnet_res_t *vresp);
+static int vnet_bind_hwrings(vnet_t *vnetp);
+static void vnet_unbind_hwrings(vnet_t *vnetp);
+static int vnet_bind_rings(vnet_res_t *vresp);
+static void vnet_unbind_rings(vnet_res_t *vresp);
+static int vnet_hio_stat(void *, uint_t, uint64_t *);
+static int vnet_hio_start(void *);
+static void vnet_hio_stop(void *);
+static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type);
+mblk_t *vnet_hio_tx(void *, mblk_t *);
/* Forwarding database (FDB) routines */
static void vnet_fdb_create(vnet_t *vnetp);
@@ -98,6 +127,8 @@ static void vnet_stop_resources(vnet_t *vnetp);
static void vnet_dispatch_res_task(vnet_t *vnetp);
static void vnet_res_start_task(void *arg);
static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
+static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp);
+static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp);
/* Exported to vnet_gen */
int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
@@ -112,15 +143,21 @@ static void vnet_hio_destroy_kstats(kstat_t *ksp);
/* Exported to to vnet_dds */
int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
+int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
+void vnet_hio_mac_cleanup(vnet_t *vnetp);
/* Externs that are imported from vnet_gen */
extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
const uint8_t *macaddr, void **vgenhdl);
+extern int vgen_init_mdeg(void *arg);
extern void vgen_uninit(void *arg);
extern int vgen_dds_tx(void *arg, void *dmsg);
extern void vgen_mod_init(void);
extern int vgen_mod_cleanup(void);
extern void vgen_mod_fini(void);
+extern int vgen_enable_intr(void *arg);
+extern int vgen_disable_intr(void *arg);
+extern mblk_t *vgen_poll(void *arg, int bytes_to_pickup);
/* Externs that are imported from vnet_dds */
extern void vdds_mod_init(void);
@@ -131,6 +168,9 @@ extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
extern void vdds_cleanup_hybrid_res(void *arg);
extern void vdds_cleanup_hio(vnet_t *vnetp);
+/* Externs imported from mac_impl */
+extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *);
+
#define DRV_NAME "vnet"
#define VNET_FDBE_REFHOLD(p) \
{ \
@@ -145,9 +185,9 @@ extern void vdds_cleanup_hio(vnet_t *vnetp);
}
#ifdef VNET_IOC_DEBUG
-#define VNET_M_CALLBACK_FLAGS (MC_IOCTL)
+#define VNET_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB)
#else
-#define VNET_M_CALLBACK_FLAGS (0)
+#define VNET_M_CALLBACK_FLAGS (MC_GETCAPAB)
#endif
static mac_callbacks_t vnet_m_callbacks = {
@@ -157,9 +197,23 @@ static mac_callbacks_t vnet_m_callbacks = {
vnet_m_stop,
vnet_m_promisc,
vnet_m_multicst,
- vnet_m_unicst,
- vnet_m_tx,
+ NULL, /* m_unicst entry must be NULL while rx rings are exposed */
+ NULL, /* m_tx entry must be NULL while tx rings are exposed */
vnet_m_ioctl,
+ vnet_m_capab,
+ NULL
+};
+
+static mac_callbacks_t vnet_hio_res_callbacks = {
+ 0,
+ vnet_hio_stat,
+ vnet_hio_start,
+ vnet_hio_stop,
+ NULL,
+ NULL,
+ NULL,
+ vnet_hio_tx,
+ NULL,
NULL,
NULL
};
@@ -176,6 +230,9 @@ uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT; /* tx timeout in msec */
uint32_t vnet_ldc_mtu = VNET_LDC_MTU; /* ldc mtu */
+/* Configure tx serialization in mac layer for the vnet device */
+boolean_t vnet_mac_tx_serialize = B_TRUE;
+
/*
* Set this to non-zero to enable additional internal receive buffer pools
* based on the MTU of the device for better performance at the cost of more
@@ -206,6 +263,11 @@ static struct ether_addr etherbroadcastaddr = {
0xff, 0xff, 0xff, 0xff, 0xff, 0xff
};
+/* mac_open() retry delay in usec */
+uint32_t vnet_mac_open_delay = 100; /* 0.1 ms */
+
+/* max # of mac_open() retries */
+uint32_t vnet_mac_open_retries = 100;
/*
* Property names
@@ -375,6 +437,9 @@ vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
attach_progress |= AST_vnet_alloc;
+ vnet_ring_grp_init(vnetp);
+ attach_progress |= AST_ring_init;
+
status = vdds_init(vnetp);
if (status != 0) {
goto vnet_attach_fail;
@@ -419,10 +484,19 @@ vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
attach_progress |= AST_vnet_list;
/*
- * Initialize the generic vnet plugin which provides
- * communication via sun4v LDC (logical domain channel) based
- * resources. It will register the LDC resources as and when
- * they become available.
+ * Initialize the generic vnet plugin which provides communication via
+ * sun4v LDC (logical domain channel) based resources. This involves 2
+ * steps; first, vgen_init() is invoked to read the various properties
+ * of the vnet device from its MD node (including its mtu which is
+ * needed to mac_register()) and obtain a handle to the vgen layer.
+ * After mac_register() is done and we have a mac handle, we then
+ * invoke vgen_init_mdeg() which registers with the the MD event
+ * generator (mdeg) framework to allow LDC resource notifications.
+ * Note: this sequence also allows us to report the correct default #
+ * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked
+ * in the context of mac_register(); and avoids conflicting with
+ * dynamic pseudo rx rings which get added/removed as a result of mdeg
+ * events in vgen.
*/
status = vgen_init(vnetp, reg, vnetp->dip,
(uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
@@ -432,15 +506,19 @@ vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
}
attach_progress |= AST_vgen_init;
- /* register with MAC layer */
status = vnet_mac_register(vnetp);
if (status != DDI_SUCCESS) {
goto vnet_attach_fail;
}
vnetp->link_state = LINK_STATE_UNKNOWN;
-
attach_progress |= AST_macreg;
+ status = vgen_init_mdeg(vnetp->vgenhdl);
+ if (status != DDI_SUCCESS) {
+ goto vnet_attach_fail;
+ }
+ attach_progress |= AST_init_mdeg;
+
vnetp->attach_progress = attach_progress;
DBG1(NULL, "instance(%d) exit\n", instance);
@@ -503,21 +581,25 @@ vnet_unattach(vnet_t *vnetp)
attach_progress = vnetp->attach_progress;
/*
- * Unregister from the gldv3 subsystem. This can fail, in particular
- * if there are still any open references to this mac device; in which
- * case we just return failure without continuing to detach further.
+ * Disable the mac device in the gldv3 subsystem. This can fail, in
+ * particular if there are still any open references to this mac
+ * device; in which case we just return failure without continuing to
+ * detach further.
+ * If it succeeds, we then invoke vgen_uninit() which should unregister
+ * any pseudo rings registered with the mac layer. Note we keep the
+ * AST_macreg flag on, so we can unregister with the mac layer at
+ * the end of this routine.
*/
if (attach_progress & AST_macreg) {
- if (mac_unregister(vnetp->mh) != 0) {
+ if (mac_disable(vnetp->mh) != 0) {
return (1);
}
- attach_progress &= ~AST_macreg;
}
/*
- * Now that we have unregistered from gldv3, we must finish all other
- * steps and successfully return from this function; otherwise we will
- * end up leaving the device in a broken/unusable state.
+ * Now that we have disabled the device, we must finish all other steps
+ * and successfully return from this function; otherwise we will end up
+ * leaving the device in a broken/unusable state.
*
* First, release any hybrid resources assigned to this vnet device.
*/
@@ -530,9 +612,10 @@ vnet_unattach(vnet_t *vnetp)
* Uninit vgen. This stops further mdeg callbacks to this vnet
* device and/or its ports; and detaches any existing ports.
*/
- if (attach_progress & AST_vgen_init) {
+ if (attach_progress & (AST_vgen_init|AST_init_mdeg)) {
vgen_uninit(vnetp->vgenhdl);
attach_progress &= ~AST_vgen_init;
+ attach_progress &= ~AST_init_mdeg;
}
/* Destroy the taskq. */
@@ -563,6 +646,17 @@ vnet_unattach(vnet_t *vnetp)
attach_progress &= ~AST_vnet_list;
}
+ if (attach_progress & AST_ring_init) {
+ vnet_ring_grp_uninit(vnetp);
+ attach_progress &= ~AST_ring_init;
+ }
+
+ if (attach_progress & AST_macreg) {
+ VERIFY(mac_unregister(vnetp->mh) == 0);
+ vnetp->mh = NULL;
+ attach_progress &= ~AST_macreg;
+ }
+
if (attach_progress & AST_vnet_alloc) {
rw_destroy(&vnetp->vrwlock);
rw_destroy(&vnetp->vsw_fp_rw);
@@ -683,8 +777,9 @@ vnet_m_promisc(void *arg, boolean_t on)
* external hosts.
*/
mblk_t *
-vnet_m_tx(void *arg, mblk_t *mp)
+vnet_tx_ring_send(void *arg, mblk_t *mp)
{
+ vnet_pseudo_tx_ring_t *tx_ringp;
vnet_t *vnetp;
vnet_res_t *vresp;
mblk_t *next;
@@ -694,8 +789,10 @@ vnet_m_tx(void *arg, mblk_t *mp)
boolean_t is_unicast;
boolean_t is_pvid; /* non-default pvid ? */
boolean_t hres; /* Hybrid resource ? */
+ void *tx_arg;
- vnetp = (vnet_t *)arg;
+ tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
+ vnetp = (vnet_t *)tx_ringp->vnetp;
DBG1(vnetp, "enter\n");
ASSERT(mp != NULL);
@@ -790,10 +887,14 @@ vnet_m_tx(void *arg, mblk_t *mp)
}
}
- }
- macp = &vresp->macreg;
- resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
+ macp = &vresp->macreg;
+ tx_arg = tx_ringp;
+ } else {
+ macp = &vresp->macreg;
+ tx_arg = macp->m_driver;
+ }
+ resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp);
/* tx done; now release ref on fdb entry */
VNET_FDBE_REFRELE(vresp);
@@ -848,6 +949,124 @@ vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
return (0);
}
+static void
+vnet_ring_grp_init(vnet_t *vnetp)
+{
+ vnet_pseudo_rx_group_t *rx_grp;
+ vnet_pseudo_rx_ring_t *rx_ringp;
+ vnet_pseudo_tx_group_t *tx_grp;
+ vnet_pseudo_tx_ring_t *tx_ringp;
+ int i;
+
+ tx_grp = &vnetp->tx_grp[0];
+ tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) *
+ VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP);
+ for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) {
+ tx_ringp[i].state |= VNET_TXRING_SHARED;
+ }
+ tx_grp->rings = tx_ringp;
+ tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS;
+
+ rx_grp = &vnetp->rx_grp[0];
+ rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP;
+ rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL);
+ rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) *
+ rx_grp->max_ring_cnt, KM_SLEEP);
+
+ /*
+ * Setup the first 3 Pseudo RX Rings that are reserved;
+ * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource.
+ */
+ rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE;
+ rx_ringp[0].index = 0;
+ rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
+ rx_ringp[1].index = 1;
+ rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
+ rx_ringp[2].index = 2;
+
+ rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
+ rx_grp->rings = rx_ringp;
+
+ for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
+ i < rx_grp->max_ring_cnt; i++) {
+ rx_ringp = &rx_grp->rings[i];
+ rx_ringp->state = VNET_RXRING_FREE;
+ rx_ringp->index = i;
+ }
+}
+
+static void
+vnet_ring_grp_uninit(vnet_t *vnetp)
+{
+ vnet_pseudo_rx_group_t *rx_grp;
+ vnet_pseudo_tx_group_t *tx_grp;
+
+ tx_grp = &vnetp->tx_grp[0];
+ if (tx_grp->rings != NULL) {
+ ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS);
+ kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) *
+ tx_grp->ring_cnt);
+ tx_grp->rings = NULL;
+ }
+
+ rx_grp = &vnetp->rx_grp[0];
+ if (rx_grp->rings != NULL) {
+ ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP);
+ ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
+ kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) *
+ rx_grp->max_ring_cnt);
+ rx_grp->rings = NULL;
+ }
+}
+
+static vnet_pseudo_rx_ring_t *
+vnet_alloc_pseudo_rx_ring(vnet_t *vnetp)
+{
+ vnet_pseudo_rx_group_t *rx_grp;
+ vnet_pseudo_rx_ring_t *rx_ringp;
+ int index;
+
+ rx_grp = &vnetp->rx_grp[0];
+ WRITE_ENTER(&rx_grp->lock);
+
+ if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) {
+ /* no rings available */
+ RW_EXIT(&rx_grp->lock);
+ return (NULL);
+ }
+
+ for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
+ index < rx_grp->max_ring_cnt; index++) {
+ rx_ringp = &rx_grp->rings[index];
+ if (rx_ringp->state == VNET_RXRING_FREE) {
+ rx_ringp->state |= VNET_RXRING_INUSE;
+ rx_grp->ring_cnt++;
+ break;
+ }
+ }
+
+ RW_EXIT(&rx_grp->lock);
+ return (rx_ringp);
+}
+
+static void
+vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp)
+{
+ vnet_pseudo_rx_group_t *rx_grp;
+
+ ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
+ rx_grp = &vnetp->rx_grp[0];
+ WRITE_ENTER(&rx_grp->lock);
+
+ if (ringp->state != VNET_RXRING_FREE) {
+ ringp->state = VNET_RXRING_FREE;
+ ringp->handle = NULL;
+ rx_grp->ring_cnt--;
+ }
+
+ RW_EXIT(&rx_grp->lock);
+}
+
/* wrapper function for mac_register() */
static int
vnet_mac_register(vnet_t *vnetp)
@@ -867,6 +1086,15 @@ vnet_mac_register(vnet_t *vnetp)
macp->m_margin = VLAN_TAGSZ;
/*
+ * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to
+ * workaround tx lock contention issues in nxge.
+ */
+ macp->m_v12n = MAC_VIRT_LEVEL1;
+ if (vnet_mac_tx_serialize == B_TRUE) {
+ macp->m_v12n |= MAC_VIRT_SERIALIZE;
+ }
+
+ /*
* Finally, we're ready to register ourselves with the MAC layer
* interface; if this succeeds, we're all ready to start()
*/
@@ -1116,42 +1344,57 @@ vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
static void
vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
{
- vnet_res_t *vresp = (vnet_res_t *)vrh;
- vnet_t *vnetp = vresp->vnetp;
+ vnet_res_t *vresp = (vnet_res_t *)vrh;
+ vnet_t *vnetp = vresp->vnetp;
+ vnet_pseudo_rx_ring_t *ringp;
if ((vnetp == NULL) || (vnetp->mh == 0)) {
freemsgchain(mp);
return;
}
- /*
- * Packets received over a hybrid resource need additional processing
- * to remove the tag, for the pvid case. The underlying resource is
- * not aware of the vnet's pvid and thus packets are received with the
- * vlan tag in the header; unlike packets that are received over a ldc
- * channel in which case the peer vnet/vsw would have already removed
- * the tag.
- */
- if (vresp->type == VIO_NET_RES_HYBRID &&
- vnetp->pvid != vnetp->default_vlan_id) {
-
- vnet_rx_frames_untag(vnetp->pvid, &mp);
- if (mp == NULL) {
- return;
- }
- }
-
- mac_rx(vnetp->mh, NULL, mp);
+ ringp = vresp->rx_ringp;
+ mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
}
void
vnet_tx_update(vio_net_handle_t vrh)
{
- vnet_res_t *vresp = (vnet_res_t *)vrh;
- vnet_t *vnetp = vresp->vnetp;
+ vnet_res_t *vresp = (vnet_res_t *)vrh;
+ vnet_t *vnetp = vresp->vnetp;
+ vnet_pseudo_tx_ring_t *tx_ringp;
+ vnet_pseudo_tx_group_t *tx_grp;
+ int i;
+
+ if (vnetp == NULL || vnetp->mh == NULL) {
+ return;
+ }
- if ((vnetp != NULL) && (vnetp->mh != NULL)) {
- mac_tx_update(vnetp->mh);
+ /*
+ * Currently, the tx hwring API (used to access rings that belong to
+ * a Hybrid IO resource) does not provide us a per ring flow ctrl
+ * update; also the pseudo rings are shared by the ports/ldcs in the
+ * vgen layer. Thus we can't figure out which pseudo ring is being
+ * re-enabled for transmits. To work around this, when we get a tx
+ * restart notification from below, we simply propagate that to all
+ * the tx pseudo rings registered with the mac layer above.
+ *
+ * There are a couple of side effects with this approach, but they are
+ * not harmful, as outlined below:
+ *
+ * A) We might send an invalid ring_update() for a ring that is not
+ * really flow controlled. This will not have any effect in the mac
+ * layer and packets will continue to be transmitted on that ring.
+ *
+ * B) We might end up clearing the flow control in the mac layer for
+ * a ring that is still flow controlled in the underlying resource.
+ * This will result in the mac layer restarting transmit, only to be
+ * flow controlled again on that ring.
+ */
+ tx_grp = &vnetp->tx_grp[0];
+ for (i = 0; i < tx_grp->ring_cnt; i++) {
+ tx_ringp = &tx_grp->rings[i];
+ mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
}
}
@@ -1233,8 +1476,8 @@ int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
vio_net_callbacks_t *vcb)
{
- vnet_t *vnetp;
- vnet_res_t *vresp;
+ vnet_t *vnetp;
+ vnet_res_t *vresp;
vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
ether_copy(local_macaddr, vresp->local_macaddr);
@@ -1260,11 +1503,7 @@ int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
vnetp->instance);
}
}
-
- WRITE_ENTER(&vnetp->vrwlock);
- vresp->nextp = vnetp->vres_list;
- vnetp->vres_list = vresp;
- RW_EXIT(&vnetp->vrwlock);
+ vnet_add_resource(vnetp, vresp);
break;
}
vnetp = vnetp->nextp;
@@ -1281,6 +1520,14 @@ int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
vcb->vio_net_tx_update = vnet_tx_update;
vcb->vio_net_report_err = vnet_handle_res_err;
+ /* Bind the resource to pseudo ring(s) */
+ if (vnet_bind_rings(vresp) != 0) {
+ (void) vnet_rem_resource(vnetp, vresp);
+ vnet_hio_destroy_kstats(vresp->ksp);
+ KMEM_FREE(vresp);
+ return (1);
+ }
+
/* Dispatch a task to start resources */
vnet_dispatch_res_task(vnetp);
return (0);
@@ -1294,8 +1541,6 @@ vio_net_resource_unreg(vio_net_handle_t vhp)
{
vnet_res_t *vresp = (vnet_res_t *)vhp;
vnet_t *vnetp = vresp->vnetp;
- vnet_res_t *vrp;
- kstat_t *ksp = NULL;
DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
@@ -1306,7 +1551,29 @@ vio_net_resource_unreg(vio_net_handle_t vhp)
*/
vnet_fdbe_del(vnetp, vresp);
+ vnet_unbind_rings(vresp);
+
/* Now remove the resource from the list */
+ (void) vnet_rem_resource(vnetp, vresp);
+
+ vnet_hio_destroy_kstats(vresp->ksp);
+ KMEM_FREE(vresp);
+}
+
+static void
+vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp)
+{
+ WRITE_ENTER(&vnetp->vrwlock);
+ vresp->nextp = vnetp->vres_list;
+ vnetp->vres_list = vresp;
+ RW_EXIT(&vnetp->vrwlock);
+}
+
+static vnet_res_t *
+vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp)
+{
+ vnet_res_t *vrp;
+
WRITE_ENTER(&vnetp->vrwlock);
if (vresp == vnetp->vres_list) {
vnetp->vres_list = vresp->nextp;
@@ -1320,15 +1587,12 @@ vio_net_resource_unreg(vio_net_handle_t vhp)
vrp = vrp->nextp;
}
}
-
- ksp = vresp->ksp;
- vresp->ksp = NULL;
-
vresp->vnetp = NULL;
vresp->nextp = NULL;
+
RW_EXIT(&vnetp->vrwlock);
- vnet_hio_destroy_kstats(ksp);
- KMEM_FREE(vresp);
+
+ return (vresp);
}
/*
@@ -1710,6 +1974,1024 @@ vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
}
}
+static boolean_t
+vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data)
+{
+ vnet_t *vnetp = (vnet_t *)arg;
+
+ if (vnetp == NULL) {
+ return (0);
+ }
+
+ switch (cap) {
+
+ case MAC_CAPAB_RINGS: {
+
+ mac_capab_rings_t *cap_rings = cap_data;
+ /*
+ * Rings Capability Notes:
+ * We advertise rings to make use of the rings framework in
+ * gldv3 mac layer, to improve the performance. This is
+ * specifically needed when a Hybrid resource (with multiple
+ * tx/rx hardware rings) is assigned to a vnet device. We also
+ * leverage this for the normal case when no Hybrid resource is
+ * assigned.
+ *
+ * Ring Allocation:
+ * - TX path:
+ * We expose a pseudo ring group with 2 pseudo tx rings (as
+ * currently HybridIO exports only 2 rings) In the normal case,
+ * transmit traffic that comes down to the driver through the
+ * mri_tx (vnet_tx_ring_send()) entry point goes through the
+ * distributed switching algorithm in vnet and gets transmitted
+ * over a port/LDC in the vgen layer to either the vswitch or a
+ * peer vnet. If and when a Hybrid resource is assigned to the
+ * vnet, we obtain the tx ring information of the Hybrid device
+ * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings.
+ * Traffic being sent over the Hybrid resource by the mac layer
+ * gets spread across both hw rings, as they are mapped to the
+ * 2 pseudo tx rings in vnet.
+ *
+ * - RX path:
+ * We expose a pseudo ring group with 3 pseudo rx rings (static
+ * rings) initially. The first (default) pseudo rx ring is
+ * reserved for the resource that connects to the vswitch
+ * service. The next 2 rings are reserved for a Hybrid resource
+ * that may be assigned to the vnet device. If and when a
+ * Hybrid resource is assigned to the vnet, we obtain the rx
+ * ring information of the Hybrid device (nxge) and map these
+ * pseudo rings 1:1 to the 2 hw rx rings. For each additional
+ * resource that connects to a peer vnet, we dynamically
+ * allocate a pseudo rx ring and map it to that resource, when
+ * the resource gets added; and the pseudo rx ring is
+ * dynamically registered with the upper mac layer. We do the
+ * reverse and unregister the ring with the mac layer when
+ * the resource gets removed.
+ *
+ * Synchronization notes:
+ * We don't need any lock to protect members of ring structure,
+ * specifically ringp->hw_rh, in either the TX or the RX ring,
+ * as explained below.
+ * - TX ring:
+ * ring->hw_rh is initialized only when a Hybrid resource is
+ * associated; and gets referenced only in vnet_hio_tx(). The
+ * Hybrid resource itself is available in fdb only after tx
+ * hwrings are found and mapped; i.e, in vio_net_resource_reg()
+ * we call vnet_bind_rings() first and then call
+ * vnet_start_resources() which adds an entry to fdb. For
+ * traffic going over LDC resources, we don't reference
+ * ring->hw_rh at all.
+ * - RX ring:
+ * For rings mapped to Hybrid resource ring->hw_rh is
+ * initialized and only then do we add the rx callback for
+ * the underlying Hybrid resource; we disable callbacks before
+ * we unmap ring->hw_rh. For rings mapped to LDC resources, we
+ * stop the rx callbacks (in vgen) before we remove ring->hw_rh
+ * (vio_net_resource_unreg()).
+ */
+
+ if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
+ cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
+
+ /*
+ * The ring_cnt for rx grp is initialized in
+ * vnet_ring_grp_init(). Later, the ring_cnt gets
+ * updated dynamically whenever LDC resources are added
+ * or removed.
+ */
+ cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt;
+ cap_rings->mr_rget = vnet_get_ring;
+
+ cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS;
+ cap_rings->mr_gget = vnet_get_group;
+ cap_rings->mr_gaddring = NULL;
+ cap_rings->mr_gremring = NULL;
+ } else {
+ cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
+
+ /*
+ * The ring_cnt for tx grp is initialized in
+ * vnet_ring_grp_init() and remains constant, as we
+ * do not support dymanic tx rings for now.
+ */
+ cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt;
+ cap_rings->mr_rget = vnet_get_ring;
+
+ /*
+ * Transmit rings are not grouped; i.e, the number of
+ * transmit ring groups advertised should be set to 0.
+ */
+ cap_rings->mr_gnum = 0;
+
+ cap_rings->mr_gget = vnet_get_group;
+ cap_rings->mr_gaddring = NULL;
+ cap_rings->mr_gremring = NULL;
+ }
+ return (B_TRUE);
+
+ }
+
+ default:
+ break;
+
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * Callback funtion for MAC layer to get ring information.
+ */
+static void
+vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
+ const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle)
+{
+ vnet_t *vnetp = arg;
+
+ switch (rtype) {
+
+ case MAC_RING_TYPE_RX: {
+
+ vnet_pseudo_rx_group_t *rx_grp;
+ vnet_pseudo_rx_ring_t *rx_ringp;
+ mac_intr_t *mintr;
+
+ /* We advertised only one RX group */
+ ASSERT(g_index == 0);
+ rx_grp = &vnetp->rx_grp[g_index];
+
+ /* Check the current # of rings in the rx group */
+ ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt));
+
+ /* Get the ring based on the index */
+ rx_ringp = &rx_grp->rings[r_index];
+
+ rx_ringp->handle = r_handle;
+ /*
+ * Note: we don't need to save the incoming r_index in rx_ring,
+ * as vnet_ring_grp_init() would have initialized the index for
+ * each ring in the array.
+ */
+ rx_ringp->grp = rx_grp;
+ rx_ringp->vnetp = vnetp;
+
+ mintr = &infop->mri_intr;
+ mintr->mi_handle = (mac_intr_handle_t)rx_ringp;
+ mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr;
+ mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr;
+
+ infop->mri_driver = (mac_ring_driver_t)rx_ringp;
+ infop->mri_start = vnet_rx_ring_start;
+ infop->mri_stop = vnet_rx_ring_stop;
+
+ /* Set the poll function, as this is an rx ring */
+ infop->mri_poll = vnet_rx_poll;
+
+ break;
+ }
+
+ case MAC_RING_TYPE_TX: {
+ vnet_pseudo_tx_group_t *tx_grp;
+ vnet_pseudo_tx_ring_t *tx_ringp;
+
+ /*
+ * No need to check grp index; mac layer passes -1 for it.
+ */
+ tx_grp = &vnetp->tx_grp[0];
+
+ /* Check the # of rings in the tx group */
+ ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt));
+
+ /* Get the ring based on the index */
+ tx_ringp = &tx_grp->rings[r_index];
+
+ tx_ringp->handle = r_handle;
+ tx_ringp->index = r_index;
+ tx_ringp->grp = tx_grp;
+ tx_ringp->vnetp = vnetp;
+
+ infop->mri_driver = (mac_ring_driver_t)tx_ringp;
+ infop->mri_start = vnet_tx_ring_start;
+ infop->mri_stop = vnet_tx_ring_stop;
+
+ /* Set the transmit function, as this is a tx ring */
+ infop->mri_tx = vnet_tx_ring_send;
+
+ break;
+ }
+
+ default:
+ break;
+ }
+}
+
+/*
+ * Callback funtion for MAC layer to get group information.
+ */
+static void
+vnet_get_group(void *arg, mac_ring_type_t type, const int index,
+ mac_group_info_t *infop, mac_group_handle_t handle)
+{
+ vnet_t *vnetp = (vnet_t *)arg;
+
+ switch (type) {
+
+ case MAC_RING_TYPE_RX:
+ {
+ vnet_pseudo_rx_group_t *rx_grp;
+
+ /* We advertised only one RX group */
+ ASSERT(index == 0);
+
+ rx_grp = &vnetp->rx_grp[index];
+ rx_grp->handle = handle;
+ rx_grp->index = index;
+ rx_grp->vnetp = vnetp;
+
+ infop->mgi_driver = (mac_group_driver_t)rx_grp;
+ infop->mgi_start = NULL;
+ infop->mgi_stop = NULL;
+ infop->mgi_addmac = vnet_addmac;
+ infop->mgi_remmac = vnet_remmac;
+ infop->mgi_count = rx_grp->ring_cnt;
+
+ break;
+ }
+
+ case MAC_RING_TYPE_TX:
+ {
+ vnet_pseudo_tx_group_t *tx_grp;
+
+ /* We advertised only one TX group */
+ ASSERT(index == 0);
+
+ tx_grp = &vnetp->tx_grp[index];
+ tx_grp->handle = handle;
+ tx_grp->index = index;
+ tx_grp->vnetp = vnetp;
+
+ infop->mgi_driver = (mac_group_driver_t)tx_grp;
+ infop->mgi_start = NULL;
+ infop->mgi_stop = NULL;
+ infop->mgi_addmac = NULL;
+ infop->mgi_remmac = NULL;
+ infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS;
+
+ break;
+ }
+
+ default:
+ break;
+
+ }
+}
+
+static int
+vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
+{
+ vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+ int err;
+
+ /*
+ * If this ring is mapped to a LDC resource, simply mark the state to
+ * indicate the ring is started and return.
+ */
+ if ((rx_ringp->state &
+ (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
+ rx_ringp->gen_num = mr_gen_num;
+ rx_ringp->state |= VNET_RXRING_STARTED;
+ return (0);
+ }
+
+ ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
+
+ /*
+ * This must be a ring reserved for a hwring. If the hwring is not
+ * bound yet, simply mark the state to indicate the ring is started and
+ * return. If and when a hybrid resource is activated for this vnet
+ * device, we will bind the hwring and start it then. If a hwring is
+ * already bound, start it now.
+ */
+ if (rx_ringp->hw_rh == NULL) {
+ rx_ringp->gen_num = mr_gen_num;
+ rx_ringp->state |= VNET_RXRING_STARTED;
+ return (0);
+ }
+
+ err = mac_hwring_start(rx_ringp->hw_rh);
+ if (err == 0) {
+ rx_ringp->gen_num = mr_gen_num;
+ rx_ringp->state |= VNET_RXRING_STARTED;
+ } else {
+ err = ENXIO;
+ }
+
+ return (err);
+}
+
+static void
+vnet_rx_ring_stop(mac_ring_driver_t arg)
+{
+ vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+
+ /*
+ * If this ring is mapped to a LDC resource, simply mark the state to
+ * indicate the ring is now stopped and return.
+ */
+ if ((rx_ringp->state &
+ (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
+ rx_ringp->state &= ~VNET_RXRING_STARTED;
+ }
+
+ ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
+
+ /*
+ * This must be a ring reserved for a hwring. If the hwring is not
+ * bound yet, simply mark the state to indicate the ring is stopped and
+ * return. If a hwring is already bound, stop it now.
+ */
+ if (rx_ringp->hw_rh == NULL) {
+ rx_ringp->state &= ~VNET_RXRING_STARTED;
+ return;
+ }
+
+ mac_hwring_stop(rx_ringp->hw_rh);
+ rx_ringp->state &= ~VNET_RXRING_STARTED;
+}
+
+/* ARGSUSED */
+static int
+vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
+{
+ vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
+
+ tx_ringp->state |= VNET_TXRING_STARTED;
+ return (0);
+}
+
+static void
+vnet_tx_ring_stop(mac_ring_driver_t arg)
+{
+ vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
+
+ tx_ringp->state &= ~VNET_TXRING_STARTED;
+}
+
+/*
+ * Disable polling for a ring and enable its interrupt.
+ */
+static int
+vnet_ring_enable_intr(void *arg)
+{
+ vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+ vnet_res_t *vresp;
+
+ if (rx_ringp->hw_rh == NULL) {
+ /*
+ * Ring enable intr func is being invoked, but the ring is
+ * not bound to any underlying resource ? This must be a ring
+ * reserved for Hybrid resource and no such resource has been
+ * assigned to this vnet device yet. We simply return success.
+ */
+ ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
+ return (0);
+ }
+
+ /*
+ * The rx ring has been bound to either a LDC or a Hybrid resource.
+ * Call the appropriate function to enable interrupts for the ring.
+ */
+ if (rx_ringp->state & VNET_RXRING_HYBRID) {
+ return (mac_hwring_enable_intr(rx_ringp->hw_rh));
+ } else {
+ vresp = (vnet_res_t *)rx_ringp->hw_rh;
+ return (vgen_enable_intr(vresp->macreg.m_driver));
+ }
+}
+
+/*
+ * Enable polling for a ring and disable its interrupt.
+ */
+static int
+vnet_ring_disable_intr(void *arg)
+{
+ vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+ vnet_res_t *vresp;
+
+ if (rx_ringp->hw_rh == NULL) {
+ /*
+ * Ring disable intr func is being invoked, but the ring is
+ * not bound to any underlying resource ? This must be a ring
+ * reserved for Hybrid resource and no such resource has been
+ * assigned to this vnet device yet. We simply return success.
+ */
+ ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
+ return (0);
+ }
+
+ /*
+ * The rx ring has been bound to either a LDC or a Hybrid resource.
+ * Call the appropriate function to disable interrupts for the ring.
+ */
+ if (rx_ringp->state & VNET_RXRING_HYBRID) {
+ return (mac_hwring_disable_intr(rx_ringp->hw_rh));
+ } else {
+ vresp = (vnet_res_t *)rx_ringp->hw_rh;
+ return (vgen_disable_intr(vresp->macreg.m_driver));
+ }
+}
+
+/*
+ * Poll 'bytes_to_pickup' bytes of message from the rx ring.
+ */
+static mblk_t *
+vnet_rx_poll(void *arg, int bytes_to_pickup)
+{
+ vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+ mblk_t *mp = NULL;
+ vnet_res_t *vresp;
+ vnet_t *vnetp = rx_ringp->vnetp;
+
+ if (rx_ringp->hw_rh == NULL) {
+ return (NULL);
+ }
+
+ if (rx_ringp->state & VNET_RXRING_HYBRID) {
+ mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup);
+ /*
+ * Packets received over a hybrid resource need additional
+ * processing to remove the tag, for the pvid case. The
+ * underlying resource is not aware of the vnet's pvid and thus
+ * packets are received with the vlan tag in the header; unlike
+ * packets that are received over a ldc channel in which case
+ * the peer vnet/vsw would have already removed the tag.
+ */
+ if (vnetp->pvid != vnetp->default_vlan_id) {
+ vnet_rx_frames_untag(vnetp->pvid, &mp);
+ }
+ } else {
+ vresp = (vnet_res_t *)rx_ringp->hw_rh;
+ mp = vgen_poll(vresp->macreg.m_driver, bytes_to_pickup);
+ }
+ return (mp);
+}
+
+/* ARGSUSED */
+void
+vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+ boolean_t loopback)
+{
+ vnet_t *vnetp = (vnet_t *)arg;
+ vnet_pseudo_rx_ring_t *ringp = (vnet_pseudo_rx_ring_t *)mrh;
+
+ /*
+ * Packets received over a hybrid resource need additional processing
+ * to remove the tag, for the pvid case. The underlying resource is
+ * not aware of the vnet's pvid and thus packets are received with the
+ * vlan tag in the header; unlike packets that are received over a ldc
+ * channel in which case the peer vnet/vsw would have already removed
+ * the tag.
+ */
+ if (vnetp->pvid != vnetp->default_vlan_id) {
+ vnet_rx_frames_untag(vnetp->pvid, &mp);
+ if (mp == NULL) {
+ return;
+ }
+ }
+ mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
+}
+
+static int
+vnet_addmac(void *arg, const uint8_t *mac_addr)
+{
+ vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg;
+ vnet_t *vnetp;
+
+ vnetp = rx_grp->vnetp;
+
+ if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
+ return (0);
+ }
+
+ cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n",
+ vnetp->instance, __func__);
+ return (EINVAL);
+}
+
+static int
+vnet_remmac(void *arg, const uint8_t *mac_addr)
+{
+ vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg;
+ vnet_t *vnetp;
+
+ vnetp = rx_grp->vnetp;
+
+ if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
+ return (0);
+ }
+
+ cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n",
+ vnetp->instance, __func__, ether_sprintf((void *)mac_addr));
+ return (EINVAL);
+}
+
+int
+vnet_hio_mac_init(vnet_t *vnetp, char *ifname)
+{
+ mac_handle_t mh;
+ mac_client_handle_t mch = NULL;
+ mac_unicast_handle_t muh = NULL;
+ mac_diag_t diag;
+ mac_register_t *macp;
+ char client_name[MAXNAMELEN];
+ int rv;
+ uint16_t mac_flags = MAC_UNICAST_TAG_DISABLE |
+ MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY;
+ vio_net_callbacks_t vcb;
+ ether_addr_t rem_addr =
+ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+ uint32_t retries = 0;
+
+ if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
+ return (EAGAIN);
+ }
+
+ do {
+ rv = mac_open_by_linkname(ifname, &mh);
+ if (rv == 0) {
+ break;
+ }
+ if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) {
+ mac_free(macp);
+ return (rv);
+ }
+ drv_usecwait(vnet_mac_open_delay);
+ } while (rv == ENOENT);
+
+ vnetp->hio_mh = mh;
+
+ (void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance,
+ ifname);
+ rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE);
+ if (rv != 0) {
+ goto fail;
+ }
+ vnetp->hio_mch = mch;
+
+ rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0,
+ &diag);
+ if (rv != 0) {
+ goto fail;
+ }
+ vnetp->hio_muh = muh;
+
+ macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
+ macp->m_driver = vnetp;
+ macp->m_dip = NULL;
+ macp->m_src_addr = NULL;
+ macp->m_callbacks = &vnet_hio_res_callbacks;
+ macp->m_min_sdu = 0;
+ macp->m_max_sdu = ETHERMTU;
+
+ rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID,
+ vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb);
+ if (rv != 0) {
+ goto fail;
+ }
+ mac_free(macp);
+
+ /* add the recv callback */
+ mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp);
+
+ /* add the notify callback - only tx updates for now */
+ vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb,
+ vnetp);
+
+ return (0);
+
+fail:
+ mac_free(macp);
+ vnet_hio_mac_cleanup(vnetp);
+ return (1);
+}
+
+void
+vnet_hio_mac_cleanup(vnet_t *vnetp)
+{
+ if (vnetp->hio_mnh != NULL) {
+ (void) mac_notify_remove(vnetp->hio_mnh, B_TRUE);
+ vnetp->hio_mnh = NULL;
+ }
+
+ if (vnetp->hio_vhp != NULL) {
+ vio_net_resource_unreg(vnetp->hio_vhp);
+ vnetp->hio_vhp = NULL;
+ }
+
+ if (vnetp->hio_muh != NULL) {
+ mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh);
+ vnetp->hio_muh = NULL;
+ }
+
+ if (vnetp->hio_mch != NULL) {
+ mac_client_close(vnetp->hio_mch, 0);
+ vnetp->hio_mch = NULL;
+ }
+
+ if (vnetp->hio_mh != NULL) {
+ mac_close(vnetp->hio_mh);
+ vnetp->hio_mh = NULL;
+ }
+}
+
+/* Bind pseudo rings to hwrings */
+static int
+vnet_bind_hwrings(vnet_t *vnetp)
+{
+ mac_ring_handle_t hw_rh[VNET_NUM_HYBRID_RINGS];
+ mac_perim_handle_t mph1;
+ vnet_pseudo_rx_group_t *rx_grp;
+ vnet_pseudo_rx_ring_t *rx_ringp;
+ vnet_pseudo_tx_group_t *tx_grp;
+ vnet_pseudo_tx_ring_t *tx_ringp;
+ int hw_ring_cnt;
+ int i;
+ int rv;
+
+ mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
+
+ /* Get the list of the underlying RX rings. */
+ hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh,
+ MAC_RING_TYPE_RX);
+
+ /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */
+ if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
+ cmn_err(CE_WARN,
+ "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n",
+ vnetp->instance, hw_ring_cnt);
+ goto fail;
+ }
+
+ if (vnetp->rx_hwgh != NULL) {
+ /*
+ * Quiesce the HW ring and the mac srs on the ring. Note
+ * that the HW ring will be restarted when the pseudo ring
+ * is started. At that time all the packets will be
+ * directly passed up to the pseudo RX ring and handled
+ * by mac srs created over the pseudo RX ring.
+ */
+ mac_rx_client_quiesce(vnetp->hio_mch);
+ mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE);
+ }
+
+ /*
+ * Bind the pseudo rings to the hwrings and start the hwrings.
+ * Note we don't need to register these with the upper mac, as we have
+ * statically exported these pseudo rxrings which are reserved for
+ * rxrings of Hybrid resource.
+ */
+ rx_grp = &vnetp->rx_grp[0];
+ for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
+ /* Pick the rxrings reserved for Hybrid resource */
+ rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
+
+ /* Store the hw ring handle */
+ rx_ringp->hw_rh = hw_rh[i];
+
+ /* Bind the pseudo ring to the underlying hwring */
+ mac_hwring_setup(rx_ringp->hw_rh,
+ (mac_resource_handle_t)rx_ringp);
+
+ /* Start the hwring if needed */
+ if (rx_ringp->state & VNET_RXRING_STARTED) {
+ rv = mac_hwring_start(rx_ringp->hw_rh);
+ if (rv != 0) {
+ mac_hwring_teardown(rx_ringp->hw_rh);
+ rx_ringp->hw_rh = NULL;
+ goto fail;
+ }
+ }
+ }
+
+ /* Get the list of the underlying TX rings. */
+ hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh,
+ MAC_RING_TYPE_TX);
+
+ /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */
+ if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
+ cmn_err(CE_WARN,
+ "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n",
+ vnetp->instance, hw_ring_cnt);
+ goto fail;
+ }
+
+ /*
+ * Now map the pseudo txrings to the hw txrings. Note we don't need
+ * to register these with the upper mac, as we have statically exported
+ * these rings. Note that these rings will continue to be used for LDC
+ * resources to peer vnets and vswitch (shared ring).
+ */
+ tx_grp = &vnetp->tx_grp[0];
+ for (i = 0; i < tx_grp->ring_cnt; i++) {
+ tx_ringp = &tx_grp->rings[i];
+ tx_ringp->hw_rh = hw_rh[i];
+ tx_ringp->state |= VNET_TXRING_HYBRID;
+ }
+
+ mac_perim_exit(mph1);
+ return (0);
+
+fail:
+ mac_perim_exit(mph1);
+ vnet_unbind_hwrings(vnetp);
+ return (1);
+}
+
+/* Unbind pseudo rings from hwrings */
+static void
+vnet_unbind_hwrings(vnet_t *vnetp)
+{
+ mac_perim_handle_t mph1;
+ vnet_pseudo_rx_ring_t *rx_ringp;
+ vnet_pseudo_rx_group_t *rx_grp;
+ vnet_pseudo_tx_group_t *tx_grp;
+ vnet_pseudo_tx_ring_t *tx_ringp;
+ int i;
+
+ mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
+
+ tx_grp = &vnetp->tx_grp[0];
+ for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
+ tx_ringp = &tx_grp->rings[i];
+ if (tx_ringp->state & VNET_TXRING_HYBRID) {
+ tx_ringp->state &= ~VNET_TXRING_HYBRID;
+ tx_ringp->hw_rh = NULL;
+ }
+ }
+
+ rx_grp = &vnetp->rx_grp[0];
+ for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
+ rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
+ if (rx_ringp->hw_rh != NULL) {
+ /* Stop the hwring */
+ mac_hwring_stop(rx_ringp->hw_rh);
+
+ /* Teardown the hwring */
+ mac_hwring_teardown(rx_ringp->hw_rh);
+ rx_ringp->hw_rh = NULL;
+ }
+ }
+
+ if (vnetp->rx_hwgh != NULL) {
+ vnetp->rx_hwgh = NULL;
+ /*
+ * First clear the permanent-quiesced flag of the RX srs then
+ * restart the HW ring and the mac srs on the ring.
+ */
+ mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE);
+ mac_rx_client_restart(vnetp->hio_mch);
+ }
+
+ mac_perim_exit(mph1);
+}
+
+/* Bind pseudo ring to a LDC resource */
+static int
+vnet_bind_vgenring(vnet_res_t *vresp)
+{
+ vnet_t *vnetp;
+ vnet_pseudo_rx_group_t *rx_grp;
+ vnet_pseudo_rx_ring_t *rx_ringp;
+ mac_perim_handle_t mph1;
+ int rv;
+ int type;
+
+ vnetp = vresp->vnetp;
+ type = vresp->type;
+ rx_grp = &vnetp->rx_grp[0];
+
+ if (type == VIO_NET_RES_LDC_SERVICE) {
+ /*
+ * Ring Index 0 is the default ring in the group and is
+ * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
+ * is allocated statically and is reported to the mac layer
+ * in vnet_m_capab(). So, all we need to do here, is save a
+ * reference to the associated vresp.
+ */
+ rx_ringp = &rx_grp->rings[0];
+ rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
+ vresp->rx_ringp = (void *)rx_ringp;
+ return (0);
+ }
+ ASSERT(type == VIO_NET_RES_LDC_GUEST);
+
+ mac_perim_enter_by_mh(vnetp->mh, &mph1);
+
+ rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp);
+ if (rx_ringp == NULL) {
+ cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring",
+ vnetp->instance);
+ goto fail;
+ }
+
+ /* Store the LDC resource itself as the ring handle */
+ rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
+
+ /*
+ * Save a reference to the ring in the resource for lookup during
+ * unbind. Note this is only done for LDC resources. We don't need this
+ * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its
+ * rx rings are mapped to reserved pseudo rx rings (index 1 and 2).
+ */
+ vresp->rx_ringp = (void *)rx_ringp;
+ rx_ringp->state |= VNET_RXRING_LDC_GUEST;
+
+ /* Register the pseudo ring with upper-mac */
+ rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index);
+ if (rv != 0) {
+ rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
+ rx_ringp->hw_rh = NULL;
+ vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
+ goto fail;
+ }
+
+ mac_perim_exit(mph1);
+ return (0);
+fail:
+ mac_perim_exit(mph1);
+ return (1);
+}
+
+/* Unbind pseudo ring from a LDC resource */
+static void
+vnet_unbind_vgenring(vnet_res_t *vresp)
+{
+ vnet_t *vnetp;
+ vnet_pseudo_rx_group_t *rx_grp;
+ vnet_pseudo_rx_ring_t *rx_ringp;
+ mac_perim_handle_t mph1;
+ int type;
+
+ vnetp = vresp->vnetp;
+ type = vresp->type;
+ rx_grp = &vnetp->rx_grp[0];
+
+ if (vresp->rx_ringp == NULL) {
+ return;
+ }
+
+ if (type == VIO_NET_RES_LDC_SERVICE) {
+ /*
+ * Ring Index 0 is the default ring in the group and is
+ * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
+ * is allocated statically and is reported to the mac layer
+ * in vnet_m_capab(). So, all we need to do here, is remove its
+ * reference to the associated vresp.
+ */
+ rx_ringp = &rx_grp->rings[0];
+ rx_ringp->hw_rh = NULL;
+ vresp->rx_ringp = NULL;
+ return;
+ }
+ ASSERT(type == VIO_NET_RES_LDC_GUEST);
+
+ mac_perim_enter_by_mh(vnetp->mh, &mph1);
+
+ rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp;
+ vresp->rx_ringp = NULL;
+
+ if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) {
+ /* Unregister the pseudo ring with upper-mac */
+ mac_group_rem_ring(rx_grp->handle, rx_ringp->handle);
+
+ rx_ringp->hw_rh = NULL;
+ rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
+
+ /* Free the pseudo rx ring */
+ vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
+ }
+
+ mac_perim_exit(mph1);
+}
+
+static void
+vnet_unbind_rings(vnet_res_t *vresp)
+{
+ switch (vresp->type) {
+
+ case VIO_NET_RES_LDC_SERVICE:
+ case VIO_NET_RES_LDC_GUEST:
+ vnet_unbind_vgenring(vresp);
+ break;
+
+ case VIO_NET_RES_HYBRID:
+ vnet_unbind_hwrings(vresp->vnetp);
+ break;
+
+ default:
+ break;
+
+ }
+}
+
+static int
+vnet_bind_rings(vnet_res_t *vresp)
+{
+ int rv;
+
+ switch (vresp->type) {
+
+ case VIO_NET_RES_LDC_SERVICE:
+ case VIO_NET_RES_LDC_GUEST:
+ rv = vnet_bind_vgenring(vresp);
+ break;
+
+ case VIO_NET_RES_HYBRID:
+ rv = vnet_bind_hwrings(vresp->vnetp);
+ break;
+
+ default:
+ rv = 1;
+ break;
+
+ }
+
+ return (rv);
+}
+
+/* ARGSUSED */
+int
+vnet_hio_stat(void *arg, uint_t stat, uint64_t *val)
+{
+ vnet_t *vnetp = (vnet_t *)arg;
+
+ *val = mac_stat_get(vnetp->hio_mh, stat);
+ return (0);
+}
+
+/*
+ * The start() and stop() routines for the Hybrid resource below, are just
+ * dummy functions. This is provided to avoid resource type specific code in
+ * vnet_start_resources() and vnet_stop_resources(). The starting and stopping
+ * of the Hybrid resource happens in the context of the mac_client interfaces
+ * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup().
+ */
+/* ARGSUSED */
+static int
+vnet_hio_start(void *arg)
+{
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+vnet_hio_stop(void *arg)
+{
+}
+
+mblk_t *
+vnet_hio_tx(void *arg, mblk_t *mp)
+{
+ vnet_pseudo_tx_ring_t *tx_ringp;
+ mblk_t *nextp;
+ mblk_t *ret_mp;
+
+ tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
+ for (;;) {
+ nextp = mp->b_next;
+ mp->b_next = NULL;
+
+ ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp);
+ if (ret_mp != NULL) {
+ ret_mp->b_next = nextp;
+ mp = ret_mp;
+ break;
+ }
+
+ if ((mp = nextp) == NULL)
+ break;
+ }
+ return (mp);
+}
+
+static void
+vnet_hio_notify_cb(void *arg, mac_notify_type_t type)
+{
+ vnet_t *vnetp = (vnet_t *)arg;
+ mac_perim_handle_t mph;
+
+ mac_perim_enter_by_mh(vnetp->hio_mh, &mph);
+ switch (type) {
+ case MAC_NOTE_TX:
+ vnet_tx_update(vnetp->hio_vhp);
+ break;
+
+ default:
+ break;
+ }
+ mac_perim_exit(mph);
+}
+
#ifdef VNET_IOC_DEBUG
/*
diff --git a/usr/src/uts/sun4v/io/vnet_dds.c b/usr/src/uts/sun4v/io/vnet_dds.c
index c3548db771..b6b6cbea13 100644
--- a/usr/src/uts/sun4v/io/vnet_dds.c
+++ b/usr/src/uts/sun4v/io/vnet_dds.c
@@ -113,6 +113,8 @@ static void vdds_release_range_prop(dev_info_t *nexus_dip, uint64_t cookie);
/* Functions imported from other files */
extern int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
+extern int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
+extern void vnet_hio_mac_cleanup(vnet_t *vnetp);
/* HV functions that are used in this file */
extern uint64_t vdds_hv_niu_vr_getinfo(uint32_t hvcookie,
@@ -412,7 +414,31 @@ vdds_process_dds_msg_task(void *arg)
} else {
vdds->hio_dip = dip;
vdds->hio_cookie = hio_cookie;
- (void) vdds_send_dds_resp_msg(vnetp, dmsg, B_TRUE);
+ sprintf(vdds->hio_ifname, "%s%d", ddi_driver_name(dip),
+ ddi_get_instance(dip));
+
+ rv = vnet_hio_mac_init(vnetp, vdds->hio_ifname);
+ if (rv != 0) {
+ /* failed - cleanup, send failed DDS message */
+ DERR(vdds, "HIO mac init failed, cleaning up");
+ rv = vdds_destroy_niu_node(dip, hio_cookie);
+ if (rv == 0) {
+ /* use DERR to print by default */
+ DERR(vdds, "Successfully destroyed"
+ " Hybrid node");
+ } else {
+ cmn_err(CE_WARN, "vnet%d:Failed to "
+ "destroy Hybrid node",
+ vnetp->instance);
+ }
+ vdds->hio_dip = NULL;
+ vdds->hio_cookie = 0;
+ (void) vdds_send_dds_resp_msg(vnetp,
+ dmsg, B_FALSE);
+ } else {
+ (void) vdds_send_dds_resp_msg(vnetp,
+ dmsg, B_TRUE);
+ }
/* DERR used only print by default */
DERR(vdds, "Successfully created HIO node");
}
@@ -424,6 +450,7 @@ vdds_process_dds_msg_task(void *arg)
DBG2(vdds, "NACK: No HIO device destroy");
(void) vdds_send_dds_resp_msg(vnetp, dmsg, B_FALSE);
} else {
+ vnet_hio_mac_cleanup(vnetp);
rv = vdds_destroy_niu_node(vnetp->vdds_info.hio_dip,
vdds->hio_cookie);
if (rv == 0) {
@@ -444,6 +471,7 @@ vdds_process_dds_msg_task(void *arg)
case VNET_DDS_TASK_REL_SHARE:
DBG2(vdds, "REL_SHARE task...");
if (vnetp->vdds_info.hio_dip != NULL) {
+ vnet_hio_mac_cleanup(vnetp);
rv = vdds_destroy_niu_node(vnetp->vdds_info.hio_dip,
vdds->hio_cookie);
if (rv == 0) {
diff --git a/usr/src/uts/sun4v/io/vnet_gen.c b/usr/src/uts/sun4v/io/vnet_gen.c
index bbf5e32cd3..f83c3a13d0 100644
--- a/usr/src/uts/sun4v/io/vnet_gen.c
+++ b/usr/src/uts/sun4v/io/vnet_gen.c
@@ -73,11 +73,15 @@
/* vgen proxy entry points */
int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
const uint8_t *macaddr, void **vgenhdl);
+int vgen_init_mdeg(void *arg);
void vgen_uninit(void *arg);
int vgen_dds_tx(void *arg, void *dmsg);
void vgen_mod_init(void);
int vgen_mod_cleanup(void);
void vgen_mod_fini(void);
+int vgen_enable_intr(void *arg);
+int vgen_disable_intr(void *arg);
+mblk_t *vgen_poll(void *arg, int bytes_to_pickup);
static int vgen_start(void *arg);
static void vgen_stop(void *arg);
static mblk_t *vgen_tx(void *arg, mblk_t *mp);
@@ -151,6 +155,7 @@ static int vgen_num_txpending(vgen_ldc_t *ldcp);
static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
static void vgen_ldc_watchdog(void *arg);
+static mblk_t *vgen_ldc_poll(vgen_ldc_t *ldcp, int bytes_to_pickup);
/* vgen handshake functions */
static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
@@ -200,7 +205,7 @@ static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
static void vgen_drain_rcv_thread(vgen_ldc_t *ldcp);
static void vgen_ldc_rcv_worker(void *arg);
static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
-static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp);
+static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt);
static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
@@ -536,13 +541,6 @@ vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
if (rv != 0) {
goto vgen_init_fail;
}
-
- /* register with MD event generator */
- rv = vgen_mdeg_reg(vgenp);
- if (rv != DDI_SUCCESS) {
- goto vgen_init_fail;
- }
-
*vgenhdl = (void *)vgenp;
DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
@@ -562,6 +560,15 @@ vgen_init_fail:
return (DDI_FAILURE);
}
+int
+vgen_init_mdeg(void *arg)
+{
+ vgen_t *vgenp = (vgen_t *)arg;
+
+ /* register with MD event generator */
+ return (vgen_mdeg_reg(vgenp));
+}
+
/*
* Called by vnet to undo the initializations done by vgen_init().
* The handle provided by generic transport during vgen_init() is the argument.
@@ -2094,13 +2101,21 @@ mdeg_reg_fail:
static void
vgen_mdeg_unreg(vgen_t *vgenp)
{
- (void) mdeg_unregister(vgenp->mdeg_dev_hdl);
- (void) mdeg_unregister(vgenp->mdeg_port_hdl);
- kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
- KMEM_FREE(vgenp->mdeg_parentp);
- vgenp->mdeg_parentp = NULL;
- vgenp->mdeg_dev_hdl = NULL;
- vgenp->mdeg_port_hdl = NULL;
+ if (vgenp->mdeg_dev_hdl != NULL) {
+ (void) mdeg_unregister(vgenp->mdeg_dev_hdl);
+ vgenp->mdeg_dev_hdl = NULL;
+ }
+ if (vgenp->mdeg_port_hdl != NULL) {
+ (void) mdeg_unregister(vgenp->mdeg_port_hdl);
+ vgenp->mdeg_port_hdl = NULL;
+ }
+
+ if (vgenp->mdeg_parentp != NULL) {
+ kmem_free(vgenp->mdeg_parentp->specp,
+ sizeof (vgen_prop_template));
+ KMEM_FREE(vgenp->mdeg_parentp);
+ vgenp->mdeg_parentp = NULL;
+ }
}
/* mdeg callback function for the port node */
@@ -2907,6 +2922,7 @@ vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL);
attach_state |= AST_mutex_init;
@@ -3032,6 +3048,7 @@ ldc_attach_failed:
mutex_destroy(&ldcp->cblock);
mutex_destroy(&ldcp->wrlock);
mutex_destroy(&ldcp->rxlock);
+ mutex_destroy(&ldcp->pollq_lock);
}
if (attach_state & AST_ldc_alloc) {
KMEM_FREE(ldcp);
@@ -3100,6 +3117,7 @@ vgen_ldc_detach(vgen_ldc_t *ldcp)
mutex_destroy(&ldcp->cblock);
mutex_destroy(&ldcp->wrlock);
mutex_destroy(&ldcp->rxlock);
+ mutex_destroy(&ldcp->pollq_lock);
/* unlink it from the list */
*prev_ldcp = ldcp->nextp;
@@ -6278,7 +6296,7 @@ vgen_recv_retry:
*/
if (bp != NULL) {
DTRACE_PROBE1(vgen_rcv_msgs, int, count);
- vgen_rx(ldcp, bp);
+ vgen_rx(ldcp, bp, bpt);
count = 0;
bp = bpt = NULL;
}
@@ -6459,7 +6477,7 @@ vgen_recv_retry:
if (count++ > vgen_chain_len) {
DTRACE_PROBE1(vgen_rcv_msgs, int, count);
- vgen_rx(ldcp, bp);
+ vgen_rx(ldcp, bp, bpt);
count = 0;
bp = bpt = NULL;
}
@@ -6512,7 +6530,7 @@ error_ret:
/* send up packets received so far */
if (bp != NULL) {
DTRACE_PROBE1(vgen_rcv_msgs, int, count);
- vgen_rx(ldcp, bp);
+ vgen_rx(ldcp, bp, bpt);
bp = bpt = NULL;
}
DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
@@ -6996,18 +7014,57 @@ vgen_print_ldcinfo(vgen_ldc_t *ldcp)
* Send received packets up the stack.
*/
static void
-vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp)
+vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt)
{
vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
+ vgen_t *vgenp = LDC_TO_VGEN(ldcp);
if (ldcp->rcv_thread != NULL) {
ASSERT(MUTEX_HELD(&ldcp->rxlock));
- mutex_exit(&ldcp->rxlock);
} else {
ASSERT(MUTEX_HELD(&ldcp->cblock));
+ }
+
+ mutex_enter(&ldcp->pollq_lock);
+
+ if (ldcp->polling_on == B_TRUE) {
+ /*
+ * If we are in polling mode, simply queue
+ * the packets onto the poll queue and return.
+ */
+ if (ldcp->pollq_headp == NULL) {
+ ldcp->pollq_headp = bp;
+ ldcp->pollq_tailp = bpt;
+ } else {
+ ldcp->pollq_tailp->b_next = bp;
+ ldcp->pollq_tailp = bpt;
+ }
+
+ mutex_exit(&ldcp->pollq_lock);
+ return;
+ }
+
+ /*
+ * Prepend any pending mblks in the poll queue, now that we
+ * are in interrupt mode, before sending up the chain of pkts.
+ */
+ if (ldcp->pollq_headp != NULL) {
+ DBG2(vgenp, ldcp, "vgen_rx(%lx), pending pollq_headp\n",
+ (uintptr_t)ldcp);
+ ldcp->pollq_tailp->b_next = bp;
+ bp = ldcp->pollq_headp;
+ ldcp->pollq_headp = ldcp->pollq_tailp = NULL;
+ }
+
+ mutex_exit(&ldcp->pollq_lock);
+
+ if (ldcp->rcv_thread != NULL) {
+ mutex_exit(&ldcp->rxlock);
+ } else {
mutex_exit(&ldcp->cblock);
}
+ /* Send up the packets */
vrx_cb(ldcp->portp->vhp, bp);
if (ldcp->rcv_thread != NULL) {
@@ -7233,6 +7290,145 @@ vgen_ldc_reset(vgen_ldc_t *ldcp)
vgen_handshake_retry(ldcp);
}
+int
+vgen_enable_intr(void *arg)
+{
+ vgen_port_t *portp = (vgen_port_t *)arg;
+ vgen_ldclist_t *ldclp;
+ vgen_ldc_t *ldcp;
+
+ ldclp = &portp->ldclist;
+ READ_ENTER(&ldclp->rwlock);
+ /*
+ * NOTE: for now, we will assume we have a single channel.
+ */
+ if (ldclp->headp == NULL) {
+ RW_EXIT(&ldclp->rwlock);
+ return (1);
+ }
+ ldcp = ldclp->headp;
+
+ mutex_enter(&ldcp->pollq_lock);
+ ldcp->polling_on = B_FALSE;
+ mutex_exit(&ldcp->pollq_lock);
+
+ RW_EXIT(&ldclp->rwlock);
+
+ return (0);
+}
+
+int
+vgen_disable_intr(void *arg)
+{
+ vgen_port_t *portp = (vgen_port_t *)arg;
+ vgen_ldclist_t *ldclp;
+ vgen_ldc_t *ldcp;
+
+ ldclp = &portp->ldclist;
+ READ_ENTER(&ldclp->rwlock);
+ /*
+ * NOTE: for now, we will assume we have a single channel.
+ */
+ if (ldclp->headp == NULL) {
+ RW_EXIT(&ldclp->rwlock);
+ return (1);
+ }
+ ldcp = ldclp->headp;
+
+
+ mutex_enter(&ldcp->pollq_lock);
+ ldcp->polling_on = B_TRUE;
+ mutex_exit(&ldcp->pollq_lock);
+
+ RW_EXIT(&ldclp->rwlock);
+
+ return (0);
+}
+
+mblk_t *
+vgen_poll(void *arg, int bytes_to_pickup)
+{
+ vgen_port_t *portp = (vgen_port_t *)arg;
+ vgen_ldclist_t *ldclp;
+ vgen_ldc_t *ldcp;
+ mblk_t *mp = NULL;
+
+ ldclp = &portp->ldclist;
+ READ_ENTER(&ldclp->rwlock);
+ /*
+ * NOTE: for now, we will assume we have a single channel.
+ */
+ if (ldclp->headp == NULL) {
+ RW_EXIT(&ldclp->rwlock);
+ return (NULL);
+ }
+ ldcp = ldclp->headp;
+
+ mp = vgen_ldc_poll(ldcp, bytes_to_pickup);
+
+ RW_EXIT(&ldclp->rwlock);
+ return (mp);
+}
+
+static mblk_t *
+vgen_ldc_poll(vgen_ldc_t *ldcp, int bytes_to_pickup)
+{
+ mblk_t *bp = NULL;
+ mblk_t *bpt = NULL;
+ mblk_t *mp = NULL;
+ size_t mblk_sz = 0;
+ size_t sz = 0;
+ uint_t count = 0;
+
+ mutex_enter(&ldcp->pollq_lock);
+
+ bp = ldcp->pollq_headp;
+ while (bp != NULL) {
+ /* get the size of this packet */
+ mblk_sz = msgdsize(bp);
+
+ /* if adding this pkt, exceeds the size limit, we are done. */
+ if (sz + mblk_sz > bytes_to_pickup) {
+ break;
+ }
+
+ /* we have room for this packet */
+ sz += mblk_sz;
+
+ /* increment the # of packets being sent up */
+ count++;
+
+ /* track the last processed pkt */
+ bpt = bp;
+
+ /* get the next pkt */
+ bp = bp->b_next;
+ }
+
+ if (count != 0) {
+ /*
+ * picked up some packets; save the head of pkts to be sent up.
+ */
+ mp = ldcp->pollq_headp;
+
+ /* move the pollq_headp to skip over the pkts being sent up */
+ ldcp->pollq_headp = bp;
+
+ /* picked up all pending pkts in the queue; reset tail also */
+ if (ldcp->pollq_headp == NULL) {
+ ldcp->pollq_tailp = NULL;
+ }
+
+ /* terminate the tail of pkts to be sent up */
+ bpt->b_next = NULL;
+ }
+
+ mutex_exit(&ldcp->pollq_lock);
+
+ DTRACE_PROBE1(vgen_poll_pkts, uint_t, count);
+ return (mp);
+}
+
#if DEBUG
/*
diff --git a/usr/src/uts/sun4v/sys/vnet.h b/usr/src/uts/sun4v/sys/vnet.h
index 1e2f88aeb8..21fb92852b 100644
--- a/usr/src/uts/sun4v/sys/vnet.h
+++ b/usr/src/uts/sun4v/sys/vnet.h
@@ -34,6 +34,8 @@ extern "C" {
#include <sys/vnet_res.h>
#include <sys/vnet_mailbox.h>
#include <sys/modhash.h>
+#include <net/if.h>
+#include <sys/mac_client.h>
#define VNET_SUCCESS (0) /* successful return */
#define VNET_FAILURE (-1) /* unsuccessful return */
@@ -117,6 +119,7 @@ typedef struct vnet_res {
uint32_t refcnt; /* reference count */
struct vnet *vnetp; /* back pointer to vnet */
kstat_t *ksp; /* hio kstats */
+ void *rx_ringp; /* assoc pseudo rx ring */
} vnet_res_t;
#define VNET_DDS_TASK_ADD_SHARE 0x01
@@ -131,6 +134,7 @@ typedef struct vnet_dds_info {
vio_dds_msg_t dmsg; /* Pending DDS message */
dev_info_t *hio_dip; /* Hybrid device's dip */
uint64_t hio_cookie; /* Hybrid device's cookie */
+ char hio_ifname[LIFNAMSIZ]; /* Hybrid interface name */
ddi_taskq_t *dds_taskqp; /* Taskq's used for DDS */
struct vnet *vnetp; /* Back pointer to vnetp */
} vnet_dds_info_t;
@@ -155,12 +159,103 @@ typedef struct vnet_dds_info {
typedef enum {
AST_init = 0x0, AST_vnet_alloc = 0x1,
- AST_mac_alloc = 0x2, AST_read_macaddr = 0x4,
- AST_vgen_init = 0x8, AST_fdbh_alloc = 0x10,
- AST_vdds_init = 0x20, AST_taskq_create = 0x40,
- AST_vnet_list = 0x80, AST_macreg = 0x100
+ AST_ring_init = 0x2, AST_vdds_init = 0x4,
+ AST_read_macaddr = 0x8, AST_fdbh_alloc = 0x10,
+ AST_taskq_create = 0x20, AST_vnet_list = 0x40,
+ AST_vgen_init = 0x80, AST_macreg = 0x100,
+ AST_init_mdeg = 0x200
} vnet_attach_progress_t;
+#define VNET_NUM_PSEUDO_GROUPS 1 /* # of pseudo ring grps */
+#define VNET_NUM_HYBRID_RINGS 2 /* # of Hybrid tx/rx rings */
+#define VNET_HYBRID_RXRING_INDEX 1 /* Hybrid rx ring start index */
+
+/*
+ * # of Pseudo TX Rings is defined based on the possible
+ * # of TX Hardware Rings from a Hybrid resource.
+ */
+#define VNET_NUM_PSEUDO_TXRINGS VNET_NUM_HYBRID_RINGS
+
+/*
+ * # of Pseudo RX Rings that are reserved and exposed by default.
+ * 1 for LDC resource to vsw + 2 for RX rings of Hybrid resource.
+ */
+#define VNET_NUM_PSEUDO_RXRINGS_DEFAULT (VNET_NUM_HYBRID_RINGS + 1)
+
+/* Pseudo RX Ring States */
+typedef enum {
+ VNET_RXRING_FREE = 0x0, /* Free */
+ VNET_RXRING_INUSE = 0x1, /* In use */
+ VNET_RXRING_LDC_SERVICE = 0x2, /* Mapped to vswitch */
+ VNET_RXRING_LDC_GUEST = 0x4, /* Mapped to a peer vnet */
+ VNET_RXRING_HYBRID = 0x8, /* Mapped to Hybrid resource */
+ VNET_RXRING_STARTED = 0x10 /* Started */
+} vnet_rxring_state_t;
+
+/* Pseudo TX Ring States */
+typedef enum {
+ VNET_TXRING_FREE = 0x0, /* Free */
+ VNET_TXRING_INUSE = 0x1, /* In use */
+ VNET_TXRING_SHARED = 0x2, /* Shared among LDCs */
+ VNET_TXRING_HYBRID = 0x4, /* Shared among LDCs, Hybrid resource */
+ VNET_TXRING_STARTED = 0x8 /* Started */
+} vnet_txring_state_t;
+
+/*
+ * Psuedo TX Ring
+ */
+typedef struct vnet_pseudo_tx_ring {
+ uint_t index; /* ring index */
+ vnet_txring_state_t state; /* ring state */
+ void *grp; /* grp associated */
+ void *vnetp; /* vnet associated */
+ mac_ring_handle_t handle; /* ring handle in mac layer */
+ mac_ring_handle_t hw_rh; /* Resource type dependent, internal */
+ /* ring handle. Hybrid res: ring hdl */
+ /* of hardware rx ring; LDC res: hdl */
+ /* to the res itself (vnet_res_t) */
+} vnet_pseudo_tx_ring_t;
+
+/*
+ * Psuedo RX Ring
+ */
+typedef struct vnet_pseudo_rx_ring {
+ uint_t index; /* ring index */
+ vnet_rxring_state_t state; /* ring state */
+ void *grp; /* grp associated */
+ void *vnetp; /* vnet associated */
+ mac_ring_handle_t handle; /* ring handle in mac layer */
+ mac_ring_handle_t hw_rh; /* Resource type dependent, internal */
+ /* ring handle. Hybrid res: ring hdl */
+ /* of hardware tx ring; otherwise */
+ /* NULL */
+ uint64_t gen_num; /* Mac layer gen_num */
+} vnet_pseudo_rx_ring_t;
+
+/*
+ * Psuedo TX Ring Group
+ */
+typedef struct vnet_pseudo_tx_group {
+ uint_t index; /* group index */
+ void *vnetp; /* vnet associated */
+ mac_group_handle_t handle; /* grp handle in mac layer */
+ uint_t ring_cnt; /* total # of rings in grp */
+ vnet_pseudo_tx_ring_t *rings; /* array of rings */
+} vnet_pseudo_tx_group_t;
+
+/*
+ * Psuedo RX Ring Group
+ */
+typedef struct vnet_pseudo_rx_group {
+ krwlock_t lock; /* sync rings access in grp */
+ int index; /* group index */
+ void *vnetp; /* vnet this grp belongs to */
+ mac_group_handle_t handle; /* grp handle in mac layer */
+ uint_t max_ring_cnt; /* total # of rings in grp */
+ uint_t ring_cnt; /* # of rings in use */
+ vnet_pseudo_rx_ring_t *rings; /* array of rings */
+} vnet_pseudo_rx_group_t;
+
/*
* vnet instance state information
*/
@@ -194,6 +289,18 @@ typedef struct vnet {
vnet_dds_info_t vdds_info; /* DDS related info */
krwlock_t vrwlock; /* Resource list lock */
ddi_taskq_t *taskqp; /* Resource taskq */
+
+ /* pseudo ring groups */
+ vnet_pseudo_rx_group_t rx_grp[VNET_NUM_PSEUDO_GROUPS];
+ vnet_pseudo_tx_group_t tx_grp[VNET_NUM_PSEUDO_GROUPS];
+
+ vio_net_handle_t hio_vhp; /* HIO resource hdl */
+ mac_handle_t hio_mh; /* HIO mac hdl */
+ mac_client_handle_t hio_mch; /* HIO mac client hdl */
+ mac_unicast_handle_t hio_muh; /* HIO mac unicst hdl */
+ mac_notify_handle_t hio_mnh; /* HIO notify cb hdl */
+ mac_group_handle_t rx_hwgh; /* HIO rx ring-group hdl */
+ mac_group_handle_t tx_hwgh; /* HIO tx ring-group hdl */
} vnet_t;
#ifdef DEBUG
diff --git a/usr/src/uts/sun4v/sys/vnet_gen.h b/usr/src/uts/sun4v/sys/vnet_gen.h
index 6c04c3cfe0..577667762b 100644
--- a/usr/src/uts/sun4v/sys/vnet_gen.h
+++ b/usr/src/uts/sun4v/sys/vnet_gen.h
@@ -180,6 +180,7 @@ typedef struct vgen_ldc {
kmutex_t tclock; /* tx reclaim lock */
kmutex_t wrlock; /* sync transmits */
kmutex_t rxlock; /* sync reception */
+ kmutex_t pollq_lock; /* sync polling and rxworker */
/* channel info from ldc layer */
uint64_t ldc_id; /* channel number */
@@ -248,6 +249,11 @@ typedef struct vgen_ldc {
kmutex_t rcv_thr_lock; /* lock for receive thread */
kcondvar_t rcv_thr_cv; /* cond.var for recv thread */
+ /* receive polling fields */
+ boolean_t polling_on; /* polling enabled ? */
+ mblk_t *pollq_headp; /* head of pkts in pollq */
+ mblk_t *pollq_tailp; /* tail of pkts in pollq */
+
/* channel statistics */
vgen_stats_t stats; /* channel statistics */
kstat_t *ksp; /* channel kstats */