diff options
Diffstat (limited to 'usr/src/uts/sun4v/io/vnet.c')
-rw-r--r-- | usr/src/uts/sun4v/io/vnet.c | 1408 |
1 files changed, 1345 insertions, 63 deletions
diff --git a/usr/src/uts/sun4v/io/vnet.c b/usr/src/uts/sun4v/io/vnet.c index 32b67b2588..884665b77f 100644 --- a/usr/src/uts/sun4v/io/vnet.c +++ b/usr/src/uts/sun4v/io/vnet.c @@ -40,6 +40,8 @@ #include <sys/dlpi.h> #include <net/if.h> #include <sys/mac_provider.h> +#include <sys/mac_client.h> +#include <sys/mac_client_priv.h> #include <sys/mac_ether.h> #include <sys/ddi.h> #include <sys/sunddi.h> @@ -75,11 +77,38 @@ static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp); #ifdef VNET_IOC_DEBUG static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp); #endif +static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data); +static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, + const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle); +static void vnet_get_group(void *arg, mac_ring_type_t type, const int index, + mac_group_info_t *infop, mac_group_handle_t handle); +static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); +static void vnet_rx_ring_stop(mac_ring_driver_t rdriver); +static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); +static void vnet_tx_ring_stop(mac_ring_driver_t rdriver); +static int vnet_ring_enable_intr(void *arg); +static int vnet_ring_disable_intr(void *arg); +static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup); +static int vnet_addmac(void *arg, const uint8_t *mac_addr); +static int vnet_remmac(void *arg, const uint8_t *mac_addr); /* vnet internal functions */ static int vnet_unattach(vnet_t *vnetp); +static void vnet_ring_grp_init(vnet_t *vnetp); +static void vnet_ring_grp_uninit(vnet_t *vnetp); static int vnet_mac_register(vnet_t *); static int vnet_read_mac_address(vnet_t *vnetp); +static int vnet_bind_vgenring(vnet_res_t *vresp); +static void vnet_unbind_vgenring(vnet_res_t *vresp); +static int vnet_bind_hwrings(vnet_t *vnetp); +static void vnet_unbind_hwrings(vnet_t *vnetp); +static int vnet_bind_rings(vnet_res_t *vresp); +static void vnet_unbind_rings(vnet_res_t *vresp); +static int vnet_hio_stat(void *, uint_t, uint64_t *); +static int vnet_hio_start(void *); +static void vnet_hio_stop(void *); +static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type); +mblk_t *vnet_hio_tx(void *, mblk_t *); /* Forwarding database (FDB) routines */ static void vnet_fdb_create(vnet_t *vnetp); @@ -98,6 +127,8 @@ static void vnet_stop_resources(vnet_t *vnetp); static void vnet_dispatch_res_task(vnet_t *vnetp); static void vnet_res_start_task(void *arg); static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err); +static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp); +static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp); /* Exported to vnet_gen */ int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); @@ -112,15 +143,21 @@ static void vnet_hio_destroy_kstats(kstat_t *ksp); /* Exported to to vnet_dds */ int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg); +int vnet_hio_mac_init(vnet_t *vnetp, char *ifname); +void vnet_hio_mac_cleanup(vnet_t *vnetp); /* Externs that are imported from vnet_gen */ extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, const uint8_t *macaddr, void **vgenhdl); +extern int vgen_init_mdeg(void *arg); extern void vgen_uninit(void *arg); extern int vgen_dds_tx(void *arg, void *dmsg); extern void vgen_mod_init(void); extern int vgen_mod_cleanup(void); extern void vgen_mod_fini(void); +extern int vgen_enable_intr(void *arg); +extern int vgen_disable_intr(void *arg); +extern mblk_t *vgen_poll(void *arg, int bytes_to_pickup); /* Externs that are imported from vnet_dds */ extern void vdds_mod_init(void); @@ -131,6 +168,9 @@ extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg); extern void vdds_cleanup_hybrid_res(void *arg); extern void vdds_cleanup_hio(vnet_t *vnetp); +/* Externs imported from mac_impl */ +extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); + #define DRV_NAME "vnet" #define VNET_FDBE_REFHOLD(p) \ { \ @@ -145,9 +185,9 @@ extern void vdds_cleanup_hio(vnet_t *vnetp); } #ifdef VNET_IOC_DEBUG -#define VNET_M_CALLBACK_FLAGS (MC_IOCTL) +#define VNET_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB) #else -#define VNET_M_CALLBACK_FLAGS (0) +#define VNET_M_CALLBACK_FLAGS (MC_GETCAPAB) #endif static mac_callbacks_t vnet_m_callbacks = { @@ -157,9 +197,23 @@ static mac_callbacks_t vnet_m_callbacks = { vnet_m_stop, vnet_m_promisc, vnet_m_multicst, - vnet_m_unicst, - vnet_m_tx, + NULL, /* m_unicst entry must be NULL while rx rings are exposed */ + NULL, /* m_tx entry must be NULL while tx rings are exposed */ vnet_m_ioctl, + vnet_m_capab, + NULL +}; + +static mac_callbacks_t vnet_hio_res_callbacks = { + 0, + vnet_hio_stat, + vnet_hio_start, + vnet_hio_stop, + NULL, + NULL, + NULL, + vnet_hio_tx, + NULL, NULL, NULL }; @@ -176,6 +230,9 @@ uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */ uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT; /* tx timeout in msec */ uint32_t vnet_ldc_mtu = VNET_LDC_MTU; /* ldc mtu */ +/* Configure tx serialization in mac layer for the vnet device */ +boolean_t vnet_mac_tx_serialize = B_TRUE; + /* * Set this to non-zero to enable additional internal receive buffer pools * based on the MTU of the device for better performance at the cost of more @@ -206,6 +263,11 @@ static struct ether_addr etherbroadcastaddr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +/* mac_open() retry delay in usec */ +uint32_t vnet_mac_open_delay = 100; /* 0.1 ms */ + +/* max # of mac_open() retries */ +uint32_t vnet_mac_open_retries = 100; /* * Property names @@ -375,6 +437,9 @@ vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd) rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL); attach_progress |= AST_vnet_alloc; + vnet_ring_grp_init(vnetp); + attach_progress |= AST_ring_init; + status = vdds_init(vnetp); if (status != 0) { goto vnet_attach_fail; @@ -419,10 +484,19 @@ vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd) attach_progress |= AST_vnet_list; /* - * Initialize the generic vnet plugin which provides - * communication via sun4v LDC (logical domain channel) based - * resources. It will register the LDC resources as and when - * they become available. + * Initialize the generic vnet plugin which provides communication via + * sun4v LDC (logical domain channel) based resources. This involves 2 + * steps; first, vgen_init() is invoked to read the various properties + * of the vnet device from its MD node (including its mtu which is + * needed to mac_register()) and obtain a handle to the vgen layer. + * After mac_register() is done and we have a mac handle, we then + * invoke vgen_init_mdeg() which registers with the the MD event + * generator (mdeg) framework to allow LDC resource notifications. + * Note: this sequence also allows us to report the correct default # + * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked + * in the context of mac_register(); and avoids conflicting with + * dynamic pseudo rx rings which get added/removed as a result of mdeg + * events in vgen. */ status = vgen_init(vnetp, reg, vnetp->dip, (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl); @@ -432,15 +506,19 @@ vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd) } attach_progress |= AST_vgen_init; - /* register with MAC layer */ status = vnet_mac_register(vnetp); if (status != DDI_SUCCESS) { goto vnet_attach_fail; } vnetp->link_state = LINK_STATE_UNKNOWN; - attach_progress |= AST_macreg; + status = vgen_init_mdeg(vnetp->vgenhdl); + if (status != DDI_SUCCESS) { + goto vnet_attach_fail; + } + attach_progress |= AST_init_mdeg; + vnetp->attach_progress = attach_progress; DBG1(NULL, "instance(%d) exit\n", instance); @@ -503,21 +581,25 @@ vnet_unattach(vnet_t *vnetp) attach_progress = vnetp->attach_progress; /* - * Unregister from the gldv3 subsystem. This can fail, in particular - * if there are still any open references to this mac device; in which - * case we just return failure without continuing to detach further. + * Disable the mac device in the gldv3 subsystem. This can fail, in + * particular if there are still any open references to this mac + * device; in which case we just return failure without continuing to + * detach further. + * If it succeeds, we then invoke vgen_uninit() which should unregister + * any pseudo rings registered with the mac layer. Note we keep the + * AST_macreg flag on, so we can unregister with the mac layer at + * the end of this routine. */ if (attach_progress & AST_macreg) { - if (mac_unregister(vnetp->mh) != 0) { + if (mac_disable(vnetp->mh) != 0) { return (1); } - attach_progress &= ~AST_macreg; } /* - * Now that we have unregistered from gldv3, we must finish all other - * steps and successfully return from this function; otherwise we will - * end up leaving the device in a broken/unusable state. + * Now that we have disabled the device, we must finish all other steps + * and successfully return from this function; otherwise we will end up + * leaving the device in a broken/unusable state. * * First, release any hybrid resources assigned to this vnet device. */ @@ -530,9 +612,10 @@ vnet_unattach(vnet_t *vnetp) * Uninit vgen. This stops further mdeg callbacks to this vnet * device and/or its ports; and detaches any existing ports. */ - if (attach_progress & AST_vgen_init) { + if (attach_progress & (AST_vgen_init|AST_init_mdeg)) { vgen_uninit(vnetp->vgenhdl); attach_progress &= ~AST_vgen_init; + attach_progress &= ~AST_init_mdeg; } /* Destroy the taskq. */ @@ -563,6 +646,17 @@ vnet_unattach(vnet_t *vnetp) attach_progress &= ~AST_vnet_list; } + if (attach_progress & AST_ring_init) { + vnet_ring_grp_uninit(vnetp); + attach_progress &= ~AST_ring_init; + } + + if (attach_progress & AST_macreg) { + VERIFY(mac_unregister(vnetp->mh) == 0); + vnetp->mh = NULL; + attach_progress &= ~AST_macreg; + } + if (attach_progress & AST_vnet_alloc) { rw_destroy(&vnetp->vrwlock); rw_destroy(&vnetp->vsw_fp_rw); @@ -683,8 +777,9 @@ vnet_m_promisc(void *arg, boolean_t on) * external hosts. */ mblk_t * -vnet_m_tx(void *arg, mblk_t *mp) +vnet_tx_ring_send(void *arg, mblk_t *mp) { + vnet_pseudo_tx_ring_t *tx_ringp; vnet_t *vnetp; vnet_res_t *vresp; mblk_t *next; @@ -694,8 +789,10 @@ vnet_m_tx(void *arg, mblk_t *mp) boolean_t is_unicast; boolean_t is_pvid; /* non-default pvid ? */ boolean_t hres; /* Hybrid resource ? */ + void *tx_arg; - vnetp = (vnet_t *)arg; + tx_ringp = (vnet_pseudo_tx_ring_t *)arg; + vnetp = (vnet_t *)tx_ringp->vnetp; DBG1(vnetp, "enter\n"); ASSERT(mp != NULL); @@ -790,10 +887,14 @@ vnet_m_tx(void *arg, mblk_t *mp) } } - } - macp = &vresp->macreg; - resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp); + macp = &vresp->macreg; + tx_arg = tx_ringp; + } else { + macp = &vresp->macreg; + tx_arg = macp->m_driver; + } + resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp); /* tx done; now release ref on fdb entry */ VNET_FDBE_REFRELE(vresp); @@ -848,6 +949,124 @@ vnet_m_stat(void *arg, uint_t stat, uint64_t *val) return (0); } +static void +vnet_ring_grp_init(vnet_t *vnetp) +{ + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_rx_ring_t *rx_ringp; + vnet_pseudo_tx_group_t *tx_grp; + vnet_pseudo_tx_ring_t *tx_ringp; + int i; + + tx_grp = &vnetp->tx_grp[0]; + tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) * + VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP); + for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) { + tx_ringp[i].state |= VNET_TXRING_SHARED; + } + tx_grp->rings = tx_ringp; + tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS; + + rx_grp = &vnetp->rx_grp[0]; + rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP; + rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL); + rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) * + rx_grp->max_ring_cnt, KM_SLEEP); + + /* + * Setup the first 3 Pseudo RX Rings that are reserved; + * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource. + */ + rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE; + rx_ringp[0].index = 0; + rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; + rx_ringp[1].index = 1; + rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; + rx_ringp[2].index = 2; + + rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; + rx_grp->rings = rx_ringp; + + for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; + i < rx_grp->max_ring_cnt; i++) { + rx_ringp = &rx_grp->rings[i]; + rx_ringp->state = VNET_RXRING_FREE; + rx_ringp->index = i; + } +} + +static void +vnet_ring_grp_uninit(vnet_t *vnetp) +{ + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_tx_group_t *tx_grp; + + tx_grp = &vnetp->tx_grp[0]; + if (tx_grp->rings != NULL) { + ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS); + kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) * + tx_grp->ring_cnt); + tx_grp->rings = NULL; + } + + rx_grp = &vnetp->rx_grp[0]; + if (rx_grp->rings != NULL) { + ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP); + ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT); + kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) * + rx_grp->max_ring_cnt); + rx_grp->rings = NULL; + } +} + +static vnet_pseudo_rx_ring_t * +vnet_alloc_pseudo_rx_ring(vnet_t *vnetp) +{ + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_rx_ring_t *rx_ringp; + int index; + + rx_grp = &vnetp->rx_grp[0]; + WRITE_ENTER(&rx_grp->lock); + + if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) { + /* no rings available */ + RW_EXIT(&rx_grp->lock); + return (NULL); + } + + for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; + index < rx_grp->max_ring_cnt; index++) { + rx_ringp = &rx_grp->rings[index]; + if (rx_ringp->state == VNET_RXRING_FREE) { + rx_ringp->state |= VNET_RXRING_INUSE; + rx_grp->ring_cnt++; + break; + } + } + + RW_EXIT(&rx_grp->lock); + return (rx_ringp); +} + +static void +vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp) +{ + vnet_pseudo_rx_group_t *rx_grp; + + ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT); + rx_grp = &vnetp->rx_grp[0]; + WRITE_ENTER(&rx_grp->lock); + + if (ringp->state != VNET_RXRING_FREE) { + ringp->state = VNET_RXRING_FREE; + ringp->handle = NULL; + rx_grp->ring_cnt--; + } + + RW_EXIT(&rx_grp->lock); +} + /* wrapper function for mac_register() */ static int vnet_mac_register(vnet_t *vnetp) @@ -867,6 +1086,15 @@ vnet_mac_register(vnet_t *vnetp) macp->m_margin = VLAN_TAGSZ; /* + * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to + * workaround tx lock contention issues in nxge. + */ + macp->m_v12n = MAC_VIRT_LEVEL1; + if (vnet_mac_tx_serialize == B_TRUE) { + macp->m_v12n |= MAC_VIRT_SERIALIZE; + } + + /* * Finally, we're ready to register ourselves with the MAC layer * interface; if this succeeds, we're all ready to start() */ @@ -1116,42 +1344,57 @@ vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp) static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp) { - vnet_res_t *vresp = (vnet_res_t *)vrh; - vnet_t *vnetp = vresp->vnetp; + vnet_res_t *vresp = (vnet_res_t *)vrh; + vnet_t *vnetp = vresp->vnetp; + vnet_pseudo_rx_ring_t *ringp; if ((vnetp == NULL) || (vnetp->mh == 0)) { freemsgchain(mp); return; } - /* - * Packets received over a hybrid resource need additional processing - * to remove the tag, for the pvid case. The underlying resource is - * not aware of the vnet's pvid and thus packets are received with the - * vlan tag in the header; unlike packets that are received over a ldc - * channel in which case the peer vnet/vsw would have already removed - * the tag. - */ - if (vresp->type == VIO_NET_RES_HYBRID && - vnetp->pvid != vnetp->default_vlan_id) { - - vnet_rx_frames_untag(vnetp->pvid, &mp); - if (mp == NULL) { - return; - } - } - - mac_rx(vnetp->mh, NULL, mp); + ringp = vresp->rx_ringp; + mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); } void vnet_tx_update(vio_net_handle_t vrh) { - vnet_res_t *vresp = (vnet_res_t *)vrh; - vnet_t *vnetp = vresp->vnetp; + vnet_res_t *vresp = (vnet_res_t *)vrh; + vnet_t *vnetp = vresp->vnetp; + vnet_pseudo_tx_ring_t *tx_ringp; + vnet_pseudo_tx_group_t *tx_grp; + int i; + + if (vnetp == NULL || vnetp->mh == NULL) { + return; + } - if ((vnetp != NULL) && (vnetp->mh != NULL)) { - mac_tx_update(vnetp->mh); + /* + * Currently, the tx hwring API (used to access rings that belong to + * a Hybrid IO resource) does not provide us a per ring flow ctrl + * update; also the pseudo rings are shared by the ports/ldcs in the + * vgen layer. Thus we can't figure out which pseudo ring is being + * re-enabled for transmits. To work around this, when we get a tx + * restart notification from below, we simply propagate that to all + * the tx pseudo rings registered with the mac layer above. + * + * There are a couple of side effects with this approach, but they are + * not harmful, as outlined below: + * + * A) We might send an invalid ring_update() for a ring that is not + * really flow controlled. This will not have any effect in the mac + * layer and packets will continue to be transmitted on that ring. + * + * B) We might end up clearing the flow control in the mac layer for + * a ring that is still flow controlled in the underlying resource. + * This will result in the mac layer restarting transmit, only to be + * flow controlled again on that ring. + */ + tx_grp = &vnetp->tx_grp[0]; + for (i = 0; i < tx_grp->ring_cnt; i++) { + tx_ringp = &tx_grp->rings[i]; + mac_tx_ring_update(vnetp->mh, tx_ringp->handle); } } @@ -1233,8 +1476,8 @@ int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type, ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp, vio_net_callbacks_t *vcb) { - vnet_t *vnetp; - vnet_res_t *vresp; + vnet_t *vnetp; + vnet_res_t *vresp; vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP); ether_copy(local_macaddr, vresp->local_macaddr); @@ -1260,11 +1503,7 @@ int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type, vnetp->instance); } } - - WRITE_ENTER(&vnetp->vrwlock); - vresp->nextp = vnetp->vres_list; - vnetp->vres_list = vresp; - RW_EXIT(&vnetp->vrwlock); + vnet_add_resource(vnetp, vresp); break; } vnetp = vnetp->nextp; @@ -1281,6 +1520,14 @@ int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type, vcb->vio_net_tx_update = vnet_tx_update; vcb->vio_net_report_err = vnet_handle_res_err; + /* Bind the resource to pseudo ring(s) */ + if (vnet_bind_rings(vresp) != 0) { + (void) vnet_rem_resource(vnetp, vresp); + vnet_hio_destroy_kstats(vresp->ksp); + KMEM_FREE(vresp); + return (1); + } + /* Dispatch a task to start resources */ vnet_dispatch_res_task(vnetp); return (0); @@ -1294,8 +1541,6 @@ vio_net_resource_unreg(vio_net_handle_t vhp) { vnet_res_t *vresp = (vnet_res_t *)vhp; vnet_t *vnetp = vresp->vnetp; - vnet_res_t *vrp; - kstat_t *ksp = NULL; DBG1(NULL, "Resource Registerig hdl=0x%p", vhp); @@ -1306,7 +1551,29 @@ vio_net_resource_unreg(vio_net_handle_t vhp) */ vnet_fdbe_del(vnetp, vresp); + vnet_unbind_rings(vresp); + /* Now remove the resource from the list */ + (void) vnet_rem_resource(vnetp, vresp); + + vnet_hio_destroy_kstats(vresp->ksp); + KMEM_FREE(vresp); +} + +static void +vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp) +{ + WRITE_ENTER(&vnetp->vrwlock); + vresp->nextp = vnetp->vres_list; + vnetp->vres_list = vresp; + RW_EXIT(&vnetp->vrwlock); +} + +static vnet_res_t * +vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp) +{ + vnet_res_t *vrp; + WRITE_ENTER(&vnetp->vrwlock); if (vresp == vnetp->vres_list) { vnetp->vres_list = vresp->nextp; @@ -1320,15 +1587,12 @@ vio_net_resource_unreg(vio_net_handle_t vhp) vrp = vrp->nextp; } } - - ksp = vresp->ksp; - vresp->ksp = NULL; - vresp->vnetp = NULL; vresp->nextp = NULL; + RW_EXIT(&vnetp->vrwlock); - vnet_hio_destroy_kstats(ksp); - KMEM_FREE(vresp); + + return (vresp); } /* @@ -1710,6 +1974,1024 @@ vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp) } } +static boolean_t +vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data) +{ + vnet_t *vnetp = (vnet_t *)arg; + + if (vnetp == NULL) { + return (0); + } + + switch (cap) { + + case MAC_CAPAB_RINGS: { + + mac_capab_rings_t *cap_rings = cap_data; + /* + * Rings Capability Notes: + * We advertise rings to make use of the rings framework in + * gldv3 mac layer, to improve the performance. This is + * specifically needed when a Hybrid resource (with multiple + * tx/rx hardware rings) is assigned to a vnet device. We also + * leverage this for the normal case when no Hybrid resource is + * assigned. + * + * Ring Allocation: + * - TX path: + * We expose a pseudo ring group with 2 pseudo tx rings (as + * currently HybridIO exports only 2 rings) In the normal case, + * transmit traffic that comes down to the driver through the + * mri_tx (vnet_tx_ring_send()) entry point goes through the + * distributed switching algorithm in vnet and gets transmitted + * over a port/LDC in the vgen layer to either the vswitch or a + * peer vnet. If and when a Hybrid resource is assigned to the + * vnet, we obtain the tx ring information of the Hybrid device + * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings. + * Traffic being sent over the Hybrid resource by the mac layer + * gets spread across both hw rings, as they are mapped to the + * 2 pseudo tx rings in vnet. + * + * - RX path: + * We expose a pseudo ring group with 3 pseudo rx rings (static + * rings) initially. The first (default) pseudo rx ring is + * reserved for the resource that connects to the vswitch + * service. The next 2 rings are reserved for a Hybrid resource + * that may be assigned to the vnet device. If and when a + * Hybrid resource is assigned to the vnet, we obtain the rx + * ring information of the Hybrid device (nxge) and map these + * pseudo rings 1:1 to the 2 hw rx rings. For each additional + * resource that connects to a peer vnet, we dynamically + * allocate a pseudo rx ring and map it to that resource, when + * the resource gets added; and the pseudo rx ring is + * dynamically registered with the upper mac layer. We do the + * reverse and unregister the ring with the mac layer when + * the resource gets removed. + * + * Synchronization notes: + * We don't need any lock to protect members of ring structure, + * specifically ringp->hw_rh, in either the TX or the RX ring, + * as explained below. + * - TX ring: + * ring->hw_rh is initialized only when a Hybrid resource is + * associated; and gets referenced only in vnet_hio_tx(). The + * Hybrid resource itself is available in fdb only after tx + * hwrings are found and mapped; i.e, in vio_net_resource_reg() + * we call vnet_bind_rings() first and then call + * vnet_start_resources() which adds an entry to fdb. For + * traffic going over LDC resources, we don't reference + * ring->hw_rh at all. + * - RX ring: + * For rings mapped to Hybrid resource ring->hw_rh is + * initialized and only then do we add the rx callback for + * the underlying Hybrid resource; we disable callbacks before + * we unmap ring->hw_rh. For rings mapped to LDC resources, we + * stop the rx callbacks (in vgen) before we remove ring->hw_rh + * (vio_net_resource_unreg()). + */ + + if (cap_rings->mr_type == MAC_RING_TYPE_RX) { + cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; + + /* + * The ring_cnt for rx grp is initialized in + * vnet_ring_grp_init(). Later, the ring_cnt gets + * updated dynamically whenever LDC resources are added + * or removed. + */ + cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt; + cap_rings->mr_rget = vnet_get_ring; + + cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS; + cap_rings->mr_gget = vnet_get_group; + cap_rings->mr_gaddring = NULL; + cap_rings->mr_gremring = NULL; + } else { + cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; + + /* + * The ring_cnt for tx grp is initialized in + * vnet_ring_grp_init() and remains constant, as we + * do not support dymanic tx rings for now. + */ + cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt; + cap_rings->mr_rget = vnet_get_ring; + + /* + * Transmit rings are not grouped; i.e, the number of + * transmit ring groups advertised should be set to 0. + */ + cap_rings->mr_gnum = 0; + + cap_rings->mr_gget = vnet_get_group; + cap_rings->mr_gaddring = NULL; + cap_rings->mr_gremring = NULL; + } + return (B_TRUE); + + } + + default: + break; + + } + + return (B_FALSE); +} + +/* + * Callback funtion for MAC layer to get ring information. + */ +static void +vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, + const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle) +{ + vnet_t *vnetp = arg; + + switch (rtype) { + + case MAC_RING_TYPE_RX: { + + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_rx_ring_t *rx_ringp; + mac_intr_t *mintr; + + /* We advertised only one RX group */ + ASSERT(g_index == 0); + rx_grp = &vnetp->rx_grp[g_index]; + + /* Check the current # of rings in the rx group */ + ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt)); + + /* Get the ring based on the index */ + rx_ringp = &rx_grp->rings[r_index]; + + rx_ringp->handle = r_handle; + /* + * Note: we don't need to save the incoming r_index in rx_ring, + * as vnet_ring_grp_init() would have initialized the index for + * each ring in the array. + */ + rx_ringp->grp = rx_grp; + rx_ringp->vnetp = vnetp; + + mintr = &infop->mri_intr; + mintr->mi_handle = (mac_intr_handle_t)rx_ringp; + mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr; + mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr; + + infop->mri_driver = (mac_ring_driver_t)rx_ringp; + infop->mri_start = vnet_rx_ring_start; + infop->mri_stop = vnet_rx_ring_stop; + + /* Set the poll function, as this is an rx ring */ + infop->mri_poll = vnet_rx_poll; + + break; + } + + case MAC_RING_TYPE_TX: { + vnet_pseudo_tx_group_t *tx_grp; + vnet_pseudo_tx_ring_t *tx_ringp; + + /* + * No need to check grp index; mac layer passes -1 for it. + */ + tx_grp = &vnetp->tx_grp[0]; + + /* Check the # of rings in the tx group */ + ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt)); + + /* Get the ring based on the index */ + tx_ringp = &tx_grp->rings[r_index]; + + tx_ringp->handle = r_handle; + tx_ringp->index = r_index; + tx_ringp->grp = tx_grp; + tx_ringp->vnetp = vnetp; + + infop->mri_driver = (mac_ring_driver_t)tx_ringp; + infop->mri_start = vnet_tx_ring_start; + infop->mri_stop = vnet_tx_ring_stop; + + /* Set the transmit function, as this is a tx ring */ + infop->mri_tx = vnet_tx_ring_send; + + break; + } + + default: + break; + } +} + +/* + * Callback funtion for MAC layer to get group information. + */ +static void +vnet_get_group(void *arg, mac_ring_type_t type, const int index, + mac_group_info_t *infop, mac_group_handle_t handle) +{ + vnet_t *vnetp = (vnet_t *)arg; + + switch (type) { + + case MAC_RING_TYPE_RX: + { + vnet_pseudo_rx_group_t *rx_grp; + + /* We advertised only one RX group */ + ASSERT(index == 0); + + rx_grp = &vnetp->rx_grp[index]; + rx_grp->handle = handle; + rx_grp->index = index; + rx_grp->vnetp = vnetp; + + infop->mgi_driver = (mac_group_driver_t)rx_grp; + infop->mgi_start = NULL; + infop->mgi_stop = NULL; + infop->mgi_addmac = vnet_addmac; + infop->mgi_remmac = vnet_remmac; + infop->mgi_count = rx_grp->ring_cnt; + + break; + } + + case MAC_RING_TYPE_TX: + { + vnet_pseudo_tx_group_t *tx_grp; + + /* We advertised only one TX group */ + ASSERT(index == 0); + + tx_grp = &vnetp->tx_grp[index]; + tx_grp->handle = handle; + tx_grp->index = index; + tx_grp->vnetp = vnetp; + + infop->mgi_driver = (mac_group_driver_t)tx_grp; + infop->mgi_start = NULL; + infop->mgi_stop = NULL; + infop->mgi_addmac = NULL; + infop->mgi_remmac = NULL; + infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS; + + break; + } + + default: + break; + + } +} + +static int +vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) +{ + vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; + int err; + + /* + * If this ring is mapped to a LDC resource, simply mark the state to + * indicate the ring is started and return. + */ + if ((rx_ringp->state & + (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { + rx_ringp->gen_num = mr_gen_num; + rx_ringp->state |= VNET_RXRING_STARTED; + return (0); + } + + ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); + + /* + * This must be a ring reserved for a hwring. If the hwring is not + * bound yet, simply mark the state to indicate the ring is started and + * return. If and when a hybrid resource is activated for this vnet + * device, we will bind the hwring and start it then. If a hwring is + * already bound, start it now. + */ + if (rx_ringp->hw_rh == NULL) { + rx_ringp->gen_num = mr_gen_num; + rx_ringp->state |= VNET_RXRING_STARTED; + return (0); + } + + err = mac_hwring_start(rx_ringp->hw_rh); + if (err == 0) { + rx_ringp->gen_num = mr_gen_num; + rx_ringp->state |= VNET_RXRING_STARTED; + } else { + err = ENXIO; + } + + return (err); +} + +static void +vnet_rx_ring_stop(mac_ring_driver_t arg) +{ + vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; + + /* + * If this ring is mapped to a LDC resource, simply mark the state to + * indicate the ring is now stopped and return. + */ + if ((rx_ringp->state & + (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { + rx_ringp->state &= ~VNET_RXRING_STARTED; + } + + ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); + + /* + * This must be a ring reserved for a hwring. If the hwring is not + * bound yet, simply mark the state to indicate the ring is stopped and + * return. If a hwring is already bound, stop it now. + */ + if (rx_ringp->hw_rh == NULL) { + rx_ringp->state &= ~VNET_RXRING_STARTED; + return; + } + + mac_hwring_stop(rx_ringp->hw_rh); + rx_ringp->state &= ~VNET_RXRING_STARTED; +} + +/* ARGSUSED */ +static int +vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) +{ + vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; + + tx_ringp->state |= VNET_TXRING_STARTED; + return (0); +} + +static void +vnet_tx_ring_stop(mac_ring_driver_t arg) +{ + vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; + + tx_ringp->state &= ~VNET_TXRING_STARTED; +} + +/* + * Disable polling for a ring and enable its interrupt. + */ +static int +vnet_ring_enable_intr(void *arg) +{ + vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; + vnet_res_t *vresp; + + if (rx_ringp->hw_rh == NULL) { + /* + * Ring enable intr func is being invoked, but the ring is + * not bound to any underlying resource ? This must be a ring + * reserved for Hybrid resource and no such resource has been + * assigned to this vnet device yet. We simply return success. + */ + ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); + return (0); + } + + /* + * The rx ring has been bound to either a LDC or a Hybrid resource. + * Call the appropriate function to enable interrupts for the ring. + */ + if (rx_ringp->state & VNET_RXRING_HYBRID) { + return (mac_hwring_enable_intr(rx_ringp->hw_rh)); + } else { + vresp = (vnet_res_t *)rx_ringp->hw_rh; + return (vgen_enable_intr(vresp->macreg.m_driver)); + } +} + +/* + * Enable polling for a ring and disable its interrupt. + */ +static int +vnet_ring_disable_intr(void *arg) +{ + vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; + vnet_res_t *vresp; + + if (rx_ringp->hw_rh == NULL) { + /* + * Ring disable intr func is being invoked, but the ring is + * not bound to any underlying resource ? This must be a ring + * reserved for Hybrid resource and no such resource has been + * assigned to this vnet device yet. We simply return success. + */ + ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); + return (0); + } + + /* + * The rx ring has been bound to either a LDC or a Hybrid resource. + * Call the appropriate function to disable interrupts for the ring. + */ + if (rx_ringp->state & VNET_RXRING_HYBRID) { + return (mac_hwring_disable_intr(rx_ringp->hw_rh)); + } else { + vresp = (vnet_res_t *)rx_ringp->hw_rh; + return (vgen_disable_intr(vresp->macreg.m_driver)); + } +} + +/* + * Poll 'bytes_to_pickup' bytes of message from the rx ring. + */ +static mblk_t * +vnet_rx_poll(void *arg, int bytes_to_pickup) +{ + vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; + mblk_t *mp = NULL; + vnet_res_t *vresp; + vnet_t *vnetp = rx_ringp->vnetp; + + if (rx_ringp->hw_rh == NULL) { + return (NULL); + } + + if (rx_ringp->state & VNET_RXRING_HYBRID) { + mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup); + /* + * Packets received over a hybrid resource need additional + * processing to remove the tag, for the pvid case. The + * underlying resource is not aware of the vnet's pvid and thus + * packets are received with the vlan tag in the header; unlike + * packets that are received over a ldc channel in which case + * the peer vnet/vsw would have already removed the tag. + */ + if (vnetp->pvid != vnetp->default_vlan_id) { + vnet_rx_frames_untag(vnetp->pvid, &mp); + } + } else { + vresp = (vnet_res_t *)rx_ringp->hw_rh; + mp = vgen_poll(vresp->macreg.m_driver, bytes_to_pickup); + } + return (mp); +} + +/* ARGSUSED */ +void +vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, + boolean_t loopback) +{ + vnet_t *vnetp = (vnet_t *)arg; + vnet_pseudo_rx_ring_t *ringp = (vnet_pseudo_rx_ring_t *)mrh; + + /* + * Packets received over a hybrid resource need additional processing + * to remove the tag, for the pvid case. The underlying resource is + * not aware of the vnet's pvid and thus packets are received with the + * vlan tag in the header; unlike packets that are received over a ldc + * channel in which case the peer vnet/vsw would have already removed + * the tag. + */ + if (vnetp->pvid != vnetp->default_vlan_id) { + vnet_rx_frames_untag(vnetp->pvid, &mp); + if (mp == NULL) { + return; + } + } + mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); +} + +static int +vnet_addmac(void *arg, const uint8_t *mac_addr) +{ + vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; + vnet_t *vnetp; + + vnetp = rx_grp->vnetp; + + if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { + return (0); + } + + cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n", + vnetp->instance, __func__); + return (EINVAL); +} + +static int +vnet_remmac(void *arg, const uint8_t *mac_addr) +{ + vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; + vnet_t *vnetp; + + vnetp = rx_grp->vnetp; + + if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { + return (0); + } + + cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n", + vnetp->instance, __func__, ether_sprintf((void *)mac_addr)); + return (EINVAL); +} + +int +vnet_hio_mac_init(vnet_t *vnetp, char *ifname) +{ + mac_handle_t mh; + mac_client_handle_t mch = NULL; + mac_unicast_handle_t muh = NULL; + mac_diag_t diag; + mac_register_t *macp; + char client_name[MAXNAMELEN]; + int rv; + uint16_t mac_flags = MAC_UNICAST_TAG_DISABLE | + MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY; + vio_net_callbacks_t vcb; + ether_addr_t rem_addr = + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + uint32_t retries = 0; + + if ((macp = mac_alloc(MAC_VERSION)) == NULL) { + return (EAGAIN); + } + + do { + rv = mac_open_by_linkname(ifname, &mh); + if (rv == 0) { + break; + } + if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) { + mac_free(macp); + return (rv); + } + drv_usecwait(vnet_mac_open_delay); + } while (rv == ENOENT); + + vnetp->hio_mh = mh; + + (void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance, + ifname); + rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE); + if (rv != 0) { + goto fail; + } + vnetp->hio_mch = mch; + + rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0, + &diag); + if (rv != 0) { + goto fail; + } + vnetp->hio_muh = muh; + + macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; + macp->m_driver = vnetp; + macp->m_dip = NULL; + macp->m_src_addr = NULL; + macp->m_callbacks = &vnet_hio_res_callbacks; + macp->m_min_sdu = 0; + macp->m_max_sdu = ETHERMTU; + + rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID, + vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb); + if (rv != 0) { + goto fail; + } + mac_free(macp); + + /* add the recv callback */ + mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp); + + /* add the notify callback - only tx updates for now */ + vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb, + vnetp); + + return (0); + +fail: + mac_free(macp); + vnet_hio_mac_cleanup(vnetp); + return (1); +} + +void +vnet_hio_mac_cleanup(vnet_t *vnetp) +{ + if (vnetp->hio_mnh != NULL) { + (void) mac_notify_remove(vnetp->hio_mnh, B_TRUE); + vnetp->hio_mnh = NULL; + } + + if (vnetp->hio_vhp != NULL) { + vio_net_resource_unreg(vnetp->hio_vhp); + vnetp->hio_vhp = NULL; + } + + if (vnetp->hio_muh != NULL) { + mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh); + vnetp->hio_muh = NULL; + } + + if (vnetp->hio_mch != NULL) { + mac_client_close(vnetp->hio_mch, 0); + vnetp->hio_mch = NULL; + } + + if (vnetp->hio_mh != NULL) { + mac_close(vnetp->hio_mh); + vnetp->hio_mh = NULL; + } +} + +/* Bind pseudo rings to hwrings */ +static int +vnet_bind_hwrings(vnet_t *vnetp) +{ + mac_ring_handle_t hw_rh[VNET_NUM_HYBRID_RINGS]; + mac_perim_handle_t mph1; + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_rx_ring_t *rx_ringp; + vnet_pseudo_tx_group_t *tx_grp; + vnet_pseudo_tx_ring_t *tx_ringp; + int hw_ring_cnt; + int i; + int rv; + + mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); + + /* Get the list of the underlying RX rings. */ + hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh, + MAC_RING_TYPE_RX); + + /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */ + if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { + cmn_err(CE_WARN, + "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n", + vnetp->instance, hw_ring_cnt); + goto fail; + } + + if (vnetp->rx_hwgh != NULL) { + /* + * Quiesce the HW ring and the mac srs on the ring. Note + * that the HW ring will be restarted when the pseudo ring + * is started. At that time all the packets will be + * directly passed up to the pseudo RX ring and handled + * by mac srs created over the pseudo RX ring. + */ + mac_rx_client_quiesce(vnetp->hio_mch); + mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE); + } + + /* + * Bind the pseudo rings to the hwrings and start the hwrings. + * Note we don't need to register these with the upper mac, as we have + * statically exported these pseudo rxrings which are reserved for + * rxrings of Hybrid resource. + */ + rx_grp = &vnetp->rx_grp[0]; + for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { + /* Pick the rxrings reserved for Hybrid resource */ + rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; + + /* Store the hw ring handle */ + rx_ringp->hw_rh = hw_rh[i]; + + /* Bind the pseudo ring to the underlying hwring */ + mac_hwring_setup(rx_ringp->hw_rh, + (mac_resource_handle_t)rx_ringp); + + /* Start the hwring if needed */ + if (rx_ringp->state & VNET_RXRING_STARTED) { + rv = mac_hwring_start(rx_ringp->hw_rh); + if (rv != 0) { + mac_hwring_teardown(rx_ringp->hw_rh); + rx_ringp->hw_rh = NULL; + goto fail; + } + } + } + + /* Get the list of the underlying TX rings. */ + hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh, + MAC_RING_TYPE_TX); + + /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */ + if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { + cmn_err(CE_WARN, + "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n", + vnetp->instance, hw_ring_cnt); + goto fail; + } + + /* + * Now map the pseudo txrings to the hw txrings. Note we don't need + * to register these with the upper mac, as we have statically exported + * these rings. Note that these rings will continue to be used for LDC + * resources to peer vnets and vswitch (shared ring). + */ + tx_grp = &vnetp->tx_grp[0]; + for (i = 0; i < tx_grp->ring_cnt; i++) { + tx_ringp = &tx_grp->rings[i]; + tx_ringp->hw_rh = hw_rh[i]; + tx_ringp->state |= VNET_TXRING_HYBRID; + } + + mac_perim_exit(mph1); + return (0); + +fail: + mac_perim_exit(mph1); + vnet_unbind_hwrings(vnetp); + return (1); +} + +/* Unbind pseudo rings from hwrings */ +static void +vnet_unbind_hwrings(vnet_t *vnetp) +{ + mac_perim_handle_t mph1; + vnet_pseudo_rx_ring_t *rx_ringp; + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_tx_group_t *tx_grp; + vnet_pseudo_tx_ring_t *tx_ringp; + int i; + + mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); + + tx_grp = &vnetp->tx_grp[0]; + for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { + tx_ringp = &tx_grp->rings[i]; + if (tx_ringp->state & VNET_TXRING_HYBRID) { + tx_ringp->state &= ~VNET_TXRING_HYBRID; + tx_ringp->hw_rh = NULL; + } + } + + rx_grp = &vnetp->rx_grp[0]; + for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { + rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; + if (rx_ringp->hw_rh != NULL) { + /* Stop the hwring */ + mac_hwring_stop(rx_ringp->hw_rh); + + /* Teardown the hwring */ + mac_hwring_teardown(rx_ringp->hw_rh); + rx_ringp->hw_rh = NULL; + } + } + + if (vnetp->rx_hwgh != NULL) { + vnetp->rx_hwgh = NULL; + /* + * First clear the permanent-quiesced flag of the RX srs then + * restart the HW ring and the mac srs on the ring. + */ + mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE); + mac_rx_client_restart(vnetp->hio_mch); + } + + mac_perim_exit(mph1); +} + +/* Bind pseudo ring to a LDC resource */ +static int +vnet_bind_vgenring(vnet_res_t *vresp) +{ + vnet_t *vnetp; + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_rx_ring_t *rx_ringp; + mac_perim_handle_t mph1; + int rv; + int type; + + vnetp = vresp->vnetp; + type = vresp->type; + rx_grp = &vnetp->rx_grp[0]; + + if (type == VIO_NET_RES_LDC_SERVICE) { + /* + * Ring Index 0 is the default ring in the group and is + * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring + * is allocated statically and is reported to the mac layer + * in vnet_m_capab(). So, all we need to do here, is save a + * reference to the associated vresp. + */ + rx_ringp = &rx_grp->rings[0]; + rx_ringp->hw_rh = (mac_ring_handle_t)vresp; + vresp->rx_ringp = (void *)rx_ringp; + return (0); + } + ASSERT(type == VIO_NET_RES_LDC_GUEST); + + mac_perim_enter_by_mh(vnetp->mh, &mph1); + + rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp); + if (rx_ringp == NULL) { + cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring", + vnetp->instance); + goto fail; + } + + /* Store the LDC resource itself as the ring handle */ + rx_ringp->hw_rh = (mac_ring_handle_t)vresp; + + /* + * Save a reference to the ring in the resource for lookup during + * unbind. Note this is only done for LDC resources. We don't need this + * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its + * rx rings are mapped to reserved pseudo rx rings (index 1 and 2). + */ + vresp->rx_ringp = (void *)rx_ringp; + rx_ringp->state |= VNET_RXRING_LDC_GUEST; + + /* Register the pseudo ring with upper-mac */ + rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index); + if (rv != 0) { + rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; + rx_ringp->hw_rh = NULL; + vnet_free_pseudo_rx_ring(vnetp, rx_ringp); + goto fail; + } + + mac_perim_exit(mph1); + return (0); +fail: + mac_perim_exit(mph1); + return (1); +} + +/* Unbind pseudo ring from a LDC resource */ +static void +vnet_unbind_vgenring(vnet_res_t *vresp) +{ + vnet_t *vnetp; + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_rx_ring_t *rx_ringp; + mac_perim_handle_t mph1; + int type; + + vnetp = vresp->vnetp; + type = vresp->type; + rx_grp = &vnetp->rx_grp[0]; + + if (vresp->rx_ringp == NULL) { + return; + } + + if (type == VIO_NET_RES_LDC_SERVICE) { + /* + * Ring Index 0 is the default ring in the group and is + * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring + * is allocated statically and is reported to the mac layer + * in vnet_m_capab(). So, all we need to do here, is remove its + * reference to the associated vresp. + */ + rx_ringp = &rx_grp->rings[0]; + rx_ringp->hw_rh = NULL; + vresp->rx_ringp = NULL; + return; + } + ASSERT(type == VIO_NET_RES_LDC_GUEST); + + mac_perim_enter_by_mh(vnetp->mh, &mph1); + + rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp; + vresp->rx_ringp = NULL; + + if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) { + /* Unregister the pseudo ring with upper-mac */ + mac_group_rem_ring(rx_grp->handle, rx_ringp->handle); + + rx_ringp->hw_rh = NULL; + rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; + + /* Free the pseudo rx ring */ + vnet_free_pseudo_rx_ring(vnetp, rx_ringp); + } + + mac_perim_exit(mph1); +} + +static void +vnet_unbind_rings(vnet_res_t *vresp) +{ + switch (vresp->type) { + + case VIO_NET_RES_LDC_SERVICE: + case VIO_NET_RES_LDC_GUEST: + vnet_unbind_vgenring(vresp); + break; + + case VIO_NET_RES_HYBRID: + vnet_unbind_hwrings(vresp->vnetp); + break; + + default: + break; + + } +} + +static int +vnet_bind_rings(vnet_res_t *vresp) +{ + int rv; + + switch (vresp->type) { + + case VIO_NET_RES_LDC_SERVICE: + case VIO_NET_RES_LDC_GUEST: + rv = vnet_bind_vgenring(vresp); + break; + + case VIO_NET_RES_HYBRID: + rv = vnet_bind_hwrings(vresp->vnetp); + break; + + default: + rv = 1; + break; + + } + + return (rv); +} + +/* ARGSUSED */ +int +vnet_hio_stat(void *arg, uint_t stat, uint64_t *val) +{ + vnet_t *vnetp = (vnet_t *)arg; + + *val = mac_stat_get(vnetp->hio_mh, stat); + return (0); +} + +/* + * The start() and stop() routines for the Hybrid resource below, are just + * dummy functions. This is provided to avoid resource type specific code in + * vnet_start_resources() and vnet_stop_resources(). The starting and stopping + * of the Hybrid resource happens in the context of the mac_client interfaces + * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup(). + */ +/* ARGSUSED */ +static int +vnet_hio_start(void *arg) +{ + return (0); +} + +/* ARGSUSED */ +static void +vnet_hio_stop(void *arg) +{ +} + +mblk_t * +vnet_hio_tx(void *arg, mblk_t *mp) +{ + vnet_pseudo_tx_ring_t *tx_ringp; + mblk_t *nextp; + mblk_t *ret_mp; + + tx_ringp = (vnet_pseudo_tx_ring_t *)arg; + for (;;) { + nextp = mp->b_next; + mp->b_next = NULL; + + ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp); + if (ret_mp != NULL) { + ret_mp->b_next = nextp; + mp = ret_mp; + break; + } + + if ((mp = nextp) == NULL) + break; + } + return (mp); +} + +static void +vnet_hio_notify_cb(void *arg, mac_notify_type_t type) +{ + vnet_t *vnetp = (vnet_t *)arg; + mac_perim_handle_t mph; + + mac_perim_enter_by_mh(vnetp->hio_mh, &mph); + switch (type) { + case MAC_NOTE_TX: + vnet_tx_update(vnetp->hio_vhp); + break; + + default: + break; + } + mac_perim_exit(mph); +} + #ifdef VNET_IOC_DEBUG /* |