1 files changed, 1345 insertions, 63 deletions
diff --git a/usr/src/uts/sun4v/io/vnet.c b/usr/src/uts/sun4v/io/vnet.c
index 32b67b2588..884665b77f 100644
--- a/usr/src/uts/sun4v/io/vnet.c
+++ b/usr/src/uts/sun4v/io/vnet.c
@@ -40,6 +40,8 @@
 #include <sys/dlpi.h>
 #include <net/if.h>
 #include <sys/mac_provider.h>
+#include <sys/mac_client.h>
+#include <sys/mac_client_priv.h>
 #include <sys/mac_ether.h>
 #include <sys/ddi.h>
 #include <sys/sunddi.h>
@@ -75,11 +77,38 @@ static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
 #ifdef	VNET_IOC_DEBUG
 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
 #endif
+static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data);
+static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
+	const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle);
+static void vnet_get_group(void *arg, mac_ring_type_t type, const int index,
+	mac_group_info_t *infop, mac_group_handle_t handle);
+static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
+static void vnet_rx_ring_stop(mac_ring_driver_t rdriver);
+static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
+static void vnet_tx_ring_stop(mac_ring_driver_t rdriver);
+static int vnet_ring_enable_intr(void *arg);
+static int vnet_ring_disable_intr(void *arg);
+static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup);
+static int vnet_addmac(void *arg, const uint8_t *mac_addr);
+static int vnet_remmac(void *arg, const uint8_t *mac_addr);
 
 /* vnet internal functions */
 static int vnet_unattach(vnet_t *vnetp);
+static void vnet_ring_grp_init(vnet_t *vnetp);
+static void vnet_ring_grp_uninit(vnet_t *vnetp);
 static int vnet_mac_register(vnet_t *);
 static int vnet_read_mac_address(vnet_t *vnetp);
+static int vnet_bind_vgenring(vnet_res_t *vresp);
+static void vnet_unbind_vgenring(vnet_res_t *vresp);
+static int vnet_bind_hwrings(vnet_t *vnetp);
+static void vnet_unbind_hwrings(vnet_t *vnetp);
+static int vnet_bind_rings(vnet_res_t *vresp);
+static void vnet_unbind_rings(vnet_res_t *vresp);
+static int vnet_hio_stat(void *, uint_t, uint64_t *);
+static int vnet_hio_start(void *);
+static void vnet_hio_stop(void *);
+static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type);
+mblk_t *vnet_hio_tx(void *, mblk_t *);
 
 /* Forwarding database (FDB) routines */
 static void vnet_fdb_create(vnet_t *vnetp);
@@ -98,6 +127,8 @@ static void vnet_stop_resources(vnet_t *vnetp);
 static void vnet_dispatch_res_task(vnet_t *vnetp);
 static void vnet_res_start_task(void *arg);
 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
+static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp);
+static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp);
 
 /* Exported to vnet_gen */
 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
@@ -112,15 +143,21 @@ static void vnet_hio_destroy_kstats(kstat_t *ksp);
 
 /* Exported to to vnet_dds */
 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
+int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
+void vnet_hio_mac_cleanup(vnet_t *vnetp);
 
 /* Externs that are imported from vnet_gen */
 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
     const uint8_t *macaddr, void **vgenhdl);
+extern int vgen_init_mdeg(void *arg);
 extern void vgen_uninit(void *arg);
 extern int vgen_dds_tx(void *arg, void *dmsg);
 extern void vgen_mod_init(void);
 extern int vgen_mod_cleanup(void);
 extern void vgen_mod_fini(void);
+extern int vgen_enable_intr(void *arg);
+extern int vgen_disable_intr(void *arg);
+extern mblk_t *vgen_poll(void *arg, int bytes_to_pickup);
 
 /* Externs that are imported from vnet_dds */
 extern void vdds_mod_init(void);
@@ -131,6 +168,9 @@ extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
 extern void vdds_cleanup_hybrid_res(void *arg);
 extern void vdds_cleanup_hio(vnet_t *vnetp);
 
+/* Externs imported from mac_impl */
+extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *);
+
 #define	DRV_NAME	"vnet"
 #define	VNET_FDBE_REFHOLD(p)						\
 {									\
@@ -145,9 +185,9 @@ extern void vdds_cleanup_hio(vnet_t *vnetp);
 }
 
 #ifdef	VNET_IOC_DEBUG
-#define	VNET_M_CALLBACK_FLAGS	(MC_IOCTL)
+#define	VNET_M_CALLBACK_FLAGS	(MC_IOCTL | MC_GETCAPAB)
 #else
-#define	VNET_M_CALLBACK_FLAGS	(0)
+#define	VNET_M_CALLBACK_FLAGS	(MC_GETCAPAB)
 #endif
 
 static mac_callbacks_t vnet_m_callbacks = {
@@ -157,9 +197,23 @@ static mac_callbacks_t vnet_m_callbacks = {
 	vnet_m_stop,
 	vnet_m_promisc,
 	vnet_m_multicst,
-	vnet_m_unicst,
-	vnet_m_tx,
+	NULL,	/* m_unicst entry must be NULL while rx rings are exposed */
+	NULL,	/* m_tx entry must be NULL while tx rings are exposed */
 	vnet_m_ioctl,
+	vnet_m_capab,
+	NULL
+};
+
+static mac_callbacks_t vnet_hio_res_callbacks = {
+	0,
+	vnet_hio_stat,
+	vnet_hio_start,
+	vnet_hio_stop,
+	NULL,
+	NULL,
+	NULL,
+	vnet_hio_tx,
+	NULL,
 	NULL,
 	NULL
 };
@@ -176,6 +230,9 @@ uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
 
+/* Configure tx serialization in mac layer for the vnet device */
+boolean_t vnet_mac_tx_serialize = B_TRUE;
+
 /*
  * Set this to non-zero to enable additional internal receive buffer pools
  * based on the MTU of the device for better performance at the cost of more
@@ -206,6 +263,11 @@ static struct ether_addr etherbroadcastaddr = {
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 };
 
+/* mac_open() retry delay in usec */
+uint32_t vnet_mac_open_delay = 100;	/* 0.1 ms */
+
+/* max # of mac_open() retries */
+uint32_t vnet_mac_open_retries = 100;
 
 /*
  * Property names
@@ -375,6 +437,9 @@ vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
 	attach_progress |= AST_vnet_alloc;
 
+	vnet_ring_grp_init(vnetp);
+	attach_progress |= AST_ring_init;
+
 	status = vdds_init(vnetp);
 	if (status != 0) {
 		goto vnet_attach_fail;
@@ -419,10 +484,19 @@ vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 	attach_progress |= AST_vnet_list;
 
 	/*
-	 * Initialize the generic vnet plugin which provides
-	 * communication via sun4v LDC (logical domain channel) based
-	 * resources. It will register the LDC resources as and when
-	 * they become available.
+	 * Initialize the generic vnet plugin which provides communication via
+	 * sun4v LDC (logical domain channel) based resources. This involves 2
+	 * steps; first, vgen_init() is invoked to read the various properties
+	 * of the vnet device from its MD node (including its mtu which is
+	 * needed to mac_register()) and obtain a handle to the vgen layer.
+	 * After mac_register() is done and we have a mac handle, we then
+	 * invoke vgen_init_mdeg() which registers with the the MD event
+	 * generator (mdeg) framework to allow LDC resource notifications.
+	 * Note: this sequence also allows us to report the correct default #
+	 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked
+	 * in the context of mac_register(); and avoids conflicting with
+	 * dynamic pseudo rx rings which get added/removed as a result of mdeg
+	 * events in vgen.
 	 */
 	status = vgen_init(vnetp, reg, vnetp->dip,
 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
@@ -432,15 +506,19 @@ vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 	}
 	attach_progress |= AST_vgen_init;
 
-	/* register with MAC layer */
 	status = vnet_mac_register(vnetp);
 	if (status != DDI_SUCCESS) {
 		goto vnet_attach_fail;
 	}
 	vnetp->link_state = LINK_STATE_UNKNOWN;
-
 	attach_progress |= AST_macreg;
 
+	status = vgen_init_mdeg(vnetp->vgenhdl);
+	if (status != DDI_SUCCESS) {
+		goto vnet_attach_fail;
+	}
+	attach_progress |= AST_init_mdeg;
+
 	vnetp->attach_progress = attach_progress;
 
 	DBG1(NULL, "instance(%d) exit\n", instance);
@@ -503,21 +581,25 @@ vnet_unattach(vnet_t *vnetp)
 	attach_progress = vnetp->attach_progress;
 
 	/*
-	 * Unregister from the gldv3 subsystem. This can fail, in particular
-	 * if there are still any open references to this mac device; in which
-	 * case we just return failure without continuing to detach further.
+	 * Disable the mac device in the gldv3 subsystem. This can fail, in
+	 * particular if there are still any open references to this mac
+	 * device; in which case we just return failure without continuing to
+	 * detach further.
+	 * If it succeeds, we then invoke vgen_uninit() which should unregister
+	 * any pseudo rings registered with the mac layer. Note we keep the
+	 * AST_macreg flag on, so we can unregister with the mac layer at
+	 * the end of this routine.
 	 */
 	if (attach_progress & AST_macreg) {
-		if (mac_unregister(vnetp->mh) != 0) {
+		if (mac_disable(vnetp->mh) != 0) {
 			return (1);
 		}
-		attach_progress &= ~AST_macreg;
 	}
 
 	/*
-	 * Now that we have unregistered from gldv3, we must finish all other
-	 * steps and successfully return from this function; otherwise we will
-	 * end up leaving the device in a broken/unusable state.
+	 * Now that we have disabled the device, we must finish all other steps
+	 * and successfully return from this function; otherwise we will end up
+	 * leaving the device in a broken/unusable state.
 	 *
 	 * First, release any hybrid resources assigned to this vnet device.
 	 */
@@ -530,9 +612,10 @@ vnet_unattach(vnet_t *vnetp)
 	 * Uninit vgen. This stops further mdeg callbacks to this vnet
 	 * device and/or its ports; and detaches any existing ports.
 	 */
-	if (attach_progress & AST_vgen_init) {
+	if (attach_progress & (AST_vgen_init|AST_init_mdeg)) {
 		vgen_uninit(vnetp->vgenhdl);
 		attach_progress &= ~AST_vgen_init;
+		attach_progress &= ~AST_init_mdeg;
 	}
 
 	/* Destroy the taskq. */
@@ -563,6 +646,17 @@ vnet_unattach(vnet_t *vnetp)
 		attach_progress &= ~AST_vnet_list;
 	}
 
+	if (attach_progress & AST_ring_init) {
+		vnet_ring_grp_uninit(vnetp);
+		attach_progress &= ~AST_ring_init;
+	}
+
+	if (attach_progress & AST_macreg) {
+		VERIFY(mac_unregister(vnetp->mh) == 0);
+		vnetp->mh = NULL;
+		attach_progress &= ~AST_macreg;
+	}
+
 	if (attach_progress & AST_vnet_alloc) {
 		rw_destroy(&vnetp->vrwlock);
 		rw_destroy(&vnetp->vsw_fp_rw);
@@ -683,8 +777,9 @@ vnet_m_promisc(void *arg, boolean_t on)
  * external hosts.
  */
 mblk_t *
-vnet_m_tx(void *arg, mblk_t *mp)
+vnet_tx_ring_send(void *arg, mblk_t *mp)
 {
+	vnet_pseudo_tx_ring_t	*tx_ringp;
 	vnet_t			*vnetp;
 	vnet_res_t		*vresp;
 	mblk_t			*next;
@@ -694,8 +789,10 @@ vnet_m_tx(void *arg, mblk_t *mp)
 	boolean_t		is_unicast;
 	boolean_t		is_pvid;	/* non-default pvid ? */
 	boolean_t		hres;		/* Hybrid resource ? */
+	void			*tx_arg;
 
-	vnetp = (vnet_t *)arg;
+	tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
+	vnetp = (vnet_t *)tx_ringp->vnetp;
 	DBG1(vnetp, "enter\n");
 	ASSERT(mp != NULL);
 
@@ -790,10 +887,14 @@ vnet_m_tx(void *arg, mblk_t *mp)
 					}
 
 				}
-			}
 
-			macp = &vresp->macreg;
-			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
+				macp = &vresp->macreg;
+				tx_arg = tx_ringp;
+			} else {
+				macp = &vresp->macreg;
+				tx_arg = macp->m_driver;
+			}
+			resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp);
 
 			/* tx done; now release ref on fdb entry */
 			VNET_FDBE_REFRELE(vresp);
@@ -848,6 +949,124 @@ vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
 	return (0);
 }
 
+static void
+vnet_ring_grp_init(vnet_t *vnetp)
+{
+	vnet_pseudo_rx_group_t	*rx_grp;
+	vnet_pseudo_rx_ring_t	*rx_ringp;
+	vnet_pseudo_tx_group_t	*tx_grp;
+	vnet_pseudo_tx_ring_t	*tx_ringp;
+	int			i;
+
+	tx_grp = &vnetp->tx_grp[0];
+	tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) *
+	    VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP);
+	for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) {
+		tx_ringp[i].state |= VNET_TXRING_SHARED;
+	}
+	tx_grp->rings = tx_ringp;
+	tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS;
+
+	rx_grp = &vnetp->rx_grp[0];
+	rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP;
+	rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL);
+	rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) *
+	    rx_grp->max_ring_cnt, KM_SLEEP);
+
+	/*
+	 * Setup the first 3 Pseudo RX Rings that are reserved;
+	 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource.
+	 */
+	rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE;
+	rx_ringp[0].index = 0;
+	rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
+	rx_ringp[1].index = 1;
+	rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
+	rx_ringp[2].index = 2;
+
+	rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
+	rx_grp->rings = rx_ringp;
+
+	for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
+	    i < rx_grp->max_ring_cnt; i++) {
+		rx_ringp = &rx_grp->rings[i];
+		rx_ringp->state = VNET_RXRING_FREE;
+		rx_ringp->index = i;
+	}
+}
+
+static void
+vnet_ring_grp_uninit(vnet_t *vnetp)
+{
+	vnet_pseudo_rx_group_t	*rx_grp;
+	vnet_pseudo_tx_group_t	*tx_grp;
+
+	tx_grp = &vnetp->tx_grp[0];
+	if (tx_grp->rings != NULL) {
+		ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS);
+		kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) *
+		    tx_grp->ring_cnt);
+		tx_grp->rings = NULL;
+	}
+
+	rx_grp = &vnetp->rx_grp[0];
+	if (rx_grp->rings != NULL) {
+		ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP);
+		ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
+		kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) *
+		    rx_grp->max_ring_cnt);
+		rx_grp->rings = NULL;
+	}
+}
+
+static vnet_pseudo_rx_ring_t *
+vnet_alloc_pseudo_rx_ring(vnet_t *vnetp)
+{
+	vnet_pseudo_rx_group_t  *rx_grp;
+	vnet_pseudo_rx_ring_t	*rx_ringp;
+	int			index;
+
+	rx_grp = &vnetp->rx_grp[0];
+	WRITE_ENTER(&rx_grp->lock);
+
+	if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) {
+		/* no rings available */
+		RW_EXIT(&rx_grp->lock);
+		return (NULL);
+	}
+
+	for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
+	    index < rx_grp->max_ring_cnt; index++) {
+		rx_ringp = &rx_grp->rings[index];
+		if (rx_ringp->state == VNET_RXRING_FREE) {
+			rx_ringp->state |= VNET_RXRING_INUSE;
+			rx_grp->ring_cnt++;
+			break;
+		}
+	}
+
+	RW_EXIT(&rx_grp->lock);
+	return (rx_ringp);
+}
+
+static void
+vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp)
+{
+	vnet_pseudo_rx_group_t  *rx_grp;
+
+	ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
+	rx_grp = &vnetp->rx_grp[0];
+	WRITE_ENTER(&rx_grp->lock);
+
+	if (ringp->state != VNET_RXRING_FREE) {
+		ringp->state = VNET_RXRING_FREE;
+		ringp->handle = NULL;
+		rx_grp->ring_cnt--;
+	}
+
+	RW_EXIT(&rx_grp->lock);
+}
+
 /* wrapper function for mac_register() */
 static int
 vnet_mac_register(vnet_t *vnetp)
@@ -867,6 +1086,15 @@ vnet_mac_register(vnet_t *vnetp)
 	macp->m_margin = VLAN_TAGSZ;
 
 	/*
+	 * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to
+	 * workaround tx lock contention issues in nxge.
+	 */
+	macp->m_v12n = MAC_VIRT_LEVEL1;
+	if (vnet_mac_tx_serialize == B_TRUE) {
+		macp->m_v12n |= MAC_VIRT_SERIALIZE;
+	}
+
+	/*
 	 * Finally, we're ready to register ourselves with the MAC layer
 	 * interface; if this succeeds, we're all ready to start()
 	 */
@@ -1116,42 +1344,57 @@ vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
 static void
 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
 {
-	vnet_res_t	*vresp = (vnet_res_t *)vrh;
-	vnet_t		*vnetp = vresp->vnetp;
+	vnet_res_t		*vresp = (vnet_res_t *)vrh;
+	vnet_t			*vnetp = vresp->vnetp;
+	vnet_pseudo_rx_ring_t	*ringp;
 
 	if ((vnetp == NULL) || (vnetp->mh == 0)) {
 		freemsgchain(mp);
 		return;
 	}
 
-	/*
-	 * Packets received over a hybrid resource need additional processing
-	 * to remove the tag, for the pvid case. The underlying resource is
-	 * not aware of the vnet's pvid and thus packets are received with the
-	 * vlan tag in the header; unlike packets that are received over a ldc
-	 * channel in which case the peer vnet/vsw would have already removed
-	 * the tag.
-	 */
-	if (vresp->type == VIO_NET_RES_HYBRID &&
-	    vnetp->pvid != vnetp->default_vlan_id) {
-
-		vnet_rx_frames_untag(vnetp->pvid, &mp);
-		if (mp == NULL) {
-			return;
-		}
-	}
-
-	mac_rx(vnetp->mh, NULL, mp);
+	ringp = vresp->rx_ringp;
+	mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
 }
 
 void
 vnet_tx_update(vio_net_handle_t vrh)
 {
-	vnet_res_t *vresp = (vnet_res_t *)vrh;
-	vnet_t *vnetp = vresp->vnetp;
+	vnet_res_t		*vresp = (vnet_res_t *)vrh;
+	vnet_t			*vnetp = vresp->vnetp;
+	vnet_pseudo_tx_ring_t	*tx_ringp;
+	vnet_pseudo_tx_group_t	*tx_grp;
+	int			i;
+
+	if (vnetp == NULL || vnetp->mh == NULL) {
+		return;
+	}
 
-	if ((vnetp != NULL) && (vnetp->mh != NULL)) {
-		mac_tx_update(vnetp->mh);
+	/*
+	 * Currently, the tx hwring API (used to access rings that belong to
+	 * a Hybrid IO resource) does not provide us a per ring flow ctrl
+	 * update; also the pseudo rings are shared by the ports/ldcs in the
+	 * vgen layer. Thus we can't figure out which pseudo ring is being
+	 * re-enabled for transmits. To work around this, when we get a tx
+	 * restart notification from below, we simply propagate that to all
+	 * the tx pseudo rings registered with the mac layer above.
+	 *
+	 * There are a couple of side effects with this approach, but they are
+	 * not harmful, as outlined below:
+	 *
+	 * A) We might send an invalid ring_update() for a ring that is not
+	 * really flow controlled. This will not have any effect in the mac
+	 * layer and packets will continue to be transmitted on that ring.
+	 *
+	 * B) We might end up clearing the flow control in the mac layer for
+	 * a ring that is still flow controlled in the underlying resource.
+	 * This will result in the mac layer restarting	transmit, only to be
+	 * flow controlled again on that ring.
+	 */
+	tx_grp = &vnetp->tx_grp[0];
+	for (i = 0; i < tx_grp->ring_cnt; i++) {
+		tx_ringp = &tx_grp->rings[i];
+		mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
 	}
 }
 
@@ -1233,8 +1476,8 @@ int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
     vio_net_callbacks_t *vcb)
 {
-	vnet_t	*vnetp;
-	vnet_res_t *vresp;
+	vnet_t		*vnetp;
+	vnet_res_t	*vresp;
 
 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
 	ether_copy(local_macaddr, vresp->local_macaddr);
@@ -1260,11 +1503,7 @@ int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
 					    vnetp->instance);
 				}
 			}
-
-			WRITE_ENTER(&vnetp->vrwlock);
-			vresp->nextp = vnetp->vres_list;
-			vnetp->vres_list = vresp;
-			RW_EXIT(&vnetp->vrwlock);
+			vnet_add_resource(vnetp, vresp);
 			break;
 		}
 		vnetp = vnetp->nextp;
@@ -1281,6 +1520,14 @@ int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
 	vcb->vio_net_tx_update = vnet_tx_update;
 	vcb->vio_net_report_err = vnet_handle_res_err;
 
+	/* Bind the resource to pseudo ring(s) */
+	if (vnet_bind_rings(vresp) != 0) {
+		(void) vnet_rem_resource(vnetp, vresp);
+		vnet_hio_destroy_kstats(vresp->ksp);
+		KMEM_FREE(vresp);
+		return (1);
+	}
+
 	/* Dispatch a task to start resources */
 	vnet_dispatch_res_task(vnetp);
 	return (0);
@@ -1294,8 +1541,6 @@ vio_net_resource_unreg(vio_net_handle_t vhp)
 {
 	vnet_res_t	*vresp = (vnet_res_t *)vhp;
 	vnet_t		*vnetp = vresp->vnetp;
-	vnet_res_t	*vrp;
-	kstat_t		*ksp = NULL;
 
 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
 
@@ -1306,7 +1551,29 @@ vio_net_resource_unreg(vio_net_handle_t vhp)
 	 */
 	vnet_fdbe_del(vnetp, vresp);
 
+	vnet_unbind_rings(vresp);
+
 	/* Now remove the resource from the list */
+	(void) vnet_rem_resource(vnetp, vresp);
+
+	vnet_hio_destroy_kstats(vresp->ksp);
+	KMEM_FREE(vresp);
+}
+
+static void
+vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp)
+{
+	WRITE_ENTER(&vnetp->vrwlock);
+	vresp->nextp = vnetp->vres_list;
+	vnetp->vres_list = vresp;
+	RW_EXIT(&vnetp->vrwlock);
+}
+
+static vnet_res_t *
+vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp)
+{
+	vnet_res_t	*vrp;
+
 	WRITE_ENTER(&vnetp->vrwlock);
 	if (vresp == vnetp->vres_list) {
 		vnetp->vres_list = vresp->nextp;
@@ -1320,15 +1587,12 @@ vio_net_resource_unreg(vio_net_handle_t vhp)
 			vrp = vrp->nextp;
 		}
 	}
-
-	ksp = vresp->ksp;
-	vresp->ksp = NULL;
-
 	vresp->vnetp = NULL;
 	vresp->nextp = NULL;
+
 	RW_EXIT(&vnetp->vrwlock);
-	vnet_hio_destroy_kstats(ksp);
-	KMEM_FREE(vresp);
+
+	return (vresp);
 }
 
 /*
@@ -1710,6 +1974,1024 @@ vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
 	}
 }
 
+static boolean_t
+vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data)
+{
+	vnet_t	*vnetp = (vnet_t *)arg;
+
+	if (vnetp == NULL) {
+		return (0);
+	}
+
+	switch (cap) {
+
+	case MAC_CAPAB_RINGS: {
+
+		mac_capab_rings_t *cap_rings = cap_data;
+		/*
+		 * Rings Capability Notes:
+		 * We advertise rings to make use of the rings framework in
+		 * gldv3 mac layer, to improve the performance. This is
+		 * specifically needed when a Hybrid resource (with multiple
+		 * tx/rx hardware rings) is assigned to a vnet device. We also
+		 * leverage this for the normal case when no Hybrid resource is
+		 * assigned.
+		 *
+		 * Ring Allocation:
+		 * - TX path:
+		 * We expose a pseudo ring group with 2 pseudo tx rings (as
+		 * currently HybridIO exports only 2 rings) In the normal case,
+		 * transmit traffic that comes down to the driver through the
+		 * mri_tx (vnet_tx_ring_send()) entry point goes through the
+		 * distributed switching algorithm in vnet and gets transmitted
+		 * over a port/LDC in the vgen layer to either the vswitch or a
+		 * peer vnet. If and when a Hybrid resource is assigned to the
+		 * vnet, we obtain the tx ring information of the Hybrid device
+		 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings.
+		 * Traffic being sent over the Hybrid resource by the mac layer
+		 * gets spread across both hw rings, as they are mapped to the
+		 * 2 pseudo tx rings in vnet.
+		 *
+		 * - RX path:
+		 * We expose a pseudo ring group with 3 pseudo rx rings (static
+		 * rings) initially. The first (default) pseudo rx ring is
+		 * reserved for the resource that connects to the vswitch
+		 * service. The next 2 rings are reserved for a Hybrid resource
+		 * that may be assigned to the vnet device. If and when a
+		 * Hybrid resource is assigned to the vnet, we obtain the rx
+		 * ring information of the Hybrid device (nxge) and map these
+		 * pseudo rings 1:1 to the 2 hw rx rings. For each additional
+		 * resource that connects to a peer vnet, we dynamically
+		 * allocate a pseudo rx ring and map it to that resource, when
+		 * the resource gets added; and the pseudo rx ring is
+		 * dynamically registered with the upper mac layer. We do the
+		 * reverse and unregister the ring with the mac layer when
+		 * the resource gets removed.
+		 *
+		 * Synchronization notes:
+		 * We don't need any lock to protect members of ring structure,
+		 * specifically ringp->hw_rh, in either the TX or the RX ring,
+		 * as explained below.
+		 * - TX ring:
+		 * ring->hw_rh is initialized only when a Hybrid resource is
+		 * associated; and gets referenced only in vnet_hio_tx(). The
+		 * Hybrid resource itself is available in fdb only after tx
+		 * hwrings are found and mapped; i.e, in vio_net_resource_reg()
+		 * we call vnet_bind_rings() first and then call
+		 * vnet_start_resources() which adds an entry to fdb. For
+		 * traffic going over LDC resources, we don't reference
+		 * ring->hw_rh at all.
+		 * - RX ring:
+		 * For rings mapped to Hybrid resource ring->hw_rh is
+		 * initialized and only then do we add the rx callback for
+		 * the underlying Hybrid resource; we disable callbacks before
+		 * we unmap ring->hw_rh. For rings mapped to LDC resources, we
+		 * stop the rx callbacks (in vgen) before we remove ring->hw_rh
+		 * (vio_net_resource_unreg()).
+		 */
+
+		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
+			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
+
+			/*
+			 * The ring_cnt for rx grp is initialized in
+			 * vnet_ring_grp_init(). Later, the ring_cnt gets
+			 * updated dynamically whenever LDC resources are added
+			 * or removed.
+			 */
+			cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt;
+			cap_rings->mr_rget = vnet_get_ring;
+
+			cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS;
+			cap_rings->mr_gget = vnet_get_group;
+			cap_rings->mr_gaddring = NULL;
+			cap_rings->mr_gremring = NULL;
+		} else {
+			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
+
+			/*
+			 * The ring_cnt for tx grp is initialized in
+			 * vnet_ring_grp_init() and remains constant, as we
+			 * do not support dymanic tx rings for now.
+			 */
+			cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt;
+			cap_rings->mr_rget = vnet_get_ring;
+
+			/*
+			 * Transmit rings are not grouped; i.e, the number of
+			 * transmit ring groups advertised should be set to 0.
+			 */
+			cap_rings->mr_gnum = 0;
+
+			cap_rings->mr_gget = vnet_get_group;
+			cap_rings->mr_gaddring = NULL;
+			cap_rings->mr_gremring = NULL;
+		}
+		return (B_TRUE);
+
+	}
+
+	default:
+		break;
+
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Callback funtion for MAC layer to get ring information.
+ */
+static void
+vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
+    const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle)
+{
+	vnet_t	*vnetp = arg;
+
+	switch (rtype) {
+
+	case MAC_RING_TYPE_RX: {
+
+		vnet_pseudo_rx_group_t	*rx_grp;
+		vnet_pseudo_rx_ring_t	*rx_ringp;
+		mac_intr_t		*mintr;
+
+		/* We advertised only one RX group */
+		ASSERT(g_index == 0);
+		rx_grp = &vnetp->rx_grp[g_index];
+
+		/* Check the current # of rings in the rx group */
+		ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt));
+
+		/* Get the ring based on the index */
+		rx_ringp = &rx_grp->rings[r_index];
+
+		rx_ringp->handle = r_handle;
+		/*
+		 * Note: we don't need to save the incoming r_index in rx_ring,
+		 * as vnet_ring_grp_init() would have initialized the index for
+		 * each ring in the array.
+		 */
+		rx_ringp->grp = rx_grp;
+		rx_ringp->vnetp = vnetp;
+
+		mintr = &infop->mri_intr;
+		mintr->mi_handle = (mac_intr_handle_t)rx_ringp;
+		mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr;
+		mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr;
+
+		infop->mri_driver = (mac_ring_driver_t)rx_ringp;
+		infop->mri_start = vnet_rx_ring_start;
+		infop->mri_stop = vnet_rx_ring_stop;
+
+		/* Set the poll function, as this is an rx ring */
+		infop->mri_poll = vnet_rx_poll;
+
+		break;
+	}
+
+	case MAC_RING_TYPE_TX: {
+		vnet_pseudo_tx_group_t	*tx_grp;
+		vnet_pseudo_tx_ring_t	*tx_ringp;
+
+		/*
+		 * No need to check grp index; mac layer passes -1 for it.
+		 */
+		tx_grp = &vnetp->tx_grp[0];
+
+		/* Check the # of rings in the tx group */
+		ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt));
+
+		/* Get the ring based on the index */
+		tx_ringp = &tx_grp->rings[r_index];
+
+		tx_ringp->handle = r_handle;
+		tx_ringp->index = r_index;
+		tx_ringp->grp = tx_grp;
+		tx_ringp->vnetp = vnetp;
+
+		infop->mri_driver = (mac_ring_driver_t)tx_ringp;
+		infop->mri_start = vnet_tx_ring_start;
+		infop->mri_stop = vnet_tx_ring_stop;
+
+		/* Set the transmit function, as this is a tx ring */
+		infop->mri_tx = vnet_tx_ring_send;
+
+		break;
+	}
+
+	default:
+		break;
+	}
+}
+
+/*
+ * Callback funtion for MAC layer to get group information.
+ */
+static void
+vnet_get_group(void *arg, mac_ring_type_t type, const int index,
+	mac_group_info_t *infop, mac_group_handle_t handle)
+{
+	vnet_t	*vnetp = (vnet_t *)arg;
+
+	switch (type) {
+
+	case MAC_RING_TYPE_RX:
+	{
+		vnet_pseudo_rx_group_t	*rx_grp;
+
+		/* We advertised only one RX group */
+		ASSERT(index == 0);
+
+		rx_grp = &vnetp->rx_grp[index];
+		rx_grp->handle = handle;
+		rx_grp->index = index;
+		rx_grp->vnetp = vnetp;
+
+		infop->mgi_driver = (mac_group_driver_t)rx_grp;
+		infop->mgi_start = NULL;
+		infop->mgi_stop = NULL;
+		infop->mgi_addmac = vnet_addmac;
+		infop->mgi_remmac = vnet_remmac;
+		infop->mgi_count = rx_grp->ring_cnt;
+
+		break;
+	}
+
+	case MAC_RING_TYPE_TX:
+	{
+		vnet_pseudo_tx_group_t	*tx_grp;
+
+		/* We advertised only one TX group */
+		ASSERT(index == 0);
+
+		tx_grp = &vnetp->tx_grp[index];
+		tx_grp->handle = handle;
+		tx_grp->index = index;
+		tx_grp->vnetp = vnetp;
+
+		infop->mgi_driver = (mac_group_driver_t)tx_grp;
+		infop->mgi_start = NULL;
+		infop->mgi_stop = NULL;
+		infop->mgi_addmac = NULL;
+		infop->mgi_remmac = NULL;
+		infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS;
+
+		break;
+	}
+
+	default:
+		break;
+
+	}
+}
+
+static int
+vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
+{
+	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+	int			err;
+
+	/*
+	 * If this ring is mapped to a LDC resource, simply mark the state to
+	 * indicate the ring is started and return.
+	 */
+	if ((rx_ringp->state &
+	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
+		rx_ringp->gen_num = mr_gen_num;
+		rx_ringp->state |= VNET_RXRING_STARTED;
+		return (0);
+	}
+
+	ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
+
+	/*
+	 * This must be a ring reserved for a hwring. If the hwring is not
+	 * bound yet, simply mark the state to indicate the ring is started and
+	 * return. If and when a hybrid resource is activated for this vnet
+	 * device, we will bind the hwring and start it then. If a hwring is
+	 * already bound, start it now.
+	 */
+	if (rx_ringp->hw_rh == NULL) {
+		rx_ringp->gen_num = mr_gen_num;
+		rx_ringp->state |= VNET_RXRING_STARTED;
+		return (0);
+	}
+
+	err = mac_hwring_start(rx_ringp->hw_rh);
+	if (err == 0) {
+		rx_ringp->gen_num = mr_gen_num;
+		rx_ringp->state |= VNET_RXRING_STARTED;
+	} else {
+		err = ENXIO;
+	}
+
+	return (err);
+}
+
+static void
+vnet_rx_ring_stop(mac_ring_driver_t arg)
+{
+	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+
+	/*
+	 * If this ring is mapped to a LDC resource, simply mark the state to
+	 * indicate the ring is now stopped and return.
+	 */
+	if ((rx_ringp->state &
+	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
+		rx_ringp->state &= ~VNET_RXRING_STARTED;
+	}
+
+	ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
+
+	/*
+	 * This must be a ring reserved for a hwring. If the hwring is not
+	 * bound yet, simply mark the state to indicate the ring is stopped and
+	 * return. If a hwring is already bound, stop it now.
+	 */
+	if (rx_ringp->hw_rh == NULL) {
+		rx_ringp->state &= ~VNET_RXRING_STARTED;
+		return;
+	}
+
+	mac_hwring_stop(rx_ringp->hw_rh);
+	rx_ringp->state &= ~VNET_RXRING_STARTED;
+}
+
+/* ARGSUSED */
+static int
+vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
+{
+	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
+
+	tx_ringp->state |= VNET_TXRING_STARTED;
+	return (0);
+}
+
+static void
+vnet_tx_ring_stop(mac_ring_driver_t arg)
+{
+	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
+
+	tx_ringp->state &= ~VNET_TXRING_STARTED;
+}
+
+/*
+ * Disable polling for a ring and enable its interrupt.
+ */
+static int
+vnet_ring_enable_intr(void *arg)
+{
+	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+	vnet_res_t		*vresp;
+
+	if (rx_ringp->hw_rh == NULL) {
+		/*
+		 * Ring enable intr func is being invoked, but the ring is
+		 * not bound to any underlying resource ? This must be a ring
+		 * reserved for Hybrid resource and no such resource has been
+		 * assigned to this vnet device yet. We simply return success.
+		 */
+		ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
+		return (0);
+	}
+
+	/*
+	 * The rx ring has been bound to either a LDC or a Hybrid resource.
+	 * Call the appropriate function to enable interrupts for the ring.
+	 */
+	if (rx_ringp->state & VNET_RXRING_HYBRID) {
+		return (mac_hwring_enable_intr(rx_ringp->hw_rh));
+	} else {
+		vresp = (vnet_res_t *)rx_ringp->hw_rh;
+		return (vgen_enable_intr(vresp->macreg.m_driver));
+	}
+}
+
+/*
+ * Enable polling for a ring and disable its interrupt.
+ */
+static int
+vnet_ring_disable_intr(void *arg)
+{
+	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+	vnet_res_t		*vresp;
+
+	if (rx_ringp->hw_rh == NULL) {
+		/*
+		 * Ring disable intr func is being invoked, but the ring is
+		 * not bound to any underlying resource ? This must be a ring
+		 * reserved for Hybrid resource and no such resource has been
+		 * assigned to this vnet device yet. We simply return success.
+		 */
+		ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
+		return (0);
+	}
+
+	/*
+	 * The rx ring has been bound to either a LDC or a Hybrid resource.
+	 * Call the appropriate function to disable interrupts for the ring.
+	 */
+	if (rx_ringp->state & VNET_RXRING_HYBRID) {
+		return (mac_hwring_disable_intr(rx_ringp->hw_rh));
+	} else {
+		vresp = (vnet_res_t *)rx_ringp->hw_rh;
+		return (vgen_disable_intr(vresp->macreg.m_driver));
+	}
+}
+
+/*
+ * Poll 'bytes_to_pickup' bytes of message from the rx ring.
+ */
+static mblk_t *
+vnet_rx_poll(void *arg, int bytes_to_pickup)
+{
+	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
+	mblk_t			*mp = NULL;
+	vnet_res_t		*vresp;
+	vnet_t			*vnetp = rx_ringp->vnetp;
+
+	if (rx_ringp->hw_rh == NULL) {
+		return (NULL);
+	}
+
+	if (rx_ringp->state & VNET_RXRING_HYBRID) {
+		mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup);
+		/*
+		 * Packets received over a hybrid resource need additional
+		 * processing to remove the tag, for the pvid case. The
+		 * underlying resource is not aware of the vnet's pvid and thus
+		 * packets are received with the vlan tag in the header; unlike
+		 * packets that are received over a ldc channel in which case
+		 * the peer vnet/vsw would have already removed the tag.
+		 */
+		if (vnetp->pvid != vnetp->default_vlan_id) {
+			vnet_rx_frames_untag(vnetp->pvid, &mp);
+		}
+	} else {
+		vresp = (vnet_res_t *)rx_ringp->hw_rh;
+		mp = vgen_poll(vresp->macreg.m_driver, bytes_to_pickup);
+	}
+	return (mp);
+}
+
+/* ARGSUSED */
+void
+vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+	boolean_t loopback)
+{
+	vnet_t			*vnetp = (vnet_t *)arg;
+	vnet_pseudo_rx_ring_t	*ringp = (vnet_pseudo_rx_ring_t *)mrh;
+
+	/*
+	 * Packets received over a hybrid resource need additional processing
+	 * to remove the tag, for the pvid case. The underlying resource is
+	 * not aware of the vnet's pvid and thus packets are received with the
+	 * vlan tag in the header; unlike packets that are received over a ldc
+	 * channel in which case the peer vnet/vsw would have already removed
+	 * the tag.
+	 */
+	if (vnetp->pvid != vnetp->default_vlan_id) {
+		vnet_rx_frames_untag(vnetp->pvid, &mp);
+		if (mp == NULL) {
+			return;
+		}
+	}
+	mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
+}
+
+static int
+vnet_addmac(void *arg, const uint8_t *mac_addr)
+{
+	vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
+	vnet_t			*vnetp;
+
+	vnetp = rx_grp->vnetp;
+
+	if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
+		return (0);
+	}
+
+	cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n",
+	    vnetp->instance, __func__);
+	return (EINVAL);
+}
+
+static int
+vnet_remmac(void *arg, const uint8_t *mac_addr)
+{
+	vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
+	vnet_t			*vnetp;
+
+	vnetp = rx_grp->vnetp;
+
+	if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
+		return (0);
+	}
+
+	cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n",
+	    vnetp->instance, __func__, ether_sprintf((void *)mac_addr));
+	return (EINVAL);
+}
+
+int
+vnet_hio_mac_init(vnet_t *vnetp, char *ifname)
+{
+	mac_handle_t		mh;
+	mac_client_handle_t	mch = NULL;
+	mac_unicast_handle_t	muh = NULL;
+	mac_diag_t		diag;
+	mac_register_t		*macp;
+	char			client_name[MAXNAMELEN];
+	int			rv;
+	uint16_t		mac_flags = MAC_UNICAST_TAG_DISABLE |
+	    MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY;
+	vio_net_callbacks_t	vcb;
+	ether_addr_t		rem_addr =
+		{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	uint32_t		retries = 0;
+
+	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
+		return (EAGAIN);
+	}
+
+	do {
+		rv = mac_open_by_linkname(ifname, &mh);
+		if (rv == 0) {
+			break;
+		}
+		if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) {
+			mac_free(macp);
+			return (rv);
+		}
+		drv_usecwait(vnet_mac_open_delay);
+	} while (rv == ENOENT);
+
+	vnetp->hio_mh = mh;
+
+	(void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance,
+	    ifname);
+	rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE);
+	if (rv != 0) {
+		goto fail;
+	}
+	vnetp->hio_mch = mch;
+
+	rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0,
+	    &diag);
+	if (rv != 0) {
+		goto fail;
+	}
+	vnetp->hio_muh = muh;
+
+	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
+	macp->m_driver = vnetp;
+	macp->m_dip = NULL;
+	macp->m_src_addr = NULL;
+	macp->m_callbacks = &vnet_hio_res_callbacks;
+	macp->m_min_sdu = 0;
+	macp->m_max_sdu = ETHERMTU;
+
+	rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID,
+	    vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb);
+	if (rv != 0) {
+		goto fail;
+	}
+	mac_free(macp);
+
+	/* add the recv callback */
+	mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp);
+
+	/* add the notify callback - only tx updates for now */
+	vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb,
+	    vnetp);
+
+	return (0);
+
+fail:
+	mac_free(macp);
+	vnet_hio_mac_cleanup(vnetp);
+	return (1);
+}
+
+void
+vnet_hio_mac_cleanup(vnet_t *vnetp)
+{
+	if (vnetp->hio_mnh != NULL) {
+		(void) mac_notify_remove(vnetp->hio_mnh, B_TRUE);
+		vnetp->hio_mnh = NULL;
+	}
+
+	if (vnetp->hio_vhp != NULL) {
+		vio_net_resource_unreg(vnetp->hio_vhp);
+		vnetp->hio_vhp = NULL;
+	}
+
+	if (vnetp->hio_muh != NULL) {
+		mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh);
+		vnetp->hio_muh = NULL;
+	}
+
+	if (vnetp->hio_mch != NULL) {
+		mac_client_close(vnetp->hio_mch, 0);
+		vnetp->hio_mch = NULL;
+	}
+
+	if (vnetp->hio_mh != NULL) {
+		mac_close(vnetp->hio_mh);
+		vnetp->hio_mh = NULL;
+	}
+}
+
+/* Bind pseudo rings to hwrings */
+static int
+vnet_bind_hwrings(vnet_t *vnetp)
+{
+	mac_ring_handle_t	hw_rh[VNET_NUM_HYBRID_RINGS];
+	mac_perim_handle_t	mph1;
+	vnet_pseudo_rx_group_t	*rx_grp;
+	vnet_pseudo_rx_ring_t	*rx_ringp;
+	vnet_pseudo_tx_group_t	*tx_grp;
+	vnet_pseudo_tx_ring_t	*tx_ringp;
+	int			hw_ring_cnt;
+	int			i;
+	int			rv;
+
+	mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
+
+	/* Get the list of the underlying RX rings. */
+	hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh,
+	    MAC_RING_TYPE_RX);
+
+	/* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */
+	if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
+		cmn_err(CE_WARN,
+		    "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n",
+		    vnetp->instance, hw_ring_cnt);
+		goto fail;
+	}
+
+	if (vnetp->rx_hwgh != NULL) {
+		/*
+		 * Quiesce the HW ring and the mac srs on the ring. Note
+		 * that the HW ring will be restarted when the pseudo ring
+		 * is started. At that time all the packets will be
+		 * directly passed up to the pseudo RX ring and handled
+		 * by mac srs created over the pseudo RX ring.
+		 */
+		mac_rx_client_quiesce(vnetp->hio_mch);
+		mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE);
+	}
+
+	/*
+	 * Bind the pseudo rings to the hwrings and start the hwrings.
+	 * Note we don't need to register these with the upper mac, as we have
+	 * statically exported these pseudo rxrings which are reserved for
+	 * rxrings of Hybrid resource.
+	 */
+	rx_grp = &vnetp->rx_grp[0];
+	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
+		/* Pick the rxrings reserved for Hybrid resource */
+		rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
+
+		/* Store the hw ring handle */
+		rx_ringp->hw_rh = hw_rh[i];
+
+		/* Bind the pseudo ring to the underlying hwring */
+		mac_hwring_setup(rx_ringp->hw_rh,
+		    (mac_resource_handle_t)rx_ringp);
+
+		/* Start the hwring if needed */
+		if (rx_ringp->state & VNET_RXRING_STARTED) {
+			rv = mac_hwring_start(rx_ringp->hw_rh);
+			if (rv != 0) {
+				mac_hwring_teardown(rx_ringp->hw_rh);
+				rx_ringp->hw_rh = NULL;
+				goto fail;
+			}
+		}
+	}
+
+	/* Get the list of the underlying TX rings. */
+	hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh,
+	    MAC_RING_TYPE_TX);
+
+	/* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */
+	if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
+		cmn_err(CE_WARN,
+		    "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n",
+		    vnetp->instance, hw_ring_cnt);
+		goto fail;
+	}
+
+	/*
+	 * Now map the pseudo txrings to the hw txrings. Note we don't need
+	 * to register these with the upper mac, as we have statically exported
+	 * these rings. Note that these rings will continue to be used for LDC
+	 * resources to peer vnets and vswitch (shared ring).
+	 */
+	tx_grp = &vnetp->tx_grp[0];
+	for (i = 0; i < tx_grp->ring_cnt; i++) {
+		tx_ringp = &tx_grp->rings[i];
+		tx_ringp->hw_rh = hw_rh[i];
+		tx_ringp->state |= VNET_TXRING_HYBRID;
+	}
+
+	mac_perim_exit(mph1);
+	return (0);
+
+fail:
+	mac_perim_exit(mph1);
+	vnet_unbind_hwrings(vnetp);
+	return (1);
+}
+
+/* Unbind pseudo rings from hwrings */
+static void
+vnet_unbind_hwrings(vnet_t *vnetp)
+{
+	mac_perim_handle_t	mph1;
+	vnet_pseudo_rx_ring_t	*rx_ringp;
+	vnet_pseudo_rx_group_t	*rx_grp;
+	vnet_pseudo_tx_group_t	*tx_grp;
+	vnet_pseudo_tx_ring_t	*tx_ringp;
+	int			i;
+
+	mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
+
+	tx_grp = &vnetp->tx_grp[0];
+	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
+		tx_ringp = &tx_grp->rings[i];
+		if (tx_ringp->state & VNET_TXRING_HYBRID) {
+			tx_ringp->state &= ~VNET_TXRING_HYBRID;
+			tx_ringp->hw_rh = NULL;
+		}
+	}
+
+	rx_grp = &vnetp->rx_grp[0];
+	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
+		rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
+		if (rx_ringp->hw_rh != NULL) {
+			/* Stop the hwring */
+			mac_hwring_stop(rx_ringp->hw_rh);
+
+			/* Teardown the hwring */
+			mac_hwring_teardown(rx_ringp->hw_rh);
+			rx_ringp->hw_rh = NULL;
+		}
+	}
+
+	if (vnetp->rx_hwgh != NULL) {
+		vnetp->rx_hwgh = NULL;
+		/*
+		 * First clear the permanent-quiesced flag of the RX srs then
+		 * restart the HW ring and the mac srs on the ring.
+		 */
+		mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE);
+		mac_rx_client_restart(vnetp->hio_mch);
+	}
+
+	mac_perim_exit(mph1);
+}
+
+/* Bind pseudo ring to a LDC resource */
+static int
+vnet_bind_vgenring(vnet_res_t *vresp)
+{
+	vnet_t			*vnetp;
+	vnet_pseudo_rx_group_t	*rx_grp;
+	vnet_pseudo_rx_ring_t	*rx_ringp;
+	mac_perim_handle_t	mph1;
+	int			rv;
+	int			type;
+
+	vnetp = vresp->vnetp;
+	type = vresp->type;
+	rx_grp = &vnetp->rx_grp[0];
+
+	if (type == VIO_NET_RES_LDC_SERVICE) {
+		/*
+		 * Ring Index 0 is the default ring in the group and is
+		 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
+		 * is allocated statically and is reported to the mac layer
+		 * in vnet_m_capab(). So, all we need to do here, is save a
+		 * reference to the associated vresp.
+		 */
+		rx_ringp = &rx_grp->rings[0];
+		rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
+		vresp->rx_ringp = (void *)rx_ringp;
+		return (0);
+	}
+	ASSERT(type == VIO_NET_RES_LDC_GUEST);
+
+	mac_perim_enter_by_mh(vnetp->mh, &mph1);
+
+	rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp);
+	if (rx_ringp == NULL) {
+		cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring",
+		    vnetp->instance);
+		goto fail;
+	}
+
+	/* Store the LDC resource itself as the ring handle */
+	rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
+
+	/*
+	 * Save a reference to the ring in the resource for lookup during
+	 * unbind. Note this is only done for LDC resources. We don't need this
+	 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its
+	 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2).
+	 */
+	vresp->rx_ringp = (void *)rx_ringp;
+	rx_ringp->state |= VNET_RXRING_LDC_GUEST;
+
+	/* Register the pseudo ring with upper-mac */
+	rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index);
+	if (rv != 0) {
+		rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
+		rx_ringp->hw_rh = NULL;
+		vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
+		goto fail;
+	}
+
+	mac_perim_exit(mph1);
+	return (0);
+fail:
+	mac_perim_exit(mph1);
+	return (1);
+}
+
+/* Unbind pseudo ring from a LDC resource */
+static void
+vnet_unbind_vgenring(vnet_res_t *vresp)
+{
+	vnet_t			*vnetp;
+	vnet_pseudo_rx_group_t	*rx_grp;
+	vnet_pseudo_rx_ring_t	*rx_ringp;
+	mac_perim_handle_t	mph1;
+	int			type;
+
+	vnetp = vresp->vnetp;
+	type = vresp->type;
+	rx_grp = &vnetp->rx_grp[0];
+
+	if (vresp->rx_ringp == NULL) {
+		return;
+	}
+
+	if (type == VIO_NET_RES_LDC_SERVICE) {
+		/*
+		 * Ring Index 0 is the default ring in the group and is
+		 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
+		 * is allocated statically and is reported to the mac layer
+		 * in vnet_m_capab(). So, all we need to do here, is remove its
+		 * reference to the associated vresp.
+		 */
+		rx_ringp = &rx_grp->rings[0];
+		rx_ringp->hw_rh = NULL;
+		vresp->rx_ringp = NULL;
+		return;
+	}
+	ASSERT(type == VIO_NET_RES_LDC_GUEST);
+
+	mac_perim_enter_by_mh(vnetp->mh, &mph1);
+
+	rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp;
+	vresp->rx_ringp = NULL;
+
+	if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) {
+		/* Unregister the pseudo ring with upper-mac */
+		mac_group_rem_ring(rx_grp->handle, rx_ringp->handle);
+
+		rx_ringp->hw_rh = NULL;
+		rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
+
+		/* Free the pseudo rx ring */
+		vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
+	}
+
+	mac_perim_exit(mph1);
+}
+
+static void
+vnet_unbind_rings(vnet_res_t *vresp)
+{
+	switch (vresp->type) {
+
+	case VIO_NET_RES_LDC_SERVICE:
+	case VIO_NET_RES_LDC_GUEST:
+		vnet_unbind_vgenring(vresp);
+		break;
+
+	case VIO_NET_RES_HYBRID:
+		vnet_unbind_hwrings(vresp->vnetp);
+		break;
+
+	default:
+		break;
+
+	}
+}
+
+static int
+vnet_bind_rings(vnet_res_t *vresp)
+{
+	int	rv;
+
+	switch (vresp->type) {
+
+	case VIO_NET_RES_LDC_SERVICE:
+	case VIO_NET_RES_LDC_GUEST:
+		rv = vnet_bind_vgenring(vresp);
+		break;
+
+	case VIO_NET_RES_HYBRID:
+		rv = vnet_bind_hwrings(vresp->vnetp);
+		break;
+
+	default:
+		rv = 1;
+		break;
+
+	}
+
+	return (rv);
+}
+
+/* ARGSUSED */
+int
+vnet_hio_stat(void *arg, uint_t stat, uint64_t *val)
+{
+	vnet_t	*vnetp = (vnet_t *)arg;
+
+	*val = mac_stat_get(vnetp->hio_mh, stat);
+	return (0);
+}
+
+/*
+ * The start() and stop() routines for the Hybrid resource below, are just
+ * dummy functions. This is provided to avoid resource type specific code in
+ * vnet_start_resources() and vnet_stop_resources(). The starting and stopping
+ * of the Hybrid resource happens in the context of the mac_client interfaces
+ * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup().
+ */
+/* ARGSUSED */
+static int
+vnet_hio_start(void *arg)
+{
+	return (0);
+}
+
+/* ARGSUSED */
+static void
+vnet_hio_stop(void *arg)
+{
+}
+
+mblk_t *
+vnet_hio_tx(void *arg, mblk_t *mp)
+{
+	vnet_pseudo_tx_ring_t	*tx_ringp;
+	mblk_t			*nextp;
+	mblk_t			*ret_mp;
+
+	tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
+	for (;;) {
+		nextp = mp->b_next;
+		mp->b_next = NULL;
+
+		ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp);
+		if (ret_mp != NULL) {
+			ret_mp->b_next = nextp;
+			mp = ret_mp;
+			break;
+		}
+
+		if ((mp = nextp) == NULL)
+			break;
+	}
+	return (mp);
+}
+
+static void
+vnet_hio_notify_cb(void *arg, mac_notify_type_t type)
+{
+	vnet_t			*vnetp = (vnet_t *)arg;
+	mac_perim_handle_t	mph;
+
+	mac_perim_enter_by_mh(vnetp->hio_mh, &mph);
+	switch (type) {
+	case MAC_NOTE_TX:
+		vnet_tx_update(vnetp->hio_vhp);
+		break;
+
+	default:
+		break;
+	}
+	mac_perim_exit(mph);
+}
+
 #ifdef	VNET_IOC_DEBUG
 
 /*