diff options
author | mrj <none@none> | 2007-12-21 14:13:23 -0800 |
---|---|---|
committer | mrj <none@none> | 2007-12-21 14:13:23 -0800 |
commit | 551bc2a66868b5cb5be6b70ab9f55515e77a39a9 (patch) | |
tree | a01e761c9864ea9483c468ced858a0f67edcbf93 /usr/src/uts/common/xen/io/xnf.c | |
parent | 71a79fe7afa36dcf0de6902c2c6ef432980534d3 (diff) | |
download | illumos-joyent-551bc2a66868b5cb5be6b70ab9f55515e77a39a9.tar.gz |
PSARC 2007/664 Paravirtualized Drivers for Fully Virtualized xVM Domains
6525093 xnb/xnf should use hypervisor based copy for xnb->xnf data path
6608917 members of struct xnf and xnb need unique names
6609324 deadlock trying to own the HAT migrate lock
6609805 still missing XPV_DISALLOW_MIGRATE/XPV_ALLOW_MIGRATE bracketing in hat_i86.c
6616384 xnb's grant ref unmapping is inefficient
6619947 Solaris should provide a PV network driver for xVM HVM environments
6632774 panic setting up xen console
--HG--
rename : usr/src/uts/i86xpv/os/gnttab.c => usr/src/uts/common/xen/os/gnttab.c
rename : usr/src/uts/i86xpv/os/hypercall.c => usr/src/uts/common/xen/os/hypercall.c
rename : usr/src/uts/i86xpv/sys/gnttab.h => usr/src/uts/common/xen/sys/gnttab.h
rename : usr/src/uts/i86xpv/ml/hypersubr.s => usr/src/uts/intel/ia32/ml/hypersubr.s
rename : usr/src/uts/i86xpv/sys/hypervisor.h => usr/src/uts/intel/sys/hypervisor.h
rename : usr/src/uts/i86xpv/sys/xen_errno.h => usr/src/uts/intel/sys/xen_errno.h
Diffstat (limited to 'usr/src/uts/common/xen/io/xnf.c')
-rw-r--r-- | usr/src/uts/common/xen/io/xnf.c | 1285 |
1 files changed, 791 insertions, 494 deletions
diff --git a/usr/src/uts/common/xen/io/xnf.c b/usr/src/uts/common/xen/io/xnf.c index 4f457edf00..89a12e4d03 100644 --- a/usr/src/uts/common/xen/io/xnf.c +++ b/usr/src/uts/common/xen/io/xnf.c @@ -63,49 +63,42 @@ */ #include <sys/types.h> -#include <sys/hypervisor.h> -#include <sys/debug.h> #include <sys/errno.h> #include <sys/param.h> #include <sys/sysmacros.h> #include <sys/systm.h> -#include <sys/stropts.h> #include <sys/stream.h> #include <sys/strsubr.h> -#include <sys/kmem.h> #include <sys/conf.h> #include <sys/ddi.h> #include <sys/devops.h> #include <sys/sunddi.h> #include <sys/sunndi.h> -#include <sys/ksynch.h> #include <sys/dlpi.h> #include <sys/ethernet.h> #include <sys/strsun.h> #include <sys/pattr.h> -#include <inet/common.h> #include <inet/ip.h> -#include <sys/stat.h> #include <sys/modctl.h> #include <sys/mac.h> #include <sys/mac_ether.h> -#include <sys/atomic.h> -#include <sys/errno.h> -#include <sys/machsystm.h> -#include <sys/bootconf.h> -#include <sys/bootsvcs.h> #include <sys/bootinfo.h> -#include <sys/promif.h> -#include <sys/archsystm.h> -#include <sys/gnttab.h> #include <sys/mach_mmu.h> -#include <xen/public/memory.h> - -#include "xnf.h" - +#ifdef XPV_HVM_DRIVER +#include <sys/xpv_support.h> +#include <sys/hypervisor.h> +#else +#include <sys/hypervisor.h> #include <sys/evtchn_impl.h> #include <sys/balloon_impl.h> +#endif +#include <xen/public/io/netif.h> +#include <sys/gnttab.h> #include <xen/sys/xendev.h> +#include <sys/sdt.h> + +#include <io/xnf.h> + /* * Declarations and Module Linkage @@ -127,6 +120,10 @@ int xnfdebug = 0; #define xnf_btop(addr) ((addr) >> PAGESHIFT) boolean_t xnf_cksum_offload = B_TRUE; + +/* Default value for hypervisor-based copy operations */ +boolean_t xnf_rx_hvcopy = B_TRUE; + /* * Should pages used for transmit be readonly for the peer? */ @@ -164,17 +161,20 @@ static void xnf_release_dma_resources(xnf_t *); static mblk_t *xnf_process_recv(xnf_t *); static void xnf_rcv_complete(struct xnf_buffer_desc *); static void xnf_release_mblks(xnf_t *); -static struct xnf_buffer_desc *xnf_alloc_xmit_buffer(xnf_t *); +static struct xnf_buffer_desc *xnf_alloc_tx_buffer(xnf_t *); static struct xnf_buffer_desc *xnf_alloc_buffer(xnf_t *); -static struct xnf_buffer_desc *xnf_get_xmit_buffer(xnf_t *); +static struct xnf_buffer_desc *xnf_get_tx_buffer(xnf_t *); static struct xnf_buffer_desc *xnf_get_buffer(xnf_t *); static void xnf_free_buffer(struct xnf_buffer_desc *); -static void xnf_free_xmit_buffer(struct xnf_buffer_desc *); +static void xnf_free_tx_buffer(struct xnf_buffer_desc *); void xnf_send_driver_status(int, int); static void rx_buffer_hang(xnf_t *, struct xnf_buffer_desc *); static int xnf_clean_tx_ring(xnf_t *); static void oe_state_change(dev_info_t *, ddi_eventcookie_t, void *, void *); +static mblk_t *xnf_process_hvcopy_recv(xnf_t *xnfp); +static boolean_t xnf_hvcopy_peer_status(dev_info_t *devinfo); +static boolean_t xnf_kstat_init(xnf_t *xnfp); /* * XXPV dme: remove MC_IOCTL? @@ -194,8 +194,8 @@ static mac_callbacks_t xnf_callbacks = { }; #define GRANT_INVALID_REF 0 -int xnf_recv_bufs_lowat = 4 * NET_RX_RING_SIZE; -int xnf_recv_bufs_hiwat = 8 * NET_RX_RING_SIZE; /* default max */ +const int xnf_rx_bufs_lowat = 4 * NET_RX_RING_SIZE; +const int xnf_rx_bufs_hiwat = 8 * NET_RX_RING_SIZE; /* default max */ /* DMA attributes for network ring buffer */ static ddi_dma_attr_t ringbuf_dma_attr = { @@ -300,134 +300,54 @@ _info(struct modinfo *modinfop) return (mod_info(&modlinkage, modinfop)); } -/* - * Statistics. - */ -/* XXPV: most of these names need re-"nice"ing */ -static char *xnf_aux_statistics[] = { - "tx_cksum_deferred", - "rx_cksum_no_need", - "intr", - "xmit_pullup", - "xmit_pagebndry", - "xmit_attempt", - "rx_no_ringbuf", - "mac_rcv_error", - "runt", -}; - -static int -xnf_kstat_aux_update(kstat_t *ksp, int flag) -{ - xnf_t *xnfp; - kstat_named_t *knp; - - if (flag != KSTAT_READ) - return (EACCES); - - xnfp = ksp->ks_private; - knp = ksp->ks_data; - - /* - * Assignment order should match that of the names in - * xnf_aux_statistics. - */ - (knp++)->value.ui64 = xnfp->stat_tx_cksum_deferred; - (knp++)->value.ui64 = xnfp->stat_rx_cksum_no_need; - - (knp++)->value.ui64 = xnfp->stat_intr; - (knp++)->value.ui64 = xnfp->stat_xmit_pullup; - (knp++)->value.ui64 = xnfp->stat_xmit_pagebndry; - (knp++)->value.ui64 = xnfp->stat_xmit_attempt; - (knp++)->value.ui64 = xnfp->stat_rx_no_ringbuf; - (knp++)->value.ui64 = xnfp->stat_mac_rcv_error; - (knp++)->value.ui64 = xnfp->stat_runt; - - return (0); -} - -static boolean_t -xnf_kstat_init(xnf_t *xnfp) -{ - int nstat = sizeof (xnf_aux_statistics) / - sizeof (xnf_aux_statistics[0]); - char **cp = xnf_aux_statistics; - kstat_named_t *knp; - - /* - * Create and initialise kstats. - */ - if ((xnfp->kstat_aux = kstat_create("xnf", - ddi_get_instance(xnfp->devinfo), - "aux_statistics", "net", KSTAT_TYPE_NAMED, - nstat, 0)) == NULL) - return (B_FALSE); - - xnfp->kstat_aux->ks_private = xnfp; - xnfp->kstat_aux->ks_update = xnf_kstat_aux_update; - - knp = xnfp->kstat_aux->ks_data; - while (nstat > 0) { - kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); - - knp++; - cp++; - nstat--; - } - - kstat_install(xnfp->kstat_aux); - - return (B_TRUE); -} - static int xnf_setup_rings(xnf_t *xnfp) { int ix, err; RING_IDX i; - struct xnf_buffer_desc *bdesc, *rbp; - struct xenbus_device *xsd; - domid_t oeid; + struct xnf_buffer_desc *bdesc, *rbp; + struct xenbus_device *xsd; + domid_t oeid; - oeid = xvdi_get_oeid(xnfp->devinfo); - xsd = xvdi_get_xsd(xnfp->devinfo); + oeid = xvdi_get_oeid(xnfp->xnf_devinfo); + xsd = xvdi_get_xsd(xnfp->xnf_devinfo); - if (xnfp->tx_ring_ref != GRANT_INVALID_REF) - gnttab_end_foreign_access(xnfp->tx_ring_ref, 0, 0); + if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF) + gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); err = gnttab_grant_foreign_access(oeid, - xnf_btop(pa_to_ma(xnfp->tx_ring_phys_addr)), 0); + xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0); if (err <= 0) { err = -err; xenbus_dev_error(xsd, err, "granting access to tx ring page"); goto out; } - xnfp->tx_ring_ref = (grant_ref_t)err; + xnfp->xnf_tx_ring_ref = (grant_ref_t)err; - if (xnfp->rx_ring_ref != GRANT_INVALID_REF) - gnttab_end_foreign_access(xnfp->rx_ring_ref, 0, 0); + if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF) + gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); err = gnttab_grant_foreign_access(oeid, - xnf_btop(pa_to_ma(xnfp->rx_ring_phys_addr)), 0); + xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0); if (err <= 0) { err = -err; xenbus_dev_error(xsd, err, "granting access to rx ring page"); goto out; } - xnfp->rx_ring_ref = (grant_ref_t)err; + xnfp->xnf_rx_ring_ref = (grant_ref_t)err; - mutex_enter(&xnfp->intrlock); + mutex_enter(&xnfp->xnf_intrlock); /* * Cleanup the TX ring. We just clean up any valid tx_pktinfo structs * and reset the ring. Note that this can lose packets after a resume, * but we expect to stagger on. */ - mutex_enter(&xnfp->txlock); + mutex_enter(&xnfp->xnf_txlock); - for (i = 0; i < xnfp->n_xmits; i++) { - struct tx_pktinfo *txp = &xnfp->tx_pkt_info[i]; + for (i = 0; i < xnfp->xnf_n_tx; i++) { + struct tx_pktinfo *txp = &xnfp->xnf_tx_pkt_info[i]; txp->id = i + 1; @@ -446,83 +366,105 @@ xnf_setup_rings(xnf_t *xnfp) (void) ddi_dma_unbind_handle(txp->dma_handle); if (txp->bdesc != NULL) { - xnf_free_xmit_buffer(txp->bdesc); + xnf_free_tx_buffer(txp->bdesc); txp->bdesc = NULL; } (void) gnttab_end_foreign_access_ref(txp->grant_ref, - xnfp->tx_pages_readonly); - gnttab_release_grant_reference(&xnfp->gref_tx_head, + xnfp->xnf_tx_pages_readonly); + gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head, txp->grant_ref); txp->grant_ref = GRANT_INVALID_REF; } - xnfp->tx_pkt_id_list = 0; - xnfp->tx_ring.rsp_cons = 0; - xnfp->tx_ring.sring->req_prod = 0; - xnfp->tx_ring.sring->rsp_prod = 0; - xnfp->tx_ring.sring->rsp_event = 1; + xnfp->xnf_tx_pkt_id_list = 0; + xnfp->xnf_tx_ring.rsp_cons = 0; + xnfp->xnf_tx_ring.sring->req_prod = 0; + xnfp->xnf_tx_ring.sring->rsp_prod = 0; + xnfp->xnf_tx_ring.sring->rsp_event = 1; - mutex_exit(&xnfp->txlock); + mutex_exit(&xnfp->xnf_txlock); /* * Rebuild the RX ring. We have to rebuild the RX ring because some of - * our pages are currently flipped out so we can't just free the RX - * buffers. Reclaim any unprocessed recv buffers, they won't be + * our pages are currently flipped out/granted so we can't just free + * the RX buffers. Reclaim any unprocessed recv buffers, they won't be * useable anyway since the mfn's they refer to are no longer valid. * Grant the backend domain access to each hung rx buffer. */ - i = xnfp->rx_ring.rsp_cons; - while (i++ != xnfp->rx_ring.sring->req_prod) { + i = xnfp->xnf_rx_ring.rsp_cons; + while (i++ != xnfp->xnf_rx_ring.sring->req_prod) { volatile netif_rx_request_t *rxrp; - rxrp = RING_GET_REQUEST(&xnfp->rx_ring, i); - ix = rxrp - RING_GET_REQUEST(&xnfp->rx_ring, 0); - rbp = xnfp->rxpkt_bufptr[ix]; + rxrp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, i); + ix = rxrp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0); + rbp = xnfp->xnf_rxpkt_bufptr[ix]; if (rbp != NULL) { - ASSERT(rbp->grant_ref != GRANT_INVALID_REF); - gnttab_grant_foreign_transfer_ref(rbp->grant_ref, - oeid); + grant_ref_t ref = rbp->grant_ref; + + ASSERT(ref != GRANT_INVALID_REF); + if (xnfp->xnf_rx_hvcopy) { + pfn_t pfn = xnf_btop(rbp->buf_phys); + mfn_t mfn = pfn_to_mfn(pfn); + + gnttab_grant_foreign_access_ref(ref, oeid, + mfn, 0); + } else { + gnttab_grant_foreign_transfer_ref(ref, oeid); + } rxrp->id = ix; - rxrp->gref = rbp->grant_ref; + rxrp->gref = ref; } } + /* * Reset the ring pointers to initial state. * Hang buffers for any empty ring slots. */ - xnfp->rx_ring.rsp_cons = 0; - xnfp->rx_ring.sring->req_prod = 0; - xnfp->rx_ring.sring->rsp_prod = 0; - xnfp->rx_ring.sring->rsp_event = 1; + xnfp->xnf_rx_ring.rsp_cons = 0; + xnfp->xnf_rx_ring.sring->req_prod = 0; + xnfp->xnf_rx_ring.sring->rsp_prod = 0; + xnfp->xnf_rx_ring.sring->rsp_event = 1; for (i = 0; i < NET_RX_RING_SIZE; i++) { - xnfp->rx_ring.req_prod_pvt = i; - if (xnfp->rxpkt_bufptr[i] != NULL) + xnfp->xnf_rx_ring.req_prod_pvt = i; + if (xnfp->xnf_rxpkt_bufptr[i] != NULL) continue; if ((bdesc = xnf_get_buffer(xnfp)) == NULL) break; rx_buffer_hang(xnfp, bdesc); } - xnfp->rx_ring.req_prod_pvt = i; + xnfp->xnf_rx_ring.req_prod_pvt = i; /* LINTED: constant in conditional context */ - RING_PUSH_REQUESTS(&xnfp->rx_ring); + RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring); - mutex_exit(&xnfp->intrlock); + mutex_exit(&xnfp->xnf_intrlock); return (0); out: - if (xnfp->tx_ring_ref != GRANT_INVALID_REF) - gnttab_end_foreign_access(xnfp->tx_ring_ref, 0, 0); - xnfp->tx_ring_ref = GRANT_INVALID_REF; + if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF) + gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); + xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF; - if (xnfp->rx_ring_ref != GRANT_INVALID_REF) - gnttab_end_foreign_access(xnfp->rx_ring_ref, 0, 0); - xnfp->rx_ring_ref = GRANT_INVALID_REF; + if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF) + gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); + xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF; return (err); } + +/* Called when the upper layers free a message we passed upstream */ +static void +xnf_copy_rcv_complete(struct xnf_buffer_desc *bdesc) +{ + (void) ddi_dma_unbind_handle(bdesc->dma_handle); + ddi_dma_mem_free(&bdesc->acc_handle); + ddi_dma_free_handle(&bdesc->dma_handle); + kmem_free(bdesc, sizeof (*bdesc)); +} + + /* * Connect driver to back end, called to set up communication with * back end driver both initially and on resume after restore/migrate. @@ -533,16 +475,16 @@ xnf_be_connect(xnf_t *xnfp) char mac[ETHERADDRL * 3]; const char *message; xenbus_transaction_t xbt; - struct xenbus_device *xsd; + struct xenbus_device *xsd; char *xsname; int err, be_no_cksum_offload; - ASSERT(!xnfp->connected); + ASSERT(!xnfp->xnf_connected); - xsd = xvdi_get_xsd(xnfp->devinfo); - xsname = xvdi_get_xsname(xnfp->devinfo); + xsd = xvdi_get_xsd(xnfp->xnf_devinfo); + xsname = xvdi_get_xsname(xnfp->xnf_devinfo); - err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->devinfo), "mac", + err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), "mac", "%s", (char *)&mac[0]); if (err != 0) { /* @@ -550,12 +492,12 @@ xnf_be_connect(xnf_t *xnfp) * addr. at this point */ cmn_err(CE_WARN, "%s%d: no mac address", - ddi_driver_name(xnfp->devinfo), - ddi_get_instance(xnfp->devinfo)); + ddi_driver_name(xnfp->xnf_devinfo), + ddi_get_instance(xnfp->xnf_devinfo)); return; } - if (ether_aton(mac, xnfp->mac_addr) != ETHERADDRL) { + if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) { err = ENOENT; xenbus_dev_error(xsd, ENOENT, "parsing %s/mac", xsname); return; @@ -568,7 +510,7 @@ xnf_be_connect(xnf_t *xnfp) return; } - err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->devinfo), + err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), "feature-no-csum-offload", "%d", &be_no_cksum_offload); /* * If we fail to read the store we assume that the key is @@ -581,8 +523,8 @@ xnf_be_connect(xnf_t *xnfp) * If the far end cannot do checksum offload or we do not wish * to do it, disable it. */ - if ((be_no_cksum_offload == 1) || !xnfp->cksum_offload) - xnfp->cksum_offload = B_FALSE; + if ((be_no_cksum_offload == 1) || !xnfp->xnf_cksum_offload) + xnfp->xnf_cksum_offload = B_FALSE; again: err = xenbus_transaction_start(&xbt); @@ -592,20 +534,21 @@ again: } err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u", - xnfp->tx_ring_ref); + xnfp->xnf_tx_ring_ref); if (err != 0) { message = "writing tx ring-ref"; goto abort_transaction; } err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u", - xnfp->rx_ring_ref); + xnfp->xnf_rx_ring_ref); if (err != 0) { message = "writing rx ring-ref"; goto abort_transaction; } - err = xenbus_printf(xbt, xsname, "event-channel", "%u", xnfp->evtchn); + err = xenbus_printf(xbt, xsname, "event-channel", "%u", + xnfp->xnf_evtchn); if (err != 0) { message = "writing event-channel"; goto abort_transaction; @@ -617,7 +560,7 @@ again: goto abort_transaction; } - if (!xnfp->tx_pages_readonly) { + if (!xnfp->xnf_tx_pages_readonly) { err = xenbus_printf(xbt, xsname, "feature-tx-writable", "%d", 1); if (err != 0) { @@ -627,11 +570,17 @@ again: } err = xenbus_printf(xbt, xsname, "feature-no-csum-offload", "%d", - xnfp->cksum_offload ? 0 : 1); + xnfp->xnf_cksum_offload ? 0 : 1); if (err != 0) { message = "writing feature-no-csum-offload"; goto abort_transaction; } + err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", + xnfp->xnf_rx_hvcopy ? 1 : 0); + if (err != 0) { + message = "writing request-rx-copy"; + goto abort_transaction; + } err = xenbus_printf(xbt, xsname, "state", "%d", XenbusStateConnected); if (err != 0) { @@ -677,20 +626,24 @@ xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) (void) xvdi_resume(devinfo); (void) xvdi_alloc_evtchn(devinfo); + xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); +#ifdef XPV_HVM_DRIVER + ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, + xnfp); +#else (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp); - xnfp->evtchn = xvdi_get_evtchn(devinfo); +#endif xnf_be_connect(xnfp); /* - * Our MAC address didn't necessarily change, but - * given that we may be resuming this OS instance - * on a different machine (or on the same one and got a - * different MAC address because we didn't specify one of - * our own), it's useful to claim that - * it changed in order that IP send out a - * gratuitous ARP. + * Our MAC address may have changed if we're resuming: + * - on a different host + * - on the same one and got a different MAC address + * because we didn't specify one of our own. + * so it's useful to claim that it changed in order that + * IP send out a gratuitous ARP. */ - mac_unicst_update(xnfp->mh, xnfp->mac_addr); + mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr); return (DDI_SUCCESS); case DDI_ATTACH: @@ -710,23 +663,32 @@ xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) macp->m_dip = devinfo; macp->m_driver = xnfp; - xnfp->devinfo = devinfo; + xnfp->xnf_devinfo = devinfo; macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; - macp->m_src_addr = xnfp->mac_addr; + macp->m_src_addr = xnfp->xnf_mac_addr; macp->m_callbacks = &xnf_callbacks; macp->m_min_sdu = 0; macp->m_max_sdu = XNF_MAXPKT; - xnfp->running = B_FALSE; - xnfp->connected = B_FALSE; - xnfp->cksum_offload = xnf_cksum_offload; - xnfp->tx_pages_readonly = xnf_tx_pages_readonly; + xnfp->xnf_running = B_FALSE; + xnfp->xnf_connected = B_FALSE; + xnfp->xnf_cksum_offload = xnf_cksum_offload; + xnfp->xnf_tx_pages_readonly = xnf_tx_pages_readonly; + + xnfp->xnf_rx_hvcopy = xnf_hvcopy_peer_status(devinfo) && xnf_rx_hvcopy; +#ifdef XPV_HVM_DRIVER + if (!xnfp->xnf_rx_hvcopy) { + cmn_err(CE_WARN, "The xnf driver requires a dom0 that " + "supports 'feature-rx-copy'"); + goto failure; + } +#endif /* * Get the iblock cookie with which to initialize the mutexes. */ - if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->icookie) + if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie) != DDI_SUCCESS) goto failure; /* @@ -736,84 +698,94 @@ xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) * affect the operation of any other part of the driver, * it needs to acquire the txlock mutex. */ - mutex_init(&xnfp->tx_buf_mutex, - NULL, MUTEX_DRIVER, xnfp->icookie); - mutex_init(&xnfp->rx_buf_mutex, - NULL, MUTEX_DRIVER, xnfp->icookie); - mutex_init(&xnfp->txlock, - NULL, MUTEX_DRIVER, xnfp->icookie); - mutex_init(&xnfp->intrlock, - NULL, MUTEX_DRIVER, xnfp->icookie); - cv_init(&xnfp->cv, NULL, CV_DEFAULT, NULL); + mutex_init(&xnfp->xnf_tx_buf_mutex, + NULL, MUTEX_DRIVER, xnfp->xnf_icookie); + mutex_init(&xnfp->xnf_rx_buf_mutex, + NULL, MUTEX_DRIVER, xnfp->xnf_icookie); + mutex_init(&xnfp->xnf_txlock, + NULL, MUTEX_DRIVER, xnfp->xnf_icookie); + mutex_init(&xnfp->xnf_intrlock, + NULL, MUTEX_DRIVER, xnfp->xnf_icookie); + cv_init(&xnfp->xnf_cv, NULL, CV_DEFAULT, NULL); if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, - &xnfp->gref_tx_head) < 0) { + &xnfp->xnf_gref_tx_head) < 0) { cmn_err(CE_WARN, "xnf%d: can't alloc tx grant refs", - ddi_get_instance(xnfp->devinfo)); - goto late_failure; + ddi_get_instance(xnfp->xnf_devinfo)); + goto failure_1; } if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, - &xnfp->gref_rx_head) < 0) { + &xnfp->xnf_gref_rx_head) < 0) { cmn_err(CE_WARN, "xnf%d: can't alloc rx grant refs", - ddi_get_instance(xnfp->devinfo)); - goto late_failure; + ddi_get_instance(xnfp->xnf_devinfo)); + goto failure_1; } if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) { cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize " - "driver data structures", ddi_get_instance(xnfp->devinfo)); - goto late_failure; + "driver data structures", + ddi_get_instance(xnfp->xnf_devinfo)); + goto failure_1; } - xnfp->rx_ring.sring->rsp_event = xnfp->tx_ring.sring->rsp_event = 1; + xnfp->xnf_rx_ring.sring->rsp_event = + xnfp->xnf_tx_ring.sring->rsp_event = 1; - xnfp->tx_ring_ref = GRANT_INVALID_REF; - xnfp->rx_ring_ref = GRANT_INVALID_REF; + xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF; + xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF; /* set driver private pointer now */ ddi_set_driver_private(devinfo, xnfp); if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change) != DDI_SUCCESS) - goto late_failure; + goto failure_1; if (!xnf_kstat_init(xnfp)) - goto very_late_failure; + goto failure_2; /* * Allocate an event channel, add the interrupt handler and * bind it to the event channel. */ (void) xvdi_alloc_evtchn(devinfo); + xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); +#ifdef XPV_HVM_DRIVER + ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp); +#else (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp); - xnfp->evtchn = xvdi_get_evtchn(devinfo); +#endif /* * connect to the backend */ xnf_be_connect(xnfp); - err = mac_register(macp, &xnfp->mh); + err = mac_register(macp, &xnfp->xnf_mh); mac_free(macp); macp = NULL; if (err != 0) - goto very_very_late_failure; + goto failure_3; return (DDI_SUCCESS); -very_very_late_failure: - kstat_delete(xnfp->kstat_aux); +failure_3: + kstat_delete(xnfp->xnf_kstat_aux); -very_late_failure: +failure_2: xvdi_remove_event_handler(devinfo, XS_OE_STATE); - ddi_remove_intr(devinfo, 0, xnfp->icookie); - xnfp->evtchn = INVALID_EVTCHN; +#ifdef XPV_HVM_DRIVER + ec_unbind_evtchn(xnfp->xnf_evtchn); +#else + ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); +#endif + xnfp->xnf_evtchn = INVALID_EVTCHN; -late_failure: +failure_1: xnf_release_dma_resources(xnfp); - cv_destroy(&xnfp->cv); - mutex_destroy(&xnfp->rx_buf_mutex); - mutex_destroy(&xnfp->txlock); - mutex_destroy(&xnfp->intrlock); + cv_destroy(&xnfp->xnf_cv); + mutex_destroy(&xnfp->xnf_rx_buf_mutex); + mutex_destroy(&xnfp->xnf_txlock); + mutex_destroy(&xnfp->xnf_intrlock); failure: kmem_free(xnfp, sizeof (*xnfp)); @@ -839,17 +811,21 @@ xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) switch (cmd) { case DDI_SUSPEND: - ddi_remove_intr(devinfo, 0, xnfp->icookie); +#ifdef XPV_HVM_DRIVER + ec_unbind_evtchn(xnfp->xnf_evtchn); +#else + ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); +#endif xvdi_suspend(devinfo); - mutex_enter(&xnfp->intrlock); - mutex_enter(&xnfp->txlock); + mutex_enter(&xnfp->xnf_intrlock); + mutex_enter(&xnfp->xnf_txlock); - xnfp->evtchn = INVALID_EVTCHN; - xnfp->connected = B_FALSE; - mutex_exit(&xnfp->txlock); - mutex_exit(&xnfp->intrlock); + xnfp->xnf_evtchn = INVALID_EVTCHN; + xnfp->xnf_connected = B_FALSE; + mutex_exit(&xnfp->xnf_txlock); + mutex_exit(&xnfp->xnf_intrlock); return (DDI_SUCCESS); case DDI_DETACH: @@ -859,32 +835,32 @@ xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) return (DDI_FAILURE); } - if (xnfp->connected) + if (xnfp->xnf_connected) return (DDI_FAILURE); /* Wait for receive buffers to be returned; give up after 5 seconds */ i = 50; - mutex_enter(&xnfp->rx_buf_mutex); - while (xnfp->rx_bufs_outstanding > 0) { - mutex_exit(&xnfp->rx_buf_mutex); + mutex_enter(&xnfp->xnf_rx_buf_mutex); + while (xnfp->xnf_rx_bufs_outstanding > 0) { + mutex_exit(&xnfp->xnf_rx_buf_mutex); delay(drv_usectohz(100000)); if (--i == 0) { cmn_err(CE_WARN, "xnf%d: never reclaimed all the " "receive buffers. Still have %d " "buffers outstanding.", - ddi_get_instance(xnfp->devinfo), - xnfp->rx_bufs_outstanding); + ddi_get_instance(xnfp->xnf_devinfo), + xnfp->xnf_rx_bufs_outstanding); return (DDI_FAILURE); } - mutex_enter(&xnfp->rx_buf_mutex); + mutex_enter(&xnfp->xnf_rx_buf_mutex); } - mutex_exit(&xnfp->rx_buf_mutex); + mutex_exit(&xnfp->xnf_rx_buf_mutex); - kstat_delete(xnfp->kstat_aux); + kstat_delete(xnfp->xnf_kstat_aux); - if (mac_unregister(xnfp->mh) != 0) + if (mac_unregister(xnfp->xnf_mh) != 0) return (DDI_FAILURE); /* Stop the receiver */ @@ -893,7 +869,11 @@ xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) xvdi_remove_event_handler(devinfo, XS_OE_STATE); /* Remove the interrupt */ - ddi_remove_intr(devinfo, 0, xnfp->icookie); +#ifdef XPV_HVM_DRIVER + ec_unbind_evtchn(xnfp->xnf_evtchn); +#else + ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); +#endif /* Release any pending xmit mblks */ xnf_release_mblks(xnfp); @@ -901,10 +881,10 @@ xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) /* Release all DMA resources */ xnf_release_dma_resources(xnfp); - cv_destroy(&xnfp->cv); - mutex_destroy(&xnfp->rx_buf_mutex); - mutex_destroy(&xnfp->txlock); - mutex_destroy(&xnfp->intrlock); + cv_destroy(&xnfp->xnf_cv); + mutex_destroy(&xnfp->xnf_rx_buf_mutex); + mutex_destroy(&xnfp->xnf_txlock); + mutex_destroy(&xnfp->xnf_intrlock); kmem_free(xnfp, sizeof (*xnfp)); @@ -924,7 +904,7 @@ xnf_set_mac_addr(void *arg, const uint8_t *macaddr) if (xnfdebug & XNF_DEBUG_TRACE) printf("xnf%d: set_mac_addr(0x%p): " "%02x:%02x:%02x:%02x:%02x:%02x\n", - ddi_get_instance(xnfp->devinfo), + ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp, macaddr[0], macaddr[1], macaddr[2], macaddr[3], macaddr[4], macaddr[5]); #endif @@ -952,7 +932,7 @@ xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca) if (xnfdebug & XNF_DEBUG_TRACE) printf("xnf%d set_multicast(0x%p): " "%02x:%02x:%02x:%02x:%02x:%02x\n", - ddi_get_instance(xnfp->devinfo), + ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp, mca[0], mca[1], mca[2], mca[3], mca[4], mca[5]); #endif @@ -983,7 +963,7 @@ xnf_set_promiscuous(void *arg, boolean_t on) #ifdef XNF_DEBUG if (xnfdebug & XNF_DEBUG_TRACE) printf("xnf%d set_promiscuous(0x%p, %x)\n", - ddi_get_instance(xnfp->devinfo), + ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp, on); #endif /* @@ -1004,45 +984,46 @@ xnf_clean_tx_ring(xnf_t *xnfp) int id; grant_ref_t ref; - ASSERT(MUTEX_HELD(&xnfp->txlock)); + ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); do { /* * index of next transmission ack */ - next_resp = xnfp->tx_ring.sring->rsp_prod; + next_resp = xnfp->xnf_tx_ring.sring->rsp_prod; membar_consumer(); /* * Clean tx packets from ring that we have responses for */ - for (i = xnfp->tx_ring.rsp_cons; i != next_resp; i++) { - id = RING_GET_RESPONSE(&xnfp->tx_ring, i)->id; - reap = &xnfp->tx_pkt_info[id]; + for (i = xnfp->xnf_tx_ring.rsp_cons; i != next_resp; i++) { + id = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i)->id; + reap = &xnfp->xnf_tx_pkt_info[id]; ref = reap->grant_ref; /* * Return id to free list */ - reap->id = xnfp->tx_pkt_id_list; - xnfp->tx_pkt_id_list = id; + reap->id = xnfp->xnf_tx_pkt_id_list; + xnfp->xnf_tx_pkt_id_list = id; if (gnttab_query_foreign_access(ref) != 0) - panic("tx grant still in use" + panic("tx grant still in use " "by backend domain"); (void) ddi_dma_unbind_handle(reap->dma_handle); (void) gnttab_end_foreign_access_ref(ref, - xnfp->tx_pages_readonly); - gnttab_release_grant_reference(&xnfp->gref_tx_head, + xnfp->xnf_tx_pages_readonly); + gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head, ref); freemsg(reap->mp); reap->mp = NULL; reap->grant_ref = GRANT_INVALID_REF; if (reap->bdesc != NULL) - xnf_free_xmit_buffer(reap->bdesc); + xnf_free_tx_buffer(reap->bdesc); reap->bdesc = NULL; } - xnfp->tx_ring.rsp_cons = next_resp; + xnfp->xnf_tx_ring.rsp_cons = next_resp; membar_enter(); - } while (next_resp != xnfp->tx_ring.sring->rsp_prod); - return (NET_TX_RING_SIZE - (xnfp->tx_ring.sring->req_prod - next_resp)); + } while (next_resp != xnfp->xnf_tx_ring.sring->rsp_prod); + return (NET_TX_RING_SIZE - (xnfp->xnf_tx_ring.sring->req_prod - + next_resp)); } /* @@ -1062,15 +1043,15 @@ xnf_pullupmsg(xnf_t *xnfp, mblk_t *mp) /* * get a xmit buffer from the xmit buffer pool */ - mutex_enter(&xnfp->rx_buf_mutex); - bdesc = xnf_get_xmit_buffer(xnfp); - mutex_exit(&xnfp->rx_buf_mutex); + mutex_enter(&xnfp->xnf_rx_buf_mutex); + bdesc = xnf_get_tx_buffer(xnfp); + mutex_exit(&xnfp->xnf_rx_buf_mutex); if (bdesc == NULL) return (bdesc); /* * Copy the data into the buffer */ - xnfp->stat_xmit_pullup++; + xnfp->xnf_stat_tx_pullup++; bp = bdesc->buf; for (mptr = mp; mptr != NULL; mptr = mptr->b_cont) { len = mptr->b_wptr - mptr->b_rptr; @@ -1112,28 +1093,28 @@ xnf_send_one(xnf_t *xnfp, mblk_t *mp) #ifdef XNF_DEBUG if (xnfdebug & XNF_DEBUG_SEND) printf("xnf%d send(0x%p, 0x%p)\n", - ddi_get_instance(xnfp->devinfo), + ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp, (void *)mp); #endif ASSERT(mp != NULL); ASSERT(mp->b_next == NULL); - ASSERT(MUTEX_HELD(&xnfp->txlock)); + ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); tx_ring_freespace = xnf_clean_tx_ring(xnfp); ASSERT(tx_ring_freespace >= 0); - oeid = xvdi_get_oeid(xnfp->devinfo); - xnfp->stat_xmit_attempt++; + oeid = xvdi_get_oeid(xnfp->xnf_devinfo); + xnfp->xnf_stat_tx_attempt++; /* * If there are no xmit ring slots available, return. */ if (tx_ring_freespace == 0) { - xnfp->stat_xmit_defer++; + xnfp->xnf_stat_tx_defer++; return (B_FALSE); /* Send should be retried */ } - slot = xnfp->tx_ring.sring->req_prod; + slot = xnfp->xnf_tx_ring.sring->req_prod; /* Count the number of mblks in message and compute packet size */ for (i = 0, mptr = mp; mptr != NULL; mptr = mptr->b_cont, i++) pktlen += (mptr->b_wptr - mptr->b_rptr); @@ -1141,7 +1122,7 @@ xnf_send_one(xnf_t *xnfp, mblk_t *mp) /* Make sure packet isn't too large */ if (pktlen > XNF_FRAMESIZE) { cmn_err(CE_WARN, "xnf%d: large packet %d bytes", - ddi_get_instance(xnfp->devinfo), pktlen); + ddi_get_instance(xnfp->xnf_devinfo), pktlen); freemsg(mp); return (B_FALSE); } @@ -1159,14 +1140,14 @@ xnf_send_one(xnf_t *xnfp, mblk_t *mp) */ if (i > xnf_max_tx_frags || page_oops) { if (page_oops) - xnfp->stat_xmit_pagebndry++; + xnfp->xnf_stat_tx_pagebndry++; if ((xmitbuf = xnf_pullupmsg(xnfp, mp)) == NULL) { /* could not allocate resources? */ #ifdef XNF_DEBUG cmn_err(CE_WARN, "xnf%d: pullupmsg failed", - ddi_get_instance(xnfp->devinfo)); + ddi_get_instance(xnfp->xnf_devinfo)); #endif - xnfp->stat_xmit_defer++; + xnfp->xnf_stat_tx_defer++; return (B_FALSE); /* Retry send */ } bufaddr = xmitbuf->buf; @@ -1181,10 +1162,10 @@ xnf_send_one(xnf_t *xnfp, mblk_t *mp) /* * Get packet id from free list */ - tx_id = xnfp->tx_pkt_id_list; + tx_id = xnfp->xnf_tx_pkt_id_list; ASSERT(tx_id < NET_TX_RING_SIZE); - txp_info = &xnfp->tx_pkt_info[tx_id]; - xnfp->tx_pkt_id_list = txp_info->id; + txp_info = &xnfp->xnf_tx_pkt_info[tx_id]; + xnfp->xnf_tx_pkt_id_list = txp_info->id; txp_info->id = tx_id; /* Prepare for DMA mapping of tx buffer(s) */ @@ -1197,27 +1178,27 @@ xnf_send_one(xnf_t *xnfp, mblk_t *mp) /* * Return id to free list */ - txp_info->id = xnfp->tx_pkt_id_list; - xnfp->tx_pkt_id_list = tx_id; + txp_info->id = xnfp->xnf_tx_pkt_id_list; + xnfp->xnf_tx_pkt_id_list = tx_id; if (rc == DDI_DMA_NORESOURCES) { - xnfp->stat_xmit_defer++; + xnfp->xnf_stat_tx_defer++; return (B_FALSE); /* Retry later */ } #ifdef XNF_DEBUG cmn_err(CE_WARN, "xnf%d: bind_handle failed (%x)", - ddi_get_instance(xnfp->devinfo), rc); + ddi_get_instance(xnfp->xnf_devinfo), rc); #endif return (B_FALSE); } ASSERT(ncookies == 1); - ref = gnttab_claim_grant_reference(&xnfp->gref_tx_head); + ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_tx_head); ASSERT((signed short)ref >= 0); mfn = xnf_btop(pa_to_ma((paddr_t)dma_cookie.dmac_laddress)); gnttab_grant_foreign_access_ref(ref, oeid, mfn, - xnfp->tx_pages_readonly); + xnfp->xnf_tx_pages_readonly); txp_info->grant_ref = ref; - txrp = RING_GET_REQUEST(&xnfp->tx_ring, slot); + txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); txrp->gref = ref; txrp->size = dma_cookie.dmac_size; txrp->offset = (uintptr_t)bufaddr & PAGEOFFSET; @@ -1225,7 +1206,7 @@ xnf_send_one(xnf_t *xnfp, mblk_t *mp) txrp->flags = 0; hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags); if (pflags != 0) { - ASSERT(xnfp->cksum_offload); + ASSERT(xnfp->xnf_cksum_offload); /* * If the local protocol stack requests checksum * offload we set the 'checksum blank' flag, @@ -1236,27 +1217,28 @@ xnf_send_one(xnf_t *xnfp, mblk_t *mp) * validated that the data and the checksum match. */ txrp->flags |= NETTXF_csum_blank; - xnfp->stat_tx_cksum_deferred++; + xnfp->xnf_stat_tx_cksum_deferred++; } membar_producer(); - xnfp->tx_ring.sring->req_prod = slot + 1; + xnfp->xnf_tx_ring.sring->req_prod = slot + 1; txp_info->mp = mp; txp_info->bdesc = xmitbuf; - txs_out = xnfp->tx_ring.sring->req_prod - xnfp->tx_ring.sring->rsp_prod; - if (xnfp->tx_ring.sring->req_prod - xnfp->tx_ring.rsp_cons < + txs_out = xnfp->xnf_tx_ring.sring->req_prod - + xnfp->xnf_tx_ring.sring->rsp_prod; + if (xnfp->xnf_tx_ring.sring->req_prod - xnfp->xnf_tx_ring.rsp_cons < XNF_TX_FREE_THRESH) { /* * The ring is getting full; Set up this packet * to cause an interrupt. */ - xnfp->tx_ring.sring->rsp_event = - xnfp->tx_ring.sring->rsp_prod + txs_out; + xnfp->xnf_tx_ring.sring->rsp_event = + xnfp->xnf_tx_ring.sring->rsp_prod + txs_out; } - xnfp->stat_opackets++; - xnfp->stat_obytes += pktlen; + xnfp->xnf_stat_opackets++; + xnfp->xnf_stat_obytes += pktlen; return (B_TRUE); /* successful transmit attempt */ } @@ -1268,19 +1250,19 @@ xnf_send(void *arg, mblk_t *mp) mblk_t *next; boolean_t sent_something = B_FALSE; - mutex_enter(&xnfp->txlock); + mutex_enter(&xnfp->xnf_txlock); /* * Transmission attempts should be impossible without having * previously called xnf_start(). */ - ASSERT(xnfp->running); + ASSERT(xnfp->xnf_running); /* * Wait for getting connected to the backend */ - while (!xnfp->connected) { - cv_wait(&xnfp->cv, &xnfp->txlock); + while (!xnfp->xnf_connected) { + cv_wait(&xnfp->xnf_cv, &xnfp->xnf_txlock); } while (mp != NULL) { @@ -1297,9 +1279,9 @@ xnf_send(void *arg, mblk_t *mp) } if (sent_something) - ec_notify_via_evtchn(xnfp->evtchn); + ec_notify_via_evtchn(xnfp->xnf_evtchn); - mutex_exit(&xnfp->txlock); + mutex_exit(&xnfp->xnf_txlock); return (mp); } @@ -1313,27 +1295,33 @@ xnf_intr(caddr_t arg) xnf_t *xnfp = (xnf_t *)arg; int tx_ring_space; - mutex_enter(&xnfp->intrlock); + mutex_enter(&xnfp->xnf_intrlock); /* * If not connected to the peer or not started by the upper * layers we cannot usefully handle interrupts. */ - if (!(xnfp->connected && xnfp->running)) { - mutex_exit(&xnfp->intrlock); + if (!(xnfp->xnf_connected && xnfp->xnf_running)) { + mutex_exit(&xnfp->xnf_intrlock); + xnfp->xnf_stat_unclaimed_interrupts++; return (DDI_INTR_UNCLAIMED); } #ifdef XNF_DEBUG if (xnfdebug & XNF_DEBUG_INT) printf("xnf%d intr(0x%p)\n", - ddi_get_instance(xnfp->devinfo), (void *)xnfp); + ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); #endif - if (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->rx_ring)) { + if (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { mblk_t *mp; - if ((mp = xnf_process_recv(xnfp)) != NULL) - mac_rx(xnfp->mh, xnfp->rx_handle, mp); + if (xnfp->xnf_rx_hvcopy) + mp = xnf_process_hvcopy_recv(xnfp); + else + mp = xnf_process_recv(xnfp); + + if (mp != NULL) + mac_rx(xnfp->xnf_mh, xnfp->xnf_rx_handle, mp); } /* @@ -1341,32 +1329,33 @@ xnf_intr(caddr_t arg) */ #define inuse(r) ((r).sring->req_prod - (r).rsp_cons) - if ((NET_TX_RING_SIZE - inuse(xnfp->tx_ring)) < XNF_TX_FREE_THRESH) { + if ((NET_TX_RING_SIZE - inuse(xnfp->xnf_tx_ring)) < + XNF_TX_FREE_THRESH) { /* * Yes, clean it and try to start any blocked xmit * streams. */ - mutex_enter(&xnfp->txlock); + mutex_enter(&xnfp->xnf_txlock); tx_ring_space = xnf_clean_tx_ring(xnfp); - mutex_exit(&xnfp->txlock); + mutex_exit(&xnfp->xnf_txlock); if (tx_ring_space > XNF_TX_FREE_THRESH) { - mutex_exit(&xnfp->intrlock); - mac_tx_update(xnfp->mh); - mutex_enter(&xnfp->intrlock); + mutex_exit(&xnfp->xnf_intrlock); + mac_tx_update(xnfp->xnf_mh); + mutex_enter(&xnfp->xnf_intrlock); } else { /* * Schedule another tx interrupt when we have * sent enough packets to cross the threshold. */ - xnfp->tx_ring.sring->rsp_event = - xnfp->tx_ring.sring->rsp_prod + + xnfp->xnf_tx_ring.sring->rsp_event = + xnfp->xnf_tx_ring.sring->rsp_prod + XNF_TX_FREE_THRESH - tx_ring_space + 1; } } #undef inuse - xnfp->stat_intr++; - mutex_exit(&xnfp->intrlock); + xnfp->xnf_stat_interrupts++; + mutex_exit(&xnfp->xnf_intrlock); return (DDI_INTR_CLAIMED); /* indicate that the interrupt was for us */ } @@ -1381,17 +1370,17 @@ xnf_start(void *arg) #ifdef XNF_DEBUG if (xnfdebug & XNF_DEBUG_TRACE) printf("xnf%d start(0x%p)\n", - ddi_get_instance(xnfp->devinfo), (void *)xnfp); + ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); #endif - mutex_enter(&xnfp->intrlock); - mutex_enter(&xnfp->txlock); + mutex_enter(&xnfp->xnf_intrlock); + mutex_enter(&xnfp->xnf_txlock); /* Accept packets from above. */ - xnfp->running = B_TRUE; + xnfp->xnf_running = B_TRUE; - mutex_exit(&xnfp->txlock); - mutex_exit(&xnfp->intrlock); + mutex_exit(&xnfp->xnf_txlock); + mutex_exit(&xnfp->xnf_intrlock); return (0); } @@ -1405,16 +1394,16 @@ xnf_stop(void *arg) #ifdef XNF_DEBUG if (xnfdebug & XNF_DEBUG_TRACE) printf("xnf%d stop(0x%p)\n", - ddi_get_instance(xnfp->devinfo), (void *)xnfp); + ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); #endif - mutex_enter(&xnfp->intrlock); - mutex_enter(&xnfp->txlock); + mutex_enter(&xnfp->xnf_intrlock); + mutex_enter(&xnfp->xnf_txlock); - xnfp->running = B_FALSE; + xnfp->xnf_running = B_FALSE; - mutex_exit(&xnfp->txlock); - mutex_exit(&xnfp->intrlock); + mutex_exit(&xnfp->xnf_txlock); + mutex_exit(&xnfp->xnf_intrlock); } /* @@ -1428,30 +1417,203 @@ static void rx_buffer_hang(xnf_t *xnfp, struct xnf_buffer_desc *bdesc) { volatile netif_rx_request_t *reqp; - RING_IDX hang_ix; - grant_ref_t ref; - domid_t oeid; + RING_IDX hang_ix; + grant_ref_t ref; + domid_t oeid; - oeid = xvdi_get_oeid(xnfp->devinfo); + oeid = xvdi_get_oeid(xnfp->xnf_devinfo); - ASSERT(MUTEX_HELD(&xnfp->intrlock)); - reqp = RING_GET_REQUEST(&xnfp->rx_ring, xnfp->rx_ring.req_prod_pvt); - hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->rx_ring, 0)); - ASSERT(xnfp->rxpkt_bufptr[hang_ix] == NULL); + ASSERT(MUTEX_HELD(&xnfp->xnf_intrlock)); + reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, + xnfp->xnf_rx_ring.req_prod_pvt); + hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0)); + ASSERT(xnfp->xnf_rxpkt_bufptr[hang_ix] == NULL); if (bdesc->grant_ref == GRANT_INVALID_REF) { - ref = gnttab_claim_grant_reference(&xnfp->gref_rx_head); + ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_rx_head); ASSERT((signed short)ref >= 0); bdesc->grant_ref = ref; - gnttab_grant_foreign_transfer_ref(ref, oeid); + if (xnfp->xnf_rx_hvcopy) { + pfn_t pfn = xnf_btop(bdesc->buf_phys); + mfn_t mfn = pfn_to_mfn(pfn); + + gnttab_grant_foreign_access_ref(ref, oeid, mfn, 0); + } else { + gnttab_grant_foreign_transfer_ref(ref, oeid); + } } reqp->id = hang_ix; reqp->gref = bdesc->grant_ref; bdesc->id = hang_ix; - xnfp->rxpkt_bufptr[hang_ix] = bdesc; + xnfp->xnf_rxpkt_bufptr[hang_ix] = bdesc; membar_producer(); - xnfp->rx_ring.req_prod_pvt++; + xnfp->xnf_rx_ring.req_prod_pvt++; } +static mblk_t * +xnf_process_hvcopy_recv(xnf_t *xnfp) +{ + netif_rx_response_t *rxpkt; + mblk_t *mp, *head, *tail; + struct xnf_buffer_desc *bdesc; + boolean_t hwcsum = B_FALSE, notify, work_to_do; + size_t len; + + /* + * in loop over unconsumed responses, we do: + * 1. get a response + * 2. take corresponding buffer off recv. ring + * 3. indicate this by setting slot to NULL + * 4. create a new message and + * 5. copy data in, adjust ptr + * + * outside loop: + * 7. make sure no more data has arrived; kick HV + */ + + head = tail = NULL; + +loop: + while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { + + /* 1. */ + rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, + xnfp->xnf_rx_ring.rsp_cons); + + DTRACE_PROBE4(got_PKT, int, (int)rxpkt->id, int, + (int)rxpkt->offset, + int, (int)rxpkt->flags, int, (int)rxpkt->status); + + /* + * 2. + * Take buffer off of receive ring + */ + hwcsum = B_FALSE; + bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id]; + /* 3 */ + xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL; + ASSERT(bdesc->id == rxpkt->id); + if (rxpkt->status <= 0) { + DTRACE_PROBE4(pkt_status_negative, int, rxpkt->status, + char *, bdesc->buf, int, rxpkt->offset, + char *, ((char *)bdesc->buf) + rxpkt->offset); + mp = NULL; + xnfp->xnf_stat_errrx++; + if (rxpkt->status == 0) + xnfp->xnf_stat_runt++; + if (rxpkt->status == NETIF_RSP_ERROR) + xnfp->xnf_stat_mac_rcv_error++; + if (rxpkt->status == NETIF_RSP_DROPPED) + xnfp->xnf_stat_norxbuf++; + /* + * re-hang the buffer + */ + rx_buffer_hang(xnfp, bdesc); + } else { + grant_ref_t ref = bdesc->grant_ref; + struct xnf_buffer_desc *new_bdesc; + unsigned long off = rxpkt->offset; + + DTRACE_PROBE4(pkt_status_ok, int, rxpkt->status, + char *, bdesc->buf, int, rxpkt->offset, + char *, ((char *)bdesc->buf) + rxpkt->offset); + len = rxpkt->status; + ASSERT(off + len <= PAGEOFFSET); + if (ref == GRANT_INVALID_REF) { + mp = NULL; + new_bdesc = bdesc; + cmn_err(CE_WARN, "Bad rx grant reference %d " + "from dom %d", ref, + xvdi_get_oeid(xnfp->xnf_devinfo)); + goto luckless; + } + /* + * Release ref which we'll be re-claiming in + * rx_buffer_hang(). + */ + bdesc->grant_ref = GRANT_INVALID_REF; + (void) gnttab_end_foreign_access_ref(ref, 0); + gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head, + ref); + if (rxpkt->flags & NETRXF_data_validated) + hwcsum = B_TRUE; + + /* + * XXPV for the initial implementation of HVcopy, + * create a new msg and copy in the data + */ + /* 4. */ + if ((mp = allocb(len, BPRI_MED)) == NULL) { + /* + * Couldn't get buffer to copy to, + * drop this data, and re-hang + * the buffer on the ring. + */ + xnfp->xnf_stat_norxbuf++; + DTRACE_PROBE(alloc_nix); + } else { + /* 5. */ + DTRACE_PROBE(alloc_ok); + bcopy(bdesc->buf + off, mp->b_wptr, + len); + mp->b_wptr += len; + } + new_bdesc = bdesc; +luckless: + + /* Re-hang old or hang new buffer. */ + rx_buffer_hang(xnfp, new_bdesc); + } + if (mp) { + if (hwcsum) { + /* + * See comments in xnf_process_recv(). + */ + + (void) hcksum_assoc(mp, NULL, + NULL, 0, 0, 0, 0, + HCK_FULLCKSUM | + HCK_FULLCKSUM_OK, + 0); + xnfp->xnf_stat_rx_cksum_no_need++; + } + if (head == NULL) { + head = tail = mp; + } else { + tail->b_next = mp; + tail = mp; + } + + ASSERT(mp->b_next == NULL); + + xnfp->xnf_stat_ipackets++; + xnfp->xnf_stat_rbytes += len; + } + + xnfp->xnf_rx_ring.rsp_cons++; + + xnfp->xnf_stat_hvcopy_packet_processed++; + } + + /* 7. */ + /* + * Has more data come in since we started? + */ + /* LINTED: constant in conditional context */ + RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do); + if (work_to_do) + goto loop; + + /* + * Indicate to the backend that we have re-filled the receive + * ring. + */ + /* LINTED: constant in conditional context */ + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); + if (notify) + ec_notify_via_evtchn(xnfp->xnf_evtchn); + + return (head); +} /* Process all queued received packets */ static mblk_t * @@ -1468,27 +1630,27 @@ xnf_process_recv(xnf_t *xnfp) head = tail = NULL; loop: - while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->rx_ring)) { + while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { - rxpkt = RING_GET_RESPONSE(&xnfp->rx_ring, - xnfp->rx_ring.rsp_cons); + rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, + xnfp->xnf_rx_ring.rsp_cons); /* * Take buffer off of receive ring */ hwcsum = B_FALSE; - bdesc = xnfp->rxpkt_bufptr[rxpkt->id]; - xnfp->rxpkt_bufptr[rxpkt->id] = NULL; + bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id]; + xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL; ASSERT(bdesc->id == rxpkt->id); if (rxpkt->status <= 0) { mp = NULL; - xnfp->stat_errrcv++; + xnfp->xnf_stat_errrx++; if (rxpkt->status == 0) - xnfp->stat_runt++; + xnfp->xnf_stat_runt++; if (rxpkt->status == NETIF_RSP_ERROR) - xnfp->stat_mac_rcv_error++; + xnfp->xnf_stat_mac_rcv_error++; if (rxpkt->status == NETIF_RSP_DROPPED) - xnfp->stat_norcvbuf++; + xnfp->xnf_stat_norxbuf++; /* * re-hang the buffer */ @@ -1506,7 +1668,7 @@ loop: new_bdesc = bdesc; cmn_err(CE_WARN, "Bad rx grant reference %d " "from dom %d", ref, - xvdi_get_oeid(xnfp->devinfo)); + xvdi_get_oeid(xnfp->xnf_devinfo)); goto luckless; } bdesc->grant_ref = GRANT_INVALID_REF; @@ -1514,13 +1676,15 @@ loop: ASSERT(mfn != MFN_INVALID); ASSERT(hat_getpfnum(kas.a_hat, bdesc->buf) == PFN_INVALID); - gnttab_release_grant_reference(&xnfp->gref_rx_head, + + gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head, ref); reassign_pfn(xnf_btop(bdesc->buf_phys), mfn); hat_devload(kas.a_hat, bdesc->buf, PAGESIZE, xnf_btop(bdesc->buf_phys), PROT_READ | PROT_WRITE, HAT_LOAD); balloon_drv_added(1); + if (rxpkt->flags & NETRXF_data_validated) hwcsum = B_TRUE; if (len <= xnf_rx_bcopy_thresh) { @@ -1534,14 +1698,14 @@ loop: * We send a pointer to this data upstream; * we need a new buffer to replace this one. */ - mutex_enter(&xnfp->rx_buf_mutex); + mutex_enter(&xnfp->xnf_rx_buf_mutex); new_bdesc = xnf_get_buffer(xnfp); if (new_bdesc != NULL) { - xnfp->rx_bufs_outstanding++; + xnfp->xnf_rx_bufs_outstanding++; } else { - xnfp->stat_rx_no_ringbuf++; + xnfp->xnf_stat_rx_no_ringbuf++; } - mutex_exit(&xnfp->rx_buf_mutex); + mutex_exit(&xnfp->xnf_rx_buf_mutex); } if (new_bdesc == NULL) { @@ -1556,7 +1720,7 @@ loop: * drop this data, and re-hang * the buffer on the ring. */ - xnfp->stat_norcvbuf++; + xnfp->xnf_stat_norxbuf++; } else { bcopy(bdesc->buf + off, mp->b_wptr, len); @@ -1579,7 +1743,7 @@ loop: * Couldn't get mblk to pass recv data * up with, free the old ring buffer */ - xnfp->stat_norcvbuf++; + xnfp->xnf_stat_norxbuf++; xnf_rcv_complete(bdesc); goto luckless; } @@ -1624,7 +1788,7 @@ luckless: HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0); - xnfp->stat_rx_cksum_no_need++; + xnfp->xnf_stat_rx_cksum_no_need++; } if (head == NULL) { head = tail = mp; @@ -1635,18 +1799,18 @@ luckless: ASSERT(mp->b_next == NULL); - xnfp->stat_ipackets++; - xnfp->stat_rbytes += len; + xnfp->xnf_stat_ipackets++; + xnfp->xnf_stat_rbytes += len; } - xnfp->rx_ring.rsp_cons++; + xnfp->xnf_rx_ring.rsp_cons++; } /* * Has more data come in since we started? */ /* LINTED: constant in conditional context */ - RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->rx_ring, work_to_do); + RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do); if (work_to_do) goto loop; @@ -1655,9 +1819,9 @@ luckless: * ring. */ /* LINTED: constant in conditional context */ - RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->rx_ring, notify); + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); if (notify) - ec_notify_via_evtchn(xnfp->evtchn); + ec_notify_via_evtchn(xnfp->xnf_evtchn); return (head); } @@ -1671,13 +1835,13 @@ xnf_rcv_complete(struct xnf_buffer_desc *bdesc) long cnt; /* One less outstanding receive buffer */ - mutex_enter(&xnfp->rx_buf_mutex); - --xnfp->rx_bufs_outstanding; + mutex_enter(&xnfp->xnf_rx_buf_mutex); + --xnfp->xnf_rx_bufs_outstanding; /* * Return buffer to the free list, unless the free list is getting - * too large. XXX - this threshold may need tuning. + * too large. XXPV - this threshold may need tuning. */ - if (xnfp->rx_descs_free < xnf_recv_bufs_lowat) { + if (xnfp->xnf_rx_descs_free < xnf_rx_bufs_lowat) { /* * Unmap the page, and hand the machine page back * to xen so it can be re-used as a backend net buffer. @@ -1689,17 +1853,17 @@ xnf_rcv_complete(struct xnf_buffer_desc *bdesc) "hypervisor\n"); } - bdesc->next = xnfp->free_list; - xnfp->free_list = bdesc; - xnfp->rx_descs_free++; - mutex_exit(&xnfp->rx_buf_mutex); + bdesc->next = xnfp->xnf_free_list; + xnfp->xnf_free_list = bdesc; + xnfp->xnf_rx_descs_free++; + mutex_exit(&xnfp->xnf_rx_buf_mutex); } else { /* * We can return everything here since we have a free buffer * that we have not given the backing page for back to xen. */ - --xnfp->recv_buffer_count; - mutex_exit(&xnfp->rx_buf_mutex); + --xnfp->xnf_rx_buffer_count; + mutex_exit(&xnfp->xnf_rx_buf_mutex); (void) ddi_dma_unbind_handle(bdesc->dma_handle); ddi_dma_mem_free(&bdesc->acc_handle); ddi_dma_free_handle(&bdesc->dma_handle); @@ -1713,7 +1877,7 @@ xnf_rcv_complete(struct xnf_buffer_desc *bdesc) static int xnf_alloc_dma_resources(xnf_t *xnfp) { - dev_info_t *devinfo = xnfp->devinfo; + dev_info_t *devinfo = xnfp->xnf_devinfo; int i; size_t len; ddi_dma_cookie_t dma_cookie; @@ -1722,10 +1886,10 @@ xnf_alloc_dma_resources(xnf_t *xnfp) int rc; caddr_t rptr; - xnfp->n_recvs = NET_RX_RING_SIZE; - xnfp->max_recv_bufs = xnf_recv_bufs_hiwat; + xnfp->xnf_n_rx = NET_RX_RING_SIZE; + xnfp->xnf_max_rx_bufs = xnf_rx_bufs_hiwat; - xnfp->n_xmits = NET_TX_RING_SIZE; + xnfp->xnf_n_tx = NET_TX_RING_SIZE; /* * The code below allocates all the DMA data structures that @@ -1734,10 +1898,10 @@ xnf_alloc_dma_resources(xnf_t *xnfp) * First allocate handles for mapping (virtual address) pointers to * transmit data buffers to physical addresses */ - for (i = 0; i < xnfp->n_xmits; i++) { + for (i = 0; i < xnfp->xnf_n_tx; i++) { if ((rc = ddi_dma_alloc_handle(devinfo, &tx_buffer_dma_attr, DDI_DMA_SLEEP, 0, - &xnfp->tx_pkt_info[i].dma_handle)) != DDI_SUCCESS) + &xnfp->xnf_tx_pkt_info[i].dma_handle)) != DDI_SUCCESS) return (DDI_FAILURE); } @@ -1745,25 +1909,25 @@ xnf_alloc_dma_resources(xnf_t *xnfp) * Allocate page for the transmit descriptor ring. */ if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, - DDI_DMA_SLEEP, 0, &xnfp->tx_ring_dma_handle) != DDI_SUCCESS) + DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS) goto alloc_error; - if (ddi_dma_mem_alloc(xnfp->tx_ring_dma_handle, + if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle, PAGESIZE, &accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0, &rptr, &len, - &xnfp->tx_ring_dma_acchandle) != DDI_SUCCESS) { - ddi_dma_free_handle(&xnfp->tx_ring_dma_handle); - xnfp->tx_ring_dma_handle = NULL; + &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) { + ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); + xnfp->xnf_tx_ring_dma_handle = NULL; goto alloc_error; } - if ((rc = ddi_dma_addr_bind_handle(xnfp->tx_ring_dma_handle, NULL, + if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL, rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { - ddi_dma_mem_free(&xnfp->tx_ring_dma_acchandle); - ddi_dma_free_handle(&xnfp->tx_ring_dma_handle); - xnfp->tx_ring_dma_handle = NULL; - xnfp->tx_ring_dma_acchandle = NULL; + ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); + ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); + xnfp->xnf_tx_ring_dma_handle = NULL; + xnfp->xnf_tx_ring_dma_acchandle = NULL; if (rc == DDI_DMA_NORESOURCES) goto alloc_error; else @@ -1775,32 +1939,32 @@ xnf_alloc_dma_resources(xnf_t *xnfp) /* LINTED: constant in conditional context */ SHARED_RING_INIT((netif_tx_sring_t *)rptr); /* LINTED: constant in conditional context */ - FRONT_RING_INIT(&xnfp->tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE); - xnfp->tx_ring_phys_addr = dma_cookie.dmac_laddress; + FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE); + xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress; /* * Allocate page for the receive descriptor ring. */ if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, - DDI_DMA_SLEEP, 0, &xnfp->rx_ring_dma_handle) != DDI_SUCCESS) + DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS) goto alloc_error; - if (ddi_dma_mem_alloc(xnfp->rx_ring_dma_handle, + if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle, PAGESIZE, &accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0, &rptr, &len, - &xnfp->rx_ring_dma_acchandle) != DDI_SUCCESS) { - ddi_dma_free_handle(&xnfp->rx_ring_dma_handle); - xnfp->rx_ring_dma_handle = NULL; + &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) { + ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); + xnfp->xnf_rx_ring_dma_handle = NULL; goto alloc_error; } - if ((rc = ddi_dma_addr_bind_handle(xnfp->rx_ring_dma_handle, NULL, + if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL, rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { - ddi_dma_mem_free(&xnfp->rx_ring_dma_acchandle); - ddi_dma_free_handle(&xnfp->rx_ring_dma_handle); - xnfp->rx_ring_dma_handle = NULL; - xnfp->rx_ring_dma_acchandle = NULL; + ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); + ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); + xnfp->xnf_rx_ring_dma_handle = NULL; + xnfp->xnf_rx_ring_dma_acchandle = NULL; if (rc == DDI_DMA_NORESOURCES) goto alloc_error; else @@ -1812,26 +1976,26 @@ xnf_alloc_dma_resources(xnf_t *xnfp) /* LINTED: constant in conditional context */ SHARED_RING_INIT((netif_rx_sring_t *)rptr); /* LINTED: constant in conditional context */ - FRONT_RING_INIT(&xnfp->rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE); - xnfp->rx_ring_phys_addr = dma_cookie.dmac_laddress; + FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE); + xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress; /* * Preallocate receive buffers for each receive descriptor. */ /* Set up the "free list" of receive buffer descriptors */ - for (i = 0; i < xnfp->n_recvs; i++) { + for (i = 0; i < xnfp->xnf_n_rx; i++) { if ((bdesc = xnf_alloc_buffer(xnfp)) == NULL) goto alloc_error; - bdesc->next = xnfp->free_list; - xnfp->free_list = bdesc; + bdesc->next = xnfp->xnf_free_list; + xnfp->xnf_free_list = bdesc; } return (DDI_SUCCESS); alloc_error: cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory", - ddi_get_instance(xnfp->devinfo)); + ddi_get_instance(xnfp->xnf_devinfo)); error: xnf_release_dma_resources(xnfp); return (DDI_FAILURE); @@ -1851,28 +2015,28 @@ xnf_release_dma_resources(xnf_t *xnfp) * Free receive buffers which are currently associated with * descriptors */ - for (i = 0; i < xnfp->n_recvs; i++) { + for (i = 0; i < xnfp->xnf_n_rx; i++) { struct xnf_buffer_desc *bp; - if ((bp = xnfp->rxpkt_bufptr[i]) == NULL) + if ((bp = xnfp->xnf_rxpkt_bufptr[i]) == NULL) continue; xnf_free_buffer(bp); - xnfp->rxpkt_bufptr[i] = NULL; + xnfp->xnf_rxpkt_bufptr[i] = NULL; } /* Free the receive ring buffer */ - if (xnfp->rx_ring_dma_acchandle != NULL) { - (void) ddi_dma_unbind_handle(xnfp->rx_ring_dma_handle); - ddi_dma_mem_free(&xnfp->rx_ring_dma_acchandle); - ddi_dma_free_handle(&xnfp->rx_ring_dma_handle); - xnfp->rx_ring_dma_acchandle = NULL; + if (xnfp->xnf_rx_ring_dma_acchandle != NULL) { + (void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle); + ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); + ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); + xnfp->xnf_rx_ring_dma_acchandle = NULL; } /* Free the transmit ring buffer */ - if (xnfp->tx_ring_dma_acchandle != NULL) { - (void) ddi_dma_unbind_handle(xnfp->tx_ring_dma_handle); - ddi_dma_mem_free(&xnfp->tx_ring_dma_acchandle); - ddi_dma_free_handle(&xnfp->tx_ring_dma_handle); - xnfp->tx_ring_dma_acchandle = NULL; + if (xnfp->xnf_tx_ring_dma_acchandle != NULL) { + (void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle); + ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); + ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); + xnfp->xnf_tx_ring_dma_acchandle = NULL; } } @@ -1881,12 +2045,13 @@ xnf_release_mblks(xnf_t *xnfp) { int i; - for (i = 0; i < xnfp->n_xmits; i++) { - if (xnfp->tx_pkt_info[i].mp == NULL) + for (i = 0; i < xnfp->xnf_n_tx; i++) { + if (xnfp->xnf_tx_pkt_info[i].mp == NULL) continue; - freemsg(xnfp->tx_pkt_info[i].mp); - xnfp->tx_pkt_info[i].mp = NULL; - (void) ddi_dma_unbind_handle(xnfp->tx_pkt_info[i].dma_handle); + freemsg(xnfp->xnf_tx_pkt_info[i].mp); + xnfp->xnf_tx_pkt_info[i].mp = NULL; + (void) ddi_dma_unbind_handle( + xnfp->xnf_tx_pkt_info[i].dma_handle); } } @@ -1896,15 +2061,15 @@ xnf_release_mblks(xnf_t *xnfp) * Called with the tx_buf_mutex held. */ static struct xnf_buffer_desc * -xnf_get_xmit_buffer(xnf_t *xnfp) +xnf_get_tx_buffer(xnf_t *xnfp) { struct xnf_buffer_desc *bdesc; - bdesc = xnfp->xmit_free_list; + bdesc = xnfp->xnf_tx_free_list; if (bdesc != NULL) { - xnfp->xmit_free_list = bdesc->next; + xnfp->xnf_tx_free_list = bdesc->next; } else { - bdesc = xnf_alloc_xmit_buffer(xnfp); + bdesc = xnf_alloc_tx_buffer(xnfp); } return (bdesc); } @@ -1919,10 +2084,10 @@ xnf_get_buffer(xnf_t *xnfp) { struct xnf_buffer_desc *bdesc; - bdesc = xnfp->free_list; + bdesc = xnfp->xnf_free_list; if (bdesc != NULL) { - xnfp->free_list = bdesc->next; - xnfp->rx_descs_free--; + xnfp->xnf_free_list = bdesc->next; + xnfp->xnf_rx_descs_free--; } else { bdesc = xnf_alloc_buffer(xnfp); } @@ -1933,32 +2098,45 @@ xnf_get_buffer(xnf_t *xnfp) * Free a xmit buffer back to the xmit free list */ static void -xnf_free_xmit_buffer(struct xnf_buffer_desc *bp) +xnf_free_tx_buffer(struct xnf_buffer_desc *bp) { xnf_t *xnfp = bp->xnfp; - mutex_enter(&xnfp->tx_buf_mutex); - bp->next = xnfp->xmit_free_list; - xnfp->xmit_free_list = bp; - mutex_exit(&xnfp->tx_buf_mutex); + mutex_enter(&xnfp->xnf_tx_buf_mutex); + bp->next = xnfp->xnf_tx_free_list; + xnfp->xnf_tx_free_list = bp; + mutex_exit(&xnfp->xnf_tx_buf_mutex); } /* * Put a buffer descriptor onto the head of the free list. + * for page-flip: * We can't really free these buffers back to the kernel * since we have given away their backing page to be used * by the back end net driver. + * for hvcopy: + * release all the memory */ static void -xnf_free_buffer(struct xnf_buffer_desc *bp) +xnf_free_buffer(struct xnf_buffer_desc *bdesc) { - xnf_t *xnfp = bp->xnfp; + xnf_t *xnfp = bdesc->xnfp; - mutex_enter(&xnfp->rx_buf_mutex); - bp->next = xnfp->free_list; - xnfp->free_list = bp; - xnfp->rx_descs_free++; - mutex_exit(&xnfp->rx_buf_mutex); + mutex_enter(&xnfp->xnf_rx_buf_mutex); + if (xnfp->xnf_rx_hvcopy) { + if (ddi_dma_unbind_handle(bdesc->dma_handle) != DDI_SUCCESS) + goto out; + ddi_dma_mem_free(&bdesc->acc_handle); + ddi_dma_free_handle(&bdesc->dma_handle); + kmem_free(bdesc, sizeof (*bdesc)); + xnfp->xnf_rx_buffer_count--; + } else { + bdesc->next = xnfp->xnf_free_list; + xnfp->xnf_free_list = bdesc; + xnfp->xnf_rx_descs_free++; + } +out: + mutex_exit(&xnfp->xnf_rx_buf_mutex); } /* @@ -1966,7 +2144,7 @@ xnf_free_buffer(struct xnf_buffer_desc *bp) * keep track of the buffer. Called with tx_buf_mutex held. */ static struct xnf_buffer_desc * -xnf_alloc_xmit_buffer(xnf_t *xnfp) +xnf_alloc_tx_buffer(xnf_t *xnfp) { struct xnf_buffer_desc *bdesc; size_t len; @@ -1975,7 +2153,7 @@ xnf_alloc_xmit_buffer(xnf_t *xnfp) return (NULL); /* allocate a DMA access handle for receive buffer */ - if (ddi_dma_alloc_handle(xnfp->devinfo, &tx_buffer_dma_attr, + if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &tx_buffer_dma_attr, 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) goto failure; @@ -1983,14 +2161,14 @@ xnf_alloc_xmit_buffer(xnf_t *xnfp) if (ddi_dma_mem_alloc(bdesc->dma_handle, PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) - goto late_failure; + goto failure_1; bdesc->xnfp = xnfp; - xnfp->xmit_buffer_count++; + xnfp->xnf_tx_buffer_count++; return (bdesc); -late_failure: +failure_1: ddi_dma_free_handle(&bdesc->dma_handle); failure: @@ -2012,14 +2190,14 @@ xnf_alloc_buffer(xnf_t *xnfp) long cnt; pfn_t pfn; - if (xnfp->recv_buffer_count >= xnfp->max_recv_bufs) + if (xnfp->xnf_rx_buffer_count >= xnfp->xnf_max_rx_bufs) return (NULL); if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL) return (NULL); /* allocate a DMA access handle for receive buffer */ - if (ddi_dma_alloc_handle(xnfp->devinfo, &rx_buffer_dma_attr, + if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &rx_buffer_dma_attr, 0, 0, &bdesc->dma_handle) != DDI_SUCCESS) goto failure; @@ -2027,39 +2205,46 @@ xnf_alloc_buffer(xnf_t *xnfp) if (ddi_dma_mem_alloc(bdesc->dma_handle, PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0, &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) - goto late_failure; + goto failure_1; /* bind to virtual address of buffer to get physical address */ if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL, bdesc->buf, PAGESIZE, DDI_DMA_READ | DDI_DMA_STREAMING, DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) - goto late_late_failure; + goto failure_2; bdesc->buf_phys = dma_cookie.dmac_laddress; bdesc->xnfp = xnfp; - bdesc->free_rtn.free_func = xnf_rcv_complete; + if (xnfp->xnf_rx_hvcopy) { + bdesc->free_rtn.free_func = xnf_copy_rcv_complete; + } else { + bdesc->free_rtn.free_func = xnf_rcv_complete; + } bdesc->free_rtn.free_arg = (char *)bdesc; bdesc->grant_ref = GRANT_INVALID_REF; ASSERT(ncookies == 1); - xnfp->recv_buffer_count++; - /* - * Unmap the page, and hand the machine page back - * to xen so it can be used as a backend net buffer. - */ - pfn = xnf_btop(bdesc->buf_phys); - cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); - if (cnt != 1) { - cmn_err(CE_WARN, "unable to give a page back to the " - "hypervisor\n"); + xnfp->xnf_rx_buffer_count++; + + if (!xnfp->xnf_rx_hvcopy) { + /* + * Unmap the page, and hand the machine page back + * to xen so it can be used as a backend net buffer. + */ + pfn = xnf_btop(bdesc->buf_phys); + cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn); + if (cnt != 1) { + cmn_err(CE_WARN, "unable to give a page back to the " + "hypervisor\n"); + } } return (bdesc); -late_late_failure: +failure_2: ddi_dma_mem_free(&bdesc->acc_handle); -late_failure: +failure_1: ddi_dma_free_handle(&bdesc->dma_handle); failure: @@ -2067,40 +2252,129 @@ failure: return (NULL); } +/* + * Statistics. + */ +static char *xnf_aux_statistics[] = { + "tx_cksum_deferred", + "rx_cksum_no_need", + "interrupts", + "unclaimed_interrupts", + "tx_pullup", + "tx_pagebndry", + "tx_attempt", + "rx_no_ringbuf", + "hvcopy_packet_processed", +}; + +static int +xnf_kstat_aux_update(kstat_t *ksp, int flag) +{ + xnf_t *xnfp; + kstat_named_t *knp; + + if (flag != KSTAT_READ) + return (EACCES); + + xnfp = ksp->ks_private; + knp = ksp->ks_data; + + /* + * Assignment order must match that of the names in + * xnf_aux_statistics. + */ + (knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred; + (knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need; + + (knp++)->value.ui64 = xnfp->xnf_stat_interrupts; + (knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts; + (knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup; + (knp++)->value.ui64 = xnfp->xnf_stat_tx_pagebndry; + (knp++)->value.ui64 = xnfp->xnf_stat_tx_attempt; + (knp++)->value.ui64 = xnfp->xnf_stat_rx_no_ringbuf; + + (knp++)->value.ui64 = xnfp->xnf_stat_hvcopy_packet_processed; + + return (0); +} + +static boolean_t +xnf_kstat_init(xnf_t *xnfp) +{ + int nstat = sizeof (xnf_aux_statistics) / + sizeof (xnf_aux_statistics[0]); + char **cp = xnf_aux_statistics; + kstat_named_t *knp; + + /* + * Create and initialise kstats. + */ + if ((xnfp->xnf_kstat_aux = kstat_create("xnf", + ddi_get_instance(xnfp->xnf_devinfo), + "aux_statistics", "net", KSTAT_TYPE_NAMED, + nstat, 0)) == NULL) + return (B_FALSE); + + xnfp->xnf_kstat_aux->ks_private = xnfp; + xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update; + + knp = xnfp->xnf_kstat_aux->ks_data; + while (nstat > 0) { + kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); + + knp++; + cp++; + nstat--; + } + + kstat_install(xnfp->xnf_kstat_aux); + + return (B_TRUE); +} + static int xnf_stat(void *arg, uint_t stat, uint64_t *val) { xnf_t *xnfp = arg; - mutex_enter(&xnfp->intrlock); - mutex_enter(&xnfp->txlock); + mutex_enter(&xnfp->xnf_intrlock); + mutex_enter(&xnfp->xnf_txlock); -#define map_stat(q, r) \ +#define mac_stat(q, r) \ case (MAC_STAT_##q): \ - *val = xnfp->stat_##r; \ + *val = xnfp->xnf_stat_##r; \ + break + +#define ether_stat(q, r) \ + case (ETHER_STAT_##q): \ + *val = xnfp->xnf_stat_##r; \ break switch (stat) { - map_stat(IPACKETS, ipackets); - map_stat(OPACKETS, opackets); - map_stat(RBYTES, rbytes); - map_stat(OBYTES, obytes); - map_stat(NORCVBUF, norcvbuf); - map_stat(IERRORS, errrcv); - map_stat(NOXMTBUF, xmit_defer); + mac_stat(IPACKETS, ipackets); + mac_stat(OPACKETS, opackets); + mac_stat(RBYTES, rbytes); + mac_stat(OBYTES, obytes); + mac_stat(NORCVBUF, norxbuf); + mac_stat(IERRORS, errrx); + mac_stat(NOXMTBUF, tx_defer); + + ether_stat(MACRCV_ERRORS, mac_rcv_error); + ether_stat(TOOSHORT_ERRORS, runt); default: - mutex_exit(&xnfp->txlock); - mutex_exit(&xnfp->intrlock); + mutex_exit(&xnfp->xnf_txlock); + mutex_exit(&xnfp->xnf_intrlock); return (ENOTSUP); } -#undef map_stat +#undef mac_stat +#undef ether_stat - mutex_exit(&xnfp->txlock); - mutex_exit(&xnfp->intrlock); + mutex_exit(&xnfp->xnf_txlock); + mutex_exit(&xnfp->xnf_intrlock); return (0); } @@ -2134,7 +2408,7 @@ xnf_resources(void *arg) mrf.mrf_normal_blank_time = 128; /* XXPV dme: see xnf_blank() */ mrf.mrf_normal_pkt_count = 8; /* XXPV dme: see xnf_blank() */ - xnfp->rx_handle = mac_resource_add(xnfp->mh, + xnfp->xnf_rx_handle = mac_resource_add(xnfp->xnf_mh, (mac_resource_t *)&mrf); } @@ -2166,7 +2440,7 @@ xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data) * not zero. (In fact, a Solaris dom0 is happy to deal * with a checksum of zero, but a Linux dom0 is not.) */ - if (xnfp->cksum_offload) + if (xnfp->xnf_cksum_offload) *capab = HCKSUM_INET_PARTIAL; else *capab = 0; @@ -2196,19 +2470,42 @@ oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, switch (new_state) { case XenbusStateConnected: - mutex_enter(&xnfp->intrlock); - mutex_enter(&xnfp->txlock); + mutex_enter(&xnfp->xnf_intrlock); + mutex_enter(&xnfp->xnf_txlock); - xnfp->connected = B_TRUE; - cv_broadcast(&xnfp->cv); + xnfp->xnf_connected = B_TRUE; + cv_broadcast(&xnfp->xnf_cv); - mutex_exit(&xnfp->txlock); - mutex_exit(&xnfp->intrlock); + mutex_exit(&xnfp->xnf_txlock); + mutex_exit(&xnfp->xnf_intrlock); - ec_notify_via_evtchn(xnfp->evtchn); + ec_notify_via_evtchn(xnfp->xnf_evtchn); break; default: break; } } + +/* + * Check whether backend is capable of and willing to talk + * to us via hypervisor copy, as opposed to page flip. + */ +static boolean_t +xnf_hvcopy_peer_status(dev_info_t *devinfo) +{ + int be_rx_copy; + int err; + + err = xenbus_scanf(XBT_NULL, xvdi_get_oename(devinfo), + "feature-rx-copy", "%d", &be_rx_copy); + /* + * If we fail to read the store we assume that the key is + * absent, implying an older domain at the far end. Older + * domains cannot do HV copy (we assume ..). + */ + if (err != 0) + be_rx_copy = 0; + + return (be_rx_copy?B_TRUE:B_FALSE); +} |