diff options
author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2016-07-01 13:32:03 +0000 |
---|---|---|
committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2016-07-01 13:32:03 +0000 |
commit | 05a1c92c75f724c65de83048dd8ecc072c4dcd92 (patch) | |
tree | ebc7e72a4c504a1974d8e0d6710d05e605476fe8 | |
parent | b1a0afdfc97cc5965a2a004b7a8ab98f9ba15c56 (diff) | |
parent | 970db7b7a5b4656c659fc7c5226da7be313dc336 (diff) | |
download | illumos-joyent-05a1c92c75f724c65de83048dd8ecc072c4dcd92.tar.gz |
[illumos-gate merge]
commit 970db7b7a5b4656c659fc7c5226da7be313dc336
7089 MSI-x on KVM prevents VMs from accessing the network
Conflicts:
usr/src/uts/common/io/vioif/vioif.c
-rw-r--r-- | usr/src/uts/common/io/vioif/vioif.c | 228 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/pcplusmp/apic_common.c | 17 |
2 files changed, 142 insertions, 103 deletions
diff --git a/usr/src/uts/common/io/vioif/vioif.c b/usr/src/uts/common/io/vioif/vioif.c index 27241894aa..ef677d791d 100644 --- a/usr/src/uts/common/io/vioif/vioif.c +++ b/usr/src/uts/common/io/vioif/vioif.c @@ -11,8 +11,8 @@ /* * Copyright 2013 Nexenta Inc. All rights reserved. - * Copyright (c) 2014, 2015 by Delphix. All rights reserved. * Copyright 2015 Joyent, Inc. + * Copyright (c) 2014, 2016 by Delphix. All rights reserved. */ /* Based on the NetBSD virtio driver by Minoura Makoto. */ @@ -187,16 +187,15 @@ static int vioif_attach(dev_info_t *, ddi_attach_cmd_t); static int vioif_detach(dev_info_t *, ddi_detach_cmd_t); DDI_DEFINE_STREAM_OPS(vioif_ops, - nulldev, /* identify */ - nulldev, /* probe */ - vioif_attach, /* attach */ - vioif_detach, /* detach */ - nodev, /* reset */ - NULL, /* cb_ops */ - D_MP, /* bus_ops */ - NULL, /* power */ - vioif_quiesce /* quiesce */ -); + nulldev, /* identify */ + nulldev, /* probe */ + vioif_attach, /* attach */ + vioif_detach, /* detach */ + nodev, /* reset */ + NULL, /* cb_ops */ + D_MP, /* bus_ops */ + NULL, /* power */ + vioif_quiesce /* quiesce */); static char vioif_ident[] = "VirtIO ethernet driver"; @@ -729,109 +728,103 @@ vioif_unicst(void *arg, const uint8_t *macaddr) } -static int +static uint_t vioif_add_rx(struct vioif_softc *sc, int kmflag) { - struct vq_entry *ve; - struct vioif_rx_buf *buf; + uint_t num_added = 0; - ve = vq_alloc_entry(sc->sc_rx_vq); - if (ve == NULL) { - /* - * Out of free descriptors - ring already full. - */ - sc->sc_rxfail_no_descriptors++; - sc->sc_norecvbuf++; - goto exit_vq; - } - buf = sc->sc_rxbufs[ve->qe_index]; + for (;;) { + struct vq_entry *ve; + struct vioif_rx_buf *buf; - if (buf == NULL) { - /* First run, allocate the buffer. */ - buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag); - sc->sc_rxbufs[ve->qe_index] = buf; - } + ve = vq_alloc_entry(sc->sc_rx_vq); + if (!ve) { + /* + * Out of free descriptors - ring already full. + * It would be better to update sc_norxdescavail + * but MAC does not ask for this info, hence we + * update sc_norecvbuf. + */ + sc->sc_rxfail_no_descriptors++; + sc->sc_norecvbuf++; + break; + } + buf = sc->sc_rxbufs[ve->qe_index]; - /* Still nothing? Bye. */ - if (buf == NULL) { - sc->sc_norecvbuf++; - goto exit_buf; - } + if (!buf) { + /* First run, allocate the buffer. */ + buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag); + sc->sc_rxbufs[ve->qe_index] = buf; + } - ASSERT(buf->rb_mapping.vbm_ncookies >= 1); + /* Still nothing? Bye. */ + if (buf == NULL) { + sc->sc_norecvbuf++; + vq_free_entry(sc->sc_rx_vq, ve); + break; + } - /* - * For an unknown reason, the virtio_net_hdr must be placed - * as a separate virtio queue entry. - */ - virtio_ve_add_indirect_buf(ve, buf->rb_mapping.vbm_dmac.dmac_laddress, - sizeof (struct virtio_net_hdr), B_FALSE); + ASSERT(buf->rb_mapping.vbm_ncookies >= 1); - /* Add the rest of the first cookie. */ - virtio_ve_add_indirect_buf(ve, - buf->rb_mapping.vbm_dmac.dmac_laddress + - sizeof (struct virtio_net_hdr), - buf->rb_mapping.vbm_dmac.dmac_size - - sizeof (struct virtio_net_hdr), B_FALSE); + /* + * For an unknown reason, the virtio_net_hdr must be placed + * as a separate virtio queue entry. + */ + virtio_ve_add_indirect_buf(ve, + buf->rb_mapping.vbm_dmac.dmac_laddress, + sizeof (struct virtio_net_hdr), B_FALSE); - /* - * If the buffer consists of a single cookie (unlikely for a - * 64-k buffer), we are done. Otherwise, add the rest of the cookies - * using indirect entries. - */ - if (buf->rb_mapping.vbm_ncookies > 1) { - ddi_dma_cookie_t *first_extra_dmac; - ddi_dma_cookie_t dmac; - first_extra_dmac = - vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah); - - ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac); - virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah, - dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE); - vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah, - first_extra_dmac); - } + /* Add the rest of the first cookie. */ + virtio_ve_add_indirect_buf(ve, + buf->rb_mapping.vbm_dmac.dmac_laddress + + sizeof (struct virtio_net_hdr), + buf->rb_mapping.vbm_dmac.dmac_size - + sizeof (struct virtio_net_hdr), B_FALSE); - virtio_push_chain(ve, B_FALSE); + /* + * If the buffer consists of a single cookie (unlikely for a + * 64-k buffer), we are done. Otherwise, add the rest of the + * cookies using indirect entries. + */ + if (buf->rb_mapping.vbm_ncookies > 1) { + ddi_dma_cookie_t *first_extra_dmac; + ddi_dma_cookie_t dmac; + first_extra_dmac = + vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah); + + ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac); + virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah, + dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE); + vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah, + first_extra_dmac); + } - return (DDI_SUCCESS); + virtio_push_chain(ve, B_FALSE); + num_added++; + } -exit_buf: - vq_free_entry(sc->sc_rx_vq, ve); -exit_vq: - return (DDI_FAILURE); + return (num_added); } -static int +static uint_t vioif_populate_rx(struct vioif_softc *sc, int kmflag) { - int i = 0; + uint_t num_added = vioif_add_rx(sc, kmflag); - for (;;) { - if (vioif_add_rx(sc, kmflag) != DDI_SUCCESS) { - /* - * We could not allocate some memory. Try to work with - * what we've got. - */ - break; - } - i++; - } - - if (i != 0) + if (num_added > 0) virtio_sync_vq(sc->sc_rx_vq); - return (i); + return (num_added); } -static int +static uint_t vioif_process_rx(struct vioif_softc *sc) { struct vq_entry *ve; struct vioif_rx_buf *buf; - mblk_t *mp; + mblk_t *mphead = NULL, *lastmp = NULL, *mp; uint32_t len; - int i = 0; + uint_t num_processed = 0; while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) { @@ -847,7 +840,7 @@ vioif_process_rx(struct vioif_softc *sc) len -= sizeof (struct virtio_net_hdr); /* - * We copy small packets that happenned to fit into a single + * We copy small packets that happen to fit into a single * cookie and reuse the buffers. For bigger ones, we loan * the buffers upstream. */ @@ -902,21 +895,31 @@ vioif_process_rx(struct vioif_softc *sc) sc->sc_ipackets++; virtio_free_chain(ve); - mac_rx(sc->sc_mac_handle, NULL, mp); - i++; + + if (lastmp == NULL) { + mphead = mp; + } else { + lastmp->b_next = mp; + } + lastmp = mp; + num_processed++; } - return (i); + if (mphead != NULL) { + mac_rx(sc->sc_mac_handle, NULL, mphead); + } + + return (num_processed); } -static void +static uint_t vioif_reclaim_used_tx(struct vioif_softc *sc) { struct vq_entry *ve; struct vioif_tx_buf *buf; uint32_t len; mblk_t *mp; - unsigned chains = 0; + uint_t num_reclaimed = 0; while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) { /* We don't chain descriptors for tx, so don't expect any. */ @@ -938,13 +941,15 @@ vioif_reclaim_used_tx(struct vioif_softc *sc) /* External mapping used, mp was not freed in vioif_send() */ if (mp != NULL) freemsg(mp); - chains++; + num_reclaimed++; } - if (sc->sc_tx_stopped != 0 && chains > 0) { + if (sc->sc_tx_stopped && num_reclaimed > 0) { sc->sc_tx_stopped = 0; mac_tx_update(sc->sc_mac_handle); } + + return (num_reclaimed); } /* sc will be used to update stat counters. */ @@ -1213,11 +1218,28 @@ int vioif_start(void *arg) { struct vioif_softc *sc = arg; + struct vq_entry *ve; + uint32_t len; mac_link_update(sc->sc_mac_handle, vioif_link_state(sc)); virtio_start_vq_intr(sc->sc_rx_vq); + /* + * Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY, + * so the device will send a transmit interrupt when the queue is empty + * and we can reclaim it in one sweep. + */ + + /* + * Clear any data that arrived early on the receive queue and populate + * it with free buffers that the device can use moving forward. + */ + while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len)) != NULL) { + virtio_free_chain(ve); + } + (void) vioif_populate_rx(sc, KM_SLEEP); + return (DDI_SUCCESS); } @@ -1585,8 +1607,12 @@ vioif_rx_handler(caddr_t arg1, caddr_t arg2) struct vioif_softc *sc = container_of(vsc, struct vioif_softc, sc_virtio); + /* + * The return values of these functions are not needed but they make + * debugging interrupts simpler because you can use them to detect when + * stuff was processed and repopulated in this handler. + */ (void) vioif_process_rx(sc); - (void) vioif_populate_rx(sc, KM_NOSLEEP); return (DDI_INTR_CLAIMED); @@ -1600,7 +1626,13 @@ vioif_tx_handler(caddr_t arg1, caddr_t arg2) struct vioif_softc *sc = container_of(vsc, struct vioif_softc, sc_virtio); - vioif_reclaim_used_tx(sc); + /* + * The return value of this function is not needed but makes debugging + * interrupts simpler because you can use it to detect if anything was + * reclaimed in this handler. + */ + (void) vioif_reclaim_used_tx(sc); + return (DDI_INTR_CLAIMED); } diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic_common.c b/usr/src/uts/i86pc/io/pcplusmp/apic_common.c index 5d642e6bd4..649e5ce950 100644 --- a/usr/src/uts/i86pc/io/pcplusmp/apic_common.c +++ b/usr/src/uts/i86pc/io/pcplusmp/apic_common.c @@ -24,6 +24,7 @@ */ /* * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2016 by Delphix. All rights reserved. */ /* @@ -1590,11 +1591,15 @@ int apic_msix_enable = 1; int apic_multi_msi_enable = 1; /* - * check whether the system supports MSI + * Check whether the system supports MSI. * - * If PCI-E capability is found, then this must be a PCI-E system. - * Since MSI is required for PCI-E system, it returns PSM_SUCCESS - * to indicate this system supports MSI. + * MSI is required for PCI-E and for PCI versions later than 2.2, so if we find + * a PCI-E bus or we find a PCI bus whose version we know is >= 2.2, then we + * return PSM_SUCCESS to indicate this system supports MSI. + * + * (Currently the only way we check whether a given PCI bus supports >= 2.2 is + * by detecting if we are running inside the KVM hypervisor, which guarantees + * this version number.) */ int apic_check_msi_support() @@ -1607,7 +1612,7 @@ apic_check_msi_support() /* * check whether the first level children of root_node have - * PCI-E capability + * PCI-E or PCI capability. */ for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL; cdip = ddi_get_next_sibling(cdip)) { @@ -1623,6 +1628,8 @@ apic_check_msi_support() continue; if (strcmp(dev_type, "pciex") == 0) return (PSM_SUCCESS); + if (strcmp(dev_type, "pci") == 0 && get_hwenv() == HW_KVM) + return (PSM_SUCCESS); } /* MSI is not supported on this system */ |