summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2016-07-01 13:32:03 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2016-07-01 13:32:03 +0000
commit05a1c92c75f724c65de83048dd8ecc072c4dcd92 (patch)
treeebc7e72a4c504a1974d8e0d6710d05e605476fe8
parentb1a0afdfc97cc5965a2a004b7a8ab98f9ba15c56 (diff)
parent970db7b7a5b4656c659fc7c5226da7be313dc336 (diff)
downloadillumos-joyent-05a1c92c75f724c65de83048dd8ecc072c4dcd92.tar.gz
[illumos-gate merge]
commit 970db7b7a5b4656c659fc7c5226da7be313dc336 7089 MSI-x on KVM prevents VMs from accessing the network Conflicts: usr/src/uts/common/io/vioif/vioif.c
-rw-r--r--usr/src/uts/common/io/vioif/vioif.c228
-rw-r--r--usr/src/uts/i86pc/io/pcplusmp/apic_common.c17
2 files changed, 142 insertions, 103 deletions
diff --git a/usr/src/uts/common/io/vioif/vioif.c b/usr/src/uts/common/io/vioif/vioif.c
index 27241894aa..ef677d791d 100644
--- a/usr/src/uts/common/io/vioif/vioif.c
+++ b/usr/src/uts/common/io/vioif/vioif.c
@@ -11,8 +11,8 @@
/*
* Copyright 2013 Nexenta Inc. All rights reserved.
- * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
* Copyright 2015 Joyent, Inc.
+ * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
*/
/* Based on the NetBSD virtio driver by Minoura Makoto. */
@@ -187,16 +187,15 @@ static int vioif_attach(dev_info_t *, ddi_attach_cmd_t);
static int vioif_detach(dev_info_t *, ddi_detach_cmd_t);
DDI_DEFINE_STREAM_OPS(vioif_ops,
- nulldev, /* identify */
- nulldev, /* probe */
- vioif_attach, /* attach */
- vioif_detach, /* detach */
- nodev, /* reset */
- NULL, /* cb_ops */
- D_MP, /* bus_ops */
- NULL, /* power */
- vioif_quiesce /* quiesce */
-);
+ nulldev, /* identify */
+ nulldev, /* probe */
+ vioif_attach, /* attach */
+ vioif_detach, /* detach */
+ nodev, /* reset */
+ NULL, /* cb_ops */
+ D_MP, /* bus_ops */
+ NULL, /* power */
+ vioif_quiesce /* quiesce */);
static char vioif_ident[] = "VirtIO ethernet driver";
@@ -729,109 +728,103 @@ vioif_unicst(void *arg, const uint8_t *macaddr)
}
-static int
+static uint_t
vioif_add_rx(struct vioif_softc *sc, int kmflag)
{
- struct vq_entry *ve;
- struct vioif_rx_buf *buf;
+ uint_t num_added = 0;
- ve = vq_alloc_entry(sc->sc_rx_vq);
- if (ve == NULL) {
- /*
- * Out of free descriptors - ring already full.
- */
- sc->sc_rxfail_no_descriptors++;
- sc->sc_norecvbuf++;
- goto exit_vq;
- }
- buf = sc->sc_rxbufs[ve->qe_index];
+ for (;;) {
+ struct vq_entry *ve;
+ struct vioif_rx_buf *buf;
- if (buf == NULL) {
- /* First run, allocate the buffer. */
- buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
- sc->sc_rxbufs[ve->qe_index] = buf;
- }
+ ve = vq_alloc_entry(sc->sc_rx_vq);
+ if (!ve) {
+ /*
+ * Out of free descriptors - ring already full.
+ * It would be better to update sc_norxdescavail
+ * but MAC does not ask for this info, hence we
+ * update sc_norecvbuf.
+ */
+ sc->sc_rxfail_no_descriptors++;
+ sc->sc_norecvbuf++;
+ break;
+ }
+ buf = sc->sc_rxbufs[ve->qe_index];
- /* Still nothing? Bye. */
- if (buf == NULL) {
- sc->sc_norecvbuf++;
- goto exit_buf;
- }
+ if (!buf) {
+ /* First run, allocate the buffer. */
+ buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
+ sc->sc_rxbufs[ve->qe_index] = buf;
+ }
- ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
+ /* Still nothing? Bye. */
+ if (buf == NULL) {
+ sc->sc_norecvbuf++;
+ vq_free_entry(sc->sc_rx_vq, ve);
+ break;
+ }
- /*
- * For an unknown reason, the virtio_net_hdr must be placed
- * as a separate virtio queue entry.
- */
- virtio_ve_add_indirect_buf(ve, buf->rb_mapping.vbm_dmac.dmac_laddress,
- sizeof (struct virtio_net_hdr), B_FALSE);
+ ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
- /* Add the rest of the first cookie. */
- virtio_ve_add_indirect_buf(ve,
- buf->rb_mapping.vbm_dmac.dmac_laddress +
- sizeof (struct virtio_net_hdr),
- buf->rb_mapping.vbm_dmac.dmac_size -
- sizeof (struct virtio_net_hdr), B_FALSE);
+ /*
+ * For an unknown reason, the virtio_net_hdr must be placed
+ * as a separate virtio queue entry.
+ */
+ virtio_ve_add_indirect_buf(ve,
+ buf->rb_mapping.vbm_dmac.dmac_laddress,
+ sizeof (struct virtio_net_hdr), B_FALSE);
- /*
- * If the buffer consists of a single cookie (unlikely for a
- * 64-k buffer), we are done. Otherwise, add the rest of the cookies
- * using indirect entries.
- */
- if (buf->rb_mapping.vbm_ncookies > 1) {
- ddi_dma_cookie_t *first_extra_dmac;
- ddi_dma_cookie_t dmac;
- first_extra_dmac =
- vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah);
-
- ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac);
- virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah,
- dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE);
- vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah,
- first_extra_dmac);
- }
+ /* Add the rest of the first cookie. */
+ virtio_ve_add_indirect_buf(ve,
+ buf->rb_mapping.vbm_dmac.dmac_laddress +
+ sizeof (struct virtio_net_hdr),
+ buf->rb_mapping.vbm_dmac.dmac_size -
+ sizeof (struct virtio_net_hdr), B_FALSE);
- virtio_push_chain(ve, B_FALSE);
+ /*
+ * If the buffer consists of a single cookie (unlikely for a
+ * 64-k buffer), we are done. Otherwise, add the rest of the
+ * cookies using indirect entries.
+ */
+ if (buf->rb_mapping.vbm_ncookies > 1) {
+ ddi_dma_cookie_t *first_extra_dmac;
+ ddi_dma_cookie_t dmac;
+ first_extra_dmac =
+ vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah);
+
+ ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac);
+ virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah,
+ dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE);
+ vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah,
+ first_extra_dmac);
+ }
- return (DDI_SUCCESS);
+ virtio_push_chain(ve, B_FALSE);
+ num_added++;
+ }
-exit_buf:
- vq_free_entry(sc->sc_rx_vq, ve);
-exit_vq:
- return (DDI_FAILURE);
+ return (num_added);
}
-static int
+static uint_t
vioif_populate_rx(struct vioif_softc *sc, int kmflag)
{
- int i = 0;
+ uint_t num_added = vioif_add_rx(sc, kmflag);
- for (;;) {
- if (vioif_add_rx(sc, kmflag) != DDI_SUCCESS) {
- /*
- * We could not allocate some memory. Try to work with
- * what we've got.
- */
- break;
- }
- i++;
- }
-
- if (i != 0)
+ if (num_added > 0)
virtio_sync_vq(sc->sc_rx_vq);
- return (i);
+ return (num_added);
}
-static int
+static uint_t
vioif_process_rx(struct vioif_softc *sc)
{
struct vq_entry *ve;
struct vioif_rx_buf *buf;
- mblk_t *mp;
+ mblk_t *mphead = NULL, *lastmp = NULL, *mp;
uint32_t len;
- int i = 0;
+ uint_t num_processed = 0;
while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
@@ -847,7 +840,7 @@ vioif_process_rx(struct vioif_softc *sc)
len -= sizeof (struct virtio_net_hdr);
/*
- * We copy small packets that happenned to fit into a single
+ * We copy small packets that happen to fit into a single
* cookie and reuse the buffers. For bigger ones, we loan
* the buffers upstream.
*/
@@ -902,21 +895,31 @@ vioif_process_rx(struct vioif_softc *sc)
sc->sc_ipackets++;
virtio_free_chain(ve);
- mac_rx(sc->sc_mac_handle, NULL, mp);
- i++;
+
+ if (lastmp == NULL) {
+ mphead = mp;
+ } else {
+ lastmp->b_next = mp;
+ }
+ lastmp = mp;
+ num_processed++;
}
- return (i);
+ if (mphead != NULL) {
+ mac_rx(sc->sc_mac_handle, NULL, mphead);
+ }
+
+ return (num_processed);
}
-static void
+static uint_t
vioif_reclaim_used_tx(struct vioif_softc *sc)
{
struct vq_entry *ve;
struct vioif_tx_buf *buf;
uint32_t len;
mblk_t *mp;
- unsigned chains = 0;
+ uint_t num_reclaimed = 0;
while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
/* We don't chain descriptors for tx, so don't expect any. */
@@ -938,13 +941,15 @@ vioif_reclaim_used_tx(struct vioif_softc *sc)
/* External mapping used, mp was not freed in vioif_send() */
if (mp != NULL)
freemsg(mp);
- chains++;
+ num_reclaimed++;
}
- if (sc->sc_tx_stopped != 0 && chains > 0) {
+ if (sc->sc_tx_stopped && num_reclaimed > 0) {
sc->sc_tx_stopped = 0;
mac_tx_update(sc->sc_mac_handle);
}
+
+ return (num_reclaimed);
}
/* sc will be used to update stat counters. */
@@ -1213,11 +1218,28 @@ int
vioif_start(void *arg)
{
struct vioif_softc *sc = arg;
+ struct vq_entry *ve;
+ uint32_t len;
mac_link_update(sc->sc_mac_handle, vioif_link_state(sc));
virtio_start_vq_intr(sc->sc_rx_vq);
+ /*
+ * Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY,
+ * so the device will send a transmit interrupt when the queue is empty
+ * and we can reclaim it in one sweep.
+ */
+
+ /*
+ * Clear any data that arrived early on the receive queue and populate
+ * it with free buffers that the device can use moving forward.
+ */
+ while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len)) != NULL) {
+ virtio_free_chain(ve);
+ }
+ (void) vioif_populate_rx(sc, KM_SLEEP);
+
return (DDI_SUCCESS);
}
@@ -1585,8 +1607,12 @@ vioif_rx_handler(caddr_t arg1, caddr_t arg2)
struct vioif_softc *sc = container_of(vsc,
struct vioif_softc, sc_virtio);
+ /*
+ * The return values of these functions are not needed but they make
+ * debugging interrupts simpler because you can use them to detect when
+ * stuff was processed and repopulated in this handler.
+ */
(void) vioif_process_rx(sc);
-
(void) vioif_populate_rx(sc, KM_NOSLEEP);
return (DDI_INTR_CLAIMED);
@@ -1600,7 +1626,13 @@ vioif_tx_handler(caddr_t arg1, caddr_t arg2)
struct vioif_softc *sc = container_of(vsc,
struct vioif_softc, sc_virtio);
- vioif_reclaim_used_tx(sc);
+ /*
+ * The return value of this function is not needed but makes debugging
+ * interrupts simpler because you can use it to detect if anything was
+ * reclaimed in this handler.
+ */
+ (void) vioif_reclaim_used_tx(sc);
+
return (DDI_INTR_CLAIMED);
}
diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic_common.c b/usr/src/uts/i86pc/io/pcplusmp/apic_common.c
index 5d642e6bd4..649e5ce950 100644
--- a/usr/src/uts/i86pc/io/pcplusmp/apic_common.c
+++ b/usr/src/uts/i86pc/io/pcplusmp/apic_common.c
@@ -24,6 +24,7 @@
*/
/*
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2016 by Delphix. All rights reserved.
*/
/*
@@ -1590,11 +1591,15 @@ int apic_msix_enable = 1;
int apic_multi_msi_enable = 1;
/*
- * check whether the system supports MSI
+ * Check whether the system supports MSI.
*
- * If PCI-E capability is found, then this must be a PCI-E system.
- * Since MSI is required for PCI-E system, it returns PSM_SUCCESS
- * to indicate this system supports MSI.
+ * MSI is required for PCI-E and for PCI versions later than 2.2, so if we find
+ * a PCI-E bus or we find a PCI bus whose version we know is >= 2.2, then we
+ * return PSM_SUCCESS to indicate this system supports MSI.
+ *
+ * (Currently the only way we check whether a given PCI bus supports >= 2.2 is
+ * by detecting if we are running inside the KVM hypervisor, which guarantees
+ * this version number.)
*/
int
apic_check_msi_support()
@@ -1607,7 +1612,7 @@ apic_check_msi_support()
/*
* check whether the first level children of root_node have
- * PCI-E capability
+ * PCI-E or PCI capability.
*/
for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
cdip = ddi_get_next_sibling(cdip)) {
@@ -1623,6 +1628,8 @@ apic_check_msi_support()
continue;
if (strcmp(dev_type, "pciex") == 0)
return (PSM_SUCCESS);
+ if (strcmp(dev_type, "pci") == 0 && get_hwenv() == HW_KVM)
+ return (PSM_SUCCESS);
}
/* MSI is not supported on this system */