diff options
author | Jason King <jason.king@joyent.com> | 2019-12-16 20:46:39 +0000 |
---|---|---|
committer | Jason King <jason.king@joyent.com> | 2021-03-16 00:28:30 +0000 |
commit | 35ee71aa744601bb20e000fc419d6b9aa2b71a6c (patch) | |
tree | 5da5d68588c6bf8651eb3b578f80377afc3a07a6 | |
parent | 1bdd4b7b527e6552b00be56972027c83b8a68c05 (diff) | |
download | illumos-joyent-vioif-ctrlq.tar.gz |
Vioif promiscuous mode supportvioif-ctrlq
-rw-r--r-- | usr/src/uts/common/io/vioif/vioif.c | 252 | ||||
-rw-r--r-- | usr/src/uts/common/io/vioif/vioif.h | 60 |
2 files changed, 304 insertions, 8 deletions
diff --git a/usr/src/uts/common/io/vioif/vioif.c b/usr/src/uts/common/io/vioif/vioif.c index cac90d3073..406ae60a23 100644 --- a/usr/src/uts/common/io/vioif/vioif.c +++ b/usr/src/uts/common/io/vioif/vioif.c @@ -12,7 +12,7 @@ /* * Copyright 2013 Nexenta Inc. All rights reserved. * Copyright (c) 2014, 2016 by Delphix. All rights reserved. - * Copyright 2019 Joyent, Inc. + * Copyright 2021 Joyent, Inc. * Copyright 2019 Joshua M. Clulow <josh@sysmgr.org> */ @@ -328,6 +328,32 @@ vioif_rx_free_callback(caddr_t free_arg) mutex_exit(&vif->vif_mutex); } +static vioif_ctrlbuf_t * +vioif_ctrlbuf_alloc(vioif_t *vif) +{ + vioif_ctrlbuf_t *cb; + + VERIFY(MUTEX_HELD(&vif->vif_mutex)); + + if ((cb = list_remove_head(&vif->vif_ctrlbufs)) != NULL) { + vif->vif_nctrlbufs_alloc++; + } + + return (cb); +} + +static void +vioif_ctrlbuf_free(vioif_t *vif, vioif_ctrlbuf_t *cb) +{ + VERIFY(MUTEX_HELD(&vif->vif_mutex)); + + VERIFY3U(vif->vif_nctrlbufs_alloc, >, 0); + vif->vif_nctrlbufs_alloc--; + + virtio_chain_clear(cb->cb_chain); + list_insert_head(&vif->vif_ctrlbufs, cb); +} + static void vioif_free_bufs(vioif_t *vif) { @@ -408,6 +434,36 @@ vioif_free_bufs(vioif_t *vif) vif->vif_rxbufs_mem = NULL; vif->vif_rxbufs_capacity = 0; } + + if (vif->vif_has_ctrlq) { + VERIFY3U(vif->vif_nctrlbufs_alloc, ==, 0); + for (uint_t i = 0; i < vif->vif_ctrlbufs_capacity; i++) { + vioif_ctrlbuf_t *cb = &vif->vif_ctrlbufs_mem[i]; + + /* + * Ensure that this ctrlbuf is now in the free list + */ + VERIFY(list_link_active(&cb->cb_link)); + list_remove(&vif->vif_ctrlbufs, cb); + + if (cb->cb_dma != NULL) { + virtio_dma_free(cb->cb_dma); + cb->cb_dma = NULL; + } + + if (cb->cb_chain != NULL) { + virtio_chain_free(cb->cb_chain); + cb->cb_chain = NULL; + } + } + VERIFY(list_is_empty(&vif->vif_ctrlbufs)); + if (vif->vif_ctrlbufs_mem != NULL) { + kmem_free(vif->vif_ctrlbufs_mem, + sizeof (vioif_ctrlbuf_t) * vif->vif_ctrlbufs_capacity); + vif->vif_ctrlbufs_mem = NULL; + vif->vif_ctrlbufs_capacity = 0; + } + } } static int @@ -434,6 +490,16 @@ vioif_alloc_bufs(vioif_t *vif) list_create(&vif->vif_rxbufs, sizeof (vioif_rxbuf_t), offsetof(vioif_rxbuf_t, rb_link)); + if (vif->vif_has_ctrlq) { + vif->vif_ctrlbufs_capacity = MIN(VIRTIO_NET_CTRL_BUFS, + virtio_queue_size(vif->vif_ctrl_vq)); + vif->vif_ctrlbufs_mem = kmem_zalloc( + sizeof (vioif_ctrlbuf_t) * vif->vif_ctrlbufs_capacity, + KM_SLEEP); + } + list_create(&vif->vif_ctrlbufs, sizeof (vioif_ctrlbuf_t), + offsetof(vioif_ctrlbuf_t, cb_link)); + /* * Do not loan more than half of our allocated receive buffers into * the networking stack. @@ -450,6 +516,9 @@ vioif_alloc_bufs(vioif_t *vif) for (uint_t i = 0; i < vif->vif_rxbufs_capacity; i++) { list_insert_tail(&vif->vif_rxbufs, &vif->vif_rxbufs_mem[i]); } + for (uint_t i = 0; i < vif->vif_ctrlbufs_capacity; i++) { + list_insert_tail(&vif->vif_ctrlbufs, &vif->vif_ctrlbufs_mem[i]); + } /* * Start from the DMA attribute template common to both transmit and @@ -486,6 +555,26 @@ vioif_alloc_bufs(vioif_t *vif) } /* + * Control queue buffers are also small (less than a page), so we'll + * also request a single cookie for them. + */ + for (vioif_ctrlbuf_t *cb = list_head(&vif->vif_ctrlbufs); cb != NULL; + cb = list_next(&vif->vif_ctrlbufs, cb)) { + if ((cb->cb_dma = virtio_dma_alloc(vif->vif_virtio, + VIOIF_CTRL_SIZE, &attr, + DDI_DMA_STREAMING | DDI_DMA_RDWR, KM_SLEEP)) == NULL) { + goto fail; + } + VERIFY3U(virtio_dma_ncookies(cb->cb_dma), ==, 1); + + if ((cb->cb_chain = virtio_chain_alloc(vif->vif_ctrl_vq, + KM_SLEEP)) == NULL) { + goto fail; + } + virtio_chain_data_set(cb->cb_chain, cb); + } + + /* * The receive buffers are larger, and we can tolerate a large number * of segments. Adjust the SGL entry count, setting aside one segment * for the virtio net header. @@ -533,6 +622,125 @@ fail: } static int +vioif_ctrlq_req(vioif_t *vif, uint8_t class, uint8_t cmd, void *data, + size_t datalen) +{ + vioif_ctrlbuf_t *cb = NULL; + virtio_chain_t *vic = NULL; + uint8_t *p = NULL; + uint64_t pa = 0; + struct virtio_net_ctrlq_hdr hdr = { + .vnch_class = class, + .vnch_command = cmd, + }; + const size_t hdrlen = sizeof (hdr); + const size_t acklen = 1; /* the ack is always 1 byte */ + size_t totlen = hdrlen + datalen + acklen; + int r = DDI_SUCCESS; + + /* + * We shouldn't be called unless the ctrlq feature has been + * negotiated with the host + */ + VERIFY(vif->vif_has_ctrlq); + + mutex_enter(&vif->vif_mutex); + cb = vioif_ctrlbuf_alloc(vif); + if (cb == NULL) { + vif->vif_noctrlbuf++; + mutex_exit(&vif->vif_mutex); + r = DDI_FAILURE; + goto done; + } + mutex_exit(&vif->vif_mutex); + + if (totlen > virtio_dma_size(cb->cb_dma)) { + vif->vif_ctrlbuf_toosmall++; + r = DDI_FAILURE; + goto done; + } + + /* + * Clear the entire buffer. Technically not necessary, but useful + * if trying to troubleshoot an issue, and probably not a bad idea + * to not let any old data linger. + */ + p = virtio_dma_va(cb->cb_dma, 0); + bzero(p, virtio_dma_size(cb->cb_dma)); + + /* + * We currently do not support VIRTIO_F_ANY_LAYOUT. That means, + * that we must put the header, the data, and the ack in their + * own respective descriptors. Since all the currently supported + * control queue commands take _very_ small amounts of data, we + * use a single DMA buffer for all of it, but use 3 descriptors to + * reference (respectively) the header, the data, and the ack byte + * within that memory to adhere to the virtio spec. + * + * If we add support for control queue features such as custom + * MAC filtering tables, which might require larger amounts of + * memory, we likely will want to add more sophistication here + * and optionally use additional allocated memory to hold that + * data instead of a fixed size buffer. + * + * Copy the header. + */ + bcopy(&hdr, p, sizeof (hdr)); + pa = virtio_dma_cookie_pa(cb->cb_dma, 0); + if ((r = virtio_chain_append(cb->cb_chain, + pa, hdrlen, VIRTIO_DIR_DEVICE_READS)) != DDI_SUCCESS) { + goto done; + } + + /* Copy the request data */ + p = virtio_dma_va(cb->cb_dma, hdrlen); + bcopy(data, p, datalen); + if ((r = virtio_chain_append(cb->cb_chain, + pa + hdrlen, datalen, VIRTIO_DIR_DEVICE_READS)) != DDI_SUCCESS) { + goto done; + } + + /* + * We already cleared the buffer, so don't need to copy out a 0 for + * the ack byte. Just add a descriptor for that spot. + */ + p = virtio_dma_va(cb->cb_dma, hdrlen + datalen); + if ((r = virtio_chain_append(cb->cb_chain, + pa + hdrlen + datalen, acklen, + VIRTIO_DIR_DEVICE_WRITES)) != DDI_SUCCESS) { + goto done; + } + + virtio_dma_sync(cb->cb_dma, DDI_DMA_SYNC_FORDEV); + virtio_chain_submit(cb->cb_chain, B_TRUE); + + /* + * Spin waiting for response. + */ + mutex_enter(&vif->vif_mutex); + while ((vic = virtio_queue_poll(vif->vif_ctrl_vq)) == NULL) { + mutex_exit(&vif->vif_mutex); + drv_usecwait(1000); + mutex_enter(&vif->vif_mutex); + } + + virtio_dma_sync(cb->cb_dma, DDI_DMA_SYNC_FORCPU); + VERIFY3P(virtio_chain_data(vic), ==, cb); + mutex_exit(&vif->vif_mutex); + + if (*p != VIRTIO_NET_CQ_OK) { + r = DDI_FAILURE; + } + +done: + mutex_enter(&vif->vif_mutex); + vioif_ctrlbuf_free(vif, cb); + mutex_exit(&vif->vif_mutex); + + return (r); +} + +static int vioif_m_multicst(void *arg, boolean_t add, const uint8_t *mcst_addr) { /* @@ -549,11 +757,15 @@ vioif_m_multicst(void *arg, boolean_t add, const uint8_t *mcst_addr) static int vioif_m_setpromisc(void *arg, boolean_t on) { - /* - * Even though we cannot currently enable promiscuous mode, we return - * success here to allow tools like snoop(1M) to continue to function. - */ - return (0); + vioif_t *vif = arg; + uint8_t val = on ? 1 : 0; + + if (!vif->vif_has_ctrlq_rx) { + return (ENOTSUP); + } + + return (vioif_ctrlq_req(vif, VIRTIO_NET_CTRL_RX, + VIRTIO_NET_CTRL_RX_PROMISC, &val, sizeof (val))); } static int @@ -1655,6 +1867,25 @@ vioif_check_features(vioif_t *vif) vif->vif_tx_tso6 = 1; } } + + if (vioif_has_feature(vif, VIRTIO_NET_F_CTRL_VQ)) { + vif->vif_has_ctrlq = 1; + } + + if (vioif_has_feature(vif, VIRTIO_NET_F_CTRL_RX)) { + vif->vif_has_ctrlq_rx = 1; + + /* + * It is mandatory that the control virtqueue feature is + * advertised by the host if the VIRTIO_NET_F_CTRL_RX + * feature is advertised. We ignore the VIRTIO_NET_F_CTRL_RX + * feature if this is not the case. + */ + if (!vif->vif_has_ctrlq) { + /* XXX: Should we emit a warning? */ + vif->vif_has_ctrlq_rx = 0; + } + } } static int @@ -1726,6 +1957,13 @@ vioif_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) goto fail; } + if (vioif_has_feature(vif, VIRTIO_NET_F_CTRL_VQ) && + (vif->vif_ctrl_vq = virtio_queue_alloc(vio, + VIRTIO_NET_VIRTQ_CONTROL, "ctrlq", NULL, vif, + B_FALSE, VIOIF_MAX_SEGS)) == NULL) { + goto fail; + } + if (virtio_init_complete(vio, vioif_select_interrupt_types()) != DDI_SUCCESS) { dev_err(dip, CE_WARN, "failed to complete Virtio init"); @@ -1734,6 +1972,8 @@ vioif_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) virtio_queue_no_interrupt(vif->vif_rx_vq, B_TRUE); virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE); + if (vif->vif_ctrl_vq != NULL) + virtio_queue_no_interrupt(vif->vif_ctrl_vq, B_TRUE); mutex_init(&vif->vif_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio)); mutex_enter(&vif->vif_mutex); diff --git a/usr/src/uts/common/io/vioif/vioif.h b/usr/src/uts/common/io/vioif/vioif.h index 9f750c9b8a..8a092406cd 100644 --- a/usr/src/uts/common/io/vioif/vioif.h +++ b/usr/src/uts/common/io/vioif/vioif.h @@ -10,7 +10,7 @@ */ /* - * Copyright 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. */ /* @@ -167,7 +167,9 @@ extern "C" { VIRTIO_NET_F_HOST_TSO6 | \ VIRTIO_NET_F_HOST_ECN | \ VIRTIO_NET_F_MAC | \ - VIRTIO_NET_F_MTU) + VIRTIO_NET_F_MTU | \ + VIRTIO_NET_F_CTRL_VQ | \ + VIRTIO_NET_F_CTRL_RX) /* * VIRTIO NETWORK HEADER @@ -201,6 +203,31 @@ struct virtio_net_hdr { #define VIRTIO_NET_HDR_GSO_TCPV6 4 #define VIRTIO_NET_HDR_GSO_ECN 0x80 +/* + * VIRTIO CONTROL VIRTQUEUE HEADER + * + * This structure appears at the start of each control virtqueue request. + */ +struct virtio_net_ctrlq_hdr { + uint8_t vnch_class; + uint8_t vnch_command; +} __packed; + +/* Contol Queue Classes */ +#define VIRTIO_NET_CTRL_RX 0 + +/* CTRL_RX commands */ +#define VIRTIO_NET_CTRL_RX_PROMISC 0 +#define VIRTIO_NET_CTRL_RX_ALLMULTI 1 +#define VIRTIO_NET_CTRL_RX_ALLUNI 2 +#define VIRTIO_NET_CTRL_RX_NOMULTI 3 +#define VIRTIO_NET_CTRL_RX_NOUNI 4 +#define VIRTIO_NET_CTRL_RX_NOBCAST 5 + +/* Control queue ack values */ +#define VIRTIO_NET_CQ_OK 0 +#define VIRTIO_NET_CQ_ERR 1 + /* * DRIVER PARAMETERS @@ -216,6 +243,13 @@ struct virtio_net_hdr { #define VIRTIO_NET_RX_BUFS 256 /* + * Initially, only use a single buf for control queue requests (when + * present). If this becomes a bottleneck, we can simply increase this + * value as necessary. + */ +#define VIRTIO_NET_CTRL_BUFS 1 + +/* * The virtio net header and the first buffer segment share the same DMA * allocation. We round up the virtio header size to a multiple of 4 and add 2 * bytes so that the IP header, which starts immediately after the 14 or 18 @@ -261,6 +295,7 @@ struct virtio_net_hdr { */ #define VIOIF_TX_INLINE_SIZE (2 * 1024) +#define VIOIF_CTRL_SIZE (256) /* * TYPE DEFINITIONS @@ -289,6 +324,15 @@ typedef struct vioif_rxbuf { list_node_t rb_link; } vioif_rxbuf_t; +typedef struct vioif_ctrlbuf { + vioif_t *cb_vioif; + + virtio_dma_t *cb_dma; + virtio_chain_t *cb_chain; + + list_node_t cb_link; +} vioif_ctrlbuf_t; + /* * Transmit buffers are also allocated in advance. DMA memory is allocated for * the virtio net header, and to hold small packets. Larger packets are mapped @@ -346,6 +390,7 @@ struct vioif { virtio_queue_t *vif_rx_vq; virtio_queue_t *vif_tx_vq; + virtio_queue_t *vif_ctrl_vq; /* TX virtqueue management resources */ boolean_t vif_tx_corked; @@ -366,6 +411,9 @@ struct vioif { */ unsigned int vif_mac_from_host:1; + unsigned int vif_has_ctrlq:1; + unsigned int vif_has_ctrlq_rx:1; + uint_t vif_mtu; uint_t vif_mtu_max; uint8_t vif_mac[ETHERADDRL]; @@ -395,6 +443,11 @@ struct vioif { uint_t vif_rxcopy_thresh; uint_t vif_txcopy_thresh; + list_t vif_ctrlbufs; + uint_t vif_nctrlbufs_alloc; + uint_t vif_ctrlbufs_capacity; + vioif_ctrlbuf_t *vif_ctrlbufs_mem; + /* * Statistics visible through mac: */ @@ -424,6 +477,9 @@ struct vioif { uint64_t vif_txfail_indirect_limit; uint64_t vif_stat_tx_reclaim; + + uint64_t vif_noctrlbuf; + uint64_t vif_ctrlbuf_toosmall; }; #ifdef __cplusplus |