diff options
author | Andy Fiddaman <omnios@citrus-it.co.uk> | 2021-04-19 13:14:49 +0000 |
---|---|---|
committer | Andy Fiddaman <omnios@citrus-it.co.uk> | 2021-07-12 21:32:19 +0000 |
commit | 069b2ef0d51cd626922df94af789ca0dc322222d (patch) | |
tree | 02799a0e51840262389c128c1bd046ca343c1d25 /usr/src | |
parent | cfcb628093a526b143a37c1a2f112f2c7591ed70 (diff) | |
download | illumos-joyent-069b2ef0d51cd626922df94af789ca0dc322222d.tar.gz |
13738 Wire up bhyve's network backends for illumos
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
Reviewed by: Jorge Schrauwen <sjorge@blackdot.be>
Approved by: Garrett D'Amore <garrett@damore.org>
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/cmd/bhyve/Makefile | 9 | ||||
-rw-r--r-- | usr/src/cmd/bhyve/README.sync | 5 | ||||
-rw-r--r-- | usr/src/cmd/bhyve/net_backends.c | 386 | ||||
-rw-r--r-- | usr/src/cmd/bhyve/net_backends.h | 4 | ||||
-rw-r--r-- | usr/src/cmd/bhyve/pci_e82545.c | 239 | ||||
-rw-r--r-- | usr/src/cmd/bhyve/pci_virtio_net.c | 931 | ||||
-rw-r--r-- | usr/src/compat/bhyve/sys/cdefs.h | 7 | ||||
-rw-r--r-- | usr/src/man/man1m/bhyve.1m | 47 | ||||
-rw-r--r-- | usr/src/man/man4/bhyve_config.4 | 21 |
9 files changed, 848 insertions, 801 deletions
diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile index e12878f71a..bbc966d67f 100644 --- a/usr/src/cmd/bhyve/Makefile +++ b/usr/src/cmd/bhyve/Makefile @@ -44,6 +44,7 @@ SRCS = acpi.c \ mem.c \ mevent.c \ mptbl.c \ + net_backends.c \ net_utils.c \ pci_ahci.c \ pci_e82545.c \ @@ -98,11 +99,6 @@ SRCS = acpi.c \ #hda_codec.c \ #pci_hda.c \ -# The bhyve generic net-backend stuff has been ignored by us at the moment -# because illumos users prefer to use viona for its superior network perf. - #net_backends.c \ - - OBJS = $(SRCS:.c=.o) CLOBBERFILES = $(ROOTUSRSBINPROG) @@ -113,7 +109,8 @@ MEVENT_TEST_OBJS = $(MEVENT_TEST_SRCS:.c=.o) CLEANFILES = $(PROG) $(MEVENT_TEST_PROG) $(MEVENT_TEST_OBJS) -CFLAGS += $(CCVERBOSE) -_gcc=-Wimplicit-function-declaration -_gcc=-Wno-parentheses +CFLAGS += $(CCVERBOSE) +CFLAGS += -_gcc=-Wimplicit-function-declaration -_gcc=-Wno-parentheses CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \ -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64 \ -I$(CONTRIB)/bhyve/dev/usb/controller \ diff --git a/usr/src/cmd/bhyve/README.sync b/usr/src/cmd/bhyve/README.sync index bed6671c73..4f71c1420e 100644 --- a/usr/src/cmd/bhyve/README.sync +++ b/usr/src/cmd/bhyve/README.sync @@ -19,11 +19,6 @@ Date: Wed Mar 24 09:29:15 2021 -0700 Divergence Notes: -A previous sync skipped commit 0ff7076bdbc6dae5ea44c0acdb567e1cede199d1 which -introduced a generic backend functionality to network devices. Without that in -place, subsequent updates reflect the absence of that subsystem. Integrating -net backends has not been a priority, given the common use of viona on illumos. - The draft Save/Restore functionality, added in FreeBSD commit 483d953a86a2507355f8287c5107dc827a0ff516, has not been synced into illumos bhyve yet. It is not built by default in FreeBSD, so we're not interested in taking diff --git a/usr/src/cmd/bhyve/net_backends.c b/usr/src/cmd/bhyve/net_backends.c index 30c26aea45..3f86b31ded 100644 --- a/usr/src/cmd/bhyve/net_backends.c +++ b/usr/src/cmd/bhyve/net_backends.c @@ -46,10 +46,12 @@ __FBSDID("$FreeBSD$"); #include <sys/uio.h> #include <net/if.h> +#ifdef __FreeBSD__ #include <net/netmap.h> #include <net/netmap_virt.h> #define NETMAP_WITH_LIBS #include <net/netmap_user.h> +#endif #ifndef WITHOUT_CAPSICUM #include <capsicum_helpers.h> @@ -75,6 +77,11 @@ __FBSDID("$FreeBSD$"); #include <netgraph.h> #endif +#ifndef __FreeBSD__ +#include <libdlpi.h> +#include <net/ethernet.h> +#endif + #include "config.h" #include "debug.h" #include "iov.h" @@ -151,6 +158,10 @@ struct net_backend { int (*set_cap)(struct net_backend *be, uint64_t features, unsigned int vnet_hdr_len); +#ifndef __FreeBSD__ + int (*get_mac)(struct net_backend *be, void *, size_t *); +#endif + struct pci_vtnet_softc *sc; int fd; @@ -175,6 +186,8 @@ SET_DECLARE(net_backend_set, struct net_backend); #define WPRINTF(params) PRINTLN params +#ifdef __FreeBSD__ + /* * The tap backend */ @@ -893,6 +906,336 @@ static struct net_backend vale_backend = { DATA_SET(net_backend_set, netmap_backend); DATA_SET(net_backend_set, vale_backend); +#else /* __FreeBSD__ */ + +/* + * The illumos dlpi backend + */ + +/* + * The size of the bounce buffer used to implement the peek callback. + * This value should be big enough to accommodate the largest of all possible + * frontend packet lengths. The value here matches the definition of + * VTNET_MAX_PKT_LEN in pci_virtio_net.c + */ +#define DLPI_BBUF_SIZE (65536 + 64) + +typedef struct be_dlpi_priv { + dlpi_handle_t bdp_dhp; + struct mevent *bdp_mevp; + /* + * A bounce buffer that allows us to implement the peek_recvlen + * callback. Each structure is only used by a single thread so + * one is enough. + */ + uint8_t bdp_bbuf[DLPI_BBUF_SIZE]; + ssize_t bdp_bbuflen; +} be_dlpi_priv_t; + +static void +be_dlpi_cleanup(net_backend_t *be) +{ + be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque; + + if (priv->bdp_dhp != NULL) + dlpi_close(priv->bdp_dhp); + priv->bdp_dhp = NULL; + + if (priv->bdp_mevp != NULL) + mevent_delete(priv->bdp_mevp); + priv->bdp_mevp = NULL; + + priv->bdp_bbuflen = 0; + be->fd = -1; +} + +static void +be_dlpi_err(int ret, const char *dev, char *msg) +{ + WPRINTF(("%s: %s (%s)", dev, msg, dlpi_strerror(ret))); +} + +static int +be_dlpi_init(net_backend_t *be, const char *devname __unused, + nvlist_t *nvl, net_be_rxeof_t cb, void *param) +{ + be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque; + const char *vnic; + int ret; + + if (cb == NULL) { + WPRINTF(("dlpi backend requires non-NULL callback")); + return (-1); + } + + vnic = get_config_value_node(nvl, "vnic"); + if (vnic == NULL) { + WPRINTF(("dlpi backend requires a VNIC")); + return (-1); + } + + priv->bdp_bbuflen = 0; + + ret = dlpi_open(vnic, &priv->bdp_dhp, DLPI_RAW); + + if (ret != DLPI_SUCCESS) { + be_dlpi_err(ret, vnic, "open failed"); + goto error; + } + + if ((ret = dlpi_bind(priv->bdp_dhp, DLPI_ANY_SAP, NULL)) != + DLPI_SUCCESS) { + be_dlpi_err(ret, vnic, "bind failed"); + goto error; + } + + if (get_config_bool_node_default(nvl, "promiscrxonly", true)) { + if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_RX_ONLY)) != + DLPI_SUCCESS) { + be_dlpi_err(ret, vnic, + "enable promiscuous mode(rxonly) failed"); + goto error; + } + } + if (get_config_bool_node_default(nvl, "promiscphys", false)) { + if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_PHYS)) != + DLPI_SUCCESS) { + be_dlpi_err(ret, vnic, + "enable promiscuous mode(physical) failed"); + goto error; + } + } + if (get_config_bool_node_default(nvl, "promiscsap", true)) { + if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_SAP)) != + DLPI_SUCCESS) { + be_dlpi_err(ret, vnic, + "enable promiscuous mode(SAP) failed"); + goto error; + } + } + if (get_config_bool_node_default(nvl, "promiscmulti", true)) { + if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_MULTI)) != + DLPI_SUCCESS) { + be_dlpi_err(ret, vnic, + "enable promiscuous mode(muticast) failed"); + goto error; + } + } + + be->fd = dlpi_fd(priv->bdp_dhp); + + if (fcntl(be->fd, F_SETFL, O_NONBLOCK) < 0) { + WPRINTF(("%s: enable O_NONBLOCK failed", vnic)); + goto error; + } + + priv->bdp_mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); + if (priv->bdp_mevp == NULL) { + WPRINTF(("Could not register event")); + goto error; + } + + return (0); + +error: + be_dlpi_cleanup(be); + return (-1); +} + +/* + * Called to send a buffer chain out to the dlpi device + */ +static ssize_t +be_dlpi_send(net_backend_t *be, const struct iovec *iov, int iovcnt) +{ + be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque; + ssize_t len = 0; + int ret; + + if (iovcnt == 1) { + len = iov[0].iov_len; + ret = dlpi_send(priv->bdp_dhp, NULL, 0, iov[0].iov_base, len, + NULL); + } else { + void *buf = NULL; + + len = iov_to_buf(iov, iovcnt, &buf); + + if (len <= 0 || buf == NULL) + return (-1); + + ret = dlpi_send(priv->bdp_dhp, NULL, 0, buf, len, NULL); + free(buf); + } + + if (ret != DLPI_SUCCESS) + return (-1); + + return (len); +} + +static ssize_t +be_dlpi_peek_recvlen(net_backend_t *be) +{ + be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque; + dlpi_recvinfo_t recv; + size_t len; + int ret; + + /* + * We already have a packet in the bounce buffer. + * Just return its length. + */ + if (priv->bdp_bbuflen > 0) + return (priv->bdp_bbuflen); + + /* + * Read the next packet (if any) into the bounce buffer, so + * that we get to know its length and we can return that + * to the caller. + */ + len = sizeof (priv->bdp_bbuf); + ret = dlpi_recv(priv->bdp_dhp, NULL, NULL, priv->bdp_bbuf, &len, + 0, &recv); + if (ret == DL_SYSERR) { + if (errno == EWOULDBLOCK) + return (0); + return (-1); + } else if (ret == DLPI_ETIMEDOUT) { + return (0); + } else if (ret != DLPI_SUCCESS) { + return (-1); + } + + if (recv.dri_totmsglen > sizeof (priv->bdp_bbuf)) { + EPRINTLN("DLPI bounce buffer was too small! - needed %x bytes", + recv.dri_totmsglen); + } + + priv->bdp_bbuflen = len; + + return (len); +} + +static ssize_t +be_dlpi_recv(net_backend_t *be, const struct iovec *iov, int iovcnt) +{ + be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque; + size_t len; + int ret; + + if (priv->bdp_bbuflen > 0) { + /* + * A packet is available in the bounce buffer, so + * we read it from there. + */ + len = buf_to_iov(priv->bdp_bbuf, priv->bdp_bbuflen, + iov, iovcnt, 0); + + /* Mark the bounce buffer as empty. */ + priv->bdp_bbuflen = 0; + + return (len); + } + + len = iov[0].iov_len; + ret = dlpi_recv(priv->bdp_dhp, NULL, NULL, + (uint8_t *)iov[0].iov_base, &len, 0, NULL); + if (ret == DL_SYSERR) { + if (errno == EWOULDBLOCK) + return (0); + return (-1); + } else if (ret == DLPI_ETIMEDOUT) { + return (0); + } else if (ret != DLPI_SUCCESS) { + return (-1); + } + + return (len); +} + +static void +be_dlpi_recv_enable(net_backend_t *be) +{ + be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque; + + mevent_enable(priv->bdp_mevp); +} + +static void +be_dlpi_recv_disable(net_backend_t *be) +{ + be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque; + + mevent_disable(priv->bdp_mevp); +} + +static uint64_t +be_dlpi_get_cap(net_backend_t *be) +{ + return (0); /* no capabilities for now */ +} + +static int +be_dlpi_set_cap(net_backend_t *be, uint64_t features, + unsigned vnet_hdr_len) +{ + return ((features || vnet_hdr_len) ? -1 : 0); +} + +static int +be_dlpi_get_mac(net_backend_t *be, void *buf, size_t *buflen) +{ + be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque; + uchar_t physaddr[DLPI_PHYSADDR_MAX]; + size_t physaddrlen = DLPI_PHYSADDR_MAX; + int ret; + + if ((ret = dlpi_get_physaddr(priv->bdp_dhp, DL_CURR_PHYS_ADDR, + physaddr, &physaddrlen)) != DLPI_SUCCESS) { + be_dlpi_err(ret, dlpi_linkname(priv->bdp_dhp), + "read MAC address failed"); + return (EINVAL); + } + + if (physaddrlen != ETHERADDRL) { + WPRINTF(("%s: bad MAC address len %d", + dlpi_linkname(priv->bdp_dhp), physaddrlen)); + return (EINVAL); + } + + if (physaddrlen > *buflen) { + WPRINTF(("%s: MAC address too long (%d bytes required)", + dlpi_linkname(priv->bdp_dhp), physaddrlen)); + return (ENOMEM); + } + + *buflen = physaddrlen; + memcpy(buf, physaddr, *buflen); + + return (0); +} + +static struct net_backend dlpi_backend = { + .prefix = "dlpi", + .priv_size = sizeof(struct be_dlpi_priv), + .init = be_dlpi_init, + .cleanup = be_dlpi_cleanup, + .send = be_dlpi_send, + .peek_recvlen = be_dlpi_peek_recvlen, + .recv = be_dlpi_recv, + .recv_enable = be_dlpi_recv_enable, + .recv_disable = be_dlpi_recv_disable, + .get_cap = be_dlpi_get_cap, + .set_cap = be_dlpi_set_cap, + .get_mac = be_dlpi_get_mac, +}; + +DATA_SET(net_backend_set, dlpi_backend); + +#endif /* __FreeBSD__ */ + +#ifdef __FreeBSD__ int netbe_legacy_config(nvlist_t *nvl, const char *opts) { @@ -911,6 +1254,35 @@ netbe_legacy_config(nvlist_t *nvl, const char *opts) free(backend); return (pci_parse_legacy_config(nvl, cp + 1)); } +#else +int +netbe_legacy_config(nvlist_t *nvl, const char *opts) +{ + char *config, *name, *tofree, *value; + + if (opts == NULL) + return (0); + + /* Default to the 'dlpi' backend - can still be overridden by opts */ + set_config_value_node(nvl, "backend", "dlpi"); + + config = tofree = strdup(opts); + if (config == NULL) + err(4, "netbe_legacy_config strdup()"); + while ((name = strsep(&config, ",")) != NULL) { + value = strchr(name, '='); + if (value != NULL) { + *value++ = '\0'; + set_config_value_node(nvl, name, value); + } else { + set_config_value_node(nvl, "vnic", name); + } + } + free(tofree); + + return (0); +} +#endif /* * Initialize a backend and attach to the frontend. @@ -1066,7 +1438,11 @@ netbe_rx_discard(struct net_backend *be) static uint8_t dummybuf[65536 + 64]; struct iovec iov; +#ifdef __FreeBSD__ iov.iov_base = dummybuf; +#else + iov.iov_base = (caddr_t)dummybuf; +#endif iov.iov_len = sizeof(dummybuf); return netbe_recv(be, &iov, 1); @@ -1092,3 +1468,13 @@ netbe_get_vnet_hdr_len(struct net_backend *be) return (be->be_vnet_hdr_len); } + +#ifndef __FreeBSD__ +int +netbe_get_mac(net_backend_t *be, void *buf, size_t *buflen) +{ + if (be->get_mac == NULL) + return (ENOTSUP); + return (be->get_mac(be, buf, buflen)); +} +#endif diff --git a/usr/src/cmd/bhyve/net_backends.h b/usr/src/cmd/bhyve/net_backends.h index bc7834546b..ea4d059b6f 100644 --- a/usr/src/cmd/bhyve/net_backends.h +++ b/usr/src/cmd/bhyve/net_backends.h @@ -51,7 +51,9 @@ ssize_t netbe_recv(net_backend_t *be, const struct iovec *iov, int iovcnt); ssize_t netbe_rx_discard(net_backend_t *be); void netbe_rx_disable(net_backend_t *be); void netbe_rx_enable(net_backend_t *be); - +#ifndef __FreeBSD__ +int netbe_get_mac(net_backend_t *, void *, size_t *); +#endif /* * Network device capabilities taken from the VirtIO standard. diff --git a/usr/src/cmd/bhyve/pci_e82545.c b/usr/src/cmd/bhyve/pci_e82545.c index 598deff980..9f456f06f2 100644 --- a/usr/src/cmd/bhyve/pci_e82545.c +++ b/usr/src/cmd/bhyve/pci_e82545.c @@ -42,13 +42,11 @@ __FBSDID("$FreeBSD$"); #include <net/ethernet.h> #include <netinet/in.h> #include <netinet/tcp.h> -#ifndef __FreeBSD__ -#include <sys/filio.h> -#endif #ifndef WITHOUT_CAPSICUM #include <capsicum_helpers.h> #endif + #include <err.h> #include <errno.h> #include <fcntl.h> @@ -71,6 +69,7 @@ __FBSDID("$FreeBSD$"); #include "pci_emul.h" #include "mevent.h" #include "net_utils.h" +#include "net_backends.h" /* Hardware/register definitions XXX: move some to common code. */ #define E82545_VENDOR_ID_INTEL 0x8086 @@ -250,11 +249,10 @@ struct eth_uni { struct e82545_softc { struct pci_devinst *esc_pi; struct vmctx *esc_ctx; - struct mevent *esc_mevp; struct mevent *esc_mevpitr; pthread_mutex_t esc_mtx; struct ether_addr esc_mac; - int esc_tapfd; + net_backend_t *esc_be; /* General */ uint32_t esc_CTRL; /* x0000 device ctl */ @@ -360,9 +358,7 @@ struct e82545_softc { static void e82545_reset(struct e82545_softc *sc, int dev); static void e82545_rx_enable(struct e82545_softc *sc); static void e82545_rx_disable(struct e82545_softc *sc); -#ifdef __FreeBSD__ static void e82545_rx_callback(int fd, enum ev_type type, void *param); -#endif static void e82545_tx_start(struct e82545_softc *sc); static void e82545_tx_enable(struct e82545_softc *sc); static void e82545_tx_disable(struct e82545_softc *sc); @@ -556,7 +552,6 @@ e82545_eecd_strobe(struct e82545_softc *sc) } } -#ifdef __FreeBSD__ static void e82545_itr_callback(int fd, enum ev_type type, void *param) { @@ -575,7 +570,6 @@ e82545_itr_callback(int fd, enum ev_type type, void *param) } pthread_mutex_unlock(&sc->esc_mtx); } -#endif static void e82545_icr_assert(struct e82545_softc *sc, uint32_t bits) @@ -601,11 +595,9 @@ e82545_icr_assert(struct e82545_softc *sc, uint32_t bits) sc->esc_irq_asserted = 1; pci_lintr_assert(sc->esc_pi); if (sc->esc_ITR != 0) { -#ifdef __FreeBSD__ sc->esc_mevpitr = mevent_add( (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */ EVF_TIMER, e82545_itr_callback, sc); -#endif } } } @@ -631,11 +623,9 @@ e82545_ims_change(struct e82545_softc *sc, uint32_t bits) sc->esc_irq_asserted = 1; pci_lintr_assert(sc->esc_pi); if (sc->esc_ITR != 0) { -#ifdef __FreeBSD__ sc->esc_mevpitr = mevent_add( (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */ EVF_TIMER, e82545_itr_callback, sc); -#endif } } } @@ -837,9 +827,6 @@ e82545_bufsz(uint32_t rctl) return (256); /* Forbidden value. */ } -#ifdef __FreeBSD__ -static uint8_t dummybuf[2048]; - /* XXX one packet at a time until this is debugged */ static void e82545_rx_callback(int fd, enum ev_type type, void *param) @@ -857,7 +844,7 @@ e82545_rx_callback(int fd, enum ev_type type, void *param) if (!sc->esc_rx_enabled || sc->esc_rx_loopback) { DPRINTF("rx disabled (!%d || %d) -- packet(s) dropped", sc->esc_rx_enabled, sc->esc_rx_loopback); - while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) { + while (netbe_rx_discard(sc->esc_be) > 0) { } goto done1; } @@ -870,7 +857,7 @@ e82545_rx_callback(int fd, enum ev_type type, void *param) if (left < maxpktdesc) { DPRINTF("rx overflow (%d < %d) -- packet(s) dropped", left, maxpktdesc); - while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) { + while (netbe_rx_discard(sc->esc_be) > 0) { } goto done1; } @@ -887,9 +874,9 @@ e82545_rx_callback(int fd, enum ev_type type, void *param) rxd->buffer_addr, bufsz); vec[i].iov_len = bufsz; } - len = readv(sc->esc_tapfd, vec, maxpktdesc); + len = netbe_recv(sc->esc_be, vec, maxpktdesc); if (len <= 0) { - DPRINTF("tap: readv() returned %d\n", len); + DPRINTF("netbe_recv() returned %d", len); goto done; } @@ -970,7 +957,6 @@ done1: DPRINTF("rx_run done: head %x, tail %x", sc->esc_RDH, sc->esc_RDT); pthread_mutex_unlock(&sc->esc_mtx); } -#endif static uint16_t e82545_carry(uint32_t sum) @@ -983,11 +969,7 @@ e82545_carry(uint32_t sum) } static uint16_t -#ifdef __FreeBSD__ e82545_buf_checksum(uint8_t *buf, int len) -#else -e82545_buf_checksum(caddr_t buf, int len) -#endif { int i; uint32_t sum = 0; @@ -1024,7 +1006,11 @@ e82545_iov_checksum(struct iovec *iov, int iovcnt, int off, int len) odd = 0; while (len > 0 && iovcnt > 0) { now = MIN(len, iov->iov_len - off); +#ifdef __FreeBSD__ s = e82545_buf_checksum(iov->iov_base + off, now); +#else + s = e82545_buf_checksum((uint8_t *)iov->iov_base + off, now); +#endif sum += odd ? (s << 8) : s; odd ^= (now & 1); len -= now; @@ -1069,10 +1055,10 @@ static void e82545_transmit_backend(struct e82545_softc *sc, struct iovec *iov, int iovcnt) { - if (sc->esc_tapfd == -1) + if (sc->esc_be == NULL) return; - (void) writev(sc->esc_tapfd, iov, iovcnt); + (void) netbe_send(sc->esc_be, iov, iovcnt); } static void @@ -1094,19 +1080,16 @@ static int e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, uint16_t dsize, uint16_t *rhead, int *tdwb) { -#ifdef __FreeBSD__ uint8_t *hdr, *hdrp; -#else - caddr_t hdr, hdrp; -#endif struct iovec iovb[I82545_MAX_TXSEGS + 2]; struct iovec tiov[I82545_MAX_TXSEGS + 2]; struct e1000_context_desc *cd; struct ck_info ckinfo[2]; struct iovec *iov; union e1000_tx_udesc *dsc; - int desc, dtype, len, ntype, iovcnt, tlen, hdrlen, vlen, tcp, tso; + int desc, dtype, len, ntype, iovcnt, tlen, tcp, tso; int mss, paylen, seg, tiovcnt, left, now, nleft, nnow, pv, pvoff; + unsigned hdrlen, vlen; uint32_t tcpsum, tcpseq; uint16_t ipcs, tcpcs, ipid, ohead; @@ -1116,7 +1099,6 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, ntype = 0; tso = 0; ohead = head; - hdr = NULL; /* iovb[0/1] may be used for writable copy of headers. */ iov = &iovb[2]; @@ -1251,6 +1233,68 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, } else { /* In case of TSO header length provided by software. */ hdrlen = sc->esc_txctx.tcp_seg_setup.fields.hdr_len; + + /* + * Cap the header length at 240 based on 7.2.4.5 of + * the Intel 82576EB (Rev 2.63) datasheet. + */ + if (hdrlen > 240) { + WPRINTF("TSO hdrlen too large: %d", hdrlen); + goto done; + } + + /* + * If VLAN insertion is requested, ensure the header + * at least holds the amount of data copied during + * VLAN insertion below. + * + * XXX: Realistic packets will include a full Ethernet + * header before the IP header at ckinfo[0].ck_start, + * but this check is sufficient to prevent + * out-of-bounds access below. + */ + if (vlen != 0 && hdrlen < ETHER_ADDR_LEN*2) { + WPRINTF("TSO hdrlen too small for vlan insertion " + "(%d vs %d) -- dropped", hdrlen, + ETHER_ADDR_LEN*2); + goto done; + } + + /* + * Ensure that the header length covers the used fields + * in the IP and TCP headers as well as the IP and TCP + * checksums. The following fields are accessed below: + * + * Header | Field | Offset | Length + * -------+-------+--------+------- + * IPv4 | len | 2 | 2 + * IPv4 | ID | 4 | 2 + * IPv6 | len | 4 | 2 + * TCP | seq # | 4 | 4 + * TCP | flags | 13 | 1 + * UDP | len | 4 | 4 + */ + if (hdrlen < ckinfo[0].ck_start + 6 || + hdrlen < ckinfo[0].ck_off + 2) { + WPRINTF("TSO hdrlen too small for IP fields (%d) " + "-- dropped", hdrlen); + goto done; + } + if (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_TCP) { + if (hdrlen < ckinfo[1].ck_start + 14 || + (ckinfo[1].ck_valid && + hdrlen < ckinfo[1].ck_off + 2)) { + WPRINTF("TSO hdrlen too small for TCP fields " + "(%d) -- dropped", hdrlen); + goto done; + } + } else { + if (hdrlen < ckinfo[1].ck_start + 8) { + WPRINTF("TSO hdrlen too small for UDP fields " + "(%d) -- dropped", hdrlen); + goto done; + } + } } /* Allocate, fill and prepend writable header vector. */ @@ -1270,9 +1314,14 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, } iov--; iovcnt++; +#ifdef __FreeBSD__ iov->iov_base = hdr; +#else + iov->iov_base = (caddr_t)hdr; +#endif iov->iov_len = hdrlen; - } + } else + hdr = NULL; /* Insert VLAN tag. */ if (vlen != 0) { @@ -1283,7 +1332,11 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, hdr[ETHER_ADDR_LEN*2 + 1] = sc->esc_VET & 0xff; hdr[ETHER_ADDR_LEN*2 + 2] = dsc->td.upper.fields.special >> 8; hdr[ETHER_ADDR_LEN*2 + 3] = dsc->td.upper.fields.special & 0xff; +#ifdef __FreeBSD__ iov->iov_base = hdr; +#else + iov->iov_base = (caddr_t)hdr; +#endif iov->iov_len += ETHER_VLAN_ENCAP_LEN; /* Correct checksum offsets after VLAN tag insertion. */ ckinfo[0].ck_start += ETHER_VLAN_ENCAP_LEN; @@ -1311,10 +1364,12 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, tcp = (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_TCP) != 0; mss = sc->esc_txctx.tcp_seg_setup.fields.mss; paylen = (sc->esc_txctx.cmd_and_length & 0x000fffff); - DPRINTF("tx %s segmentation offload %d+%d/%d bytes %d iovs\r\n", + DPRINTF("tx %s segmentation offload %d+%d/%d bytes %d iovs", tcp ? "TCP" : "UDP", hdrlen, paylen, mss, iovcnt); ipid = ntohs(*(uint16_t *)&hdr[ckinfo[0].ck_start + 4]); - tcpseq = ntohl(*(uint32_t *)&hdr[ckinfo[1].ck_start + 4]); + tcpseq = 0; + if (tcp) + tcpseq = ntohl(*(uint32_t *)&hdr[ckinfo[1].ck_start + 4]); ipcs = *(uint16_t *)&hdr[ckinfo[0].ck_off]; tcpcs = 0; if (ckinfo[1].ck_valid) /* Save partial pseudo-header checksum. */ @@ -1326,7 +1381,11 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, /* Construct IOVs for the segment. */ /* Include whole original header. */ +#ifdef __FreeBSD__ tiov[0].iov_base = hdr; +#else + tiov[0].iov_base = (caddr_t)hdr; +#endif tiov[0].iov_len = hdrlen; tiovcnt = 1; /* Include respective part of payload IOV. */ @@ -1340,7 +1399,7 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, } else pvoff += nnow; } - DPRINTF("tx segment %d %d+%d bytes %d iovs\r\n", + DPRINTF("tx segment %d %d+%d bytes %d iovs", seg, hdrlen, now, tiovcnt); /* Update IP header. */ @@ -1435,7 +1494,7 @@ e82545_tx_run(struct e82545_softc *sc) sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT); } -static void * +static _Noreturn void * e82545_tx_thread(void *param) { struct e82545_softc *sc = param; @@ -1455,9 +1514,6 @@ e82545_tx_thread(void *param) /* Process some tx descriptors. Lock dropped inside. */ e82545_tx_run(sc); } -#ifndef __FreeBSD__ - return (NULL); -#endif } static void @@ -2236,58 +2292,6 @@ e82545_reset(struct e82545_softc *sc, int drvr) sc->esc_TXDCTL = 0; } -static void -e82545_open_tap(struct e82545_softc *sc, const char *path) -{ - char tbuf[80]; -#ifndef WITHOUT_CAPSICUM - cap_rights_t rights; -#endif - - if (path == NULL) { - sc->esc_tapfd = -1; - return; - } - - strcpy(tbuf, "/dev/"); - strlcat(tbuf, path, sizeof(tbuf)); - - sc->esc_tapfd = open(tbuf, O_RDWR); - if (sc->esc_tapfd == -1) { - DPRINTF("unable to open tap device %s\n", path); - exit(4); - } - - /* - * Set non-blocking and register for read - * notifications with the event loop - */ - int opt = 1; - if (ioctl(sc->esc_tapfd, FIONBIO, &opt) < 0) { - WPRINTF("tap device O_NONBLOCK failed: %d\n", errno); - close(sc->esc_tapfd); - sc->esc_tapfd = -1; - } - -#ifndef WITHOUT_CAPSICUM - cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); - if (caph_rights_limit(sc->esc_tapfd, &rights) == -1) - errx(EX_OSERR, "Unable to apply rights for sandbox"); -#endif - -#ifdef __FreeBSD__ - sc->esc_mevp = mevent_add(sc->esc_tapfd, - EVF_READ, - e82545_rx_callback, - sc); - if (sc->esc_mevp == NULL) { - DPRINTF("Could not register mevent %d\n", EVF_READ); - close(sc->esc_tapfd); - sc->esc_tapfd = -1; - } -#endif -} - static int e82545_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) { @@ -2342,51 +2346,36 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) } else net_genmac(pi, sc->esc_mac.octet); - const char *tap = get_config_value_node(nvl, "tap"); - if (tap != NULL && (strncmp(tap, "tap", 3) == 0 || - strncmp(tap, "vmnet", 5) == 0)) - e82545_open_tap(sc, tap); + err = netbe_init(&sc->esc_be, nvl, e82545_rx_callback, sc); + if (err) { + free(sc); + return (err); + } - /* H/w initiated reset */ - e82545_reset(sc, 0); +#ifndef __FreeBSD__ + size_t buflen = sizeof (sc->esc_mac.octet); - return (0); -} + err = netbe_get_mac(sc->esc_be, sc->esc_mac.octet, &buflen); + if (err != 0) { + free(sc); + return (err); + } +#endif -#ifndef __FreeBSD__ -static int -e82545_legacy_config(nvlist_t *nvl, const char *opt) -{ - char *config, *name, *tofree, *value; + netbe_rx_enable(sc->esc_be); - if (opt == NULL) - return (0); + /* H/w initiated reset */ + e82545_reset(sc, 0); - config = tofree = strdup(opt); - while ((name = strsep(&config, ",")) != NULL) { - value = strchr(name, '='); - if (value != NULL) { - *value++ = '\0'; - set_config_value_node(nvl, name, value); - } else { - set_config_value_node(nvl, "tap", name); - } - } - free(tofree); return (0); } -#endif struct pci_devemu pci_de_e82545 = { .pe_emu = "e1000", .pe_init = e82545_init, -#ifdef __FreeBSD__ .pe_legacy_config = netbe_legacy_config, -#else - .pe_legacy_config = e82545_legacy_config, -#endif .pe_barwrite = e82545_write, - .pe_barread = e82545_read + .pe_barread = e82545_read, }; PCI_EMUL_SET(pci_de_e82545); diff --git a/usr/src/cmd/bhyve/pci_virtio_net.c b/usr/src/cmd/bhyve/pci_virtio_net.c index 6736603fb8..4b9ce3e58a 100644 --- a/usr/src/cmd/bhyve/pci_virtio_net.c +++ b/usr/src/cmd/bhyve/pci_virtio_net.c @@ -27,42 +27,18 @@ * * $FreeBSD$ */ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - * - * Copyright 2013 Pluribus Networks Inc. - * Copyright 2018 Joyent, Inc. - */ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); #include <sys/param.h> -#ifndef WITHOUT_CAPSICUM -#include <sys/capsicum.h> -#endif #include <sys/linker_set.h> #include <sys/select.h> #include <sys/uio.h> #include <sys/ioctl.h> #include <net/ethernet.h> -#ifdef __FreeBSD__ -#ifndef NETMAP_WITH_LIBS -#define NETMAP_WITH_LIBS -#endif -#include <net/netmap_user.h> -#endif +#include <net/if.h> /* IFNAMSIZ */ -#ifndef WITHOUT_CAPSICUM -#include <capsicum_helpers.h> -#endif #include <err.h> #include <errno.h> #include <fcntl.h> @@ -73,54 +49,30 @@ __FBSDID("$FreeBSD$"); #include <strings.h> #include <unistd.h> #include <assert.h> -#include <md5.h> #include <pthread.h> #include <pthread_np.h> -#include <sysexits.h> -#ifndef __FreeBSD__ -#include <poll.h> -#include <libdlpi.h> -#endif #include "bhyverun.h" #include "config.h" #include "debug.h" #include "pci_emul.h" -#ifdef __FreeBSD__ #include "mevent.h" -#endif #include "virtio.h" #include "net_utils.h" +#include "net_backends.h" +#include "iov.h" #define VTNET_RINGSZ 1024 #define VTNET_MAXSEGS 256 -/* - * Host capabilities. Note that we only offer a few of these. - */ -#define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */ -#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */ -#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */ -#define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */ -#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */ -#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */ -#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */ -#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */ -#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */ -#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */ -#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */ -#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */ -#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */ -#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */ -#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */ -#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */ -#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */ -#define VIRTIO_NET_F_GUEST_ANNOUNCE \ - (1 << 21) /* guest can send gratuitous pkts */ +#define VTNET_MAX_PKT_LEN (65536 + 64) + +#define VTNET_MIN_MTU ETHERMIN +#define VTNET_MAX_MTU 65535 #define VTNET_S_HOSTCAPS \ - ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \ + ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \ VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) /* @@ -143,19 +95,6 @@ struct virtio_net_config { #define VTNET_MAXQ 3 /* - * Fixed network header size - */ -struct virtio_net_rxhdr { - uint8_t vrh_flags; - uint8_t vrh_gso_type; - uint16_t vrh_hdr_len; - uint16_t vrh_gso_size; - uint16_t vrh_csum_start; - uint16_t vrh_csum_offset; - uint16_t vrh_bufs; -} __packed; - -/* * Debug printf */ static int pci_vtnet_debug; @@ -169,27 +108,16 @@ struct pci_vtnet_softc { struct virtio_softc vsc_vs; struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; pthread_mutex_t vsc_mtx; - struct mevent *vsc_mevp; -#ifdef __FreeBSD - int vsc_tapfd; -#else - dlpi_handle_t vsc_dhp; - int vsc_dlpifd; -#endif - struct nm_desc *vsc_nmd; + net_backend_t *vsc_be; + + bool features_negotiated; /* protected by rx_mtx */ - int vsc_rx_ready; - bool features_negotiated; /* protected by rx_mtx */ int resetting; /* protected by tx_mtx */ uint64_t vsc_features; /* negotiated features */ - - struct virtio_net_config vsc_config; - struct virtio_consts vsc_consts; - + pthread_mutex_t rx_mtx; - int rx_vhdrlen; int rx_merge; /* merged rx bufs in use */ pthread_t tx_tid; @@ -197,9 +125,11 @@ struct pci_vtnet_softc { pthread_cond_t tx_cond; int tx_in_progress; - void (*pci_vtnet_rx)(struct pci_vtnet_softc *sc); - void (*pci_vtnet_tx)(struct pci_vtnet_softc *sc, struct iovec *iov, - int iovcnt, int len); + size_t vhdrlen; + size_t be_vhdrlen; + + struct virtio_net_config vsc_config; + struct virtio_consts vsc_consts; }; static void pci_vtnet_reset(void *); @@ -230,7 +160,16 @@ pci_vtnet_reset(void *vsc) /* Acquire the RX lock to block RX processing. */ pthread_mutex_lock(&sc->rx_mtx); + /* + * Make sure receive operation is disabled at least until we + * re-negotiate the features, since receive operation depends + * on the value of sc->rx_merge and the header length, which + * are both set in pci_vtnet_neg_features(). + * Receive operation will be enabled again once the guest adds + * the first receive buffers and kicks us. + */ sc->features_negotiated = false; + netbe_rx_disable(sc->vsc_be); /* Set sc->resetting and give a chance to the TX thread to stop. */ pthread_mutex_lock(&sc->tx_mtx); @@ -241,10 +180,6 @@ pci_vtnet_reset(void *vsc) pthread_mutex_lock(&sc->tx_mtx); } - sc->vsc_rx_ready = 0; - sc->rx_merge = 1; - sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); - /* * Now reset rings, MSI-X vectors, and negotiated capabilities. * Do that with the TX lock held, since we need to reset @@ -257,434 +192,237 @@ pci_vtnet_reset(void *vsc) pthread_mutex_unlock(&sc->rx_mtx); } -/* - * Called to send a buffer chain out to the tap device - */ -#ifdef __FreeBSD__ -static void -pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, - int len) -{ - static char pad[60]; /* all zero bytes */ - - if (sc->vsc_tapfd == -1) - return; - - /* - * If the length is < 60, pad out to that and add the - * extra zero'd segment to the iov. It is guaranteed that - * there is always an extra iov available by the caller. - */ - if (len < 60) { - iov[iovcnt].iov_base = pad; - iov[iovcnt].iov_len = 60 - len; - iovcnt++; - } - (void) writev(sc->vsc_tapfd, iov, iovcnt); -} -#else -static void -pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, - int len) -{ - int i; - - for (i = 0; i < iovcnt; i++) { - (void) dlpi_send(sc->vsc_dhp, NULL, 0, - iov[i].iov_base, iov[i].iov_len, NULL); - } -} -#endif /* __FreeBSD__ */ - -#ifdef __FreeBSD__ -/* - * Called when there is read activity on the tap file descriptor. - * Each buffer posted by the guest is assumed to be able to contain - * an entire ethernet frame + rx header. - * MP note: the dummybuf is only used for discarding frames, so there - * is no need for it to be per-vtnet or locked. - */ -static uint8_t dummybuf[2048]; -#endif /* __FreeBSD__ */ - static __inline struct iovec * -rx_iov_trim(struct iovec *iov, int *niov, int tlen) +iov_trim_hdr(struct iovec *iov, int *iovcnt, unsigned int hlen) { struct iovec *riov; - /* XXX short-cut: assume first segment is >= tlen */ - assert(iov[0].iov_len >= tlen); + if (iov[0].iov_len < hlen) { + /* + * Not enough header space in the first fragment. + * That's not ok for us. + */ + return NULL; + } - iov[0].iov_len -= tlen; + iov[0].iov_len -= hlen; if (iov[0].iov_len == 0) { - assert(*niov > 1); - *niov -= 1; + *iovcnt -= 1; + if (*iovcnt == 0) { + /* + * Only space for the header. That's not + * enough for us. + */ + return NULL; + } riov = &iov[1]; } else { - iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen); + iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + hlen); riov = &iov[0]; } return (riov); } +struct virtio_mrg_rxbuf_info { + uint16_t idx; + uint16_t pad; + uint32_t len; +}; + static void -pci_vtnet_tap_rx(struct pci_vtnet_softc *sc) +pci_vtnet_rx(struct pci_vtnet_softc *sc) { - struct iovec iov[VTNET_MAXSEGS], *riov; + int prepend_hdr_len = sc->vhdrlen - sc->be_vhdrlen; + struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS]; + struct iovec iov[VTNET_MAXSEGS + 1]; struct vqueue_info *vq; - void *vrx; - int n; -#ifdef __FreeBSD__ - int len; -#else - size_t len; - int ret; -#endif - uint16_t idx; - /* - * Should never be called without a valid tap fd - */ -#ifdef __FreeBSD__ - assert(sc->vsc_tapfd != -1); -#else - assert(sc->vsc_dlpifd != -1); -#endif + vq = &sc->vsc_queues[VTNET_RXQ]; /* Features must be negotiated */ if (!sc->features_negotiated) { return; } - /* - * But, will be called when the rx ring hasn't yet - * been set up. - */ - if (!sc->vsc_rx_ready) { -#ifdef __FreeBSD__ - /* - * Drop the packet and try later. - */ - (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); -#endif - return; - } + for (;;) { + struct virtio_net_rxhdr *hdr; + uint32_t riov_bytes; + struct iovec *riov; + uint32_t ulen; + int riov_len; + int n_chains; + ssize_t rlen; + ssize_t plen; + + plen = netbe_peek_recvlen(sc->vsc_be); + if (plen <= 0) { + /* + * No more packets (plen == 0), or backend errored + * (plen < 0). Interrupt if needed and stop. + */ + vq_endchains(vq, /*used_all_avail=*/0); + return; + } + plen += prepend_hdr_len; - /* - * Check for available rx buffers - */ - vq = &sc->vsc_queues[VTNET_RXQ]; - if (!vq_has_descs(vq)) { /* - * Drop the packet and try later. Interrupt on - * empty, if that's negotiated. + * Get a descriptor chain to store the next ingress + * packet. In case of mergeable rx buffers, get as + * many chains as necessary in order to make room + * for plen bytes. */ -#ifdef __FreeBSD__ - (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); + riov_bytes = 0; + riov_len = 0; + riov = iov; + n_chains = 0; + do { + int n = vq_getchain(vq, &info[n_chains].idx, riov, + VTNET_MAXSEGS - riov_len, NULL); + + if (n == 0) { + /* + * No rx buffers. Enable RX kicks and double + * check. + */ + vq_kick_enable(vq); + if (!vq_has_descs(vq)) { + /* + * Still no buffers. Return the unused + * chains (if any), interrupt if needed + * (including for NOTIFY_ON_EMPTY), and + * disable the backend until the next + * kick. + */ + vq_retchains(vq, n_chains); + vq_endchains(vq, /*used_all_avail=*/1); + netbe_rx_disable(sc->vsc_be); + return; + } + + /* More rx buffers found, so keep going. */ + vq_kick_disable(vq); + continue; + } +#ifndef __FreeBSD__ + if (n == -1) { + /* + * An error from vq_getchain() means that + * an invalid descriptor was found. + */ + vq_retchains(vq, n_chains); + vq_endchains(vq, /*used_all_avail=*/0); + return; + } #endif - vq_endchains(vq, 1); - return; - } - - do { - /* - * Get descriptor chain - */ - n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); - assert(n >= 1 && n <= VTNET_MAXSEGS); + assert(n >= 1 && riov_len + n <= VTNET_MAXSEGS); + riov_len += n; + if (!sc->rx_merge) { + n_chains = 1; + break; + } +#ifndef __FreeBSD__ + size_t c = count_iov(riov, n); + if (c > UINT32_MAX) { + vq_retchains(vq, n_chains); + vq_endchains(vq, /*used_all_avail=*/0); + return; + } + info[n_chains].len = (uint32_t)c; +#else + info[n_chains].len = (uint32_t)count_iov(riov, n); +#endif + riov_bytes += info[n_chains].len; + riov += n; + n_chains++; + } while (riov_bytes < plen && riov_len < VTNET_MAXSEGS); - /* - * Get a pointer to the rx header, and use the - * data immediately following it for the packet buffer. - */ - vrx = iov[0].iov_base; - riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen); -#ifdef __FreeBSD__ - len = readv(sc->vsc_tapfd, riov, n); + riov = iov; +#ifdef __FreeBSD__ + hdr = riov[0].iov_base; #else - len = riov[0].iov_len; - ret = dlpi_recv(sc->vsc_dhp, NULL, NULL, - (uint8_t *)riov[0].iov_base, &len, 0, NULL); - if (ret != DLPI_SUCCESS) { - errno = EWOULDBLOCK; - len = 0; - } + hdr = (struct virtio_net_rxhdr *)riov[0].iov_base; #endif - if (len <= 0 && errno == EWOULDBLOCK) { + if (prepend_hdr_len > 0) { /* - * No more packets, but still some avail ring - * entries. Interrupt if needed/appropriate. + * The frontend uses a virtio-net header, but the + * backend does not. We need to prepend a zeroed + * header. */ - vq_retchains(vq, 1); - vq_endchains(vq, 0); - return; - } - - /* - * The only valid field in the rx packet header is the - * number of buffers if merged rx bufs were negotiated. - */ - memset(vrx, 0, sc->rx_vhdrlen); - - if (sc->rx_merge) { - struct virtio_net_rxhdr *vrxh; - - vrxh = vrx; - vrxh->vrh_bufs = 1; + riov = iov_trim_hdr(riov, &riov_len, prepend_hdr_len); + if (riov == NULL) { + /* + * The first collected chain is nonsensical, + * as it is not even enough to store the + * virtio-net header. Just drop it. + */ + vq_relchain(vq, info[0].idx, 0); + vq_retchains(vq, n_chains - 1); + continue; + } + memset(hdr, 0, prepend_hdr_len); } - /* - * Release this chain and handle more chains. - */ - vq_relchain(vq, idx, len + sc->rx_vhdrlen); - } while (vq_has_descs(vq)); - - /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ - vq_endchains(vq, 1); -} - -#ifdef __FreeBSD__ -static __inline int -pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt) -{ - int r, i; - int len = 0; - - for (r = nmd->cur_tx_ring; ; ) { - struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r); - uint32_t cur, idx; - char *buf; - - if (nm_ring_empty(ring)) { - r++; - if (r > nmd->last_tx_ring) - r = nmd->first_tx_ring; - if (r == nmd->cur_tx_ring) - break; + rlen = netbe_recv(sc->vsc_be, riov, riov_len); + if (rlen != plen - prepend_hdr_len) { + /* + * If this happens it means there is something + * wrong with the backend (e.g., some other + * process is stealing our packets). + */ + WPRINTF(("netbe_recv: expected %zd bytes, " + "got %zd", plen - prepend_hdr_len, rlen)); + vq_retchains(vq, n_chains); continue; } - cur = ring->cur; - idx = ring->slot[cur].buf_idx; - buf = NETMAP_BUF(ring, idx); - - for (i = 0; i < iovcnt; i++) { - if (len + iov[i].iov_len > 2048) - break; - memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len); - len += iov[i].iov_len; - } - ring->slot[cur].len = len; - ring->head = ring->cur = nm_ring_next(ring, cur); - nmd->cur_tx_ring = r; - ioctl(nmd->fd, NIOCTXSYNC, NULL); - break; - } - return (len); -} + ulen = (uint32_t)plen; -static __inline int -pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt) -{ - int len = 0; - int i = 0; - int r; - - for (r = nmd->cur_rx_ring; ; ) { - struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r); - uint32_t cur, idx; - char *buf; - size_t left; - - if (nm_ring_empty(ring)) { - r++; - if (r > nmd->last_rx_ring) - r = nmd->first_rx_ring; - if (r == nmd->cur_rx_ring) - break; - continue; - } - cur = ring->cur; - idx = ring->slot[cur].buf_idx; - buf = NETMAP_BUF(ring, idx); - left = ring->slot[cur].len; - - for (i = 0; i < iovcnt && left > 0; i++) { - if (iov[i].iov_len > left) - iov[i].iov_len = left; - memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len); - len += iov[i].iov_len; - left -= iov[i].iov_len; + /* + * Publish the used buffers to the guest, reporting the + * number of bytes that we wrote. + */ + if (!sc->rx_merge) { + vq_relchain(vq, info[0].idx, ulen); + } else { + uint32_t iolen; + int i = 0; + + do { + iolen = info[i].len; + if (iolen > ulen) { + iolen = ulen; + } + vq_relchain_prepare(vq, info[i].idx, iolen); + ulen -= iolen; + i++; + } while (ulen > 0); + + hdr->vrh_bufs = i; + vq_relchain_publish(vq); + assert(i == n_chains); } - ring->head = ring->cur = nm_ring_next(ring, cur); - nmd->cur_rx_ring = r; - ioctl(nmd->fd, NIOCRXSYNC, NULL); - break; } - for (; i < iovcnt; i++) - iov[i].iov_len = 0; - return (len); } /* - * Called to send a buffer chain out to the vale port + * Called when there is read activity on the backend file descriptor. + * Each buffer posted by the guest is assumed to be able to contain + * an entire ethernet frame + rx header. */ static void -pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, - int len) -{ - static char pad[60]; /* all zero bytes */ - - if (sc->vsc_nmd == NULL) - return; - - /* - * If the length is < 60, pad out to that and add the - * extra zero'd segment to the iov. It is guaranteed that - * there is always an extra iov available by the caller. - */ - if (len < 60) { - iov[iovcnt].iov_base = pad; - iov[iovcnt].iov_len = 60 - len; - iovcnt++; - } - (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt); -} - -static void -pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc) -{ - struct iovec iov[VTNET_MAXSEGS], *riov; - struct vqueue_info *vq; - void *vrx; - int len, n; - uint16_t idx; - - /* - * Should never be called without a valid netmap descriptor - */ - assert(sc->vsc_nmd != NULL); - - /* Features must be negotiated */ - if (!sc->features_negotiated) { - return; - } - - /* - * But, will be called when the rx ring hasn't yet - * been set up. - */ - if (!sc->vsc_rx_ready) { - /* - * Drop the packet and try later. - */ - (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf); - return; - } - - /* - * Check for available rx buffers - */ - vq = &sc->vsc_queues[VTNET_RXQ]; - if (!vq_has_descs(vq)) { - /* - * Drop the packet and try later. Interrupt on - * empty, if that's negotiated. - */ - (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf); - vq_endchains(vq, 1); - return; - } - - do { - /* - * Get descriptor chain. - */ - n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); - assert(n >= 1 && n <= VTNET_MAXSEGS); - - /* - * Get a pointer to the rx header, and use the - * data immediately following it for the packet buffer. - */ - vrx = iov[0].iov_base; - riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen); - - len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n); - - if (len == 0) { - /* - * No more packets, but still some avail ring - * entries. Interrupt if needed/appropriate. - */ - vq_retchain(vq); - vq_endchains(vq, 0); - return; - } - - /* - * The only valid field in the rx packet header is the - * number of buffers if merged rx bufs were negotiated. - */ - memset(vrx, 0, sc->rx_vhdrlen); - - if (sc->rx_merge) { - struct virtio_net_rxhdr *vrxh; - - vrxh = vrx; - vrxh->vrh_bufs = 1; - } - - /* - * Release this chain and handle more chains. - */ - vq_relchain(vq, idx, len + sc->rx_vhdrlen); - } while (vq_has_descs(vq)); - - /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ - vq_endchains(vq, 1); -} -#endif /* __FreeBSD__ */ - -#ifdef __FreeBSD__ -static void pci_vtnet_rx_callback(int fd, enum ev_type type, void *param) { struct pci_vtnet_softc *sc = param; pthread_mutex_lock(&sc->rx_mtx); - sc->pci_vtnet_rx(sc); + pci_vtnet_rx(sc); pthread_mutex_unlock(&sc->rx_mtx); } -#else -static void * -pci_vtnet_poll_thread(void *param) -{ - struct pci_vtnet_softc *sc = param; - pollfd_t pollset; - - pollset.fd = sc->vsc_dlpifd; - pollset.events = POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND; - - for (;;) { - if (poll(&pollset, 1, -1) < 0) { - if (errno == EINTR) - continue; - fprintf(stderr, "pci_vtnet_poll_thread poll() error %d\n", errno); - continue; - } - pthread_mutex_lock(&sc->vsc_mtx); - pci_vtnet_tap_rx(sc); - pthread_mutex_unlock(&sc->vsc_mtx); - } - - return (NULL); -} -#endif /* __FreeBSD__ */ +/* Called on RX kick. */ static void pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) { @@ -695,42 +433,64 @@ pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) * Enable RX only if features are negotiated. */ pthread_mutex_lock(&sc->rx_mtx); - if (sc->vsc_rx_ready == 0 && sc->features_negotiated) { - sc->vsc_rx_ready = 1; - vq_kick_disable(vq); + if (!sc->features_negotiated) { + pthread_mutex_unlock(&sc->rx_mtx); + return; } + + vq_kick_disable(vq); + netbe_rx_enable(sc->vsc_be); pthread_mutex_unlock(&sc->rx_mtx); } +/* TX virtqueue processing, called by the TX thread. */ static void pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) { struct iovec iov[VTNET_MAXSEGS + 1]; - int i, n; - int plen, tlen; + struct iovec *siov = iov; uint16_t idx; + ssize_t len; + int n; /* - * Obtain chain of descriptors. The first one is - * really the header descriptor, so we need to sum - * up two lengths: packet length and transfer length. + * Obtain chain of descriptors. The first descriptor also + * contains the virtio-net header. */ n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); assert(n >= 1 && n <= VTNET_MAXSEGS); - plen = 0; - tlen = iov[0].iov_len; - for (i = 1; i < n; i++) { - plen += iov[i].iov_len; - tlen += iov[i].iov_len; + + if (sc->vhdrlen != sc->be_vhdrlen) { + /* + * The frontend uses a virtio-net header, but the backend + * does not. We simply strip the header and ignore it, as + * it should be zero-filled. + */ + siov = iov_trim_hdr(siov, &n, sc->vhdrlen); } - DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n)); - sc->pci_vtnet_tx(sc, &iov[1], n - 1, plen); + if (siov == NULL) { + /* The chain is nonsensical. Just drop it. */ + len = 0; + } else { + len = netbe_send(sc->vsc_be, siov, n); + if (len < 0) { + /* + * If send failed, report that 0 bytes + * were read. + */ + len = 0; + } + } - /* chain is processed, release it and set tlen */ - vq_relchain(vq, idx, tlen); + /* + * Return the processed chain to the guest, reporting + * the number of bytes that we read. + */ + vq_relchain(vq, idx, len); } +/* Called on TX kick. */ static void pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) { @@ -797,140 +557,23 @@ pci_vtnet_tx_thread(void *param) /* * Generate an interrupt if needed. */ - vq_endchains(vq, 1); + vq_endchains(vq, /*used_all_avail=*/1); pthread_mutex_lock(&sc->tx_mtx); } +#ifndef __FreeBSD__ return (NULL); +#endif } -#ifdef __FreeBSD__ +#ifdef notyet static void pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) { DPRINTF(("vtnet: control qnotify!")); } -#endif /* __FreeBSD__ */ - -static void -pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, const char *devname) -{ - char tbuf[80]; -#ifndef WITHOUT_CAPSICUM - cap_rights_t rights; -#endif -#ifndef __FreeBSD__ - uchar_t physaddr[DLPI_PHYSADDR_MAX]; - size_t physaddrlen = DLPI_PHYSADDR_MAX; - int error; -#endif - - strcpy(tbuf, "/dev/"); - strlcat(tbuf, devname, sizeof(tbuf)); - - sc->pci_vtnet_rx = pci_vtnet_tap_rx; - sc->pci_vtnet_tx = pci_vtnet_tap_tx; -#ifdef __FreeBSD__ - sc->vsc_tapfd = open(tbuf, O_RDWR); - if (sc->vsc_tapfd == -1) { - WPRINTF(("open of tap device %s failed\n", tbuf)); - return; - } - - /* - * Set non-blocking and register for read - * notifications with the event loop - */ - int opt = 1; - if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { - WPRINTF(("tap device O_NONBLOCK failed\n")); - close(sc->vsc_tapfd); - sc->vsc_tapfd = -1; - } - -#ifndef WITHOUT_CAPSICUM - cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); - if (caph_rights_limit(sc->vsc_tapfd, &rights) == -1) - errx(EX_OSERR, "Unable to apply rights for sandbox"); -#endif - - sc->vsc_mevp = mevent_add(sc->vsc_tapfd, - EVF_READ, - pci_vtnet_rx_callback, - sc); - if (sc->vsc_mevp == NULL) { - WPRINTF(("Could not register event\n")); - close(sc->vsc_tapfd); - sc->vsc_tapfd = -1; - } -#else - if (dlpi_open(devname, &sc->vsc_dhp, DLPI_RAW) != DLPI_SUCCESS) { - WPRINTF(("open of vnic device %s failed\n", devname)); - } - - if (dlpi_get_physaddr(sc->vsc_dhp, DL_CURR_PHYS_ADDR, physaddr, - &physaddrlen) != DLPI_SUCCESS) { - WPRINTF(("read MAC address of vnic device %s failed\n", - devname)); - } - if (physaddrlen != ETHERADDRL) { - WPRINTF(("bad MAC address len %d on vnic device %s\n", - physaddrlen, devname)); - } - memcpy(sc->vsc_config.mac, physaddr, ETHERADDRL); - - if (dlpi_bind(sc->vsc_dhp, DLPI_ANY_SAP, NULL) != DLPI_SUCCESS) { - WPRINTF(("bind of vnic device %s failed\n", devname)); - } - - if (dlpi_promiscon(sc->vsc_dhp, DL_PROMISC_PHYS) != DLPI_SUCCESS) { - WPRINTF(("enable promiscous mode(physical) of vnic device %s " - "failed\n", devname)); - } - if (dlpi_promiscon(sc->vsc_dhp, DL_PROMISC_SAP) != DLPI_SUCCESS) { - WPRINTF(("enable promiscous mode(SAP) of vnic device %s " - "failed\n", devname)); - } - - sc->vsc_dlpifd = dlpi_fd(sc->vsc_dhp); - - if (fcntl(sc->vsc_dlpifd, F_SETFL, O_NONBLOCK) < 0) { - WPRINTF(("enable O_NONBLOCK of vnic device %s failed\n", - devname)); - dlpi_close(sc->vsc_dhp); - sc->vsc_dlpifd = -1; - } - - error = pthread_create(NULL, NULL, pci_vtnet_poll_thread, sc); - assert(error == 0); #endif -} - -#ifdef __FreeBSD__ -static void -pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname) -{ - sc->pci_vtnet_rx = pci_vtnet_netmap_rx; - sc->pci_vtnet_tx = pci_vtnet_netmap_tx; - - sc->vsc_nmd = nm_open(ifname, NULL, 0, 0); - if (sc->vsc_nmd == NULL) { - WPRINTF(("open of netmap device %s failed\n", ifname)); - return; - } - - sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd, - EVF_READ, - pci_vtnet_rx_callback, - sc); - if (sc->vsc_mevp == NULL) { - WPRINTF(("Could not register event\n")); - nm_close(sc->vsc_nmd); - sc->vsc_nmd = NULL; - } -} -#endif /* __FreeBSD__ */ static int pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) @@ -938,11 +581,7 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) struct pci_vtnet_softc *sc; const char *value; char tname[MAXCOMLEN + 1]; -#ifdef __FreeBSD__ unsigned long mtu = ETHERMTU; -#else - int use_msix = 1; -#endif int err; /* @@ -974,7 +613,6 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) } else net_genmac(pi, sc->vsc_config.mac); -#ifdef __FreeBSD__ value = get_config_value_node(nvl, "mtu"); if (value != NULL) { err = net_parsemtu(value, &mtu); @@ -982,6 +620,7 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) free(sc); return (err); } + if (mtu < VTNET_MIN_MTU || mtu > VTNET_MAX_MTU) { err = EINVAL; errno = EINVAL; @@ -990,26 +629,28 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) } sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MTU; } -#endif + sc->vsc_config.mtu = mtu; /* Permit interfaces without a configured backend. */ if (get_config_value_node(nvl, "backend") != NULL) { -#ifdef __FreeBSD__ err = netbe_init(&sc->vsc_be, nvl, pci_vtnet_rx_callback, sc); if (err) { free(sc); return (err); } -#else - pci_vtnet_tap_setup(sc, get_config_value_node(nvl, "backend")); +#ifndef __FreeBSD__ + size_t buflen = sizeof (sc->vsc_config.mac); + + err = netbe_get_mac(sc->vsc_be, sc->vsc_config.mac, &buflen); + if (err != 0) { + free(sc); + return (err); + } #endif } - -#ifdef __FreeBSD__ sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MRG_RXBUF | netbe_get_cap(sc->vsc_be); -#endif /* * Since we do not actually support multiqueue, @@ -1026,18 +667,23 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) /* Link is always up. */ sc->vsc_config.status = 1; + + vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues); + sc->vsc_vs.vs_mtx = &sc->vsc_mtx; /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ - if (vi_intr_init(&sc->vsc_vs, 1, use_msix)) + if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) { + free(sc); return (1); + } /* use BAR 0 to map config regs in IO space */ vi_set_io_bar(&sc->vsc_vs, 0); sc->resetting = 0; - sc->rx_merge = 1; - sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); + sc->rx_merge = 0; + sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2; pthread_mutex_init(&sc->rx_mtx, NULL); /* @@ -1062,8 +708,8 @@ pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) struct pci_vtnet_softc *sc = vsc; void *ptr; - if (offset < 6) { - assert(offset + size <= 6); + if (offset < (int)sizeof(sc->vsc_config.mac)) { + assert(offset + size <= (int)sizeof(sc->vsc_config.mac)); /* * The driver is allowed to change the MAC address */ @@ -1095,50 +741,33 @@ pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) sc->vsc_features = negotiated_features; - if (!(sc->vsc_features & VIRTIO_NET_F_MRG_RXBUF)) { + if (negotiated_features & VIRTIO_NET_F_MRG_RXBUF) { + sc->vhdrlen = sizeof(struct virtio_net_rxhdr); + sc->rx_merge = 1; + } else { + /* + * Without mergeable rx buffers, virtio-net header is 2 + * bytes shorter than sizeof(struct virtio_net_rxhdr). + */ + sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2; sc->rx_merge = 0; - /* non-merge rx header is 2 bytes shorter */ - sc->rx_vhdrlen -= 2; } + /* Tell the backend to enable some capabilities it has advertised. */ + netbe_set_cap(sc->vsc_be, negotiated_features, sc->vhdrlen); + sc->be_vhdrlen = netbe_get_vnet_hdr_len(sc->vsc_be); + assert(sc->be_vhdrlen == 0 || sc->be_vhdrlen == sc->vhdrlen); + pthread_mutex_lock(&sc->rx_mtx); sc->features_negotiated = true; pthread_mutex_unlock(&sc->rx_mtx); } -#ifndef __FreeBSD__ -static int -pci_vtnet_legacy_config(nvlist_t *nvl, const char *opt) -{ - char *config, *name, *tofree, *value; - - if (opt == NULL) - return (0); - - config = tofree = strdup(opt); - while ((name = strsep(&config, ",")) != NULL) { - value = strchr(name, '='); - if (value != NULL) { - *value++ = '\0'; - set_config_value_node(nvl, name, value); - } else { - set_config_value_node(nvl, "backend", name); - } - } - free(tofree); - return (0); -} -#endif - -struct pci_devemu pci_de_vnet = { +static struct pci_devemu pci_de_vnet = { .pe_emu = "virtio-net", .pe_init = pci_vtnet_init, -#ifdef __FreeBSD__ .pe_legacy_config = netbe_legacy_config, -#else - .pe_legacy_config = pci_vtnet_legacy_config, -#endif .pe_barwrite = vi_pci_write, - .pe_barread = vi_pci_read + .pe_barread = vi_pci_read, }; PCI_EMUL_SET(pci_de_vnet); diff --git a/usr/src/compat/bhyve/sys/cdefs.h b/usr/src/compat/bhyve/sys/cdefs.h index 0f3146ea43..71dd205466 100644 --- a/usr/src/compat/bhyve/sys/cdefs.h +++ b/usr/src/compat/bhyve/sys/cdefs.h @@ -12,6 +12,7 @@ /* * Copyright 2013 Pluribus Networks Inc. * Copyright 2017 Joyent, Inc. + * Copyright 2021 OmniOS Community Edition (OmniOSce) Association. */ #ifndef _COMPAT_FREEBSD_SYS_CDEFS_H_ @@ -61,6 +62,12 @@ #define _Alignof(x) __alignof(x) #endif +#if defined(__cplusplus) && __cplusplus >= 201103L +#define _Noreturn [[noreturn]] +#else +#define _Noreturn __dead2 +#endif + #if !__has_extension(c_static_assert) #if (defined(__cplusplus) && __cplusplus >= 201103L) || \ __has_extension(cxx_static_assert) diff --git a/usr/src/man/man1m/bhyve.1m b/usr/src/man/man1m/bhyve.1m index ea61607829..a6c4637538 100644 --- a/usr/src/man/man1m/bhyve.1m +++ b/usr/src/man/man1m/bhyve.1m @@ -258,7 +258,9 @@ emulation is identical but uses a PCI vendor ID of .It Li passthru PCI pass-through device. .It Li virtio-net-viona -Virtio network interface. +Accelerated Virtio network interface. +.It Li virtio-net +Legacy Virtio network interface. .It Li virtio-blk Virtio block storage interface. .It Li virtio-rnd @@ -295,7 +297,7 @@ If is not specified, the device emulation has no backend and can be considered unconnected. .Pp -Host Bridge Devices +.Sy Host Bridge Devices .Bl -tag -width 10n .It Cm model Ns = Ns Ar model Specify a hostbridge model to emulate. @@ -324,7 +326,7 @@ and .Va devid must be specified. .Pp -Network backends: +.Sy Accelerated Virtio Network Backends : .Bl -tag -width 10n .It Oo Cm vnic Ns = Oc Ns Ar vnic Ns Oo , Ns Cm feature_mask Ns = Ns Ar mask Oc .Pp @@ -337,7 +339,30 @@ Bits set in the value are removed from the advertised features. .El .Pp -Block storage devices: +.Sy Other Network Backends : +.Bl -tag -width 10n +.It Oo Cm vnic Ns = Oc Ns Ar vnic Ns Oo , Ns Ar network-backend-options Oc +.Pp +.Ar vnic +is the name of a configured virtual NIC on the system. +.El +.Pp +The +.Ar network-backend-options +are: +.Bl -tag -width 8n +.It Cm promiscphys +Enable promiscuous mode at the physical level (default: false) +.It Cm promiscsap +Enable promiscuous mode at the SAP level (default: true) +.It Cm promiscmulti +Enable promiscuous mode for all multicast addresses (default: true) +.It Cm promiscrxonly +The selected promiscuous modes are only enabled for received traffic +(default: true). +.El +.Pp +.Sy Block storage devices : .Bl -tag -width 10n .It Pa /filename Ns Oo , Ns Ar block-device-options Oc .It Pa /dev/xxx Ns Oo , Ns Ar block-device-options Oc @@ -376,7 +401,7 @@ process. Use the host TTY device for serial port I/O. .El .Pp -Boot ROM device: +.Sy Boot ROM device : .Bl -tag -width 10n .It Pa romfile Map @@ -384,7 +409,7 @@ Map in the guest address space reserved for boot firmware. .El .Pp -Pass-through devices: +.Sy Pass-through devices : .Bl -tag -width 10n .It Pa /dev/ppt Ns Ar N Connect to a PCI device on the host identified by the specificed path. @@ -398,7 +423,7 @@ The host device must have been previously attached to the .Sy ppt driver. .Pp -Virtio console devices: +.Sy Virtio console devices : .Bl -tag -width 10n .It Li port1= Ns Pa /path/to/port1.sock Ns ,anotherport= Ns Pa ... A maximum of 16 ports per device can be created. @@ -423,7 +448,7 @@ Emergency write is advertised, but no-op at present. .El .El .Pp -Framebuffer devices: +.Sy Framebuffer devices : .Bl -tag -width 10n .It Xo .Sm off @@ -503,14 +528,14 @@ the session over an encrypted channel provided by IPsec or SSH. .El .El .Pp -xHCI USB devices: +.Sy xHCI USB devices : .Bl -tag -width 10n .It Li tablet A USB tablet device which provides precise cursor synchronization when using VNC. .El .Pp -NVMe devices: +.Sy NVMe devices : .Bl -tag -width 10n .It Li path Accepted device paths are: @@ -531,7 +556,7 @@ Sector size (defaults to blockif sector size). Serial number with maximum 20 characters. .El .Pp -AHCI devices: +.Sy AHCI devices : .Bl -tag -width 10n .It Li nmrr Nominal Media Rotation Rate, known as RPM. value 1 will indicate device as Solid State Disk. default value is 0, not report. diff --git a/usr/src/man/man4/bhyve_config.4 b/usr/src/man/man4/bhyve_config.4 index b2d563cf8f..23e1e33c5a 100644 --- a/usr/src/man/man4/bhyve_config.4 +++ b/usr/src/man/man4/bhyve_config.4 @@ -222,7 +222,9 @@ VirtIO block storage interface. .It Li virtio-console VirtIO console interface. .It Li virtio-net-viona -VirtIO network interface. +Accelerated VirtIO network interface. +.It Li net-viona +Legacy VirtIO network interface. .It Li virtio-rnd VirtIO random number generator interface. .It Li xhci @@ -272,7 +274,7 @@ Specify the logical and physical sector size of the emulated disk. If the physical size is not specified, it is set to be equal to the logical size. .El -.Ss virtio-net-viona Settings +.Ss virtio-net-viona Network Backend Settings Viona network devices use the following settings to configure their backend. .Bl -column "feature_flags" "string" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description @@ -281,6 +283,21 @@ The VNIC to use for the network connection. .It feature_mask Ta integer Ta 0 Ta Specify a mask to apply to the virtio features advertised to the guest. .El +.Ss Other Network Backend Settings +Other network devices use the following settings to configure their backend. +.Bl -column "feature_flags" "string" "Default" +.It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description +.It vnic Ta string Ta Ta +The VNIC to use for the network connection. +.It promiscphys Ta bool Ta false Ta +Enable promiscuous mode at the physical level. +.It promiscsap Ta bool Ta true Ta +Enable promiscuous mode at the SAP level. +.It promiscmulti Ta bool Ta true Ta +Enable promiscuous mode for all multicast addresses. +.It promiscrxonly Ta bool Ta true Ta +The selected promiscuous modes are only enabled for received traffic. +.El .Ss UART Device Settings .Bl -column "Name" "Format" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description |