summaryrefslogtreecommitdiff
path: root/usr/src/cmd/bhyve/pci_virtio_net.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/cmd/bhyve/pci_virtio_net.c')
-rw-r--r--usr/src/cmd/bhyve/pci_virtio_net.c870
1 files changed, 870 insertions, 0 deletions
diff --git a/usr/src/cmd/bhyve/pci_virtio_net.c b/usr/src/cmd/bhyve/pci_virtio_net.c
new file mode 100644
index 0000000000..e58bdd0115
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_virtio_net.c
@@ -0,0 +1,870 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/pci_virtio_net.c 253440 2013-07-17 23:37:33Z grehan $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pci_virtio_net.c 253440 2013-07-17 23:37:33Z grehan $");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/select.h>
+#include <sys/uio.h>
+#include <sys/ioctl.h>
+#include <net/ethernet.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <md5.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#ifndef __FreeBSD__
+#include <poll.h>
+#include <libdlpi.h>
+#endif
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#ifdef __FreeBSD__
+#include "mevent.h"
+#endif
+#include "virtio.h"
+
+#define VTNET_RINGSZ 1024
+
+#define VTNET_MAXSEGS 32
+
+/*
+ * Host capabilities. Note that we only offer a few of these.
+ */
+#define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */
+#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */
+#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */
+#define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */
+#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */
+#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */
+#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */
+#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */
+#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */
+#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */
+#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */
+#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */
+#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */
+#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */
+#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */
+#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */
+#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */
+#define VIRTIO_NET_F_GUEST_ANNOUNCE \
+ (1 << 21) /* guest can send gratuitous pkts */
+
+#define VTNET_S_HOSTCAPS \
+ ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
+ VIRTIO_F_NOTIFY_ON_EMPTY)
+
+/*
+ * PCI config-space "registers"
+ */
+struct virtio_net_config {
+ uint8_t mac[6];
+ uint16_t status;
+} __packed;
+
+/*
+ * Queue definitions.
+ */
+#define VTNET_RXQ 0
+#define VTNET_TXQ 1
+#define VTNET_CTLQ 2 /* NB: not yet supported */
+
+#define VTNET_MAXQ 3
+
+/*
+ * Fixed network header size
+ */
+struct virtio_net_rxhdr {
+ uint8_t vrh_flags;
+ uint8_t vrh_gso_type;
+ uint16_t vrh_hdr_len;
+ uint16_t vrh_gso_size;
+ uint16_t vrh_csum_start;
+ uint16_t vrh_csum_offset;
+ uint16_t vrh_bufs;
+} __packed;
+
+/*
+ * Debug printf
+ */
+static int pci_vtnet_debug;
+#define DPRINTF(params) if (pci_vtnet_debug) printf params
+#define WPRINTF(params) printf params
+
+/*
+ * Per-device softc
+ */
+struct pci_vtnet_softc {
+ struct virtio_softc vsc_vs;
+ struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
+ pthread_mutex_t vsc_mtx;
+ struct mevent *vsc_mevp;
+
+#ifdef __FreeBSD
+ int vsc_tapfd;
+#else
+ dlpi_handle_t vsc_dhp;
+ int vsc_dlpifd;
+#endif
+ int vsc_rx_ready;
+ volatile int resetting; /* set and checked outside lock */
+
+ uint32_t vsc_features;
+ struct virtio_net_config vsc_config;
+
+ pthread_mutex_t rx_mtx;
+ int rx_in_progress;
+
+ pthread_t tx_tid;
+ pthread_mutex_t tx_mtx;
+ pthread_cond_t tx_cond;
+ int tx_in_progress;
+};
+
+static void pci_vtnet_reset(void *);
+/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
+static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
+static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
+
+static struct virtio_consts vtnet_vi_consts = {
+ "vtnet", /* our name */
+ VTNET_MAXQ - 1, /* we currently support 2 virtqueues */
+ sizeof(struct virtio_net_config), /* config reg size */
+ pci_vtnet_reset, /* reset */
+ NULL, /* device-wide qnotify -- not used */
+ pci_vtnet_cfgread, /* read PCI config */
+ pci_vtnet_cfgwrite, /* write PCI config */
+ VTNET_S_HOSTCAPS, /* our capabilities */
+};
+
+/*
+ * If the transmit thread is active then stall until it is done.
+ */
+static void
+pci_vtnet_txwait(struct pci_vtnet_softc *sc)
+{
+
+ pthread_mutex_lock(&sc->tx_mtx);
+ while (sc->tx_in_progress) {
+ pthread_mutex_unlock(&sc->tx_mtx);
+ usleep(10000);
+ pthread_mutex_lock(&sc->tx_mtx);
+ }
+ pthread_mutex_unlock(&sc->tx_mtx);
+}
+
+/*
+ * If the receive thread is active then stall until it is done.
+ */
+static void
+pci_vtnet_rxwait(struct pci_vtnet_softc *sc)
+{
+
+ pthread_mutex_lock(&sc->rx_mtx);
+ while (sc->rx_in_progress) {
+ pthread_mutex_unlock(&sc->rx_mtx);
+ usleep(10000);
+ pthread_mutex_lock(&sc->rx_mtx);
+ }
+ pthread_mutex_unlock(&sc->rx_mtx);
+}
+
+static void
+pci_vtnet_reset(void *vsc)
+{
+ struct pci_vtnet_softc *sc = vsc;
+
+ DPRINTF(("vtnet: device reset requested !\n"));
+
+ sc->resetting = 1;
+
+ /*
+ * Wait for the transmit and receive threads to finish their
+ * processing.
+ */
+ pci_vtnet_txwait(sc);
+ pci_vtnet_rxwait(sc);
+
+ sc->vsc_rx_ready = 0;
+
+ /* now reset rings, MSI-X vectors, and negotiated capabilities */
+ vi_reset_dev(&sc->vsc_vs);
+
+ sc->resetting = 0;
+}
+
+/*
+ * Called to send a buffer chain out to the tap device
+ */
+#ifdef __FreeBSD__
+static void
+pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
+ int len)
+{
+ static char pad[60]; /* all zero bytes */
+
+ if (sc->vsc_tapfd == -1)
+ return;
+
+ /*
+ * If the length is < 60, pad out to that and add the
+ * extra zero'd segment to the iov. It is guaranteed that
+ * there is always an extra iov available by the caller.
+ */
+ if (len < 60) {
+ iov[iovcnt].iov_base = pad;
+ iov[iovcnt].iov_len = 60 - len;
+ iovcnt++;
+ }
+ (void) writev(sc->vsc_tapfd, iov, iovcnt);
+}
+#else
+static void
+pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
+ int len)
+{
+ int i;
+
+ for (i = 0; i < iovcnt; i++) {
+ (void) dlpi_send(sc->vsc_dhp, NULL, NULL,
+ iov[i].iov_base, iov[i].iov_len, NULL);
+ }
+}
+#endif
+
+#ifdef __FreeBSD__
+/*
+ * Called when there is read activity on the tap file descriptor.
+ * Each buffer posted by the guest is assumed to be able to contain
+ * an entire ethernet frame + rx header.
+ * MP note: the dummybuf is only used for discarding frames, so there
+ * is no need for it to be per-vtnet or locked.
+ */
+static uint8_t dummybuf[2048];
+#endif
+
+static void
+pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
+{
+ struct vqueue_info *vq;
+ struct virtio_net_rxhdr *vrx;
+ uint8_t *buf;
+#ifdef __FreeBSD__
+ int len;
+#endif
+ struct iovec iov[VTNET_MAXSEGS];
+#ifndef __FreeBSD__
+ size_t len;
+ int ret;
+#endif
+ int total_len = 0;
+
+ /*
+ * Should never be called without a valid tap fd
+ */
+#ifdef __FreeBSD__
+ assert(sc->vsc_tapfd != -1);
+#else
+ assert(sc->vsc_dlpifd != -1);
+#endif
+
+ /*
+ * But, will be called when the rx ring hasn't yet
+ * been set up or the guest is resetting the device.
+ */
+ if (!sc->vsc_rx_ready || sc->resetting) {
+#ifdef __FreeBSD__
+ /*
+ * Drop the packet and try later.
+ */
+ (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
+#endif
+ return;
+ }
+
+ /*
+ * Check for available rx buffers
+ */
+ vq = &sc->vsc_queues[VTNET_RXQ];
+ vq_startchains(vq);
+ if (!vq_has_descs(vq)) {
+ /*
+ * Drop the packet and try later. Interrupt on
+ * empty, if that's negotiated.
+ */
+#ifdef __FreeBSD__
+ (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
+#endif
+ vq_endchains(vq, 1);
+ return;
+ }
+
+ do {
+ /*
+ * Get descriptor chain
+ */
+ if (sc->vsc_vs.vs_negotiated_caps & VIRTIO_NET_F_MRG_RXBUF) {
+ assert(vq_getchain(vq, iov, 1, NULL) == 1);
+
+ /*
+ * Get a pointer to the rx header, and use the
+ * data immediately following it for the packet buffer.
+ */
+ vrx = (struct virtio_net_rxhdr *)iov[0].iov_base;
+ buf = (uint8_t *)(vrx + 1);
+ total_len = iov[0].iov_len;
+#ifdef __FreeBSD__
+ len = read(sc->vsc_tapfd, buf,
+ iov[0].iov_len - sizeof(struct virtio_net_rxhdr));
+
+ if (len < 0 && errno == EWOULDBLOCK) {
+ /*
+ * No more packets, but still some avail ring
+ * entries. Interrupt if needed/appropriate.
+ */
+ vq_endchains(vq, 0);
+ return;
+ }
+#else
+ len = iov[0].iov_len - sizeof(struct virtio_net_rxhdr);
+ ret = dlpi_recv(sc->vsc_dhp, NULL, NULL, buf,
+ &len, 0, NULL);
+ if (ret != DLPI_SUCCESS) {
+ /*
+ * No more packets, but still some avail ring
+ * entries. Interrupt if needed/appropriate.
+ */
+ vq_endchains(vq, 0);
+ return;
+ }
+#endif
+ } else {
+ int i;
+ int num_segs;
+ num_segs = vq_getchain(vq, iov,
+ VTNET_MAXSEGS, NULL);
+ vrx = (struct virtio_net_rxhrd *)iov[0].iov_base;
+ total_len = iov[0].iov_len;
+ for (i = 1; i < num_segs; i++) {
+ buf = (uint8_t *)iov[i].iov_base;
+ total_len += iov[i].iov_len;
+#ifdef __FreeBSD__
+ len = read(sc->vsc_tapfd, buf, iov[i].iov_len);
+ if (len < 0 && errno == EWOULDBLOCK) {
+ /*
+ * No more packets,
+ * but still some avail ring entries.
+ * Interrupt if needed/appropriate.
+ */
+ break;
+ }
+#else
+ len = iov[i].iov_len;
+ ret = dlpi_recv(sc->vsc_dhp, NULL, NULL, buf,
+ &len, 0, NULL);
+ if (ret != DLPI_SUCCESS) {
+ /*
+ * No more packets,
+ * but still some avail ring entries.
+ * Interrupt if needed/appropriate.
+ */
+ total_len = 0;
+ break;
+ }
+#endif
+ }
+ if (total_len == 0) {
+ vq_endchains(vq, 0);
+ return;
+ }
+ }
+
+ /*
+ * The only valid field in the rx packet header is the
+ * number of buffers, which is always 1 without TSO
+ * support.
+ */
+ memset(vrx, 0, sizeof(struct virtio_net_rxhdr));
+ vrx->vrh_bufs = 1;
+
+ /*
+ * Release this chain and handle more chains.
+ */
+ vq_relchain(vq, total_len);
+ } while (vq_has_descs(vq));
+
+ /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
+ vq_endchains(vq, 1);
+}
+
+#ifdef __FreeBSD__
+static void
+pci_vtnet_tap_callback(int fd, enum ev_type type, void *param)
+{
+ struct pci_vtnet_softc *sc = param;
+
+ pthread_mutex_lock(&sc->rx_mtx);
+ sc->rx_in_progress = 1;
+ pci_vtnet_tap_rx(sc);
+ sc->rx_in_progress = 0;
+ pthread_mutex_unlock(&sc->rx_mtx);
+
+}
+#else
+static void *
+pci_vtnet_poll_thread(void *param)
+{
+ struct pci_vtnet_softc *sc = param;
+ pollfd_t pollset;
+
+ pollset.fd = sc->vsc_dlpifd;
+ pollset.events = POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND;
+
+ for (;;) {
+ if (poll(&pollset, 1, -1) < 0) {
+ if (errno == EINTR)
+ continue;
+ fprintf(stderr, "pci_vtnet_poll_thread poll() error %d\n", errno);
+ continue;
+ }
+ pthread_mutex_lock(&sc->vsc_mtx);
+ pci_vtnet_tap_rx(sc);
+ pthread_mutex_unlock(&sc->vsc_mtx);
+ }
+}
+#endif
+
+static void
+pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
+{
+ struct pci_vtnet_softc *sc = vsc;
+
+ /*
+ * A qnotify means that the rx process can now begin
+ */
+ if (sc->vsc_rx_ready == 0) {
+ sc->vsc_rx_ready = 1;
+ }
+}
+
+static void
+pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
+{
+ struct iovec iov[VTNET_MAXSEGS + 1];
+ int i, n;
+ int plen, tlen;
+
+ /*
+ * Obtain chain of descriptors. The first one is
+ * really the header descriptor, so we need to sum
+ * up two lengths: packet length and transfer length.
+ */
+ n = vq_getchain(vq, iov, VTNET_MAXSEGS, NULL);
+ assert(n >= 1 && n <= VTNET_MAXSEGS);
+ plen = 0;
+ tlen = iov[0].iov_len;
+ for (i = 1; i < n; i++) {
+ plen += iov[i].iov_len;
+ tlen += iov[i].iov_len;
+ }
+
+ DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
+ pci_vtnet_tap_tx(sc, &iov[1], n - 1, plen);
+
+ /* chain is processed, release it and set tlen */
+ vq_relchain(vq, tlen);
+}
+
+static void
+pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
+{
+ struct pci_vtnet_softc *sc = vsc;
+
+ /*
+ * Any ring entries to process?
+ */
+ if (!vq_has_descs(vq))
+ return;
+
+ /* Signal the tx thread for processing */
+ pthread_mutex_lock(&sc->tx_mtx);
+ if (sc->tx_in_progress == 0)
+ pthread_cond_signal(&sc->tx_cond);
+ pthread_mutex_unlock(&sc->tx_mtx);
+}
+
+/*
+ * Thread which will handle processing of TX desc
+ */
+static void *
+pci_vtnet_tx_thread(void *param)
+{
+ struct pci_vtnet_softc *sc = param;
+ struct vqueue_info *vq;
+ int have_work, error;
+
+ vq = &sc->vsc_queues[VTNET_TXQ];
+
+ /*
+ * Let us wait till the tx queue pointers get initialised &
+ * first tx signaled
+ */
+ pthread_mutex_lock(&sc->tx_mtx);
+ error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
+ assert(error == 0);
+
+ for (;;) {
+ /* note - tx mutex is locked here */
+ do {
+ if (sc->resetting)
+ have_work = 0;
+ else
+ have_work = vq_has_descs(vq);
+
+ if (!have_work) {
+ sc->tx_in_progress = 0;
+ error = pthread_cond_wait(&sc->tx_cond,
+ &sc->tx_mtx);
+ assert(error == 0);
+ }
+ } while (!have_work);
+ sc->tx_in_progress = 1;
+ pthread_mutex_unlock(&sc->tx_mtx);
+
+ vq_startchains(vq);
+ do {
+ /*
+ * Run through entries, placing them into
+ * iovecs and sending when an end-of-packet
+ * is found
+ */
+ pci_vtnet_proctx(sc, vq);
+ } while (vq_has_descs(vq));
+
+ /*
+ * Generate an interrupt if needed.
+ */
+ vq_endchains(vq, 1);
+
+ pthread_mutex_lock(&sc->tx_mtx);
+ }
+}
+
+#ifdef notyet
+static void
+pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
+{
+
+ DPRINTF(("vtnet: control qnotify!\n\r"));
+}
+#endif
+
+#ifdef __FreeBSD__
+static int
+pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr)
+{
+ struct ether_addr *ea;
+ char *tmpstr;
+ char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
+
+ tmpstr = strsep(&mac_str,"=");
+
+ if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
+ ea = ether_aton(mac_str);
+
+ if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
+ memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
+ fprintf(stderr, "Invalid MAC %s\n", mac_str);
+ return (EINVAL);
+ } else
+ memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
+ }
+
+ return (0);
+}
+#endif
+
+
+static int
+pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+#ifdef __FreeBSD__
+ MD5_CTX mdctx;
+ unsigned char digest[16];
+#else
+ uchar_t physaddr[DLPI_PHYSADDR_MAX];
+ size_t physaddrlen = DLPI_PHYSADDR_MAX;
+ int error;
+#endif
+ char nstr[80];
+ char tname[MAXCOMLEN + 1];
+ struct pci_vtnet_softc *sc;
+ const char *env_msi;
+ char *devname;
+ char *vtopts;
+ int mac_provided;
+ int use_msix;
+
+ sc = malloc(sizeof(struct pci_vtnet_softc));
+ memset(sc, 0, sizeof(struct pci_vtnet_softc));
+
+ pthread_mutex_init(&sc->vsc_mtx, NULL);
+
+ vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues);
+ sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
+ sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
+ sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
+ sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
+#ifdef notyet
+ sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
+ sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
+#endif
+
+ /*
+ * Use MSI if set by user
+ */
+ use_msix = 1;
+ if ((env_msi = getenv("BHYVE_USE_MSI")) != NULL) {
+ if (strcasecmp(env_msi, "yes") == 0)
+ use_msix = 0;
+ }
+
+ /*
+ * Attempt to open the tap device and read the MAC address
+ * if specified
+ */
+ mac_provided = 0;
+#ifdef __FreeBSD__
+ sc->vsc_tapfd = -1;
+#endif
+ if (opts != NULL) {
+ char tbuf[80];
+ int err;
+
+ devname = vtopts = strdup(opts);
+ (void) strsep(&vtopts, ",");
+
+#ifdef __FreBSD__
+ if (vtopts != NULL) {
+ err = pci_vtnet_parsemac(vtopts, sc->vsc_config.mac);
+ if (err != 0) {
+ free(devname);
+ return (err);
+ }
+ mac_provided = 1;
+ }
+#endif
+
+ strcpy(tbuf, "/dev/");
+ strlcat(tbuf, devname, sizeof(tbuf));
+
+ free(devname);
+
+#ifdef __FreeBSD__
+ sc->vsc_tapfd = open(tbuf, O_RDWR);
+ if (sc->vsc_tapfd == -1) {
+ WPRINTF(("open of tap device %s failed\n", tbuf));
+ } else {
+ /*
+ * Set non-blocking and register for read
+ * notifications with the event loop
+ */
+ int opt = 1;
+ if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
+ WPRINTF(("tap device O_NONBLOCK failed\n"));
+ close(sc->vsc_tapfd);
+ sc->vsc_tapfd = -1;
+ }
+
+ sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
+ EVF_READ,
+ pci_vtnet_tap_callback,
+ sc);
+ if (sc->vsc_mevp == NULL) {
+ WPRINTF(("Could not register event\n"));
+ close(sc->vsc_tapfd);
+ sc->vsc_tapfd = -1;
+ }
+ }
+#else
+ if (dlpi_open(opts, &sc->vsc_dhp, DLPI_RAW) != DLPI_SUCCESS) {
+ WPRINTF(("open of vnic device %s failed\n", opts));
+ }
+
+ if (dlpi_get_physaddr(sc->vsc_dhp, DL_CURR_PHYS_ADDR, physaddr, &physaddrlen) != DLPI_SUCCESS) {
+ WPRINTF(("read MAC address of vnic device %s failed\n", opts));
+ }
+ if (physaddrlen != ETHERADDRL) {
+ WPRINTF(("bad MAC address len %d on vnic device %s\n", physaddrlen, opts));
+ }
+ memcpy(sc->vsc_config.mac, physaddr, ETHERADDRL);
+
+ if (dlpi_bind(sc->vsc_dhp, DLPI_ANY_SAP, NULL) != DLPI_SUCCESS) {
+ WPRINTF(("bind of vnic device %s failed\n", opts));
+ }
+
+ if (dlpi_promiscon(sc->vsc_dhp, DL_PROMISC_PHYS) != DLPI_SUCCESS) {
+ WPRINTF(("enable promiscous mode(physical) of vnic device %s failed\n", opts));
+ }
+ if (dlpi_promiscon(sc->vsc_dhp, DL_PROMISC_SAP) != DLPI_SUCCESS) {
+ WPRINTF(("enable promiscous mode(SAP) of vnic device %s failed\n", opts));
+ }
+
+ sc->vsc_dlpifd = dlpi_fd(sc->vsc_dhp);
+
+ if (fcntl(sc->vsc_dlpifd, F_SETFL, O_NONBLOCK) < 0) {
+ WPRINTF(("enable O_NONBLOCK of vnic device %s failed\n", opts));
+ dlpi_close(sc->vsc_dhp);
+ sc->vsc_dlpifd = -1;
+ }
+
+ error = pthread_create(NULL, NULL, pci_vtnet_poll_thread, sc);
+ assert(error == 0);
+#endif
+ }
+
+#ifdef __FreeBSD__
+ /*
+ * The default MAC address is the standard NetApp OUI of 00-a0-98,
+ * followed by an MD5 of the PCI slot/func number and dev name
+ */
+ if (!mac_provided) {
+ snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
+ pi->pi_func, vmname);
+
+ MD5Init(&mdctx);
+ MD5Update(&mdctx, nstr, strlen(nstr));
+ MD5Final(digest, &mdctx);
+
+ sc->vsc_config.mac[0] = 0x00;
+ sc->vsc_config.mac[1] = 0xa0;
+ sc->vsc_config.mac[2] = 0x98;
+ sc->vsc_config.mac[3] = digest[0];
+ sc->vsc_config.mac[4] = digest[1];
+ sc->vsc_config.mac[5] = digest[2];
+ }
+#endif
+
+ /* initialize config space */
+ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
+ pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
+ pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
+ pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
+
+ /* link always up */
+ sc->vsc_config.status = 1;
+
+ /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
+ if (vi_intr_init(&sc->vsc_vs, 1, use_msix))
+ return (1);
+
+ /* use BAR 0 to map config regs in IO space */
+ vi_set_io_bar(&sc->vsc_vs, 0);
+
+ sc->resetting = 0;
+
+ sc->rx_in_progress = 0;
+ pthread_mutex_init(&sc->rx_mtx, NULL);
+
+ /*
+ * Initialize tx semaphore & spawn TX processing thread.
+ * As of now, only one thread for TX desc processing is
+ * spawned.
+ */
+ sc->tx_in_progress = 0;
+ pthread_mutex_init(&sc->tx_mtx, NULL);
+ pthread_cond_init(&sc->tx_cond, NULL);
+ pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
+ snprintf(tname, sizeof(tname), "%s vtnet%d tx", vmname, pi->pi_slot);
+ pthread_set_name_np(sc->tx_tid, tname);
+
+ return (0);
+}
+
+static int
+pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
+{
+ struct pci_vtnet_softc *sc = vsc;
+ void *ptr;
+
+ if (offset < 6) {
+ assert(offset + size <= 6);
+ /*
+ * The driver is allowed to change the MAC address
+ */
+ ptr = &sc->vsc_config.mac[offset];
+ memcpy(ptr, &value, size);
+ } else {
+ DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
+ return (1);
+ }
+ return (0);
+}
+
+static int
+pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+ struct pci_vtnet_softc *sc = vsc;
+ void *ptr;
+
+ ptr = (uint8_t *)&sc->vsc_config + offset;
+ memcpy(retval, ptr, size);
+ return (0);
+}
+
+struct pci_devemu pci_de_vnet = {
+ .pe_emu = "virtio-net",
+ .pe_init = pci_vtnet_init,
+ .pe_barwrite = vi_pci_write,
+ .pe_barread = vi_pci_read
+};
+PCI_EMUL_SET(pci_de_vnet);