13689 Want AWS ENA driver

Reviewed by: Robert Mustacchi <rm@fingolfin.org> Approved by: Dan McDonald <danmcd@joyent.com>
author: Ryan Zezeski <ryan@zinascii.com> 2020-08-25 00:52:37 -0600
committer: Dan McDonald <danmcd@joyent.com> 2021-11-23 13:18:50 -0500
commit: 6f443ebc1fb4fec01d6e8fa8ca4648182ed215bb (patch)
tree: 5c4551c6d6caaaf138fe369af872c3fc31d02c8a
parent: a28480febf31f0e61debac062a55216a98a05a92 (diff)
download: illumos-joyent-6f443ebc1fb4fec01d6e8fa8ca4648182ed215bb.tar.gz
22 files changed, 8158 insertions, 1 deletions
diff --git a/usr/src/man/man7d/Makefile b/usr/src/man/man7d/Makefile
index 9da7d4b205..af38c7a9bd 100644
--- a/usr/src/man/man7d/Makefile
+++ b/usr/src/man/man7d/Makefile
@@ -16,6 +16,7 @@
 # Copyright 2016 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
 # Copyright 2018 Nexenta Systems, Inc.
 # Copyright 2020 Peter Tribble
+# Copyright 2021 Oxide Computer Company
 #
 
 include		$(SRC)/Makefile.master
@@ -46,6 +47,7 @@ _MANFILES=	aac.7d		\
 		dtrace.7d	\
 		e1000g.7d	\
 		ehci.7d		\
+		ena.7d		\
 		fasttrap.7d	\
 		fbt.7d		\
 		fcip.7d		\
diff --git a/usr/src/man/man7d/ena.7d b/usr/src/man/man7d/ena.7d
new file mode 100644
index 0000000000..d4070e1745
--- /dev/null
+++ b/usr/src/man/man7d/ena.7d
@@ -0,0 +1,135 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source.  A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2021 Oxide Computer Company
+.\"
+.Dd Nov 17, 2021
+.Dt ENA 7D
+.Os
+.Sh NAME
+.Nm ena
+.Nd Driver for the AWS Elastic Network Adapter
+.Sh SYNOPSIS
+.Pa /dev/net/ena*
+.Sh DESCRIPTION
+The
+.Sy ena
+driver is a GLDv3 NIC driver for the AWS Elastic Network Adapter
+family of virtual devices.
+The driver supports:
+.Bl -dash -offset indent
+.It
+Jumbo frames up to 9216 bytes.
+.It
+Multiple Rx and Tx rings.
+.El
+.Pp
+By design, this driver does not support VNICs.
+A given ENA device can only ever receive traffic for a single unicast
+MAC address and IP address combination, as determined by the AWS configuration.
+There is no support for promiscuous mode, or for receiving traffic for
+additional unicast or multicast addresses.
+.Sh CONFIGURATION
+The
+.Sy ena.conf
+file contains user configurable parameters, each of which is described
+below.
+This file is read when an ENA device is found and an instance of the
+driver is attached to it.
+Changes made to this file do not affect running instances.
+Only instances attached after the changes will see the effects of
+those changes.
+Therefore, if you want your change to take effect on a running
+instance, you must somehow reload it.
+That could be done by a manual reloading of the driver or a system
+reboot.
+.Sh PROPERTIES
+The configuration file can be found at
+.Pa /kernel/drv/ena.conf .
+.Bl -hang -width Ds
+.It Sy rx_queue_num_descs
+.Bd -filled -compact
+Minimum:
+.Sy 64 |
+Maximum:
+.Sy device dependent
+.Ed
+.Bd -filled -compact
+Default:
+.Sy device maximum
+.Ed
+.Bd -filled
+The
+.Sy rx_queue_num_descs
+property determines the number of descriptors provided by the Rx queue.
+Currently a single descriptor is equal to a single packet, but in the
+future it may be that a single packet consumes multiple descriptors.
+.Ed
+.It Sy rx_queue_intr_limit
+.Bd -filled -compact
+Minimum:
+.Sy 16 |
+Maximum:
+.Sy 4096
+.Ed
+.Bd -filled -compact
+Default:
+.Sy 256
+.Ed
+.Bd -filled
+The
+.Sy rx_queue_intr_limit
+property determines the number frames an Rx interrupt will attempt to
+process before returning and claiming the interrupt.
+This is meant to keep the ENA Rx interrupt handler from consuming too
+much system time.
+In general, when a NIC becomes saturated with packets, the
+.Sy MAC
+layer will switch the driver into polling mode to reduce interrupt
+load.
+.Ed
+.It Sy tx_queue_num_descs
+.Bd -filled -compact
+Minimum:
+.Sy 64 |
+Maximum:
+.Sy device dependent
+.Ed
+.Bd -filled -compact
+Default:
+.Sy device maximum
+.Ed
+.Bd -filled
+The
+.Sy tx_queue_num_descs
+property determines the number of descriptors provided by the Tx queue.
+Currently a single descriptor is equal to a single packet, but in the
+future it may be that a single packet consumes multiple descriptors.
+.Ed
+.El
+.Sh FILES
+.Bl -tag -width Pa
+.It Pa /kernel/drv/amd64/ena
+Device driver (x86)
+.It Pa /kernel/drv/ena.conf
+Driver configuration file containing user-configurable options
+.El
+.Sh INTERFACE STABILITY
+The tunables in
+.Pa ena.conf
+are considered
+.Sy Evolving
+and may change in the future.
+.Sh SEE ALSO
+.Xr dladm 1M ,
+.Xr snoop 1M ,
+.Xr driver.conf 4 ,
+.Xr dlpi 7P
diff --git a/usr/src/pkg/manifests/driver-network-ena.p5m b/usr/src/pkg/manifests/driver-network-ena.p5m
new file mode 100644
index 0000000000..cd64e9c504
--- /dev/null
+++ b/usr/src/pkg/manifests/driver-network-ena.p5m
@@ -0,0 +1,36 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2021 Oxide Computer Company
+#
+
+<include global_zone_only_component>
+set name=pkg.fmri value=pkg:/driver/network/ena@$(PKGVERS)
+set name=pkg.summary value="AWS ENA Ethernet Driver"
+set name=pkg.description value="AWS ENA Ethernet Driver"
+set name=info.classification \
+    value=org.opensolaris.category.2008:Drivers/Networking
+set name=variant.arch value=i386
+dir  path=kernel group=sys
+dir  path=kernel/drv group=sys
+dir  path=kernel/drv/$(ARCH64) group=sys
+file path=kernel/drv/$(ARCH64)/ena group=sys
+file path=kernel/drv/ena.conf group=sys
+dir  path=usr/share/man
+dir  path=usr/share/man/man7d
+file path=usr/share/man/man7d/ena.7d
+driver name=ena perms="* 0666 root sys" clone_perms="ena 0666 root sys" \
+    alias=pciex1d0f,ec2 \
+    alias=pciex1d0f,1ec2 \
+    alias=pciex1d0f,ec20 \
+    alias=pciex1d0f,ec21
+license lic_CDDL license=lic_CDDL
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index d768802685..00af839874 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -29,6 +29,7 @@
 # Copyright 2016 OmniTI Computer Consulting, Inc.  All rights reserved.
 # Copyright 2016 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
 # Copyright 2020 RackTop Systems, Inc.
+# Copyright 2021 Oxide Computer Company
 #
 
 #
@@ -2288,3 +2289,10 @@ BNX_OBJS += \
 #
 MLXCX_OBJS +=	mlxcx.o mlxcx_dma.o mlxcx_cmd.o mlxcx_intr.o mlxcx_gld.o \
 		mlxcx_ring.o mlxcx_sensor.o
+
+#
+# ena(7D)
+#
+ENA_OBJS +=	ena.o ena_admin.o ena_dma.o ena_gld.o ena_hw.o ena_intr.o \
+		ena_stats.o ena_tx.o ena_rx.o
+
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index 32a80767b2..78f01a1f9f 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -26,7 +26,7 @@
 # Copyright 2019 Joyent, Inc.
 # Copyright 2018 Nexenta Systems, Inc.
 # Copyright (c) 2017 by Delphix. All rights reserved.
-# Copyright 2020 Oxide Computer Company
+# Copyright 2021 Oxide Computer Company
 #
 
 #
@@ -777,6 +777,10 @@ $(OBJS_DIR)/%.o:		$(UTSBASE)/common/io/elxl/%.c
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
 
+$(OBJS_DIR)/%.o:		$(UTSBASE)/common/io/ena/%.c
+	$(COMPILE.c) -o $@ $<
+	$(CTFCONVERT_O)
+
 $(OBJS_DIR)/%.o:		$(UTSBASE)/common/io/fcoe/%.c
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
diff --git a/usr/src/uts/common/io/ena/ena.c b/usr/src/uts/common/io/ena/ena.c
new file mode 100644
index 0000000000..b42f6350af
--- /dev/null
+++ b/usr/src/uts/common/io/ena/ena.c
@@ -0,0 +1,1944 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2021 Oxide Computer Company
+ */
+
+#include "ena_hw.h"
+#include "ena.h"
+
+/*
+ * Elastic Network Adapter (ENA) Driver
+ * ------------------------------------
+ *
+ * The ena driver provides support for the AWS ENA device, also
+ * referred to as their "enhanced networking". This device is present
+ * on "Nitro"-based instances. It presents itself with the following
+ * PCI Vendor/Device IDs
+ *
+ * o 1d0f:0ec2 -- ENA PF
+ * o 1d0f:1ec2 -- ENA PF (Reserved)
+ * o 1d0f:ec20 -- ENA VF
+ * o 1d0f:ec21 -- ENA VF (Reserved)
+ *
+ * This driver provides support for only the essential features needed
+ * to drive traffic on an ENA device. Support for the following
+ * features IS NOT currently implemented.
+ *
+ *    o Admin Queue Interrupts: queue completion events are always polled
+ *    o AENQ keep alive
+ *    o FMA
+ *    o Rx checksum offloads
+ *    o Tx checksum offloads
+ *    o Tx DMA bind (borrow buffers)
+ *    o Rx DMA bind (loaned buffers)
+ *    o TSO
+ *    o RSS
+ *    o Low Latency Queues (LLQ)
+ *    o Support for different Tx complection policies
+ *    o More controlled Tx recycling and Rx refill
+ *
+ * Even without these features the ena driver should perform
+ * reasonably well.
+ *
+ * Driver vs. Hardware Types
+ * -------------------------
+ *
+ * To properly communicate with the ENA device the driver must
+ * populate memory (registers and buffers) with specific types. These
+ * types are defined by the device and are found under the "common"
+ * (ena_com) code of the AWS Linux and FreeBSD drivers [1]. We have
+ * simplified this a bit by defining all device-specific types in the
+ * ena_hw.h file. Furthermore, all device-specific types are given an
+ * "enahw" prefix. This makes it clear when we are dealing with a
+ * device type and when we are dealing with a driver type.
+ *
+ * [1]: https://github.com/amzn/amzn-drivers
+ *
+ * Groups, Rings (Queues), and Interrupts
+ * --------------------------------------
+ *
+ * The ENA device presents one mac group. This single mac group
+ * represents the single unicast address that this device represents
+ * in your AWS instance. The ENA device presents no option for
+ * configuring additional MAC addresses, multicast, or promisc mode --
+ * you receive only what AWS wants you to receive.
+ *
+ * This single mac group may have one or more rings. The ENA driver
+ * refers to rings as queues, for no special reason other than it was
+ * the dominant language in the Linux and FreeBSD drivers, and it
+ * spilled over into this port. The upper bound on number of queues is
+ * presented by the device. However, we don't just go with whatever
+ * number of queues the device reports; but rather we limit the queues
+ * based on other factors such as an absolute maximum, number of
+ * online CPUs, and number of available interrupts. The upper bound is
+ * calculated by ena_set_max_io_queues(), and that is used and
+ * possibly further restricted in ena_attach_intr_alloc(). As this
+ * point, ultimately, it is the number of available interrupts (minus
+ * one for the admin queue) that determines the number of queues: one
+ * Tx and one Rx on each I/O interrupt.
+ *
+ * NOTE: Perhaps it is overly restrictive to limit the number of
+ * queues to the number of I/O interrupts. Something worth considering
+ * on larger instances if they present far less interrupts than they
+ * do queues + CPUs.
+ *
+ * The ENA device presents MSI-X interrupts only. During attach the
+ * driver queries the number of available interrupts and sets aside
+ * one for admin/AENQ (vector 0) and the rest for I/O (vector 1 to N).
+ * This means that a Tx/Rx queue at index 0 will map to vector 1, and
+ * so on.
+ *
+ * NOTE: The ENA driver currently doesn't make use of the Admin Queue
+ * interrupt. This interrupt is used to notify a the driver that a
+ * command response is read. The ENA driver always polls the Admin
+ * Queue for responses.
+ *
+ * Tx Queue Workings
+ * -----------------
+ *
+ * A single Tx queue (ena_txq_t) is made up of one submission queue
+ * (SQ) and its paired completion queue (CQ). These two queues form a
+ * logical descriptor ring which is used to send packets out of the
+ * device -- where each SQ entry describes the packet to be sent
+ * (enahw_tx_desc_t) and each CQ entry describes the result of sending
+ * a packet (enahw_tx_cdesc_t). For this to work the host and device
+ * must agree on which descriptors are currently owned by the host
+ * (free for sending) and which are owned by the device (pending
+ * device completion). This state is tracked on the host side via head
+ * and tail indexes along with a phase value.
+ *
+ * The head and tail values represent the head and tail of the FIFO
+ * queue of pending packets -- the next packet to be sent by the
+ * device is head, and all descriptors up to tail are ready for
+ * sending. The phase allows the host to determine which CQ
+ * descriptors represent completed events when using per-SQ completion
+ * events (as opposed to queue head pointer updates). As the queues
+ * represent a logical ring buffer, the phase must alternate on
+ * wrap-around. The device initializes the phase to zero, and the host
+ * starts with a phase of 1. The first packet descriptor writes, and
+ * their corresponding completions, are indicated with a phase of 1.
+ *
+ *
+ * For example, the diagram below represents the SQ/CQ state after the
+ * first 6 packets have been sent by the host and 2 of them have been
+ * completed by the device (and these completions have been processed
+ * by the driver). In this state the host could send 4 more packets
+ * before needing to wait on completion events.
+ *
+ *
+ *    +---+---+---+---+---+---+---+---+
+ * SQ | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 |   phase = 1
+ *    +---+---+---+---+---+---+---+---+
+ *                              ^
+ *                              |
+ *                            tail
+ *            head
+ *              |
+ *              v
+ *    +---+---+---+---+---+---+---+---+
+ * CQ | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |   phase = 1
+ *    +---+---+---+---+---+---+---+---+
+ *
+ *
+ * The next diagram shows how the state changes as 5 more packets are
+ * sent (for a total of 11) and 7 more are completed (for a total of
+ * 9). Notice that as the SQ and CQ have wrapped around their phases
+ * have been complemented. In this state the host could send 6 more
+ * packets before needing to wait on completion events.
+ *
+ *    +---+---+---+---+---+---+---+---+
+ * SQ | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 |   phase = 0
+ *    +---+---+---+---+---+---+---+---+
+ *                  ^
+ *                  |
+ *                tail
+ *        head
+ *          |
+ *          v
+ *    +---+---+---+---+---+---+---+---+
+ * CQ | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |   phase = 0
+ *    +---+---+---+---+---+---+---+---+
+ *
+ *
+ * Currently, all packets are copied for Tx. At ring start we allocate
+ * a Tx Control Buffer (TCB) for each queue descriptor. Each TCB has
+ * DMA buffer associated with it; and each buffer is large enough to
+ * hold the MTU. Therefore, Tx descriptors and TCBs currently have a
+ * 1:1 mapping. When a packet is sent, the mblk's buffer is copied to
+ * the TCB's DMA buffer, and a new descriptor is written to the SQ
+ * describing said TCB buffer. If and when we add more advanced
+ * features like DMA binding of mblks and TSO, this 1:1 guarantee will
+ * no longer hold.
+ *
+ * Rx Queue Workings
+ * -----------------
+ *
+ * In terms of implementing the logical descriptor ring, the Rx queues
+ * are very much like the Tx queues. There is a paired SQ and CQ for
+ * each logical ring. The difference is that in Rx the SQ is for
+ * handing buffers to the device to fill, and the CQ is for describing
+ * the contents of those buffers for a given received frame. At Rx
+ * ring start we allocate a Rx Control Buffer (RCB) for each
+ * descriptor in the ring. Each RCB has a DMA buffer associated with
+ * it; and each buffer is large enough to hold the MTU. For each
+ * received frame we copy the contents out of the RCB and into its own
+ * mblk, immediately returning the RCB for reuse. As with Tx, this
+ * gives us a simple 1:1 mapping currently, but if more advanced
+ * features are implemented later this could change.
+ *
+ * Asynchronous Event Notification Queue (AENQ)
+ * --------------------------------------------
+ *
+ * Each ENA device comes with a mechanism for sending out-of-band
+ * notifications to the driver. This includes events like link state
+ * changes, fatal errors, and a watchdog/keep alive signal. The AENQ
+ * delivery mechanism is via interrupt, handled by the ena_aenq_work()
+ * function, which dispatches via the eaenq_hdlrs table. If no handler
+ * is registered, the ena_aenq_default_hdlr() handler is used. A given
+ * device may not support all the different event types
+ * (enahw_aenq_groups_t); and the driver may choose to enable a subset
+ * of the supported events. During attach we call ena_setup_aenq() to
+ * negotiate the supported/enabled events. The enabled group is stored
+ * at ena_aenq_enabled_groups.
+ *
+ * Queues and Unsigned Wraparound
+ * ------------------------------
+ *
+ * All the queues use a uint16_t value as their head/tail values, e.g.
+ * the Rx queue's er_cq_head_idx value. You might notice that we only
+ * ever increment these values, letting them perform implicit unsigned
+ * integer wraparound. This is intended. This is the same behavior as
+ * the common code, and seems to be what the hardware expects. Of
+ * course, when accessing our own descriptor arrays we must make sure
+ * to first perform a modulo of this value or risk running off into
+ * space.
+ *
+ * Attach Sequencing
+ * -----------------
+ *
+ * Most drivers implement their attach/detach/cleanup functions as a
+ * sequential stream of function calls used to allocate and initialize
+ * resources in an order determined by the device's programming manual
+ * combined with any requirements imposed by the kernel and its
+ * relevant modules. These functions can become quite long. It is
+ * often hard to see the order in which steps are taken, and even
+ * harder to tell if detach/cleanup undoes them in the correct order,
+ * or even if it undoes them at all! The only sure way to understand
+ * the flow is to take good notes while closely inspecting each line
+ * of code. Even then, it's easy for attach and detach to get out of
+ * sync.
+ *
+ * Some more recent drivers have improved on this situation by using a
+ * bit vector to track the sequence of events in attach/detach. Each
+ * bit is declared in as an enum value, in the same order it is
+ * expected attach would run, and thus detach would run in the exact
+ * opposite order. This has three main benefits:
+ *
+ *    1. It makes it easier to determine sequence order at a
+ *       glance.
+ *
+ *    2. It gives a better idea of what state the device is in during
+ *       debugging (the sequence bit vector is kept with the instance
+ *       state).
+ *
+ *    3. The detach function can verify that all sequence bits are
+ *       cleared, indicating that everything done in attach was
+ *       successfully undone.
+ *
+ * These are great improvements. However, the attach/detach functions
+ * can still become unruly, and there is still no guarantee that
+ * detach is done in opposite order of attach (this is not always
+ * strictly required, but is probably the best way to write detach).
+ * There is still a lot of boilerplate and chance for programmer
+ * error.
+ *
+ * The ena driver takes the sequence idea a bit further, creating a
+ * descriptor table of the attach sequence (ena_attach_tbl). This
+ * table is used by attach/detach to generically, declaratively, and
+ * programmaticaly enforce the precise sequence order and verify that
+ * anything that is done is undone. This provides several benefits:
+ *
+ *    o Correct order is enforced implicitly by the descriptor table.
+ *      It is impossible for the detach sequence to run in any other
+ *      order other than opposite that of attach.
+ *
+ *    o It is obvious what the precise attach sequence is. While the
+ *      bit vector enum helps a lot with this it doesn't prevent
+ *      programmer error. With the sequence defined as a declarative
+ *      table it makes it easy for the programmer to see the order and
+ *      know it's followed exactly.
+ *
+ *    o It is impossible to modify the attach sequence without also
+ *      specifying a callback for its dual in the detach sequence.
+ *
+ *    o Common and repetitive code like error checking, logging, and bit
+ *      vector modification is eliminated and centralized, again
+ *      reducing the chance of programmer error.
+ *
+ * The ena attach sequence is defined under ena_attach_seq_t. The
+ * descriptor table is defined under ena_attach_tbl.
+ */
+
+/*
+ * These are some basic data layout invariants on which development
+ * assumptions where made.
+ */
+CTASSERT(sizeof (enahw_aenq_desc_t) == 64);
+/* TODO: Why doesn't this work? */
+/* CTASSERT(sizeof (enahw_tx_data_desc_t) == 64); */
+CTASSERT(sizeof (enahw_tx_data_desc_t) == sizeof (enahw_tx_meta_desc_t));
+CTASSERT(sizeof (enahw_tx_data_desc_t) == sizeof (enahw_tx_desc_t));
+CTASSERT(sizeof (enahw_tx_meta_desc_t) == sizeof (enahw_tx_desc_t));
+/*
+ * We add this here as an extra safety check to make sure that any
+ * addition to the AENQ group enum also updates the groups array num
+ * value.
+ */
+CTASSERT(ENAHW_AENQ_GROUPS_ARR_NUM == 6);
+
+/*
+ * Amazon does not specify the endianess of the ENA device. We assume
+ * it's the same as the bus, and we assume the CPU/bus is always
+ * little endian.
+ */
+#ifdef _BIG_ENDIAN
+#error "ENA driver is little-endian only"
+#endif
+
+/*
+ * These values are used to communicate the driver version to the AWS
+ * hypervisor via the ena_set_host_info() function. We don't know what
+ * exactly AWS does with this info, but it's fairly safe to assume
+ * it's used solely for debug/informational purposes. The Linux driver
+ * updates these values frequently as bugs are fixed and features are
+ * added.
+ */
+#define	ENA_DRV_VER_MAJOR	1
+#define	ENA_DRV_VER_MINOR	0
+#define	ENA_DRV_VER_SUBMINOR	0
+
+uint64_t ena_admin_cmd_timeout_ns = ENA_ADMIN_CMD_DEF_TIMEOUT;
+
+/*
+ * Log an error message. We leave the destination (console or system
+ * log) up to the caller
+ */
+void
+ena_err(const ena_t *ena, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	if (ena != NULL && ena->ena_dip != NULL) {
+		vdev_err(ena->ena_dip, CE_WARN, fmt, ap);
+	} else {
+		vcmn_err(CE_WARN, fmt, ap);
+	}
+	va_end(ap);
+}
+
+/*
+ * Set this to B_TRUE to enable debug messages.
+ */
+boolean_t ena_debug = B_FALSE;
+
+/*
+ * Log a debug message. We force all debug messages to go to the
+ * system log.
+ */
+void
+ena_dbg(const ena_t *ena, const char *fmt, ...)
+{
+	va_list ap;
+
+	if (ena_debug) {
+		char msg[1024];
+
+		va_start(ap, fmt);
+		(void) vsnprintf(msg, sizeof (msg), fmt, ap);
+		va_end(ap);
+
+		if (ena != NULL && ena->ena_dip != NULL) {
+			dev_err(ena->ena_dip, CE_NOTE, "!%s", msg);
+		} else {
+			cmn_err(CE_NOTE, "!%s", msg);
+		}
+	}
+}
+
+ena_aenq_grpstr_t ena_groups_str[ENAHW_AENQ_GROUPS_ARR_NUM] = {
+	{ .eag_type = ENAHW_AENQ_GROUP_LINK_CHANGE, .eag_str = "LINK CHANGE" },
+	{ .eag_type = ENAHW_AENQ_GROUP_FATAL_ERROR, .eag_str = "FATAL ERROR" },
+	{ .eag_type = ENAHW_AENQ_GROUP_WARNING, .eag_str = "WARNING" },
+	{
+		.eag_type = ENAHW_AENQ_GROUP_NOTIFICATION,
+		.eag_str = "NOTIFICATION"
+	},
+	{ .eag_type = ENAHW_AENQ_GROUP_KEEP_ALIVE, .eag_str = "KEEP ALIVE" },
+	{
+		.eag_type = ENAHW_AENQ_GROUP_REFRESH_CAPABILITIES,
+		.eag_str = "REFRESH CAPABILITIES"
+	},
+};
+
+void
+ena_aenq_work(ena_t *ena)
+{
+	ena_aenq_t *aenq = &ena->ena_aenq;
+	uint16_t head_mod = aenq->eaenq_head & (aenq->eaenq_num_descs - 1);
+	boolean_t processed = B_FALSE;
+	enahw_aenq_desc_t *desc = &aenq->eaenq_descs[head_mod];
+	uint64_t ts;
+
+	ts = ((uint64_t)desc->ead_ts_high << 32) | (uint64_t)desc->ead_ts_low;
+	ENA_DMA_SYNC(aenq->eaenq_dma, DDI_DMA_SYNC_FORKERNEL);
+
+	while (ENAHW_AENQ_DESC_PHASE(desc) == aenq->eaenq_phase) {
+		ena_aenq_hdlr_t hdlr;
+
+		ASSERT3U(desc->ead_group, <, ENAHW_AENQ_GROUPS_ARR_NUM);
+		processed = B_TRUE;
+		ena_dbg(ena, "AENQ Group: (0x%x) %s Syndrome: 0x%x ts: %" PRIu64
+		    " us", desc->ead_group,
+		    ena_groups_str[desc->ead_group].eag_str, desc->ead_syndrome,
+		    ts);
+
+		hdlr = ena->ena_aenq.eaenq_hdlrs[desc->ead_group];
+		hdlr(ena, desc);
+
+		aenq->eaenq_head++;
+		head_mod = aenq->eaenq_head & (aenq->eaenq_num_descs - 1);
+
+		if (head_mod == 0) {
+			aenq->eaenq_phase = !aenq->eaenq_phase;
+		}
+
+		desc = &aenq->eaenq_descs[head_mod];
+	}
+
+	if (processed) {
+		ena_hw_bar_write32(ena, ENAHW_REG_AENQ_HEAD_DB,
+		    aenq->eaenq_head);
+	}
+}
+
+/*
+ * Use for attach sequences which perform no resource allocation (or
+ * global state modification) and thus require no subsequent
+ * deallocation.
+ */
+static void
+ena_no_cleanup(ena_t *ena)
+{
+}
+
+static boolean_t
+ena_attach_pci(ena_t *ena)
+{
+	ddi_acc_handle_t hdl;
+
+	if (pci_config_setup(ena->ena_dip, &hdl) != 0) {
+		return (B_FALSE);
+	}
+
+	ena->ena_pci_hdl = hdl;
+	ena->ena_pci_vid = pci_config_get16(hdl, PCI_CONF_VENID);
+	ena->ena_pci_did = pci_config_get16(hdl, PCI_CONF_DEVID);
+	ena->ena_pci_rev = pci_config_get8(hdl, PCI_CONF_REVID);
+	ena->ena_pci_svid = pci_config_get16(hdl, PCI_CONF_SUBVENID);
+	ena->ena_pci_sdid = pci_config_get16(hdl, PCI_CONF_SUBSYSID);
+	ena_dbg(ena, "vid: 0x%x did: 0x%x rev: 0x%x svid: 0x%x sdid: 0x%x",
+	    ena->ena_pci_vid, ena->ena_pci_did, ena->ena_pci_rev,
+	    ena->ena_pci_svid, ena->ena_pci_sdid);
+
+	return (B_TRUE);
+}
+
+static void
+ena_cleanup_pci(ena_t *ena)
+{
+	pci_config_teardown(&ena->ena_pci_hdl);
+}
+
+static void
+ena_cleanup_regs_map(ena_t *ena)
+{
+	ddi_regs_map_free(&ena->ena_reg_hdl);
+}
+
+static boolean_t
+ena_attach_regs_map(ena_t *ena)
+{
+	int ret = 0;
+
+	if (ddi_dev_regsize(ena->ena_dip, ENA_REG_NUMBER, &ena->ena_reg_size) !=
+	    DDI_SUCCESS) {
+		ena_err(ena, "failed to get register set %d size",
+		    ENA_REG_NUMBER);
+		return (B_FALSE);
+	}
+
+	ena_dbg(ena, "register size: %ld", ena->ena_reg_size);
+	bzero(&ena->ena_reg_attr, sizeof (ena->ena_reg_attr));
+	ena->ena_reg_attr.devacc_attr_version = DDI_DEVICE_ATTR_V1;
+	ena->ena_reg_attr.devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
+	ena->ena_reg_attr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
+
+	/*
+	 * This function can return several different failure values,
+	 * so we make sure to capture its return value for the purpose
+	 * of logging.
+	 */
+	ret = ddi_regs_map_setup(ena->ena_dip, ENA_REG_NUMBER,
+	    &ena->ena_reg_base, 0, ena->ena_reg_size, &ena->ena_reg_attr,
+	    &ena->ena_reg_hdl);
+
+	if (ret != DDI_SUCCESS) {
+		ena_err(ena, "failed to map register set %d: %d",
+		    ENA_REG_NUMBER, ret);
+		return (B_FALSE);
+	}
+
+	ena_dbg(ena, "registers mapped to base: 0x%p",
+	    (void *)ena->ena_reg_base);
+
+	return (B_TRUE);
+}
+
+/*
+ * Free any resources related to the admin submission queue.
+ */
+static void
+ena_admin_sq_free(ena_t *ena)
+{
+	ena_dma_free(&ena->ena_aq.ea_sq.eas_dma);
+}
+
+/*
+ * Initialize the admin submission queue.
+ */
+static boolean_t
+ena_admin_sq_init(ena_t *ena)
+{
+	ena_adminq_t *aq = &ena->ena_aq;
+	ena_dma_buf_t *dma = &aq->ea_sq.eas_dma;
+	size_t size = aq->ea_qlen * sizeof (*aq->ea_sq.eas_entries);
+	uint32_t addr_low, addr_high, wval;
+	ena_dma_conf_t conf = {
+		.edc_size = size,
+		.edc_align = ENAHW_ADMIN_SQ_DESC_BUF_ALIGNMENT,
+		.edc_sgl = 1,
+		.edc_endian = DDI_NEVERSWAP_ACC,
+		.edc_stream = B_FALSE,
+	};
+
+	if (!ena_dma_alloc(ena, dma, &conf, size)) {
+		ena_err(ena, "failed to allocate DMA for Admin SQ");
+		return (B_FALSE);
+	}
+
+	aq->ea_sq.eas_entries = (void *)dma->edb_va;
+	aq->ea_sq.eas_tail = 0;
+	aq->ea_sq.eas_phase = 1;
+	aq->ea_sq.eas_dbaddr =
+	    (uint32_t *)(ena->ena_reg_base + ENAHW_REG_ASQ_DB);
+	ENA_DMA_VERIFY_ADDR(ena, dma->edb_cookie->dmac_laddress);
+	addr_low = (uint32_t)(dma->edb_cookie->dmac_laddress);
+	addr_high = (uint32_t)(dma->edb_cookie->dmac_laddress >> 32);
+	ena_hw_bar_write32(ena, ENAHW_REG_ASQ_BASE_LO, addr_low);
+	ena_hw_bar_write32(ena, ENAHW_REG_ASQ_BASE_HI, addr_high);
+	wval = ENAHW_ASQ_CAPS_DEPTH(aq->ea_qlen) |
+	    ENAHW_ASQ_CAPS_ENTRY_SIZE(sizeof (*aq->ea_sq.eas_entries));
+	ena_hw_bar_write32(ena, ENAHW_REG_ASQ_CAPS, wval);
+	return (B_TRUE);
+}
+
+/*
+ * Free any resources related to the admin completion queue.
+ */
+static void
+ena_admin_cq_free(ena_t *ena)
+{
+	ena_dma_free(&ena->ena_aq.ea_cq.eac_dma);
+}
+
+/*
+ * Initialize the admin completion queue.
+ */
+static boolean_t
+ena_admin_cq_init(ena_t *ena)
+{
+	ena_adminq_t *aq = &ena->ena_aq;
+	ena_dma_buf_t *dma = &aq->ea_cq.eac_dma;
+	size_t size = aq->ea_qlen * sizeof (*aq->ea_cq.eac_entries);
+	uint32_t addr_low, addr_high, wval;
+	ena_dma_conf_t conf = {
+		.edc_size = size,
+		.edc_align = ENAHW_ADMIN_CQ_DESC_BUF_ALIGNMENT,
+		.edc_sgl = 1,
+		.edc_endian = DDI_NEVERSWAP_ACC,
+		.edc_stream = B_FALSE,
+	};
+
+	if (!ena_dma_alloc(ena, dma, &conf, size)) {
+		ena_err(ena, "failed to allocate DMA for Admin CQ");
+		return (B_FALSE);
+	}
+
+	aq->ea_cq.eac_entries = (void *)dma->edb_va;
+	aq->ea_cq.eac_head = 0;
+	aq->ea_cq.eac_phase = 1;
+	ENA_DMA_VERIFY_ADDR(ena, dma->edb_cookie->dmac_laddress);
+	addr_low = (uint32_t)(dma->edb_cookie->dmac_laddress);
+	addr_high = (uint32_t)(dma->edb_cookie->dmac_laddress >> 32);
+	ena_hw_bar_write32(ena, ENAHW_REG_ACQ_BASE_LO, addr_low);
+	ena_hw_bar_write32(ena, ENAHW_REG_ACQ_BASE_HI, addr_high);
+	wval = ENAHW_ACQ_CAPS_DEPTH(aq->ea_qlen) |
+	    ENAHW_ACQ_CAPS_ENTRY_SIZE(sizeof (*aq->ea_cq.eac_entries));
+	ena_hw_bar_write32(ena, ENAHW_REG_ACQ_CAPS, wval);
+	return (B_TRUE);
+}
+
+static void
+ena_aenq_default_hdlr(void *data, enahw_aenq_desc_t *desc)
+{
+	ena_t *ena = data;
+
+	ena->ena_aenq_stat.eaes_default.value.ui64++;
+	ena_dbg(ena, "unimplemented handler for aenq group: %s",
+	    ena_groups_str[desc->ead_group].eag_str);
+}
+
+static void
+ena_aenq_link_change_hdlr(void *data, enahw_aenq_desc_t *desc)
+{
+	ena_t *ena = data;
+	boolean_t is_up = (desc->ead_payload.link_change.flags &
+	    ENAHW_AENQ_LINK_CHANGE_LINK_STATUS_MASK) != 0;
+
+	/*
+	 * The interupts are not enabled until after we register mac,
+	 * so the mac handle should be valid.
+	 */
+	ASSERT3U(ena->ena_attach_seq, >=, ENA_ATTACH_MAC_REGISTER);
+	ena->ena_aenq_stat.eaes_link_change.value.ui64++;
+
+	mutex_enter(&ena->ena_lock);
+
+	/*
+	 * Notify mac only on an actual change in status.
+	 */
+	if (ena->ena_link_up != is_up) {
+		if (is_up) {
+			mac_link_update(ena->ena_mh, LINK_STATE_UP);
+		} else {
+			mac_link_update(ena->ena_mh, LINK_STATE_DOWN);
+		}
+	}
+
+	ena->ena_link_up = is_up;
+
+	mutex_exit(&ena->ena_lock);
+}
+
+/*
+ * Free any resources related to the Async Event Notification Queue.
+ */
+static void
+ena_aenq_free(ena_t *ena)
+{
+	ena_dma_free(&ena->ena_aenq.eaenq_dma);
+}
+
+static void
+ena_aenq_set_def_hdlrs(ena_aenq_t *aenq)
+{
+	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_LINK_CHANGE] = ena_aenq_default_hdlr;
+	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_FATAL_ERROR] = ena_aenq_default_hdlr;
+	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_WARNING] = ena_aenq_default_hdlr;
+	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_NOTIFICATION] =
+	    ena_aenq_default_hdlr;
+	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_KEEP_ALIVE] = ena_aenq_default_hdlr;
+	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_REFRESH_CAPABILITIES] =
+	    ena_aenq_default_hdlr;
+}
+/*
+ * Initialize the Async Event Notification Queue.
+ */
+static boolean_t
+ena_aenq_init(ena_t *ena)
+{
+	ena_aenq_t *aenq = &ena->ena_aenq;
+	size_t size;
+	uint32_t addr_low, addr_high, wval;
+	ena_dma_conf_t conf;
+
+	aenq->eaenq_num_descs = ENA_AENQ_NUM_DESCS;
+	size = aenq->eaenq_num_descs * sizeof (*aenq->eaenq_descs);
+
+	conf = (ena_dma_conf_t) {
+		.edc_size = size,
+		.edc_align = ENAHW_AENQ_DESC_BUF_ALIGNMENT,
+		.edc_sgl = 1,
+		.edc_endian = DDI_NEVERSWAP_ACC,
+		.edc_stream = B_FALSE,
+	};
+
+	if (!ena_dma_alloc(ena, &aenq->eaenq_dma, &conf, size)) {
+		ena_err(ena, "failed to allocate DMA for AENQ");
+		return (B_FALSE);
+	}
+
+	aenq->eaenq_descs = (void *)aenq->eaenq_dma.edb_va;
+	aenq->eaenq_head = 0;
+	aenq->eaenq_phase = 1;
+	bzero(aenq->eaenq_descs, size);
+	ena_aenq_set_def_hdlrs(aenq);
+
+	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_LINK_CHANGE] =
+	    ena_aenq_link_change_hdlr;
+
+	ENA_DMA_VERIFY_ADDR(ena, aenq->eaenq_dma.edb_cookie->dmac_laddress);
+	addr_low = (uint32_t)(aenq->eaenq_dma.edb_cookie->dmac_laddress);
+	addr_high = (uint32_t)(aenq->eaenq_dma.edb_cookie->dmac_laddress >> 32);
+	ena_hw_bar_write32(ena, ENAHW_REG_AENQ_BASE_LO, addr_low);
+	ena_hw_bar_write32(ena, ENAHW_REG_AENQ_BASE_HI, addr_high);
+	ENA_DMA_SYNC(aenq->eaenq_dma, DDI_DMA_SYNC_FORDEV);
+	wval = ENAHW_AENQ_CAPS_DEPTH(aenq->eaenq_num_descs) |
+	    ENAHW_AENQ_CAPS_ENTRY_SIZE(sizeof (*aenq->eaenq_descs));
+	ena_hw_bar_write32(ena, ENAHW_REG_AENQ_CAPS, wval);
+	return (B_TRUE);
+}
+
+/*
+ * We limit the max number of I/O queues based on several aspects of
+ * the underlying hardware.
+ *
+ * 1. The absolute upper limit is set by ENAHW_MAX_NUM_IO_QUEUES,
+ *    which comes from the common code and presumably is based on device
+ *    constraints.
+ *
+ * 2. Next we latch the number of I/O queues to the number of online
+ *    CPUs. The idea being that each queue is a parallel work stream,
+ *    and having more queues than CPUs to flush them will not improve
+ *    performance. The number of online CPUs can change dynamically,
+ *    and that's okay, everything should still work fine, it just
+ *    might not be ideal.
+ *
+ * 3. Next we latch the number of I/O queues to the smallest of the
+ *    max Tx queues and max Rx queues. We could probably loosen this
+ *    restriction in the future, and have separate max I/O queues for
+ *    Tx and Rx. This is what Linux does, and seems like a fine place
+ *    to start.
+ */
+static void
+ena_set_max_io_queues(ena_t *ena)
+{
+	uint32_t max = ENAHW_MAX_NUM_IO_QUEUES;
+
+	max = MIN(ncpus_online, max);
+	/*
+	 * Supposedly a device could present a different number of SQs
+	 * and CQs. This driver is desinged in a way that requires
+	 * each SQ to have a corresponding and dedicated CQ (how would
+	 * it work otherwise). Therefore, we must check both values
+	 * and find the minimum between them.
+	 */
+	max = MIN(ena->ena_tx_max_sq_num, max);
+	max = MIN(ena->ena_tx_max_cq_num, max);
+	max = MIN(ena->ena_rx_max_sq_num, max);
+	max = MIN(ena->ena_rx_max_cq_num, max);
+
+
+	/* This shouldn't happen, but just in case. */
+	if (max == 0) {
+		max = 1;
+	}
+
+	ena->ena_max_io_queues = max;
+}
+
+/*
+ * We require that an Rx or Tx buffer be able to hold the maximum MTU
+ * along with the maximum frame header length. In this case we know
+ * ENA is presenting us an Ethernet frame so we add the size of an
+ * Ethernet VLAN header. Rx has the additional requirement of needing
+ * additional margin for the sake of IP header alignment.
+ */
+static void
+ena_update_buf_sizes(ena_t *ena)
+{
+	ena->ena_max_frame_hdr = sizeof (struct ether_vlan_header);
+	ena->ena_max_frame_total = ena->ena_max_frame_hdr + ena->ena_mtu;
+	ena->ena_tx_buf_sz = P2ROUNDUP_TYPED(ena->ena_max_frame_total,
+	    ena->ena_page_sz, uint32_t);
+	ena->ena_rx_buf_sz = P2ROUNDUP_TYPED(ena->ena_max_frame_total +
+	    ENA_RX_BUF_IPHDR_ALIGNMENT, ena->ena_page_sz, uint32_t);
+}
+
+static boolean_t
+ena_get_offloads(ena_t *ena)
+{
+	int ret = 0;
+	enahw_resp_desc_t resp;
+	enahw_feat_offload_t *feat = &resp.erd_resp.erd_get_feat.ergf_offload;
+
+	ena->ena_tx_l3_ipv4_csum = B_FALSE;
+
+	ena->ena_tx_l4_ipv4_part_csum = B_FALSE;
+	ena->ena_tx_l4_ipv4_full_csum = B_FALSE;
+	ena->ena_tx_l4_ipv4_lso = B_FALSE;
+
+	ena->ena_tx_l4_ipv6_part_csum = B_FALSE;
+	ena->ena_tx_l4_ipv6_full_csum = B_FALSE;
+	ena->ena_tx_l4_ipv6_lso = B_FALSE;
+
+	ena->ena_rx_l3_ipv4_csum = B_FALSE;
+	ena->ena_rx_l4_ipv4_csum = B_FALSE;
+	ena->ena_rx_l4_ipv6_csum = B_FALSE;
+	ena->ena_rx_hash = B_FALSE;
+
+	bzero(&resp, sizeof (resp));
+	ret = ena_get_feature(ena, &resp, ENAHW_FEAT_STATELESS_OFFLOAD_CONFIG,
+	    ENAHW_FEAT_STATELESS_OFFLOAD_CONFIG_VER);
+
+	if (ret == ENOTSUP) {
+		/*
+		 * In this case the device does not support querying
+		 * for hardware offloads. We take that as a sign that
+		 * the device provides no offloads.
+		 */
+		return (B_TRUE);
+	} else if (ret != 0) {
+		ena_err(ena, "error getting stateless offload: %d", ret);
+		return (B_FALSE);
+	}
+
+	ena->ena_tx_l3_ipv4_csum = ENAHW_FEAT_OFFLOAD_TX_L3_IPV4_CSUM(feat);
+
+	ena->ena_tx_l4_ipv4_part_csum =
+	    ENAHW_FEAT_OFFLOAD_TX_L4_IPV4_CSUM_PART(feat);
+	ena->ena_tx_l4_ipv4_full_csum =
+	    ENAHW_FEAT_OFFLOAD_TX_L4_IPV4_CSUM_FULL(feat);
+	ena->ena_tx_l4_ipv4_lso = ENAHW_FEAT_OFFLOAD_TSO_IPV4(feat);
+
+	ena->ena_tx_l4_ipv6_part_csum =
+	    ENAHW_FEAT_OFFLOAD_TX_L4_IPV6_CSUM_PART(feat);
+	ena->ena_tx_l4_ipv6_full_csum =
+	    ENAHW_FEAT_OFFLOAD_TX_L4_IPV6_CSUM_FULL(feat);
+	ena->ena_tx_l4_ipv6_lso = ENAHW_FEAT_OFFLOAD_TSO_IPV6(feat);
+
+	ena->ena_rx_l3_ipv4_csum = ENAHW_FEAT_OFFLOAD_RX_L3_IPV4_CSUM(feat);
+	ena->ena_rx_l4_ipv4_csum = ENAHW_FEAT_OFFLOAD_RX_L4_IPV4_CSUM(feat);
+	ena->ena_rx_l4_ipv6_csum = ENAHW_FEAT_OFFLOAD_RX_L4_IPV6_CSUM(feat);
+	return (B_TRUE);
+}
+
+static int
+ena_get_prop(ena_t *ena, char *propname, const int minval, const int maxval,
+    const int defval)
+{
+	int value = ddi_prop_get_int(DDI_DEV_T_ANY, ena->ena_dip,
+	    DDI_PROP_DONTPASS, propname, defval);
+
+	if (value > maxval) {
+		ena_err(ena, "user value %s=%d exceeded maximum, setting to %d",
+		    propname, value, maxval);
+		value = maxval;
+	}
+
+	if (value < minval) {
+		ena_err(ena, "user value %s=%d below minimum, setting to %d",
+		    propname, value, minval);
+		value = minval;
+	}
+
+	return (value);
+}
+
+static boolean_t
+ena_set_mtu(ena_t *ena)
+{
+	int ret = 0;
+	enahw_cmd_desc_t cmd;
+	enahw_feat_mtu_t *feat = &cmd.ecd_cmd.ecd_set_feat.ecsf_feat.ecsf_mtu;
+	enahw_resp_desc_t resp;
+
+	bzero(&cmd, sizeof (cmd));
+	bzero(&resp, sizeof (resp));
+	feat->efm_mtu = ena->ena_mtu;
+
+	if ((ret = ena_set_feature(ena, &cmd, &resp, ENAHW_FEAT_MTU,
+	    ENAHW_FEAT_MTU_VER)) != 0) {
+		ena_err(ena, "failed to set device MTU to %u: %d", ena->ena_mtu,
+		    ret);
+		return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
+
+static void
+ena_get_link_config(ena_t *ena)
+{
+	enahw_resp_desc_t resp;
+	enahw_feat_link_conf_t *feat =
+	    &resp.erd_resp.erd_get_feat.ergf_link_conf;
+	boolean_t full_duplex;
+
+	bzero(&resp, sizeof (resp));
+
+	if (ena_get_feature(ena, &resp, ENAHW_FEAT_LINK_CONFIG,
+	    ENAHW_FEAT_LINK_CONFIG_VER) != 0) {
+		/*
+		 * Some ENA devices do no support this feature. In
+		 * those cases we report a 1Gbps link, full duplex.
+		 * For the most accurate information on bandwidth
+		 * limits see the official AWS documentation.
+		 */
+		ena->ena_link_speed_mbits = 1 * 1000 * 1000;
+		ena->ena_link_speeds = ENAHW_LINK_SPEED_1G;
+		ena->ena_link_duplex = LINK_DUPLEX_FULL;
+		ena->ena_link_autoneg = B_TRUE;
+		return;
+	}
+
+	ena->ena_link_speed_mbits = feat->eflc_speed;
+	ena->ena_link_speeds = feat->eflc_supported;
+	full_duplex = ENAHW_FEAT_LINK_CONF_FULL_DUPLEX(feat);
+	ena->ena_link_duplex = full_duplex ? LINK_DUPLEX_FULL :
+	    LINK_DUPLEX_HALF;
+	ena->ena_link_autoneg = ENAHW_FEAT_LINK_CONF_AUTONEG(feat);
+}
+
+/*
+ * Retrieve all configuration values which are modifiable via
+ * ena.conf, and set ena_t members accordingly. While the conf values
+ * have priority, they may be implicitly modified by the driver to
+ * meet resource constraints on a given platform. If no value is
+ * specified in the conf file, the driver will attempt to use the
+ * largest value supported. While there should be no value large
+ * enough, keep in mind that ena_get_prop() will cast the values to an
+ * int.
+ *
+ * This function should be called after the device is initialized,
+ * admin queue is established, and the hardware features/capabs have
+ * been queried; it should be called before mac registration.
+ */
+static boolean_t
+ena_attach_read_conf(ena_t *ena)
+{
+	uint32_t gcv;	/* Greatest Common Value */
+
+	/*
+	 * We expect that the queue lengths are the same for both the
+	 * CQ and SQ, but technically the device could return
+	 * different lengths. For now the driver locks them together.
+	 */
+	gcv = min(ena->ena_rx_max_sq_num_descs, ena->ena_rx_max_cq_num_descs);
+	ASSERT3U(gcv, <=, INT_MAX);
+	ena->ena_rxq_num_descs = ena_get_prop(ena, ENA_PROP_RXQ_NUM_DESCS,
+	    ENA_PROP_RXQ_NUM_DESCS_MIN, gcv, gcv);
+
+	ena->ena_rxq_intr_limit = ena_get_prop(ena, ENA_PROP_RXQ_INTR_LIMIT,
+	    ENA_PROP_RXQ_INTR_LIMIT_MIN, ENA_PROP_RXQ_INTR_LIMIT_MAX,
+	    ENA_PROP_RXQ_INTR_LIMIT_DEF);
+
+	gcv = min(ena->ena_tx_max_sq_num_descs, ena->ena_tx_max_cq_num_descs);
+	ASSERT3U(gcv, <=, INT_MAX);
+	ena->ena_txq_num_descs = ena_get_prop(ena, ENA_PROP_TXQ_NUM_DESCS,
+	    ENA_PROP_TXQ_NUM_DESCS_MIN, gcv, gcv);
+
+	return (B_TRUE);
+}
+
+/*
+ * Perform any necessary device configuration after the driver.conf
+ * has been read.
+ */
+static boolean_t
+ena_attach_dev_cfg(ena_t *ena)
+{
+	ASSERT3U(ena->ena_attach_seq, >=, ENA_ATTACH_READ_CONF);
+
+	if (!ena_set_mtu(ena)) {
+		/*
+		 * We don't expect this to fail, but we try a fallback
+		 * first before failing the attach sequence.
+		 */
+		ena->ena_mtu = 1500;
+		ena_err(ena, "trying fallback MTU: %u", ena->ena_mtu);
+
+		if (!ena_set_mtu(ena)) {
+			return (B_FALSE);
+		}
+	}
+
+	return (B_TRUE);
+}
+
+static boolean_t
+ena_check_versions(ena_t *ena)
+{
+	uint32_t dev_vsn = ena_hw_bar_read32(ena, ENAHW_REG_VERSION);
+	uint32_t ctrl_vsn =
+	    ena_hw_bar_read32(ena, ENAHW_REG_CONTROLLER_VERSION);
+
+	ena->ena_dev_major_vsn = ENAHW_DEV_MAJOR_VSN(dev_vsn);
+	ena->ena_dev_minor_vsn = ENAHW_DEV_MINOR_VSN(dev_vsn);
+
+	ena->ena_ctrl_major_vsn = ENAHW_CTRL_MAJOR_VSN(ctrl_vsn);
+	ena->ena_ctrl_minor_vsn = ENAHW_CTRL_MINOR_VSN(ctrl_vsn);
+	ena->ena_ctrl_subminor_vsn = ENAHW_CTRL_SUBMINOR_VSN(ctrl_vsn);
+	ena->ena_ctrl_impl_id = ENAHW_CTRL_IMPL_ID(ctrl_vsn);
+
+	if (ena->ena_ctrl_subminor_vsn < ENA_CTRL_SUBMINOR_VSN_MIN) {
+		ena_err(ena, "unsupported controller version: %u.%u.%u",
+		    ena->ena_ctrl_major_vsn, ena->ena_ctrl_minor_vsn,
+		    ena->ena_ctrl_subminor_vsn);
+		return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
+
+boolean_t
+ena_setup_aenq(ena_t *ena)
+{
+	enahw_cmd_desc_t cmd;
+	enahw_feat_aenq_t *cmd_feat =
+	    &cmd.ecd_cmd.ecd_set_feat.ecsf_feat.ecsf_aenq;
+	enahw_resp_desc_t resp;
+	enahw_feat_aenq_t *resp_feat = &resp.erd_resp.erd_get_feat.ergf_aenq;
+	enahw_aenq_groups_t to_enable;
+
+	bzero(&resp, sizeof (resp));
+	if (ena_get_feature(ena, &resp, ENAHW_FEAT_AENQ_CONFIG,
+	    ENAHW_FEAT_AENQ_CONFIG_VER) != 0) {
+		return (B_FALSE);
+	}
+
+	to_enable = BIT(ENAHW_AENQ_GROUP_LINK_CHANGE) |
+	    BIT(ENAHW_AENQ_GROUP_FATAL_ERROR) |
+	    BIT(ENAHW_AENQ_GROUP_WARNING) |
+	    BIT(ENAHW_AENQ_GROUP_NOTIFICATION);
+	to_enable &= resp_feat->efa_supported_groups;
+
+	bzero(&cmd, sizeof (cmd));
+	bzero(&resp, sizeof (cmd));
+	cmd_feat->efa_enabled_groups = to_enable;
+
+	if (ena_set_feature(ena, &cmd, &resp, ENAHW_FEAT_AENQ_CONFIG,
+	    ENAHW_FEAT_AENQ_CONFIG_VER) != 0) {
+		return (B_FALSE);
+	}
+
+	bzero(&resp, sizeof (resp));
+	if (ena_get_feature(ena, &resp, ENAHW_FEAT_AENQ_CONFIG,
+	    ENAHW_FEAT_AENQ_CONFIG_VER) != 0) {
+		return (B_FALSE);
+	}
+
+	ena->ena_aenq_supported_groups = resp_feat->efa_supported_groups;
+	ena->ena_aenq_enabled_groups = resp_feat->efa_enabled_groups;
+
+	for (uint_t i = 0; i < ENAHW_AENQ_GROUPS_ARR_NUM; i++) {
+		ena_aenq_grpstr_t *grpstr = &ena_groups_str[i];
+		boolean_t supported = BIT(grpstr->eag_type) &
+		    resp_feat->efa_supported_groups;
+		boolean_t enabled = BIT(grpstr->eag_type) &
+		    resp_feat->efa_enabled_groups;
+
+		ena_dbg(ena, "%s supported: %s enabled: %s", grpstr->eag_str,
+		    supported ? "Y" : "N", enabled ? "Y" : "N");
+	}
+
+	return (B_TRUE);
+}
+
+/*
+ * Free all resources allocated as part of ena_device_init().
+ */
+static void
+ena_cleanup_device_init(ena_t *ena)
+{
+	ena_adminq_t *aq = &ena->ena_aq;
+
+	ena_free_host_info(ena);
+	mutex_destroy(&aq->ea_sq_lock);
+	mutex_destroy(&aq->ea_cq_lock);
+	mutex_destroy(&aq->ea_stat_lock);
+	list_destroy(&aq->ea_cmd_ctxs_free);
+	kmem_free(aq->ea_cmd_ctxs, sizeof (ena_cmd_ctx_t) * aq->ea_qlen);
+	ena_admin_sq_free(ena);
+	ena_admin_cq_free(ena);
+	ena_aenq_free(ena);
+	ena_stat_device_basic_cleanup(ena);
+	ena_stat_device_extended_cleanup(ena);
+	ena_stat_aenq_cleanup(ena);
+}
+
+static boolean_t
+ena_attach_device_init(ena_t *ena)
+{
+	ena_adminq_t *aq = &ena->ena_aq;
+	uint32_t rval, wval;
+	uint8_t dma_width;
+	hrtime_t timeout, cmd_timeout;
+	hrtime_t expired;
+	enahw_resp_desc_t resp;
+	enahw_feat_dev_attr_t *feat = &resp.erd_resp.erd_get_feat.ergf_dev_attr;
+	uint8_t *maddr;
+	uint32_t supported_features;
+	int ret = 0;
+
+	rval = ena_hw_bar_read32(ena, ENAHW_REG_DEV_STS);
+	if ((rval & ENAHW_DEV_STS_READY_MASK) == 0) {
+		ena_err(ena, "device is not ready");
+		return (B_FALSE);
+	}
+
+	rval = ena_hw_bar_read32(ena, ENAHW_REG_CAPS);
+
+	/*
+	 * The device stores the reset timeout at 100ms resolution; we
+	 * normalize that to nanoseconds.
+	 */
+	timeout = MSEC2NSEC(ENAHW_CAPS_RESET_TIMEOUT(rval) * 100);
+
+	if (timeout == 0) {
+		ena_err(ena, "device gave invalid reset timeout");
+		return (B_FALSE);
+	}
+
+	expired = gethrtime() + timeout;
+
+	wval = ENAHW_DEV_CTL_DEV_RESET_MASK;
+	wval |= (ENAHW_RESET_NORMAL << ENAHW_DEV_CTL_RESET_REASON_SHIFT) &
+	    ENAHW_DEV_CTL_RESET_REASON_MASK;
+	ena_hw_bar_write32(ena, ENAHW_REG_DEV_CTL, wval);
+
+	/*
+	 * Make sure reset is in progress.
+	 */
+	while (1) {
+		rval = ena_hw_bar_read32(ena, ENAHW_REG_DEV_STS);
+
+		if ((rval & ENAHW_DEV_STS_RESET_IN_PROGRESS_MASK) != 0) {
+			break;
+		}
+
+		if (gethrtime() > expired) {
+			ena_err(ena, "device reset start timed out");
+			return (B_FALSE);
+		}
+
+		/* Sleep for 100 milliseconds. */
+		delay(drv_usectohz(100 * 1000));
+	}
+
+	/*
+	 * Reset the timeout counter for the next device request.
+	 */
+	expired = gethrtime() + timeout;
+
+	/*
+	 * Wait for the device reset to finish.
+	 */
+	ena_hw_bar_write32(ena, ENAHW_REG_DEV_CTL, 0);
+	while (1) {
+		rval = ena_hw_bar_read32(ena, ENAHW_REG_DEV_STS);
+
+		if ((rval & ENAHW_DEV_STS_RESET_IN_PROGRESS_MASK) == 0) {
+			break;
+		}
+
+		if (gethrtime() > expired) {
+			ena_err(ena, "device reset timed out");
+			return (B_FALSE);
+		}
+
+		/* Sleep for 100 milliseconds. */
+		delay(drv_usectohz(100 * 1000));
+	}
+
+	if (!ena_check_versions(ena)) {
+		return (B_FALSE);
+	}
+
+	rval = ena_hw_bar_read32(ena, ENAHW_REG_CAPS);
+	dma_width = ENAHW_CAPS_DMA_ADDR_WIDTH(rval);
+	ena->ena_dma_width = dma_width;
+
+	/*
+	 * As we are not using an interrupt for admin queue completion
+	 * signaling, we do not need a priority on these mutexes. If
+	 * that changes, we will have to rejigger some code to create
+	 * the admin queue interrupt before this function.
+	 */
+	mutex_init(&aq->ea_sq_lock, NULL, MUTEX_DRIVER, NULL);
+	mutex_init(&aq->ea_cq_lock, NULL, MUTEX_DRIVER, NULL);
+	mutex_init(&aq->ea_stat_lock, NULL, MUTEX_DRIVER, NULL);
+	aq->ea_qlen = ENA_ADMINQ_DEPTH;
+	aq->ea_pending_cmds = 0;
+
+	aq->ea_cmd_ctxs = kmem_zalloc(sizeof (ena_cmd_ctx_t) * aq->ea_qlen,
+	    KM_SLEEP);
+	list_create(&aq->ea_cmd_ctxs_free, sizeof (ena_cmd_ctx_t),
+	    offsetof(ena_cmd_ctx_t, ectx_node));
+
+	for (uint_t i = 0; i < aq->ea_qlen; i++) {
+		ena_cmd_ctx_t *ctx = &aq->ea_cmd_ctxs[i];
+
+		ctx->ectx_id = i;
+		ctx->ectx_pending = B_FALSE;
+		ctx->ectx_cmd_opcode = ENAHW_CMD_NONE;
+		ctx->ectx_resp = NULL;
+		list_insert_tail(&aq->ea_cmd_ctxs_free, ctx);
+	}
+
+	/*
+	 * The value stored in the device register is in the
+	 * resolution of 100 milliseconds. We normalize that to
+	 * nanoseconds.
+	 */
+	cmd_timeout = MSEC2NSEC(ENAHW_CAPS_ADMIN_CMD_TIMEOUT(rval) * 100);
+	aq->ea_cmd_timeout_ns = max(cmd_timeout, ena_admin_cmd_timeout_ns);
+
+	if (aq->ea_cmd_timeout_ns == 0) {
+		aq->ea_cmd_timeout_ns = ENA_ADMIN_CMD_DEF_TIMEOUT;
+	}
+
+	if (!ena_admin_sq_init(ena)) {
+		return (B_FALSE);
+	}
+
+	if (!ena_admin_cq_init(ena)) {
+		return (B_FALSE);
+	}
+
+	if (!ena_aenq_init(ena)) {
+		return (B_FALSE);
+	}
+
+	/*
+	 * While the Linux driver prefers to use interrupts to deliver
+	 * admin queue completions, we just poll -- it seems to work
+	 * just fine.
+	 */
+	ena_hw_bar_write32(ena, ENAHW_REG_INTERRUPT_MASK, 0);
+	aq->ea_poll_mode = B_TRUE;
+
+	bzero(&resp, sizeof (resp));
+	ret = ena_get_feature(ena, &resp, ENAHW_FEAT_DEVICE_ATTRIBUTES,
+	    ENAHW_FEAT_DEVICE_ATTRIBUTES_VER);
+
+	if (ret != 0) {
+		ena_err(ena, "failed to get device attributes: %d", ret);
+		return (B_FALSE);
+	}
+
+	ena_dbg(ena, "impl ID: %u", feat->efda_impl_id);
+	ena_dbg(ena, "device version: %u", feat->efda_device_version);
+	ena_dbg(ena, "supported features: 0x%x",
+	    feat->efda_supported_features);
+	ena_dbg(ena, "phys addr width: %u", feat->efda_phys_addr_width);
+	ena_dbg(ena, "virt addr width: %u", feat->efda_virt_addr_with);
+	maddr = feat->efda_mac_addr;
+	ena_dbg(ena, "mac addr: %x:%x:%x:%x:%x:%x", maddr[0], maddr[1],
+	    maddr[2], maddr[3], maddr[4], maddr[5]);
+	ena_dbg(ena, "max MTU: %u", feat->efda_max_mtu);
+
+	bcopy(maddr, ena->ena_mac_addr, ETHERADDRL);
+	ena->ena_max_mtu = feat->efda_max_mtu;
+	supported_features = feat->efda_supported_features;
+	ena->ena_supported_features = supported_features;
+	feat = NULL;
+	bzero(&resp, sizeof (resp));
+
+	if (supported_features & BIT(ENAHW_FEAT_MAX_QUEUES_EXT)) {
+		enahw_feat_max_queue_ext_t *feat_mqe =
+		    &resp.erd_resp.erd_get_feat.ergf_max_queue_ext;
+
+		ret = ena_get_feature(ena, &resp, ENAHW_FEAT_MAX_QUEUES_EXT,
+		    ENAHW_FEAT_MAX_QUEUES_EXT_VER);
+
+		if (ret != 0) {
+			ena_err(ena, "failed to query max queues ext: %d", ret);
+			return (B_FALSE);
+		}
+
+		ena->ena_tx_max_sq_num = feat_mqe->efmqe_max_tx_sq_num;
+		ena->ena_tx_max_sq_num_descs = feat_mqe->efmqe_max_tx_sq_depth;
+		ena->ena_tx_max_cq_num = feat_mqe->efmqe_max_tx_cq_num;
+		ena->ena_tx_max_cq_num_descs = feat_mqe->efmqe_max_tx_cq_depth;
+		ena->ena_tx_max_desc_per_pkt =
+		    feat_mqe->efmqe_max_per_packet_tx_descs;
+		ena->ena_tx_max_hdr_len = feat_mqe->efmqe_max_tx_header_size;
+
+		ena->ena_rx_max_sq_num = feat_mqe->efmqe_max_rx_sq_num;
+		ena->ena_rx_max_sq_num_descs = feat_mqe->efmqe_max_rx_sq_depth;
+		ena->ena_rx_max_cq_num = feat_mqe->efmqe_max_rx_cq_num;
+		ena->ena_rx_max_cq_num_descs = feat_mqe->efmqe_max_rx_cq_depth;
+		ena->ena_rx_max_desc_per_pkt =
+		    feat_mqe->efmqe_max_per_packet_rx_descs;
+
+		ena_set_max_io_queues(ena);
+	} else {
+		enahw_feat_max_queue_t *feat_mq =
+		    &resp.erd_resp.erd_get_feat.ergf_max_queue;
+
+		ret = ena_get_feature(ena, &resp, ENAHW_FEAT_MAX_QUEUES_NUM,
+		    ENAHW_FEAT_MAX_QUEUES_NUM_VER);
+
+		if (ret != 0) {
+			ena_err(ena, "failed to query max queues: %d", ret);
+			return (B_FALSE);
+		}
+
+		ena->ena_tx_max_sq_num = feat_mq->efmq_max_sq_num;
+		ena->ena_tx_max_sq_num_descs = feat_mq->efmq_max_sq_depth;
+		ena->ena_tx_max_cq_num = feat_mq->efmq_max_cq_num;
+		ena->ena_tx_max_cq_num_descs = feat_mq->efmq_max_cq_depth;
+		ena->ena_tx_max_desc_per_pkt =
+		    feat_mq->efmq_max_per_packet_tx_descs;
+		ena->ena_tx_max_hdr_len = feat_mq->efmq_max_header_size;
+
+		ena->ena_rx_max_sq_num = feat_mq->efmq_max_sq_num;
+		ena->ena_rx_max_sq_num_descs = feat_mq->efmq_max_sq_depth;
+		ena->ena_rx_max_cq_num = feat_mq->efmq_max_cq_num;
+		ena->ena_rx_max_cq_num_descs = feat_mq->efmq_max_cq_depth;
+		ena->ena_rx_max_desc_per_pkt =
+		    feat_mq->efmq_max_per_packet_rx_descs;
+
+		ena_set_max_io_queues(ena);
+	}
+
+	ena->ena_mtu = ena->ena_max_mtu;
+	ena_update_buf_sizes(ena);
+	/*
+	 * We could use ENAHW_FEAT_HW_HINTS to determine actual SGL
+	 * sizes, for now we just force everything to use one
+	 * segment.
+	 */
+	ena->ena_tx_sgl_max_sz = 1;
+	ena->ena_rx_sgl_max_sz = 1;
+
+	if (!ena_init_host_info(ena)) {
+		return (B_FALSE);
+	}
+
+	if (!ena_setup_aenq(ena)) {
+		return (B_FALSE);
+	}
+
+	ena_get_link_config(ena);
+
+	if (!ena_get_offloads(ena)) {
+		return (B_FALSE);
+	}
+
+	if (!ena_stat_device_basic_init(ena)) {
+		return (B_FALSE);
+	}
+
+	if (!ena_stat_device_extended_init(ena)) {
+		return (B_FALSE);
+	}
+
+	if (!ena_stat_aenq_init(ena)) {
+		return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
+
+static void
+ena_cleanup_intr_alloc(ena_t *ena)
+{
+	for (int i = 0; i < ena->ena_num_intrs; i++) {
+		int ret = ddi_intr_free(ena->ena_intr_handles[i]);
+		if (ret != DDI_SUCCESS) {
+			ena_err(ena, "failed to free interrupt %d: %d", i, ret);
+		}
+	}
+
+	if (ena->ena_intr_handles != NULL) {
+		kmem_free(ena->ena_intr_handles, ena->ena_intr_handles_sz);
+		ena->ena_intr_handles = NULL;
+		ena->ena_intr_handles_sz = 0;
+	}
+}
+
+/*
+ * The Linux driver supports only MSI-X interrupts. We do the same,
+ * with the assumption that it's the only type of interrupt the device
+ * can present.
+ */
+static boolean_t
+ena_attach_intr_alloc(ena_t *ena)
+{
+	int ret;
+	int types;
+	int min, req, ideal, avail, actual;
+
+	ret = ddi_intr_get_supported_types(ena->ena_dip, &types);
+	if (ret != DDI_SUCCESS) {
+		ena_err(ena, "failed to get interrupt types: %d", ret);
+		return (B_FALSE);
+	}
+
+	ena_dbg(ena, "supported interrupt types: 0x%x", types);
+	if ((types & DDI_INTR_TYPE_MSIX) == 0) {
+		ena_err(ena, "the ena driver only supports MSI-X interrupts");
+		return (B_FALSE);
+	}
+
+	/* One for I/O, one for adminq. */
+	min = 2;
+	ideal = ena->ena_max_io_queues + 1;
+	ret = ddi_intr_get_nintrs(ena->ena_dip, DDI_INTR_TYPE_MSIX, &avail);
+	if (ret != DDI_SUCCESS) {
+		ena_err(ena, "failed to get number of MSI-X interrupts: %d",
+		    ret);
+		return (B_FALSE);
+	}
+
+	if (avail < min) {
+		ena_err(ena, "number of MSI-X interrupts is %d, but the driver "
+		    "requires a minimum of %d", avail, min);
+		return (B_FALSE);
+	}
+
+	ena_dbg(ena, "%d MSI-X interrupts available", avail);
+
+	ret = ddi_intr_get_navail(ena->ena_dip, DDI_INTR_TYPE_MSIX, &avail);
+	if (ret != DDI_SUCCESS) {
+		ena_err(ena, "failed to get available interrupts: %d", ret);
+		return (B_FALSE);
+	}
+
+	if (avail < min) {
+		ena_err(ena, "number of available MSI-X interrupts is %d, "
+		    "but the driver requires a minimum of %d", avail, min);
+		return (B_FALSE);
+	}
+
+	req = MIN(ideal, avail);
+	ena->ena_intr_handles_sz = req * sizeof (ddi_intr_handle_t);
+	ena->ena_intr_handles = kmem_zalloc(ena->ena_intr_handles_sz, KM_SLEEP);
+
+	ret = ddi_intr_alloc(ena->ena_dip, ena->ena_intr_handles,
+	    DDI_INTR_TYPE_MSIX, 0, req, &actual, DDI_INTR_ALLOC_NORMAL);
+	if (ret != DDI_SUCCESS) {
+		ena_err(ena, "failed to allocate %d MSI-X interrupts: %d",
+		    req, ret);
+		return (B_FALSE);
+	}
+
+	if (actual < min) {
+		ena_err(ena, "number of allocated interrupts is %d, but the "
+		    "driver requires a minimum of %d", actual, min);
+		return (B_FALSE);
+	}
+
+	ena->ena_num_intrs = actual;
+
+	ret = ddi_intr_get_cap(ena->ena_intr_handles[0], &ena->ena_intr_caps);
+	if (ret != DDI_SUCCESS) {
+		ena_err(ena, "failed to get interrupt capability: %d", ret);
+		return (B_FALSE);
+	}
+
+	ret = ddi_intr_get_pri(ena->ena_intr_handles[0], &ena->ena_intr_pri);
+	if (ret != DDI_SUCCESS) {
+		ena_err(ena, "failed to get interrupt priority: %d", ret);
+		return (B_FALSE);
+	}
+
+	ena_dbg(ena, "MSI-X interrupts allocated: %d, cap: 0x%x, pri: %u",
+	    actual, ena->ena_intr_caps, ena->ena_intr_pri);
+
+	/*
+	 * The ena_lock should not be held in the datapath, but it is
+	 * held as part of the AENQ handler, which runs in interrupt
+	 * context. Therefore, we delayed the initilization of this
+	 * mutex until after the interrupts are allocated.
+	 */
+	mutex_init(&ena->ena_lock, NULL, MUTEX_DRIVER,
+	    DDI_INTR_PRI(ena->ena_intr_pri));
+
+	return (B_TRUE);
+}
+
+/*
+ * Allocate the parent Rx queue structures. More importantly, this is
+ * NOT allocating the queue descriptors or data buffers. Those are
+ * allocated on demand as queues are started.
+ */
+static boolean_t
+ena_attach_alloc_rxqs(ena_t *ena)
+{
+	/* We rely on the interrupt priority for initializing the mutexes. */
+	VERIFY3U(ena->ena_attach_seq, >=, ENA_ATTACH_INTR_ALLOC);
+	ena->ena_num_rxqs = ena->ena_num_intrs - 1;
+	ASSERT3U(ena->ena_num_rxqs, >, 0);
+	ena->ena_rxqs = kmem_zalloc(ena->ena_num_rxqs * sizeof (*ena->ena_rxqs),
+	    KM_SLEEP);
+
+	for (uint_t i = 0; i < ena->ena_num_rxqs; i++) {
+		ena_rxq_t *rxq = &ena->ena_rxqs[i];
+
+		rxq->er_rxqs_idx = i;
+		/* The 0th vector is for Admin + AENQ. */
+		rxq->er_intr_vector = i + 1;
+		rxq->er_mrh = NULL;
+
+		mutex_init(&rxq->er_lock, NULL, MUTEX_DRIVER,
+		    DDI_INTR_PRI(ena->ena_intr_pri));
+		mutex_init(&rxq->er_stat_lock, NULL, MUTEX_DRIVER,
+		    DDI_INTR_PRI(ena->ena_intr_pri));
+
+		rxq->er_ena = ena;
+		rxq->er_sq_num_descs = ena->ena_rxq_num_descs;
+		rxq->er_cq_num_descs = ena->ena_rxq_num_descs;
+
+		if (!ena_stat_rxq_init(rxq)) {
+			return (B_FALSE);
+		}
+
+		if (!ena_alloc_rxq(rxq)) {
+			return (B_FALSE);
+		}
+	}
+
+	return (B_TRUE);
+}
+
+static void
+ena_cleanup_rxqs(ena_t *ena)
+{
+	for (uint_t i = 0; i < ena->ena_num_rxqs; i++) {
+		ena_rxq_t *rxq = &ena->ena_rxqs[i];
+
+		ena_cleanup_rxq(rxq);
+		mutex_destroy(&rxq->er_lock);
+		mutex_destroy(&rxq->er_stat_lock);
+		ena_stat_rxq_cleanup(rxq);
+	}
+
+	kmem_free(ena->ena_rxqs, ena->ena_num_rxqs * sizeof (*ena->ena_rxqs));
+}
+
+/*
+ * Allocate the parent Tx queue structures. More importantly, this is
+ * NOT allocating the queue descriptors or data buffers. Those are
+ * allocated on demand as a queue is started.
+ */
+static boolean_t
+ena_attach_alloc_txqs(ena_t *ena)
+{
+	/* We rely on the interrupt priority for initializing the mutexes. */
+	VERIFY3U(ena->ena_attach_seq, >=, ENA_ATTACH_INTR_ALLOC);
+	ena->ena_num_txqs = ena->ena_num_intrs - 1;
+	ASSERT3U(ena->ena_num_txqs, >, 0);
+	ena->ena_txqs = kmem_zalloc(ena->ena_num_txqs * sizeof (*ena->ena_txqs),
+	    KM_SLEEP);
+
+	for (uint_t i = 0; i < ena->ena_num_txqs; i++) {
+		ena_txq_t *txq = &ena->ena_txqs[i];
+
+		txq->et_txqs_idx = i;
+		/* The 0th vector is for Admin + AENQ. */
+		txq->et_intr_vector = i + 1;
+		txq->et_mrh = NULL;
+
+		mutex_init(&txq->et_lock, NULL, MUTEX_DRIVER,
+		    DDI_INTR_PRI(ena->ena_intr_pri));
+		mutex_init(&txq->et_stat_lock, NULL, MUTEX_DRIVER,
+		    DDI_INTR_PRI(ena->ena_intr_pri));
+
+		txq->et_ena = ena;
+		txq->et_sq_num_descs = ena->ena_txq_num_descs;
+		txq->et_cq_num_descs = ena->ena_txq_num_descs;
+
+		if (!ena_stat_txq_init(txq)) {
+			return (B_FALSE);
+		}
+
+		if (!ena_alloc_txq(txq)) {
+			return (B_FALSE);
+		}
+	}
+
+	return (B_TRUE);
+}
+
+static void
+ena_cleanup_txqs(ena_t *ena)
+{
+	for (uint_t i = 0; i < ena->ena_num_rxqs; i++) {
+		ena_txq_t *txq = &ena->ena_txqs[i];
+
+		ena_cleanup_txq(txq);
+		mutex_destroy(&txq->et_lock);
+		mutex_destroy(&txq->et_stat_lock);
+		ena_stat_txq_cleanup(txq);
+	}
+
+	kmem_free(ena->ena_txqs, ena->ena_num_txqs * sizeof (*ena->ena_txqs));
+}
+
+ena_attach_desc_t ena_attach_tbl[ENA_ATTACH_NUM_ENTRIES] = {
+	{
+		.ead_seq = ENA_ATTACH_PCI,
+		.ead_name = "PCI config",
+		.ead_attach_fn = ena_attach_pci,
+		.ead_attach_hard_fail = B_TRUE,
+		.ead_cleanup_fn = ena_cleanup_pci,
+	},
+
+	{
+		.ead_seq = ENA_ATTACH_REGS,
+		.ead_name = "BAR mapping",
+		.ead_attach_fn = ena_attach_regs_map,
+		.ead_attach_hard_fail = B_TRUE,
+		.ead_cleanup_fn = ena_cleanup_regs_map,
+	},
+
+	{
+		.ead_seq = ENA_ATTACH_DEV_INIT,
+		.ead_name = "device initialization",
+		.ead_attach_fn = ena_attach_device_init,
+		.ead_attach_hard_fail = B_TRUE,
+		.ead_cleanup_fn = ena_cleanup_device_init,
+	},
+
+	{
+		.ead_seq = ENA_ATTACH_READ_CONF,
+		.ead_name = "ena.conf",
+		.ead_attach_fn = ena_attach_read_conf,
+		.ead_attach_hard_fail = B_TRUE,
+		.ead_cleanup_fn = ena_no_cleanup,
+	},
+
+	{
+		.ead_seq = ENA_ATTACH_DEV_CFG,
+		.ead_name = "device config",
+		.ead_attach_fn = ena_attach_dev_cfg,
+		.ead_attach_hard_fail = B_TRUE,
+		.ead_cleanup_fn = ena_no_cleanup,
+	},
+
+	{
+		.ead_seq = ENA_ATTACH_INTR_ALLOC,
+		.ead_name = "interrupt allocation",
+		.ead_attach_fn = ena_attach_intr_alloc,
+		.ead_attach_hard_fail = B_TRUE,
+		.ead_cleanup_fn = ena_cleanup_intr_alloc,
+	},
+
+	{
+		.ead_seq = ENA_ATTACH_INTR_HDLRS,
+		.ead_name = "interrupt handlers",
+		.ead_attach_fn = ena_intr_add_handlers,
+		.ead_attach_hard_fail = B_TRUE,
+		.ead_cleanup_fn = ena_intr_remove_handlers,
+	},
+
+	{
+		.ead_seq = ENA_ATTACH_TXQS_ALLOC,
+		.ead_name = "Tx queues",
+		.ead_attach_fn = ena_attach_alloc_txqs,
+		.ead_attach_hard_fail = B_TRUE,
+		.ead_cleanup_fn = ena_cleanup_txqs,
+	},
+
+	{
+		.ead_seq = ENA_ATTACH_RXQS_ALLOC,
+		.ead_name = "Rx queues",
+		.ead_attach_fn = ena_attach_alloc_rxqs,
+		.ead_attach_hard_fail = B_TRUE,
+		.ead_cleanup_fn = ena_cleanup_rxqs,
+	},
+
+	/*
+	 * The chance of mac_unregister() failure poses a problem to
+	 * cleanup. We address interrupt disablement and mac
+	 * unregistration explicitly in the attach/detach routines.
+	 */
+	{
+		.ead_seq = ENA_ATTACH_MAC_REGISTER,
+		.ead_name = "mac registration",
+		.ead_attach_fn = ena_mac_register,
+		.ead_attach_hard_fail = B_TRUE,
+		.ead_cleanup_fn = ena_no_cleanup,
+	},
+
+	{
+		.ead_seq = ENA_ATTACH_INTRS_ENABLE,
+		.ead_name = "enable interrupts",
+		.ead_attach_fn = ena_intrs_enable,
+		.ead_attach_hard_fail = B_TRUE,
+		.ead_cleanup_fn = ena_no_cleanup,
+	}
+};
+
+/*
+ * This function undoes any work done by ena_attach(), either in
+ * response to a failed attach or a planned detach. At the end of this
+ * function ena_attach_seq should be zero, otherwise it means
+ * something has not be freed/uninitialized.
+ */
+static void
+ena_cleanup(ena_t *ena)
+{
+	if (ena == NULL || ena->ena_attach_seq == 0) {
+		return;
+	}
+
+	/*
+	 * We VERIFY this because if the seq is greater than entries
+	 * we drift into space and execute god knows what.
+	 */
+	VERIFY3U(ena->ena_attach_seq, <, ENA_ATTACH_NUM_ENTRIES);
+
+	while (ena->ena_attach_seq > 0) {
+		int idx = ena->ena_attach_seq - 1;
+		ena_attach_desc_t *desc = &ena_attach_tbl[idx];
+
+		ena_dbg(ena, "running cleanup sequence: %s (%d)",
+		    desc->ead_name, idx);
+
+		desc->ead_cleanup_fn(ena);
+		ena->ena_attach_seq--;
+	}
+
+	ASSERT3U(ena->ena_attach_seq, ==, 0);
+	mutex_destroy(&ena->ena_lock);
+}
+
+static int
+ena_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+	ena_t *ena;
+
+	if (cmd != DDI_ATTACH) {
+		return (DDI_FAILURE);
+	}
+
+	ena = kmem_zalloc(sizeof (ena_t), KM_SLEEP);
+	ena->ena_instance = ddi_get_instance(dip);
+	ena->ena_dip = dip;
+	ena->ena_instance = ddi_get_instance(dip);
+	ena->ena_page_sz = ddi_ptob(dip, 1);
+
+	for (int i = 0; i < ENA_ATTACH_NUM_ENTRIES; i++) {
+		boolean_t success;
+		ena_attach_desc_t *desc = &ena_attach_tbl[i];
+
+		ena_dbg(ena, "running attach sequence: %s (%d)", desc->ead_name,
+		    i);
+
+		if (!(success = desc->ead_attach_fn(ena))) {
+			ena_err(ena, "attach sequence failed: %s (%d)",
+			    desc->ead_name, i);
+
+			if (ena->ena_attach_seq == ENA_ATTACH_MAC_REGISTER) {
+				/*
+				 * In this specific case
+				 * ENA_ATTACH_INTRS_ENABLE has failed,
+				 * and we may or may not be able to
+				 * unregister the mac, depending on if
+				 * something in userspace has created
+				 * a client on top.
+				 *
+				 * NOTE: Something that would be nice
+				 * to add to mac is the ability to
+				 * register a provider separate from
+				 * "publishing" it to the rest of the
+				 * system. This would allow a driver
+				 * to register its mac, do some
+				 * additional work that might fail,
+				 * and then unregister if that work
+				 * fails without concern for any
+				 * chance of failure when calling
+				 * unregister. This would remove the
+				 * complexity of the situation we are
+				 * trying to address here, as we would
+				 * know that until the mac has been
+				 * "published", there is no chance for
+				 * mac_unregister() to fail.
+				 */
+				if (ena_mac_unregister(ena) != 0) {
+					return (DDI_FAILURE);
+				}
+
+				ena->ena_attach_seq--;
+			} else {
+				/*
+				 * Since the ead_seq is predicated on
+				 * successful ead_attach_fn we must
+				 * run the specific cleanup handler
+				 * before calling the global cleanup
+				 * routine. This also means that all
+				 * cleanup functions must be able to
+				 * deal with partial success of the
+				 * corresponding ead_attach_fn.
+				 */
+				desc->ead_cleanup_fn(ena);
+			}
+
+			ena_cleanup(ena);
+			kmem_free(ena, sizeof (ena_t));
+			return (DDI_FAILURE);
+		}
+
+		if (success) {
+			ena_dbg(ena, "attach sequence completed: %s (%d)",
+			    desc->ead_name, i);
+		}
+
+		ena->ena_attach_seq = desc->ead_seq;
+	}
+
+	/*
+	 * Now that interrupts are enabled make sure to tell the
+	 * device that all AENQ descriptors are ready for writing.
+	 */
+	ena_hw_bar_write32(ena, ENAHW_REG_AENQ_HEAD_DB,
+	    ena->ena_aenq.eaenq_num_descs);
+
+	ddi_set_driver_private(dip, ena);
+	return (DDI_SUCCESS);
+}
+
+static int
+ena_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+	ena_t *ena = ddi_get_driver_private(dip);
+
+	if (ena == NULL) {
+		return (DDI_FAILURE);
+	}
+
+	/*
+	 * Before we can proceed to cleanup we have to treat
+	 * mac_unregister() explicitly -- if there are still
+	 * outstanding clients, then we can't proceed with detach or
+	 * cleanup.
+	 */
+
+	/*
+	 * Why this would fail I don't know, but if we proceed to mac
+	 * unregister, then there is a good chance we will panic in
+	 * the Rx interrupt handler when calling mac_rx_ring()
+	 */
+	if (!ena_intrs_disable(ena)) {
+		return (DDI_FAILURE);
+	}
+
+	/* We can't detach if clients are actively using the device. */
+	if (ena_mac_unregister(ena) != 0) {
+		(void) ena_intrs_enable(ena);
+		return (DDI_FAILURE);
+	}
+
+	/*
+	 * At this point we can proceed with the rest of cleanup on a
+	 * best-effort basis.
+	 */
+	ena->ena_attach_seq = ENA_ATTACH_RXQS_ALLOC;
+	ena_cleanup(ena);
+	ddi_set_driver_private(dip, NULL);
+	kmem_free(ena, sizeof (ena_t));
+	return (DDI_SUCCESS);
+}
+
+static struct cb_ops ena_cb_ops = {
+	.cb_open = nodev,
+	.cb_close = nodev,
+	.cb_strategy = nodev,
+	.cb_print = nodev,
+	.cb_dump = nodev,
+	.cb_read = nodev,
+	.cb_write = nodev,
+	.cb_ioctl = nodev,
+	.cb_devmap = nodev,
+	.cb_mmap = nodev,
+	.cb_segmap = nodev,
+	.cb_chpoll = nochpoll,
+	.cb_prop_op = ddi_prop_op,
+	.cb_flag = D_MP,
+	.cb_rev = CB_REV,
+	.cb_aread = nodev,
+	.cb_awrite = nodev
+};
+
+static struct dev_ops ena_dev_ops = {
+	.devo_rev = DEVO_REV,
+	.devo_refcnt = 0,
+	.devo_getinfo = NULL,
+	.devo_identify = nulldev,
+	.devo_probe = nulldev,
+	.devo_attach = ena_attach,
+	.devo_detach = ena_detach,
+	.devo_reset = nodev,
+	.devo_quiesce = ddi_quiesce_not_supported,
+	.devo_cb_ops = &ena_cb_ops
+};
+
+static struct modldrv ena_modldrv = {
+	.drv_modops = &mod_driverops,
+	.drv_linkinfo = "AWS ENA Ethernet",
+	.drv_dev_ops = &ena_dev_ops
+};
+
+static struct modlinkage ena_modlinkage = {
+	.ml_rev = MODREV_1,
+	.ml_linkage = { &ena_modldrv, NULL }
+};
+
+int
+_init(void)
+{
+	int ret;
+
+	mac_init_ops(&ena_dev_ops, ENA_MODULE_NAME);
+
+	if ((ret = mod_install(&ena_modlinkage)) != 0) {
+		mac_fini_ops(&ena_dev_ops);
+		return (ret);
+	}
+
+	return (ret);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&ena_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+	int ret;
+
+	if ((ret = mod_remove(&ena_modlinkage)) != 0) {
+		return (ret);
+	}
+
+	mac_fini_ops(&ena_dev_ops);
+	return (ret);
+}
diff --git a/usr/src/uts/common/io/ena/ena.conf b/usr/src/uts/common/io/ena/ena.conf
new file mode 100644
index 0000000000..64ee011d7c
--- /dev/null
+++ b/usr/src/uts/common/io/ena/ena.conf
@@ -0,0 +1,50 @@
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2021 Oxide Computer Company
+#
+
+#
+# Driver .conf file for AWS Elastic Network Adapter. See ena(7D) for
+# valid options.
+#
+
+#
+# rx_queue_num_descs
+#
+#    The number of descriptors provided by each Rx queue.
+#
+#    Range: 64 - <device maximum>
+#    Default: <device maximum>
+#
+# rx_queue_num_descs = 1024;
+
+#
+# rx_queue_intr_limit
+#
+#    The number of frames that may be read by a single Rx interrupt.
+#
+#    Range: 16 - 4096
+#    Default: 256
+#
+# rx_queue_intr_limit = 256;
+
+#
+# tx_queue_num_descs
+#
+#    The number of descriptors provided by each Tx queue.
+#
+#    Range: 64 - <device maximum>
+#    Default: <device maximum>
+#
+# tx_queue_num_descs = 1024;
+\ No newline at end of file
diff --git a/usr/src/uts/common/io/ena/ena.h b/usr/src/uts/common/io/ena/ena.h
new file mode 100644
index 0000000000..467da40f4b
--- /dev/null
+++ b/usr/src/uts/common/io/ena/ena.h
@@ -0,0 +1,848 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2021 Oxide Computer Company
+ */
+
+#ifndef	_ENA_H
+#define	_ENA_H
+
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/list.h>
+#include <sys/time.h>
+#include <sys/modctl.h>
+#include <sys/conf.h>
+#include <sys/cpuvar.h>
+#include <sys/pci.h>
+#include <sys/sysmacros.h>
+#include <sys/mac.h>
+#include <sys/mac_ether.h>
+#include <sys/mac_provider.h>
+#include <sys/pattr.h>
+#include <sys/strsun.h>
+#include <sys/ethernet.h>
+#include <sys/vlan.h>
+#include <sys/utsname.h>
+#include "ena_hw.h"
+
+/*
+ * AWS ENA Ethernet Driver
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	ENA_MODULE_NAME	"ena"
+
+/*
+ * The minimum supported ENA device controller version.
+ */
+#define	ENA_CTRL_MAJOR_VSN_MIN		0
+#define	ENA_CTRL_MINOR_VSN_MIN		0
+#define	ENA_CTRL_SUBMINOR_VSN_MIN	1
+
+#define	ENA_MODULE_VER_MAJOR	1
+#define	ENA_MODULE_VER_MINOR	0
+#define	ENA_MODULE_VER_SUBMINOR	0
+
+/*
+ * The Linux driver doesn't document what the specification version
+ * number controls or the contract around version changes. The best we
+ * can do is use the same version that they use and port version
+ * changes as they come (the last one was in 2018).
+ *
+ * common: ENA_COMMON_SPEC_VERSION_{MAJOR,MINOR}
+ */
+#define	ENA_SPEC_VERSION_MAJOR	2
+#define	ENA_SPEC_VERSION_MINOR	0
+
+
+/* This represents BAR 0. */
+#define	ENA_REG_NUMBER	1
+
+/*
+ * A sentinel value passed as argument to ena_ring_rx() to indicate
+ * the Rx ring is being read in interrupt mode, not polling mode.
+ */
+#define	ENA_INTERRUPT_MODE	-1
+
+#define	ENA_RX_BUF_IPHDR_ALIGNMENT	2
+#define	ENA_ADMINQ_DEPTH		32
+#define	ENA_AENQ_NUM_DESCS		32
+
+/* Convert milliseconds to nanoseconds. */
+#define	ENA_MS_TO_NS(ms)	((ms) * 1000000ul)
+
+/*
+ * The default amount of time we will wait for an admin command to
+ * complete, specified in microseconds. In this case, 500 milliseconds.
+ */
+#define	ENA_ADMIN_CMD_DEF_TIMEOUT	MSEC2NSEC(500)
+
+/*
+ * Property macros.
+ */
+#define	ENA_PROP_RXQ_NUM_DESCS	"rx_queue_num_descs"
+#define	ENA_PROP_RXQ_NUM_DESCS_MIN	64
+
+#define	ENA_PROP_TXQ_NUM_DESCS	"tx_queue_num_descs"
+#define	ENA_PROP_TXQ_NUM_DESCS_MIN	64
+
+#define	ENA_PROP_RXQ_INTR_LIMIT	"rx_queue_intr_limit"
+#define	ENA_PROP_RXQ_INTR_LIMIT_MIN	16
+#define	ENA_PROP_RXQ_INTR_LIMIT_MAX	4096
+#define	ENA_PROP_RXQ_INTR_LIMIT_DEF	256
+
+#define	ENA_DMA_BIT_MASK(x)	((1ULL << (x)) - 1ULL)
+#define	ENA_DMA_VERIFY_ADDR(ena, phys_addr)				\
+	VERIFY3U(ENA_DMA_BIT_MASK((ena)->ena_dma_width) & (phys_addr), \
+	    ==, (phys_addr))
+
+typedef struct ena_dma_conf {
+	size_t		edc_size;
+	uint64_t	edc_align;
+	int		edc_sgl;
+	uchar_t		edc_endian;
+	boolean_t	edc_stream;
+} ena_dma_conf_t;
+
+typedef struct ena_dma_buf {
+	caddr_t			edb_va;
+	size_t			edb_len;
+	/*
+	 * The length given by DMA engine, kept around for debugging
+	 * purposes.
+	 */
+	size_t			edb_real_len;
+	size_t			edb_used_len;
+	ddi_acc_handle_t	edb_acc_hdl;
+	ddi_dma_handle_t	edb_dma_hdl;
+	const ddi_dma_cookie_t	*edb_cookie;
+} ena_dma_buf_t;
+
+/*
+ * We always sync the entire range, and therefore expect success.
+ */
+#ifdef DEBUG
+#define	ENA_DMA_SYNC(buf, flag)					\
+	ASSERT0(ddi_dma_sync((buf).edb_dma_hdl, 0, 0, (flag)))
+#else  /* DEBUG */
+#define	ENA_DMA_SYNC(buf, flag)					\
+	((void)ddi_dma_sync((buf).edb_dma_hdl, 0, 0, (flag)))
+#endif
+
+typedef struct ena_aenq_grpstr {
+	enahw_aenq_groups_t	eag_type;
+	const char		*eag_str;
+} ena_aenq_grpstr_t;
+
+typedef struct ena_aenq_synstr {
+	enahw_aenq_syndrome_t	eas_type;
+	const char		*eas_str;
+} ena_aenq_synstr_t;
+
+typedef void (*ena_aenq_hdlr_t)(void *data, enahw_aenq_desc_t *desc);
+
+typedef struct ena_aenq {
+	enahw_aenq_desc_t	*eaenq_descs;
+	ena_dma_buf_t		eaenq_dma;
+	ena_aenq_hdlr_t		eaenq_hdlrs[ENAHW_AENQ_GROUPS_ARR_NUM];
+	uint16_t		eaenq_num_descs;
+	uint16_t		eaenq_head;
+	uint8_t			eaenq_phase;
+} ena_aenq_t;
+
+typedef struct ena_admin_sq {
+	enahw_cmd_desc_t	*eas_entries;
+	ena_dma_buf_t		eas_dma;
+	uint32_t		*eas_dbaddr;
+	uint16_t		eas_tail;
+	uint8_t			eas_phase;
+} ena_admin_sq_t;
+
+typedef struct ena_admin_cq {
+	enahw_resp_desc_t	*eac_entries;
+	ena_dma_buf_t		eac_dma;
+	uint16_t		eac_head;
+	uint8_t			eac_phase;
+} ena_admin_cq_t;
+
+/*
+ * The command context is used to track outstanding requests and match
+ * them to device responses.
+ */
+typedef struct ena_cmd_ctx {
+	list_node_t		ectx_node;
+
+	/*
+	 * The index into ea_cmd_ctxs where this ctx lives. Used as
+	 * the command ID value in the command descriptor. This allows
+	 * us to match a response to its associated context.
+	 */
+	uint16_t		ectx_id;
+
+	/* Is the command pending? */
+	boolean_t		ectx_pending;
+
+	/* The type of command associated with this context. */
+	enahw_cmd_opcode_t	ectx_cmd_opcode;
+
+	/*
+	 * The location to copy the full response to. This is
+	 * specified by the caller of the command during
+	 * submission.
+	 */
+	enahw_resp_desc_t	*ectx_resp;
+} ena_cmd_ctx_t;
+
+/*
+ * The admin queue, the queue through which commands are sent to the
+ * device.
+ *
+ * WO: Write Once (at initialization)
+ *
+ * In general, only a single lock needs to be held in order to access
+ * the different parts of the admin queue:
+ *
+ *  sq_lock: Any data deailng with submitting admin commands, which
+ *  includes acquiring a command context.
+ *
+ *  cq_lock: Any data dealing with reading command responses.
+ *
+ *  stat_lock: For accessing statistics.
+ *
+ * In some cases, the ectx_lock/stat_lock may be held in tandem with
+ * either the SQ or CQ lock. In that case, the SQ/CQ lock is always
+ * entered first.
+ */
+typedef struct ena_adminq {
+	kmutex_t		ea_sq_lock;	/* WO */
+	kmutex_t		ea_cq_lock;	/* WO */
+	kmutex_t		ea_stat_lock;	/* WO */
+
+	hrtime_t		ea_cmd_timeout_ns; /* WO */
+
+	uint16_t		ea_qlen;	/* WO */
+	boolean_t		ea_poll_mode;	/* WO */
+
+	ena_cmd_ctx_t		*ea_cmd_ctxs;	  /* WO */
+	list_t			ea_cmd_ctxs_free; /* ea_sq_lock */
+	uint16_t		ea_pending_cmds; /* ea_sq_lock */
+	ena_admin_sq_t		ea_sq; /* eq_sq_lock */
+	ena_admin_cq_t		ea_cq; /* eq_cq_lock */
+
+	/* ea_stat_lock */
+	struct ena_adminq_stats {
+		uint64_t cmds_fail;
+		uint64_t cmds_submitted;
+		uint64_t cmds_success;
+		uint64_t queue_full;
+	} ea_stats;
+} ena_adminq_t;
+
+typedef enum ena_attach_seq {
+	ENA_ATTACH_PCI = 1,	 /* PCI config space */
+	ENA_ATTACH_REGS,	 /* BAR mapping */
+	ENA_ATTACH_DEV_INIT,	 /* ENA device initialization */
+	ENA_ATTACH_READ_CONF,	 /* Read driver conf file */
+	ENA_ATTACH_DEV_CFG,	 /* Set any needed device config */
+	ENA_ATTACH_INTR_ALLOC,	 /* interrupt handles allocated */
+	ENA_ATTACH_INTR_HDLRS,	 /* intr handlers set */
+	ENA_ATTACH_TXQS_ALLOC,	 /* Tx Queues allocated */
+	ENA_ATTACH_RXQS_ALLOC,	 /* Tx Queues allocated */
+	ENA_ATTACH_MAC_REGISTER, /* registered with mac */
+	ENA_ATTACH_INTRS_ENABLE, /* interrupts are enabled */
+	ENA_ATTACH_END
+} ena_attach_seq_t;
+
+#define	ENA_ATTACH_SEQ_FIRST	(ENA_ATTACH_PCI)
+#define	ENA_ATTACH_NUM_ENTRIES	(ENA_ATTACH_END - 1)
+
+struct ena;
+typedef boolean_t (*ena_attach_fn_t)(struct ena *);
+typedef void (*ena_cleanup_fn_t)(struct ena *);
+
+typedef struct ena_attach_desc {
+	ena_attach_seq_t ead_seq;
+	const char *ead_name;
+	ena_attach_fn_t ead_attach_fn;
+	boolean_t ead_attach_hard_fail;
+	ena_cleanup_fn_t ead_cleanup_fn;
+} ena_attach_desc_t;
+
+typedef enum {
+	ENA_TCB_NONE,
+	ENA_TCB_COPY
+} ena_tcb_type_t;
+
+/*
+ * The TCB is used to track information relating to the Tx of a
+ * packet. At the moment we support copy only.
+ */
+typedef struct ena_tx_control_block {
+	mblk_t		*etcb_mp;
+	ena_tcb_type_t	etcb_type;
+	ena_dma_buf_t	etcb_dma;
+} ena_tx_control_block_t;
+
+typedef enum ena_txq_state {
+	ENA_TXQ_STATE_NONE		= 0,
+	ENA_TXQ_STATE_HOST_ALLOC	= 1 << 0,
+	ENA_TXQ_STATE_CQ_CREATED	= 1 << 1,
+	ENA_TXQ_STATE_SQ_CREATED	= 1 << 2,
+	ENA_TXQ_STATE_READY		= 1 << 3, /* TxQ ready and waiting */
+	ENA_TXQ_STATE_RUNNING		= 1 << 4, /* intrs enabled */
+} ena_txq_state_t;
+
+typedef struct ena_txq_stat {
+	/* Number of times mac_ether_offload_info() has failed. */
+	kstat_named_t	ets_hck_meoifail;
+
+	/*
+	 * Total number of times the ring was blocked due to
+	 * insufficient descriptors, or unblocked due to recycling
+	 * descriptors.
+	 */
+	kstat_named_t	ets_blocked;
+	kstat_named_t	ets_unblocked;
+
+	/* The total number descriptors that have been recycled. */
+	kstat_named_t	ets_recycled;
+
+	/*
+	 * Number of bytes and packets that have been _submitted_ to
+	 * the device.
+	 */
+	kstat_named_t	ets_bytes;
+	kstat_named_t	ets_packets;
+} ena_txq_stat_t;
+
+/*
+ * A transmit queue, made up of a Submission Queue (SQ) and Completion
+ * Queue (CQ) to form a logical descriptor ring for sending packets.
+ *
+ * Write Once (WO)
+ *
+ *   This value is written once, before the datapath is activated, in
+ *   a function which is controlled by mac(9E). Some values may be
+ *   written earlier, during ena attach, like et_ena and
+ *   et_sq_num_descs.
+ *
+ * Tx Mutex (TM) -- et_lock
+ *
+ *   This value is protected by the Tx queue's mutex. Some values may
+ *   be initialized in a WO path, but also continually updated as part
+ *   of normal datapath operation, such as et_sq_avail_descs. These
+ *   values need mutex protection.
+ */
+typedef struct ena_txq {
+	kmutex_t		et_lock; /* WO */
+
+	struct ena		*et_ena; /* WO */
+	uint_t			et_txqs_idx; /* WO */
+	mac_ring_handle_t	et_mrh;	 /* WO */
+	uint64_t		et_m_gen_num; /* TM */
+	ena_txq_state_t		et_state; /* WO */
+	uint16_t		et_intr_vector; /* WO */
+
+	enahw_tx_desc_t		*et_sq_descs; /* TM */
+	ena_dma_buf_t		et_sq_dma;    /* WO */
+
+	/* Is the Tx queue currently in a blocked state? */
+	boolean_t		et_blocked; /* TM */
+
+	/*
+	 * The number of descriptors owned by this ring. This value
+	 * never changes after initialization.
+	 */
+	uint16_t		et_sq_num_descs;   /* WO */
+
+	/*
+	 * The number of descriptors currently available for Tx
+	 * submission. When this value reaches zero the ring must
+	 * block until device notifies us of freed descriptors.
+	 */
+	uint16_t		et_sq_avail_descs; /* TM */
+
+	/*
+	 * The current tail index of the queue (the first free
+	 * descriptor for host Tx submission). After initialization,
+	 * this value only increments, relying on unsigned wrap
+	 * around. The ENA device seems to expect this behavior,
+	 * performing its own modulo on the value for the purposes of
+	 * indexing, much like the driver code needs to do in order to
+	 * access the proper TCB entry.
+	 */
+	uint16_t		et_sq_tail_idx;  /* TM */
+
+	/*
+	 * The phase is used to know which CQ descriptors may be
+	 * reclaimed. This is explained further in ena.c.
+	 */
+	uint16_t		et_sq_phase; /* TM */
+	uint16_t		et_sq_hw_idx; /* WO */
+
+	/*
+	 * The "doorbell" address is how the host indicates to the
+	 * device which descriptors are ready for Tx processing.
+	 */
+	uint32_t		*et_sq_db_addr; /* WO */
+
+	/*
+	 * The TCBs track host Tx information, like a pointer to the
+	 * mblk being submitted. Currently we maintain a 1:1 mapping
+	 * of SQ descriptors to TCBs as Tx is copy only.
+	 */
+	ena_tx_control_block_t	*et_tcbs;    /* TM */
+
+	enahw_tx_cdesc_t	*et_cq_descs; /* TM */
+	ena_dma_buf_t		et_cq_dma;    /* WO */
+	uint16_t		et_cq_num_descs; /* WO */
+	uint16_t		et_cq_head_idx; /* TM */
+	uint16_t		et_cq_phase;	/* TM */
+	uint16_t		et_cq_hw_idx;	/* WO */
+
+	/*
+	 * This address is used to control the CQ interrupts.
+	 */
+	uint32_t		*et_cq_unmask_addr; /* WO */
+	uint32_t		*et_cq_head_db_addr; /* WO (currently unused) */
+	uint32_t		*et_cq_numa_addr;    /* WO (currently unused) */
+
+	/*
+	 * This mutex protects the Tx queue stats. This mutex may be
+	 * entered while et_lock is held, but et_lock is not required
+	 * to access/modify the stats. However, if both locks are
+	 * held, then et_lock must be entered first.
+	 */
+	kmutex_t		et_stat_lock;
+	ena_txq_stat_t		et_stat;
+	kstat_t			*et_kstat;
+} ena_txq_t;
+
+typedef enum ena_rxq_state {
+	ENA_RXQ_STATE_NONE		= 0,
+	ENA_RXQ_STATE_HOST_ALLOC	= 1 << 0,
+	ENA_RXQ_STATE_CQ_CREATED	= 1 << 1,
+	ENA_RXQ_STATE_SQ_CREATED	= 1 << 2,
+	ENA_RXQ_STATE_READY		= 1 << 3, /* RxQ ready and waiting */
+	ENA_RXQ_STATE_RUNNING		= 1 << 4, /* intrs enabled */
+} ena_rxq_state_t;
+
+typedef struct ena_rx_ctrl_block {
+	ena_dma_buf_t	ercb_dma;
+	uint8_t		ercb_offset;
+	uint16_t	ercb_length;
+} ena_rx_ctrl_block_t;
+
+typedef enum {
+	ENA_RXQ_MODE_POLLING	= 1,
+	ENA_RXQ_MODE_INTR	= 2,
+} ena_rxq_mode_t;
+
+typedef struct ena_rxq_stat_t {
+	/* The total number of packets/bytes received on this queue. */
+	kstat_named_t	ers_packets;
+	kstat_named_t	ers_bytes;
+
+	/*
+	 * At this time we expect all incoming frames to fit in a
+	 * single buffer/descriptor. In some rare event that the
+	 * device doesn't cooperate this stat is incremented.
+	 */
+	kstat_named_t	ers_multi_desc;
+
+	/*
+	 * The total number of times we failed to allocate a new mblk
+	 * for an incoming frame.
+	 */
+	kstat_named_t	ers_allocb_fail;
+
+	/*
+	 * The total number of times the Rx interrupt handler reached
+	 * its maximum limit for number of packets to process in a
+	 * single interrupt. If you see this number increase
+	 * continuously at a steady rate, then it may be an indication
+	 * the driver is not entering polling mode.
+	 */
+	kstat_named_t	ers_intr_limit;
+
+	/*
+	 * The total number of times the device detected an incorrect
+	 * IPv4 header checksum.
+	 */
+	kstat_named_t	ers_hck_ipv4_err;
+
+	/*
+	 * The total number of times the device detected an incorrect
+	 * L4/ULP checksum.
+	 */
+	kstat_named_t	ers_hck_l4_err;
+} ena_rxq_stat_t;
+
+/*
+ * A receive queue, made up of a Submission Queue (SQ) and Completion
+ * Queue (CQ) to form a logical descriptor ring for receiving packets.
+ *
+ * Write Once (WO)
+ *
+ *   This value is written once, before the datapath is activated, in
+ *   a function which is controlled by mac(9E).
+ *
+ * Rx Mutex (RM) -- er_lock
+ *
+ *   This value is protected by the Rx queue's mutex. Some values may
+ *   be initialized in a WO path, but also continually updated as part
+ *   of normal datapath operation, such as er_sq_avail_descs. These
+ *   values need mutex protection.
+ */
+typedef struct ena_rxq {
+	kmutex_t		er_lock;
+
+	struct ena		*er_ena; /* WO */
+	uint_t			er_rxqs_idx; /* WO */
+	mac_ring_handle_t	er_mrh;	 /* WO */
+	uint64_t		er_m_gen_num; /* WO */
+	ena_rxq_state_t		er_state; /* WO */
+	uint16_t		er_intr_vector; /* WO */
+	ena_rxq_mode_t		er_mode;	/* RM */
+	uint16_t		er_intr_limit;	/* RM */
+
+	enahw_rx_desc_t		*er_sq_descs; /* RM */
+	ena_dma_buf_t		er_sq_dma;    /* WO */
+	uint16_t		er_sq_num_descs;   /* WO */
+	uint16_t		er_sq_avail_descs; /* RM */
+	uint16_t		er_sq_tail_idx;  /* RM */
+	uint16_t		er_sq_phase; /* RM */
+	uint16_t		er_sq_hw_idx;	/* WO */
+	uint32_t		*er_sq_db_addr; /* WO */
+
+	enahw_rx_cdesc_t	*er_cq_descs; /* RM */
+	ena_dma_buf_t		er_cq_dma;    /* WO */
+	uint16_t		er_cq_num_descs; /* WO */
+	uint16_t		er_cq_head_idx;	 /* RM */
+	uint16_t		er_cq_phase;	 /* RM */
+	uint16_t		er_cq_hw_idx;	 /* WO */
+	uint32_t		*er_cq_unmask_addr; /* WO */
+	uint32_t		*er_cq_head_db_addr; /* WO (currently unused) */
+	uint32_t		*er_cq_numa_addr;    /* WO (currently unused) */
+
+	ena_rx_ctrl_block_t	*er_rcbs; /* RM */
+
+	kmutex_t		er_stat_lock;
+	ena_rxq_stat_t		er_stat;
+	kstat_t			*er_kstat;
+} ena_rxq_t;
+
+/* These are stats based off of enahw_resp_basic_stats_t. */
+typedef struct ena_basic_stat {
+	kstat_named_t	ebs_tx_bytes;
+	kstat_named_t	ebs_tx_pkts;
+	kstat_named_t	ebs_tx_drops;
+
+	kstat_named_t	ebs_rx_bytes;
+	kstat_named_t	ebs_rx_pkts;
+	kstat_named_t	ebs_rx_drops;
+} ena_basic_stat_t;
+
+/* These are stats based off of enahw_resp_eni_stats_t. */
+typedef struct ena_extended_stat {
+	kstat_named_t	ees_bw_in_exceeded;
+	kstat_named_t	ees_bw_out_exceeded;
+	kstat_named_t	ees_pps_exceeded;
+	kstat_named_t	ees_conns_exceeded;
+	kstat_named_t	ees_linklocal_exceeded;
+} ena_extended_stat_t;
+
+/* These stats monitor which AENQ handlers have been called. */
+typedef struct ena_aenq_stat {
+	kstat_named_t	eaes_default;
+	kstat_named_t	eaes_link_change;
+} ena_aenq_stat_t;
+
+#define	ENA_STATE_PRIMORDIAL	0x1u
+#define	ENA_STATE_RUNNING	0x2u
+
+/*
+ * This structure contains the per-instance (PF of VF) state of the
+ * device.
+ */
+typedef struct ena {
+	dev_info_t		*ena_dip;
+	int			ena_instance;
+
+	/*
+	 * Global lock, used to synchronize administration changes to
+	 * the ena_t. This lock should not be held in the datapath.
+	 */
+	kmutex_t		ena_lock;
+	ena_attach_seq_t	ena_attach_seq;
+
+	/*
+	 * We use atomic ops for ena_state so that datapath consumers
+	 * do not need to enter ena_lock.
+	 */
+	uint32_t		ena_state;
+
+	/*
+	 * PCI config space and BAR handle.
+	 */
+	ddi_acc_handle_t	ena_pci_hdl;
+	off_t			ena_reg_size;
+	caddr_t			ena_reg_base;
+	ddi_device_acc_attr_t	ena_reg_attr;
+	ddi_acc_handle_t	ena_reg_hdl;
+
+	/*
+	 * Vendor information.
+	 */
+	uint16_t		ena_pci_vid;
+	uint16_t		ena_pci_did;
+	uint8_t			ena_pci_rev;
+	uint16_t		ena_pci_svid;
+	uint16_t		ena_pci_sdid;
+
+	/*
+	 * Device and controller versions.
+	 */
+	uint32_t		ena_dev_major_vsn;
+	uint32_t		ena_dev_minor_vsn;
+	uint32_t		ena_ctrl_major_vsn;
+	uint32_t		ena_ctrl_minor_vsn;
+	uint32_t		ena_ctrl_subminor_vsn;
+	uint32_t		ena_ctrl_impl_id;
+
+	/*
+	 * Interrupts
+	 */
+	int			ena_num_intrs;
+	ddi_intr_handle_t	*ena_intr_handles;
+	size_t			ena_intr_handles_sz;
+	int			ena_intr_caps;
+	uint_t			ena_intr_pri;
+
+	mac_handle_t		ena_mh;
+
+	size_t			ena_page_sz;
+
+	/*
+	 * The MTU and data layer frame sizes.
+	 */
+	uint32_t		ena_mtu;
+	uint32_t		ena_max_frame_hdr;
+	uint32_t		ena_max_frame_total;
+
+	/* The size (in bytes) of the Rx/Tx data buffers. */
+	uint32_t		ena_tx_buf_sz;
+	uint32_t		ena_rx_buf_sz;
+
+	/*
+	 * The maximum number of Scatter Gather List segments the
+	 * device can address.
+	 */
+	uint8_t			ena_tx_sgl_max_sz;
+	uint8_t			ena_rx_sgl_max_sz;
+
+	/* The number of descriptors per Rx/Tx queue. */
+	uint16_t		ena_rxq_num_descs;
+	uint16_t		ena_txq_num_descs;
+
+	/*
+	 * The maximum number of frames which may be read per Rx
+	 * interrupt.
+	 */
+	uint16_t		ena_rxq_intr_limit;
+
+	/* The Rx/Tx data queues (rings). */
+	ena_rxq_t		*ena_rxqs;
+	uint16_t		ena_num_rxqs;
+	ena_txq_t		*ena_txqs;
+	uint16_t		ena_num_txqs;
+
+	/* These statistics are device-wide. */
+	kstat_t			*ena_device_basic_kstat;
+	kstat_t			*ena_device_extended_kstat;
+
+	/*
+	 * This tracks AENQ-related stats, it is implicitly
+	 * device-wide.
+	 */
+	ena_aenq_stat_t		ena_aenq_stat;
+	kstat_t			*ena_aenq_kstat;
+
+	/*
+	 * The Admin Queue, through which call device commands are
+	 * sent.
+	 */
+	ena_adminq_t		ena_aq;
+
+	ena_aenq_t		ena_aenq;
+	ena_dma_buf_t		ena_host_info;
+
+	/*
+	 * Hardware info
+	 */
+	uint32_t		ena_supported_features;
+	uint8_t			ena_dma_width;
+	boolean_t		ena_link_up;
+	boolean_t		ena_link_autoneg;
+	boolean_t		ena_link_full_duplex;
+	link_duplex_t		ena_link_duplex;
+	uint64_t		ena_link_speed_mbits;
+	enahw_link_speeds_t	ena_link_speeds;
+	link_state_t		ena_link_state;
+	uint32_t		ena_aenq_supported_groups;
+	uint32_t		ena_aenq_enabled_groups;
+
+	uint32_t		ena_tx_max_sq_num;
+	uint32_t		ena_tx_max_sq_num_descs;
+	uint32_t		ena_tx_max_cq_num;
+	uint32_t		ena_tx_max_cq_num_descs;
+	uint16_t		ena_tx_max_desc_per_pkt;
+	uint32_t		ena_tx_max_hdr_len;
+
+	uint32_t		ena_rx_max_sq_num;
+	uint32_t		ena_rx_max_sq_num_descs;
+	uint32_t		ena_rx_max_cq_num;
+	uint32_t		ena_rx_max_cq_num_descs;
+	uint16_t		ena_rx_max_desc_per_pkt;
+
+	/* This is calculated from the Rx/Tx queue nums. */
+	uint16_t		ena_max_io_queues;
+
+	/* Hardware Offloads */
+	boolean_t		ena_tx_l3_ipv4_csum;
+
+	boolean_t		ena_tx_l4_ipv4_part_csum;
+	boolean_t		ena_tx_l4_ipv4_full_csum;
+	boolean_t		ena_tx_l4_ipv4_lso;
+
+	boolean_t		ena_tx_l4_ipv6_part_csum;
+	boolean_t		ena_tx_l4_ipv6_full_csum;
+	boolean_t		ena_tx_l4_ipv6_lso;
+
+	boolean_t		ena_rx_l3_ipv4_csum;
+	boolean_t		ena_rx_l4_ipv4_csum;
+	boolean_t		ena_rx_l4_ipv6_csum;
+	boolean_t		ena_rx_hash;
+
+	uint32_t		ena_max_mtu;
+	uint8_t			ena_mac_addr[ETHERADDRL];
+} ena_t;
+
+/*
+ * Logging functions.
+ */
+/*PRINTFLIKE2*/
+extern void ena_err(const ena_t *, const char *, ...) __KPRINTFLIKE(2);
+/*PRINTFLIKE2*/
+extern void ena_dbg(const ena_t *, const char *, ...) __KPRINTFLIKE(2);
+
+extern uint32_t ena_hw_bar_read32(const ena_t *, const uint16_t);
+extern uint32_t ena_hw_abs_read32(const ena_t *, uint32_t *);
+extern void ena_hw_bar_write32(const ena_t *, const uint16_t, const uint32_t);
+extern void ena_hw_abs_write32(const ena_t *, uint32_t *, const uint32_t);
+
+/*
+ * Stats
+ */
+extern void ena_stat_device_basic_cleanup(ena_t *);
+extern boolean_t ena_stat_device_basic_init(ena_t *);
+
+extern void ena_stat_device_extended_cleanup(ena_t *);
+extern boolean_t ena_stat_device_extended_init(ena_t *);
+
+extern void ena_stat_aenq_cleanup(ena_t *);
+extern boolean_t ena_stat_aenq_init(ena_t *);
+
+extern void ena_stat_rxq_cleanup(ena_rxq_t *);
+extern boolean_t ena_stat_rxq_init(ena_rxq_t *);
+extern void ena_stat_txq_cleanup(ena_txq_t *);
+extern boolean_t ena_stat_txq_init(ena_txq_t *);
+
+/*
+ * DMA
+ */
+extern boolean_t ena_dma_alloc(ena_t *, ena_dma_buf_t *, ena_dma_conf_t *,
+    size_t);
+extern void ena_dma_free(ena_dma_buf_t *);
+extern void ena_set_dma_addr(const ena_t *, const uint64_t, enahw_addr_t *);
+extern void ena_set_dma_addr_values(const ena_t *, const uint64_t, uint32_t *,
+    uint16_t *);
+
+/*
+ * Interrupts
+ */
+extern boolean_t ena_intr_add_handlers(ena_t *);
+extern void ena_intr_remove_handlers(ena_t *);
+extern void ena_tx_intr_work(ena_txq_t *);
+extern void ena_rx_intr_work(ena_rxq_t *);
+extern void ena_aenq_work(ena_t *);
+extern boolean_t ena_intrs_disable(ena_t *);
+extern boolean_t ena_intrs_enable(ena_t *);
+
+/*
+ * MAC
+ */
+extern boolean_t ena_mac_register(ena_t *);
+extern int ena_mac_unregister(ena_t *);
+extern void ena_ring_tx_stop(mac_ring_driver_t);
+extern int ena_ring_tx_start(mac_ring_driver_t, uint64_t);
+extern mblk_t *ena_ring_tx(void *, mblk_t *);
+extern void ena_ring_rx_stop(mac_ring_driver_t);
+extern int ena_ring_rx_start(mac_ring_driver_t rh, uint64_t gen_num);
+extern int ena_m_stat(void *, uint_t, uint64_t *);
+extern mblk_t *ena_ring_rx_poll(void *, int);
+extern int ena_ring_rx_stat(mac_ring_driver_t, uint_t, uint64_t *);
+extern int ena_ring_tx_stat(mac_ring_driver_t, uint_t, uint64_t *);
+
+/*
+ * Admin API
+ */
+extern int ena_admin_submit_cmd(ena_t *, enahw_cmd_desc_t *,
+    enahw_resp_desc_t *, ena_cmd_ctx_t **);
+extern int ena_admin_poll_for_resp(ena_t *, ena_cmd_ctx_t *);
+extern void ena_free_host_info(ena_t *);
+extern boolean_t ena_init_host_info(ena_t *);
+extern int ena_create_cq(ena_t *, uint16_t, uint64_t, boolean_t, uint32_t,
+    uint16_t *, uint32_t **, uint32_t **, uint32_t **);
+extern int ena_destroy_cq(ena_t *, uint16_t);
+extern int ena_create_sq(ena_t *, uint16_t, uint64_t, boolean_t, uint16_t,
+    uint16_t *, uint32_t **);
+extern int ena_destroy_sq(ena_t *, uint16_t, boolean_t);
+extern int ena_set_feature(ena_t *, enahw_cmd_desc_t *,
+    enahw_resp_desc_t *, const enahw_feature_id_t, const uint8_t);
+extern int ena_get_feature(ena_t *, enahw_resp_desc_t *,
+    const enahw_feature_id_t, const uint8_t);
+extern int ena_admin_get_basic_stats(ena_t *, enahw_resp_desc_t *);
+extern int ena_admin_get_eni_stats(ena_t *, enahw_resp_desc_t *);
+extern int enahw_resp_status_to_errno(ena_t *, enahw_resp_status_t);
+
+/*
+ * Rx/Tx allocations
+ */
+extern boolean_t ena_alloc_rxq(ena_rxq_t *);
+extern void ena_cleanup_rxq(ena_rxq_t *);
+extern boolean_t ena_alloc_txq(ena_txq_t *);
+extern void ena_cleanup_txq(ena_txq_t *);
+
+extern ena_aenq_grpstr_t ena_groups_str[];
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	/* _ENA_H */
diff --git a/usr/src/uts/common/io/ena/ena_admin.c b/usr/src/uts/common/io/ena/ena_admin.c
new file mode 100644
index 0000000000..55e5b48901
--- /dev/null
+++ b/usr/src/uts/common/io/ena/ena_admin.c
@@ -0,0 +1,674 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2021 Oxide Computer Company
+ */
+
+/*
+ * This file contains everything having to do with communicating with
+ * the admin queue for sending commands to the device.
+ */
+
+#include "ena_hw.h"
+#include "ena.h"
+
+/*
+ * Mark the context as complete (a response has been received).
+ */
+static void
+ena_complete_cmd_ctx(ena_cmd_ctx_t *ctx, enahw_resp_desc_t *hwresp)
+{
+	bcopy(hwresp, ctx->ectx_resp, sizeof (*hwresp));
+	ctx->ectx_pending = B_FALSE;
+}
+
+/*
+ * Reset and release the context back to the free list.
+ */
+static void
+ena_release_cmd_ctx(ena_t *ena, ena_cmd_ctx_t *ctx)
+{
+	ASSERT(ctx->ectx_pending == B_FALSE);
+	ctx->ectx_resp = NULL;
+	ctx->ectx_cmd_opcode = ENAHW_CMD_NONE;
+
+	mutex_enter(&ena->ena_aq.ea_sq_lock);
+	list_insert_head(&ena->ena_aq.ea_cmd_ctxs_free, ctx);
+	ena->ena_aq.ea_pending_cmds--;
+	mutex_exit(&ena->ena_aq.ea_sq_lock);
+}
+
+/*
+ * Acquire the next avaiable command context.
+ */
+static ena_cmd_ctx_t *
+ena_acquire_cmd_ctx(ena_adminq_t *aq)
+{
+	VERIFY(MUTEX_HELD(&aq->ea_sq_lock));
+	ASSERT3U(aq->ea_pending_cmds, <, aq->ea_qlen);
+	ena_cmd_ctx_t *ctx = list_remove_head(&aq->ea_cmd_ctxs_free);
+
+	ctx->ectx_pending = B_TRUE;
+	return (ctx);
+}
+
+/*
+ * Submit a command to the admin queue.
+ */
+int
+ena_admin_submit_cmd(ena_t *ena, enahw_cmd_desc_t *cmd, enahw_resp_desc_t *resp,
+    ena_cmd_ctx_t **ctx)
+{
+	VERIFY3U(cmd->ecd_opcode, !=, 0);
+	ena_adminq_t *aq = &ena->ena_aq;
+	ena_admin_sq_t *sq = &aq->ea_sq;
+	uint16_t modulo_mask = aq->ea_qlen - 1;
+	ena_cmd_ctx_t *lctx = NULL;
+
+	mutex_enter(&aq->ea_sq_lock);
+	uint16_t tail_mod = sq->eas_tail & modulo_mask;
+
+	if (aq->ea_pending_cmds >= aq->ea_qlen) {
+		mutex_enter(&aq->ea_stat_lock);
+		aq->ea_stats.queue_full++;
+		mutex_exit(&aq->ea_stat_lock);
+		mutex_exit(&aq->ea_sq_lock);
+		return (ENOSPC);
+	}
+
+	lctx = ena_acquire_cmd_ctx(aq);
+	lctx->ectx_cmd_opcode = cmd->ecd_opcode;
+	lctx->ectx_resp = resp;
+
+	cmd->ecd_flags = sq->eas_phase & ENAHW_CMD_PHASE_MASK;
+	ENAHW_CMD_ID(cmd, lctx->ectx_id);
+	bcopy(cmd, &sq->eas_entries[tail_mod], sizeof (*cmd));
+	ENA_DMA_SYNC(sq->eas_dma, DDI_DMA_SYNC_FORDEV);
+	sq->eas_tail++;
+	aq->ea_pending_cmds++;
+
+	mutex_enter(&aq->ea_stat_lock);
+	aq->ea_stats.cmds_submitted++;
+	mutex_exit(&aq->ea_stat_lock);
+
+	DTRACE_PROBE4(cmd__submit, enahw_cmd_desc_t *, cmd, ena_cmd_ctx_t *,
+	    lctx, uint16_t, tail_mod, uint8_t, sq->eas_phase);
+
+	if ((sq->eas_tail & modulo_mask) == 0) {
+		sq->eas_phase = !sq->eas_phase;
+	}
+
+	ena_hw_abs_write32(ena, sq->eas_dbaddr, sq->eas_tail);
+	mutex_exit(&aq->ea_sq_lock);
+	*ctx = lctx;
+	return (0);
+}
+
+/*
+ * Read a single response from the admin queue.
+ */
+static void
+ena_admin_read_resp(ena_t *ena, enahw_resp_desc_t *hwresp)
+{
+	ena_adminq_t *aq = &ena->ena_aq;
+	ena_admin_cq_t *cq = &aq->ea_cq;
+	ena_cmd_ctx_t *ctx = NULL;
+	uint16_t modulo_mask = aq->ea_qlen - 1;
+	VERIFY(MUTEX_HELD(&aq->ea_cq_lock));
+
+	uint16_t head_mod = cq->eac_head & modulo_mask;
+	uint8_t phase = cq->eac_phase & ENAHW_RESP_PHASE_MASK;
+	uint16_t cmd_id = ENAHW_RESP_CMD_ID(hwresp);
+	ctx = &aq->ea_cmd_ctxs[cmd_id];
+	ASSERT3U(ctx->ectx_id, ==, cmd_id);
+	ena_complete_cmd_ctx(ctx, hwresp);
+
+	if (hwresp->erd_status != ENAHW_RESP_SUCCESS) {
+		mutex_enter(&aq->ea_stat_lock);
+		aq->ea_stats.cmds_fail++;
+		mutex_exit(&aq->ea_stat_lock);
+		DTRACE_PROBE4(cmd__fail, enahw_resp_desc_t *, hwresp,
+		    ena_cmd_ctx_t *, ctx, uint16_t, head_mod, uint8_t, phase);
+		return;
+	}
+
+	DTRACE_PROBE4(cmd__success, enahw_resp_desc_t *, hwresp,
+	    ena_cmd_ctx_t *, ctx, uint16_t, head_mod, uint8_t, phase);
+	mutex_enter(&aq->ea_stat_lock);
+	aq->ea_stats.cmds_success++;
+	mutex_exit(&aq->ea_stat_lock);
+}
+
+static void
+ena_admin_process_responses(ena_t *ena)
+{
+	ena_adminq_t *aq = &ena->ena_aq;
+	ena_admin_cq_t *cq = &aq->ea_cq;
+	uint16_t modulo_mask = aq->ea_qlen - 1;
+	enahw_resp_desc_t *hwresp;
+
+	mutex_enter(&aq->ea_cq_lock);
+	uint16_t head_mod = cq->eac_head & modulo_mask;
+	uint8_t phase = cq->eac_phase & ENAHW_RESP_PHASE_MASK;
+
+	ENA_DMA_SYNC(cq->eac_dma, DDI_DMA_SYNC_FORKERNEL);
+	hwresp = &cq->eac_entries[head_mod];
+	while ((hwresp->erd_flags & ENAHW_RESP_PHASE_MASK) == phase) {
+		ena_admin_read_resp(ena, hwresp);
+
+		cq->eac_head++;
+		head_mod = cq->eac_head & modulo_mask;
+
+		if (head_mod == 0) {
+			phase = !phase;
+		}
+
+		hwresp = &cq->eac_entries[head_mod];
+	}
+
+	cq->eac_phase = phase;
+	mutex_exit(&aq->ea_cq_lock);
+}
+
+/*
+ * Wait for the command described by ctx to complete by polling for
+ * status updates.
+ */
+int
+ena_admin_poll_for_resp(ena_t *ena, ena_cmd_ctx_t *ctx)
+{
+	int ret = 0;
+	hrtime_t expire = gethrtime() + ena->ena_aq.ea_cmd_timeout_ns;
+
+	while (1) {
+		ena_admin_process_responses(ena);
+
+		if (!ctx->ectx_pending) {
+			break;
+		}
+
+		/* Wait for 1 millisecond. */
+		delay(drv_usectohz(1000));
+
+		if (gethrtime() > expire) {
+			/*
+			 * We have no visibility into the device to
+			 * confirm it is making progress on this
+			 * command. At this point the driver and
+			 * device cannot agree on the state of the
+			 * world: perhaps the device is still making
+			 * progress but not fast enough, perhaps the
+			 * device completed the command but there was
+			 * a failure to deliver the reply, perhaps the
+			 * command failed but once again the reply was
+			 * not delivered. With this unknown state the
+			 * best thing to do is to reset the device and
+			 * start from scratch. But as we don't have
+			 * that capability at the moment the next best
+			 * thing to do is to spin or panic; we choose
+			 * to panic.
+			 */
+			panic("timed out waiting for admin response");
+		}
+	}
+
+	ret = enahw_resp_status_to_errno(ena, ctx->ectx_resp->erd_status);
+	ena_release_cmd_ctx(ena, ctx);
+	return (ret);
+}
+
+void
+ena_free_host_info(ena_t *ena)
+{
+	ena_dma_free(&ena->ena_host_info);
+}
+
+boolean_t
+ena_init_host_info(ena_t *ena)
+{
+	enahw_host_info_t *ehi;
+	int ret = 0;
+	int *regs;
+	uint_t nregs;
+	ena_dma_buf_t *hi_dma;
+	enahw_cmd_desc_t cmd;
+	enahw_feat_host_attr_t *ha_cmd =
+	    &cmd.ecd_cmd.ecd_set_feat.ecsf_feat.ecsf_host_attr;
+	enahw_resp_desc_t resp;
+	ena_dma_conf_t conf = {
+		.edc_size = ENAHW_HOST_INFO_ALLOC_SZ,
+		.edc_align = ENAHW_HOST_INFO_ALIGNMENT,
+		.edc_sgl = 1,
+		.edc_endian = DDI_NEVERSWAP_ACC,
+		.edc_stream = B_FALSE,
+	};
+
+	hi_dma = &ena->ena_host_info;
+
+	if (!ena_dma_alloc(ena, hi_dma, &conf, 4096)) {
+		ena_err(ena, "failed to allocate DMA for host info");
+		return (B_FALSE);
+	}
+
+	ehi = (void *)hi_dma->edb_va;
+	ehi->ehi_ena_spec_version =
+	    ((ENA_SPEC_VERSION_MAJOR << ENAHW_HOST_INFO_SPEC_MAJOR_SHIFT) |
+	    (ENA_SPEC_VERSION_MINOR));
+
+	ehi->ehi_bdf = 0;
+	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, ena->ena_dip,
+	    DDI_PROP_DONTPASS, "reg", &regs, &nregs) == DDI_PROP_SUCCESS) {
+		if (nregs != 0) {
+			ehi->ehi_bdf |= PCI_REG_BUS_G(regs[0]) << 8;
+			ehi->ehi_bdf |= PCI_REG_DEV_G(regs[0]) << 3;
+			ehi->ehi_bdf |= PCI_REG_FUNC_G(regs[0]);
+		}
+
+		ddi_prop_free(regs);
+	}
+
+	/*
+	 * There is no illumos OS type, it would be nice to ping
+	 * someone at Amazon and see if we can't get one added.
+	 */
+	ehi->ehi_os_type = ENAHW_OS_FREEBSD;
+	ehi->ehi_kernel_ver = 511; /* If you know you know */
+	(void) strlcpy((char *)ehi->ehi_kernel_ver_str, utsname.version,
+	    sizeof (ehi->ehi_kernel_ver_str));
+	ehi->ehi_os_dist = 0;	/* What everyone else does. */
+	ehi->ehi_driver_ver =
+	    (ENA_MODULE_VER_MAJOR) |
+	    (ENA_MODULE_VER_MINOR << ENAHW_HOST_INFO_MINOR_SHIFT) |
+	    (ENA_MODULE_VER_SUBMINOR << ENAHW_HOST_INFO_SUB_MINOR_SHIFT);
+	ehi->ehi_num_cpus = ncpus_online;
+
+	/*
+	 * ENA devices are not created equal. Some will support
+	 * features not found in others. This field tells the device
+	 * which features the driver supports.
+	 *
+	 * ENAHW_HOST_INFO_RX_OFFSET
+	 *
+	 *    Some ENA devices will write the frame data at an offset
+	 *    in the buffer, presumably for alignment purposes. We
+	 *    support this feature for the sole reason that the Linux
+	 *    driver does as well.
+	 *
+	 * ENAHW_HOST_INFO_INTERRUPT_MODERATION
+	 *
+	 *    Based on the Linux history this flag indicates that the
+	 *    driver "supports interrupt moderation properly". What
+	 *    that means is anyone's guess. The Linux driver seems to
+	 *    have some "adaptive" interrupt moderation, so perhaps
+	 *    it's that? In any case, FreeBSD doesn't bother with
+	 *    setting this flag, so we'll leave it be for now as well.
+	 *
+	 *    If you're curious to know if the device supports
+	 *    interrupt moderation: the FEAT_INTERRUPT_MODERATION flag
+	 *    will be set in ena_hw.eh_supported_features.
+	 *
+	 * ENAHW_HOST_INFO_RX_BUF_MIRRORING
+	 *
+	 *    Support traffic mirroring by allowing the hypervisor to
+	 *    read the buffer memory directly. This probably has to do
+	 *    with AWS flow logs, allowing more efficient mirroring.
+	 *    But it's hard to say for sure given we only have the
+	 *    Linux commit log to go off of. In any case, the only
+	 *    requirement for this feature is that the Rx DMA buffers
+	 *    be read/write, which they are.
+	 *
+	 * ENAHW_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY
+	 *
+	 *    The device supports the retrieving and updating of the
+	 *    RSS function and hash key. As we don't yet implement RSS
+	 *    this is disabled.
+	 */
+	ehi->ehi_driver_supported_features =
+	    ENAHW_HOST_INFO_RX_OFFSET_MASK |
+	    ENAHW_HOST_INFO_RX_BUF_MIRRORING_MASK;
+
+	ENA_DMA_SYNC(*hi_dma, DDI_DMA_SYNC_FORDEV);
+	bzero(&cmd, sizeof (cmd));
+	ena_set_dma_addr(ena, hi_dma->edb_cookie->dmac_laddress,
+	    &ha_cmd->efha_os_addr);
+
+	/*
+	 * You might notice the "debug area" is not allocated or
+	 * configured, that is on purpose.
+	 *
+	 * The "debug area" is a region of host memory that contains
+	 * the String Set (SS) tables used to report statistics to
+	 * tools like ethtool (on Linux). This table consists of one
+	 * of more entries of a 32-byte string (the name of the
+	 * statistic) along with its associated 64-bit value. The
+	 * stats reported here contain both the host-side stats as
+	 * well as device-reported stats (ENAHW_GET_STATS_TYPE_ENI). I
+	 * believe the reason for calling it the "debug area" is that
+	 * it can be accessed from outside of the guest, allowing an
+	 * AWS user (?) or Amazon employee to get basic information
+	 * about the state of the device from the guest's point of
+	 * view.
+	 *
+	 * In the fullness of time, our driver should probably support
+	 * this aspect of ENA. For the time being, all testing
+	 * indicates the driver and device function fine without it.
+	 */
+
+	ret = ena_set_feature(ena, &cmd, &resp, ENAHW_FEAT_HOST_ATTR_CONFIG,
+	    ENAHW_FEAT_HOST_ATTR_CONFIG_VER);
+	if (ret != 0) {
+		ena_err(ena, "failed to set host attributes: %d", ret);
+		ena_dma_free(hi_dma);
+		return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
+
+int
+ena_create_cq(ena_t *ena, uint16_t num_descs, uint64_t phys_addr,
+    boolean_t is_tx, uint32_t vector, uint16_t *hw_index,
+    uint32_t **unmask_addr, uint32_t **headdb, uint32_t **numanode)
+{
+	int ret;
+	enahw_cmd_desc_t cmd;
+	enahw_cmd_create_cq_t *cmd_cq = &cmd.ecd_cmd.ecd_create_cq;
+	enahw_resp_desc_t resp;
+	enahw_resp_create_cq_t *resp_cq = &resp.erd_resp.erd_create_cq;
+	ena_cmd_ctx_t *ctx = NULL;
+	uint8_t desc_size = is_tx ? sizeof (enahw_tx_cdesc_t) :
+	    sizeof (enahw_rx_cdesc_t);
+
+	bzero(&cmd, sizeof (cmd));
+	bzero(&resp, sizeof (resp));
+
+	cmd.ecd_opcode = ENAHW_CMD_CREATE_CQ;
+	ENAHW_CMD_CREATE_CQ_INTERRUPT_MODE_ENABLE(cmd_cq);
+	ASSERT3U(desc_size % 4, ==, 0);
+	ENAHW_CMD_CREATE_CQ_DESC_SIZE_WORDS(cmd_cq, desc_size / 4);
+	cmd_cq->ecq_num_descs = num_descs;
+	cmd_cq->ecq_msix_vector = vector;
+	ena_set_dma_addr(ena, phys_addr, &cmd_cq->ecq_addr);
+
+	if ((ret = ena_admin_submit_cmd(ena, &cmd, &resp, &ctx)) != 0) {
+		ena_err(ena, "failed to submit Create CQ command: %d", ret);
+		return (ret);
+	}
+
+	if ((ret = ena_admin_poll_for_resp(ena, ctx)) != 0) {
+		ena_err(ena, "failed to Create CQ: %d", ret);
+		return (ret);
+	}
+
+	*hw_index = resp_cq->ercq_idx;
+	*unmask_addr = (uint32_t *)(ena->ena_reg_base +
+	    resp_cq->ercq_interrupt_mask_reg_offset);
+
+	if (resp_cq->ercq_head_db_reg_offset != 0) {
+		*headdb = (uint32_t *)(ena->ena_reg_base +
+		    resp_cq->ercq_head_db_reg_offset);
+	} else {
+		*headdb = NULL;
+	}
+
+	if (resp_cq->ercq_numa_node_reg_offset != 0) {
+		*numanode = (uint32_t *)(ena->ena_reg_base +
+		    resp_cq->ercq_numa_node_reg_offset);
+	} else {
+		*numanode = NULL;
+	}
+
+	return (0);
+}
+
+int
+ena_destroy_cq(ena_t *ena, uint16_t hw_idx)
+{
+	enahw_cmd_desc_t cmd;
+	enahw_resp_desc_t resp;
+	ena_cmd_ctx_t *ctx = NULL;
+	int ret;
+
+	bzero(&cmd, sizeof (cmd));
+	bzero(&resp, sizeof (resp));
+	cmd.ecd_opcode = ENAHW_CMD_DESTROY_CQ;
+	cmd.ecd_cmd.ecd_destroy_cq.edcq_idx = hw_idx;
+
+	if ((ret = ena_admin_submit_cmd(ena, &cmd, &resp, &ctx)) != 0) {
+		ena_err(ena, "failed to submit Destroy CQ command: %d", ret);
+		return (ret);
+	}
+
+	if ((ret = ena_admin_poll_for_resp(ena, ctx)) != 0) {
+		ena_err(ena, "failed to Destroy CQ: %d", ret);
+		return (ret);
+	}
+
+	return (0);
+}
+
+int
+ena_create_sq(ena_t *ena, uint16_t num_descs, uint64_t phys_addr,
+    boolean_t is_tx, uint16_t cq_index, uint16_t *hw_index, uint32_t **db_addr)
+{
+	int ret;
+	enahw_cmd_desc_t cmd;
+	enahw_cmd_create_sq_t *cmd_sq = &cmd.ecd_cmd.ecd_create_sq;
+	enahw_resp_desc_t resp;
+	enahw_resp_create_sq_t *resp_sq = &resp.erd_resp.erd_create_sq;
+	enahw_sq_direction_t dir =
+	    is_tx ? ENAHW_SQ_DIRECTION_TX : ENAHW_SQ_DIRECTION_RX;
+	ena_cmd_ctx_t *ctx = NULL;
+
+	if (!ISP2(num_descs)) {
+		ena_err(ena, "the number of descs must be a power of 2, but "
+		    " is %d", num_descs);
+		return (B_FALSE);
+	}
+
+	bzero(&cmd, sizeof (cmd));
+	bzero(&resp, sizeof (resp));
+	cmd.ecd_opcode = ENAHW_CMD_CREATE_SQ;
+	ENAHW_CMD_CREATE_SQ_DIR(cmd_sq, dir);
+	ENAHW_CMD_CREATE_SQ_PLACEMENT_POLICY(cmd_sq,
+	    ENAHW_PLACEMENT_POLICY_HOST);
+	ENAHW_CMD_CREATE_SQ_COMPLETION_POLICY(cmd_sq,
+	    ENAHW_COMPLETION_POLICY_DESC);
+	/*
+	 * We limit all SQ descriptor rings to an SGL of 1, therefore
+	 * they are always physically contiguous.
+	 */
+	ENAHW_CMD_CREATE_SQ_PHYSMEM_CONTIG(cmd_sq);
+	cmd_sq->ecsq_cq_idx = cq_index;
+	cmd_sq->ecsq_num_descs = num_descs;
+
+	/*
+	 * If we ever use a non-host placement policy, then guard this
+	 * code against placement type (this value should not be set
+	 * for device placement).
+	 */
+	ena_set_dma_addr(ena, phys_addr, &cmd_sq->ecsq_base);
+
+	if ((ret = ena_admin_submit_cmd(ena, &cmd, &resp, &ctx)) != 0) {
+		ena_err(ena, "failed to submit Create SQ command: %d", ret);
+		return (ret);
+	}
+
+	if ((ret = ena_admin_poll_for_resp(ena, ctx)) != 0) {
+		ena_err(ena, "failed to Create SQ: %d", ret);
+		return (ret);
+	}
+
+	*hw_index = resp_sq->ersq_idx;
+	*db_addr = (uint32_t *)(ena->ena_reg_base +
+	    resp_sq->ersq_db_reg_offset);
+	return (0);
+}
+
+int
+ena_destroy_sq(ena_t *ena, uint16_t hw_idx, boolean_t is_tx)
+{
+	enahw_cmd_desc_t cmd;
+	enahw_cmd_destroy_sq_t *cmd_sq = &cmd.ecd_cmd.ecd_destroy_sq;
+	enahw_resp_desc_t resp;
+	ena_cmd_ctx_t *ctx = NULL;
+	int ret;
+
+	bzero(&cmd, sizeof (cmd));
+	bzero(&resp, sizeof (resp));
+	cmd.ecd_opcode = ENAHW_CMD_DESTROY_SQ;
+	cmd_sq->edsq_idx = hw_idx;
+	ENAHW_CMD_DESTROY_SQ_DIR(cmd_sq, is_tx);
+
+	if ((ret = ena_admin_submit_cmd(ena, &cmd, &resp, &ctx)) != 0) {
+		ena_err(ena, "failed to submit Destroy SQ command: %d", ret);
+		return (ret);
+	}
+
+	if ((ret = ena_admin_poll_for_resp(ena, ctx)) != 0) {
+		ena_err(ena, "failed Destroy SQ: %d", ret);
+		return (ret);
+	}
+
+	return (0);
+}
+
+/*
+ * Determine if a given feature is available on this device.
+ */
+static boolean_t
+ena_is_feature_avail(ena_t *ena, const enahw_feature_id_t feat_id)
+{
+	VERIFY3U(feat_id, <=, ENAHW_FEAT_NUM);
+	uint32_t mask = 1U << feat_id;
+
+	/*
+	 * The device attributes feature is always supported, as
+	 * indicated by the common code.
+	 */
+	if (feat_id == ENAHW_FEAT_DEVICE_ATTRIBUTES) {
+		return (B_TRUE);
+	}
+
+	return ((ena->ena_supported_features & mask) != 0);
+}
+
+int
+ena_set_feature(ena_t *ena, enahw_cmd_desc_t *cmd, enahw_resp_desc_t *resp,
+    const enahw_feature_id_t feat_id, const uint8_t feat_ver)
+{
+	enahw_cmd_set_feat_t *cmd_sf = &cmd->ecd_cmd.ecd_set_feat;
+	ena_cmd_ctx_t *ctx = NULL;
+	int ret = 0;
+
+	if (!ena_is_feature_avail(ena, feat_id)) {
+		ena_err(ena, "attempted to set unsupported feature: 0x%x %d"
+		    " (0x%x)", feat_id, feat_ver, ena->ena_supported_features);
+		return (ENOTSUP);
+	}
+
+	cmd->ecd_opcode = ENAHW_CMD_SET_FEATURE;
+	cmd_sf->ecsf_comm.efc_id = feat_id;
+	cmd_sf->ecsf_comm.efc_version = feat_ver;
+	cmd_sf->ecsf_comm.efc_flags = 0;
+
+	if ((ret = ena_admin_submit_cmd(ena, cmd, resp, &ctx)) != 0) {
+		ena_err(ena, "failed to submit Set Feature command: %d", ret);
+		return (ret);
+	}
+
+	return (ena_admin_poll_for_resp(ena, ctx));
+}
+
+int
+ena_get_feature(ena_t *ena, enahw_resp_desc_t *resp,
+    const enahw_feature_id_t feat_id, const uint8_t feat_ver)
+{
+	enahw_cmd_desc_t cmd;
+	enahw_cmd_get_feat_t *cmd_gf = &cmd.ecd_cmd.ecd_get_feat;
+	ena_cmd_ctx_t *ctx = NULL;
+	int ret = 0;
+
+	if (!ena_is_feature_avail(ena, feat_id)) {
+		return (ENOTSUP);
+	}
+
+	bzero(&cmd, sizeof (cmd));
+	cmd.ecd_opcode = ENAHW_CMD_GET_FEATURE;
+	cmd_gf->ecgf_comm.efc_id = feat_id;
+	cmd_gf->ecgf_comm.efc_version = feat_ver;
+	ENAHW_GET_FEAT_FLAGS_GET_CURR_VAL(cmd_gf);
+
+	if ((ret = ena_admin_submit_cmd(ena, &cmd, resp, &ctx)) != 0) {
+		ena_err(ena, "failed to submit Get Feature command: %d", ret);
+		return (ret);
+	}
+
+	return (ena_admin_poll_for_resp(ena, ctx));
+}
+
+int
+ena_admin_get_basic_stats(ena_t *ena, enahw_resp_desc_t *resp)
+{
+	int ret = 0;
+	enahw_cmd_desc_t cmd;
+	enahw_cmd_get_stats_t *cmd_stats = &cmd.ecd_cmd.ecd_get_stats;
+	ena_cmd_ctx_t *ctx = NULL;
+
+	bzero(&cmd, sizeof (cmd));
+	bzero(resp, sizeof (*resp));
+	cmd.ecd_opcode = ENAHW_CMD_GET_STATS;
+	cmd_stats->ecgs_type = ENAHW_GET_STATS_TYPE_BASIC;
+	cmd_stats->ecgs_scope = ENAHW_GET_STATS_SCOPE_ETH;
+	cmd_stats->ecgs_device_id = ENAHW_CMD_GET_STATS_MY_DEVICE_ID;
+
+	if ((ret = ena_admin_submit_cmd(ena, &cmd, resp, &ctx)) != 0) {
+		ena_err(ena, "failed to submit Get Basic Stats command: %d",
+		    ret);
+		return (ret);
+	}
+
+	if ((ret = ena_admin_poll_for_resp(ena, ctx)) != 0) {
+		ena_err(ena, "failed to Get Basic Stats: %d", ret);
+		return (ret);
+	}
+
+	return (0);
+}
+
+int
+ena_admin_get_eni_stats(ena_t *ena, enahw_resp_desc_t *resp)
+{
+	int ret = 0;
+	enahw_cmd_desc_t cmd;
+	enahw_cmd_get_stats_t *cmd_stats = &cmd.ecd_cmd.ecd_get_stats;
+	ena_cmd_ctx_t *ctx = NULL;
+
+	bzero(&cmd, sizeof (cmd));
+	bzero(resp, sizeof (*resp));
+	cmd.ecd_opcode = ENAHW_CMD_GET_STATS;
+	cmd_stats->ecgs_type = ENAHW_GET_STATS_TYPE_ENI;
+	cmd_stats->ecgs_scope = ENAHW_GET_STATS_SCOPE_ETH;
+	cmd_stats->ecgs_device_id = ENAHW_CMD_GET_STATS_MY_DEVICE_ID;
+
+	if ((ret = ena_admin_submit_cmd(ena, &cmd, resp, &ctx)) != 0) {
+		ena_err(ena, "failed to submit Get ENI Stats command: %d", ret);
+		return (ret);
+	}
+
+	if ((ret = ena_admin_poll_for_resp(ena, ctx)) != 0) {
+		ena_err(ena, "failed to Get ENI Stats: %d", ret);
+		return (ret);
+	}
+
+	return (0);
+}
diff --git a/usr/src/uts/common/io/ena/ena_dma.c b/usr/src/uts/common/io/ena/ena_dma.c
new file mode 100644
index 0000000000..48f39b9dbb
--- /dev/null
+++ b/usr/src/uts/common/io/ena/ena_dma.c
@@ -0,0 +1,191 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2021 Oxide Computer Company
+ */
+
+#include "ena.h"
+
+/*
+ * Create DMA attributes based on the conf parameter.
+ */
+void
+ena_dma_attr(const ena_t *ena, ddi_dma_attr_t *attrp,
+    const ena_dma_conf_t *conf)
+{
+	bzero(attrp, sizeof (*attrp));
+
+	/*
+	 * Round up maximums to next page. This is what the Linux and
+	 * FreeBSD driver do, so we follow suit.
+	 */
+	const size_t size_up =
+	    P2ROUNDUP_TYPED(conf->edc_size, ena->ena_page_sz, size_t);
+
+	attrp->dma_attr_version = DMA_ATTR_V0;
+
+	/*
+	 * The device tells us the window it supports in terms of
+	 * number of bits, we convert that to the appropriate mask.
+	 */
+	ASSERT3U(ena->ena_dma_width, >=, 32);
+	ASSERT3U(ena->ena_dma_width, <=, 48);
+	attrp->dma_attr_addr_lo = 0x0;
+	attrp->dma_attr_addr_hi = ENA_DMA_BIT_MASK(ena->ena_dma_width);
+
+	/*
+	 * This indicates the amount of data that can fit in one
+	 * cookie/segment. We allow the entire object to live in one
+	 * segment, when possible.
+	 *
+	 * NOTE: This value must be _one less_ than the desired max
+	 * (i.e. a value of 4095 indicates a max of 4096).
+	 */
+	attrp->dma_attr_count_max = size_up - 1;
+
+	/*
+	 * The alignment of the starting address.
+	 */
+	attrp->dma_attr_align = conf->edc_align;
+
+	/*
+	 * The segment boundary dictates the address which a segment
+	 * cannot cross. In this case there is no boundary.
+	 */
+	attrp->dma_attr_seg = UINT64_MAX;
+
+	/*
+	 * Allow a burst size of the entire object.
+	 */
+	attrp->dma_attr_burstsizes = size_up;
+
+	/*
+	 * Minimum and maximum amount of data we can send. This isn't
+	 * strictly limited by PCI in hardware, as it'll just make the
+	 * appropriate number of requests. Simiarly, PCIe allows for
+	 * an arbitrary granularity. We set this to one, as it's
+	 * really a matter of what hardware is requesting from us.
+	 */
+	attrp->dma_attr_minxfer = 0x1;
+	attrp->dma_attr_maxxfer = size_up;
+	attrp->dma_attr_granular = 0x1;
+
+	/*
+	 * The maximum length of the Scatter Gather List, aka the
+	 * maximum number of segments a device can address in a
+	 * transfer.
+	 */
+	attrp->dma_attr_sgllen = conf->edc_sgl;
+}
+
+void
+ena_dma_free(ena_dma_buf_t *edb)
+{
+	if (edb->edb_cookie != NULL) {
+		(void) ddi_dma_unbind_handle(edb->edb_dma_hdl);
+		edb->edb_cookie = NULL;
+		edb->edb_real_len = 0;
+	}
+
+	if (edb->edb_acc_hdl != NULL) {
+		ddi_dma_mem_free(&edb->edb_acc_hdl);
+		edb->edb_acc_hdl = NULL;
+		edb->edb_va = NULL;
+	}
+
+	if (edb->edb_dma_hdl != NULL) {
+		ddi_dma_free_handle(&edb->edb_dma_hdl);
+		edb->edb_dma_hdl = NULL;
+	}
+
+	edb->edb_len = 0;
+}
+
+boolean_t
+ena_dma_alloc(ena_t *ena, ena_dma_buf_t *edb, ena_dma_conf_t *conf, size_t size)
+{
+	int ret;
+	size_t size_allocated;
+	ddi_dma_attr_t attr;
+	ddi_device_acc_attr_t acc;
+	uint_t flags =
+	    conf->edc_stream ? DDI_DMA_STREAMING : DDI_DMA_CONSISTENT;
+
+	ena_dma_attr(ena, &attr, conf);
+
+	acc.devacc_attr_version = DDI_DEVICE_ATTR_V1;
+	acc.devacc_attr_endian_flags = conf->edc_endian;
+	acc.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
+
+	ret = ddi_dma_alloc_handle(ena->ena_dip, &attr, DDI_DMA_DONTWAIT, NULL,
+	    &edb->edb_dma_hdl);
+	if (ret != DDI_SUCCESS) {
+		ena_err(ena, "!failed to allocate DMA handle: %d", ret);
+		return (B_FALSE);
+	}
+
+	ret = ddi_dma_mem_alloc(edb->edb_dma_hdl, size, &acc, flags,
+	    DDI_DMA_DONTWAIT, NULL, &edb->edb_va, &size_allocated,
+	    &edb->edb_acc_hdl);
+	if (ret != DDI_SUCCESS) {
+		ena_err(ena, "!failed to allocate %lu bytes of DMA "
+		    "memory: %d", size, ret);
+		ena_dma_free(edb);
+		return (B_FALSE);
+	}
+
+	bzero(edb->edb_va, size_allocated);
+
+	ret = ddi_dma_addr_bind_handle(edb->edb_dma_hdl, NULL, edb->edb_va,
+	    size_allocated, DDI_DMA_RDWR | flags, DDI_DMA_DONTWAIT, NULL, NULL,
+	    NULL);
+	if (ret != DDI_SUCCESS) {
+		ena_err(ena, "!failed to bind %lu bytes of DMA "
+		    "memory: %d", size_allocated, ret);
+		ena_dma_free(edb);
+		return (B_FALSE);
+	}
+
+	edb->edb_len = size;
+	edb->edb_real_len = size_allocated;
+	edb->edb_cookie = ddi_dma_cookie_one(edb->edb_dma_hdl);
+	return (B_TRUE);
+}
+
+/*
+ * Write the physical DMA address to the ENA hardware address pointer.
+ * While the DMA engine should guarantee that the allocation is within
+ * the specified range, we double check here to catch programmer error
+ * and avoid hard-to-debug situations.
+ */
+void
+ena_set_dma_addr(const ena_t *ena, const uint64_t phys_addr,
+    enahw_addr_t *hwaddrp)
+{
+	ENA_DMA_VERIFY_ADDR(ena, phys_addr);
+	hwaddrp->ea_low = (uint32_t)phys_addr;
+	hwaddrp->ea_high = (uint16_t)(phys_addr >> 32);
+}
+
+/*
+ * The same as the above function, but writes the phsyical address to
+ * the supplied value pointers instead. Mostly used as a sanity check
+ * that the address fits in the reported DMA width.
+ */
+void
+ena_set_dma_addr_values(const ena_t *ena, const uint64_t phys_addr,
+    uint32_t *dst_low, uint16_t *dst_high)
+{
+	ENA_DMA_VERIFY_ADDR(ena, phys_addr);
+	*dst_low = (uint32_t)phys_addr;
+	*dst_high = (uint16_t)(phys_addr >> 32);
+}
diff --git a/usr/src/uts/common/io/ena/ena_gld.c b/usr/src/uts/common/io/ena/ena_gld.c
new file mode 100644
index 0000000000..2c27d0d31c
--- /dev/null
+++ b/usr/src/uts/common/io/ena/ena_gld.c
@@ -0,0 +1,465 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2021 Oxide Computer Company
+ */
+#include "ena.h"
+
+/*
+ * Group/Ring callbacks
+ */
+
+/*
+ * The ena driver supports only a single mac address: the one assigned
+ * to it by the hypervisor. If mac requests an address besides this
+ * one, then return ENOTSUP. This will prevent VNICs from being
+ * created, as it should.
+ */
+static int
+ena_group_add_mac(void *arg, const uint8_t *mac_addr)
+{
+	ena_t *ena = arg;
+
+	if (ETHER_IS_MULTICAST(mac_addr)) {
+		return (EINVAL);
+	}
+
+	if (bcmp(ena->ena_mac_addr, mac_addr, ETHERADDRL) == 0) {
+		return (0);
+	}
+
+	return (ENOTSUP);
+}
+
+static int
+ena_group_rem_mac(void *arg, const uint8_t *mac_addr)
+{
+	ena_t *ena = arg;
+
+	if (ETHER_IS_MULTICAST(mac_addr)) {
+		return (EINVAL);
+	}
+
+	if (bcmp(ena->ena_mac_addr, mac_addr, ETHERADDRL) == 0) {
+		return (0);
+	}
+
+	return (ENOTSUP);
+}
+
+static int
+ena_ring_rx_intr_disable(mac_intr_handle_t mih)
+{
+	ena_rxq_t *rxq = (ena_rxq_t *)mih;
+	uint32_t intr_ctrl;
+
+	mutex_enter(&rxq->er_lock);
+	intr_ctrl = ena_hw_abs_read32(rxq->er_ena, rxq->er_cq_unmask_addr);
+	ENAHW_REG_INTR_MASK(intr_ctrl);
+	ena_hw_abs_write32(rxq->er_ena, rxq->er_cq_unmask_addr, intr_ctrl);
+	rxq->er_mode = ENA_RXQ_MODE_POLLING;
+	mutex_exit(&rxq->er_lock);
+	return (0);
+}
+
+static int
+ena_ring_rx_intr_enable(mac_intr_handle_t mih)
+{
+	ena_rxq_t *rxq = (ena_rxq_t *)mih;
+	uint32_t intr_ctrl;
+
+	mutex_enter(&rxq->er_lock);
+	intr_ctrl = ena_hw_abs_read32(rxq->er_ena, rxq->er_cq_unmask_addr);
+	ENAHW_REG_INTR_UNMASK(intr_ctrl);
+	ena_hw_abs_write32(rxq->er_ena, rxq->er_cq_unmask_addr, intr_ctrl);
+	rxq->er_mode = ENA_RXQ_MODE_INTR;
+	mutex_exit(&rxq->er_lock);
+	return (0);
+}
+
+static void
+ena_fill_rx_group(void *arg, mac_ring_type_t rtype, const int index,
+    mac_group_info_t *infop, mac_group_handle_t gh)
+{
+	ena_t *ena = arg;
+
+	VERIFY3S(rtype, ==, MAC_RING_TYPE_RX);
+	/*
+	 * Typically you pass an Rx group data structure as
+	 * mgi_driver, but given we should only ever have one group we
+	 * just pass the top-level ena_t.
+	 */
+	infop->mgi_driver = (mac_group_driver_t)ena;
+	infop->mgi_start = NULL;
+	infop->mgi_stop = NULL;
+	infop->mgi_addmac = ena_group_add_mac;
+	infop->mgi_remmac = ena_group_rem_mac;
+	infop->mgi_count = ena->ena_num_intrs - 1;
+}
+
+static void
+ena_fill_tx_ring(void *arg, mac_ring_type_t rtype, const int group_index,
+    const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
+{
+	ena_t *ena = arg;
+	ena_txq_t *txq = &(ena->ena_txqs[ring_index]);
+
+	VERIFY3S(rtype, ==, MAC_RING_TYPE_TX);
+	VERIFY3S(ring_index, <, ena->ena_num_txqs);
+	/* Link driver Tx queue to mac ring handle and vice versa. */
+	txq->et_mrh = rh;
+	infop->mri_driver = (mac_ring_driver_t)txq;
+	infop->mri_start = ena_ring_tx_start;
+	infop->mri_stop = ena_ring_tx_stop;
+	infop->mri_tx = ena_ring_tx;
+	infop->mri_stat = ena_ring_tx_stat;
+}
+
+static void
+ena_fill_rx_ring(void *arg, mac_ring_type_t rtype, const int group_index,
+    const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
+{
+	ena_t *ena = arg;
+	ena_rxq_t *rxq = &(ena->ena_rxqs[ring_index]);
+
+	VERIFY3S(rtype, ==, MAC_RING_TYPE_RX);
+	VERIFY3S(ring_index, <, ena->ena_num_rxqs);
+	rxq->er_mrh = rh;
+	infop->mri_driver = (mac_ring_driver_t)rxq;
+	infop->mri_start = ena_ring_rx_start;
+	infop->mri_stop = ena_ring_rx_stop;
+	infop->mri_poll = ena_ring_rx_poll;
+	infop->mri_stat = ena_ring_rx_stat;
+	infop->mri_intr.mi_handle = (mac_intr_handle_t)rxq;
+	infop->mri_intr.mi_enable = ena_ring_rx_intr_enable;
+	infop->mri_intr.mi_disable = ena_ring_rx_intr_disable;
+	infop->mri_intr.mi_ddi_handle =
+	    ena->ena_intr_handles[rxq->er_intr_vector];
+}
+
+static int
+ena_m_start(void *arg)
+{
+	ena_t *ena = arg;
+
+	atomic_or_32(&ena->ena_state, ENA_STATE_RUNNING);
+	return (0);
+}
+
+static void
+ena_m_stop(void *arg)
+{
+	ena_t *ena = arg;
+	atomic_and_32(&ena->ena_state, ~ENA_STATE_RUNNING);
+}
+
+/*
+ * As discussed in ena_group_add_mac(), ENA only supports a single MAC
+ * address, and therefore we prevent VNICs from being created. That
+ * means there is no chance for promisc to be used as a means for
+ * implementing VNIC support on ENA, as we never allow them to be
+ * created in the first place.
+ *
+ * As for promisc itself, returning success is about the best we can
+ * do. There is no promisc API for an ENA device -- you get only the
+ * exact traffic AWS wants you to see.
+ */
+static int
+ena_m_setpromisc(void *arg, boolean_t on)
+{
+	return (0);
+}
+
+/*
+ * Similarly to promisc, there is no multicast API for an ENA
+ * device.
+ */
+static int
+ena_m_multicast(void *arg, boolean_t add, const uint8_t *multicast_address)
+{
+	return (0);
+}
+
+static boolean_t
+ena_m_getcapab(void *arg, mac_capab_t capab, void *cap_data)
+{
+	ena_t *ena = arg;
+	mac_capab_rings_t *cap_rings;
+
+	switch (capab) {
+	case MAC_CAPAB_RINGS:
+		cap_rings = cap_data;
+		cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
+		cap_rings->mr_gaddring = NULL;
+		cap_rings->mr_gremring = NULL;
+		ASSERT3U(ena->ena_num_intrs, >=, 2);
+
+		switch (cap_rings->mr_type) {
+		case MAC_RING_TYPE_TX:
+			/*
+			 * We use pseudo Tx groups for now.
+			 */
+			cap_rings->mr_gnum = 0;
+			cap_rings->mr_rnum = ena->ena_num_intrs - 1;
+			cap_rings->mr_rget = ena_fill_tx_ring;
+			break;
+		case MAC_RING_TYPE_RX:
+			cap_rings->mr_rnum = ena->ena_num_intrs - 1;
+			cap_rings->mr_rget = ena_fill_rx_ring;
+			/*
+			 * The ENA device provides no means to add mac
+			 * filters or set promisc mode; it's only
+			 * meant to receive its pre-designated unicast
+			 * address. However, we still want rings as
+			 * the device does provide multiple queues and
+			 * RSS.
+			 */
+			cap_rings->mr_gnum = 1;
+			cap_rings->mr_gget = ena_fill_rx_group;
+			break;
+		}
+
+		break;
+
+	case MAC_CAPAB_HCKSUM:
+	case MAC_CAPAB_LSO:
+		return (B_FALSE);
+	default:
+		return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
+
+static int
+ena_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+    uint_t pr_valsize, const void *pr_val)
+{
+	return (ENOTSUP);
+}
+
+static int
+ena_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+    uint_t pr_valsize, void *pr_val)
+{
+	ena_t *ena = arg;
+	int ret = 0;
+	uint64_t speed;
+	uint8_t *u8;
+
+	mutex_enter(&ena->ena_lock);
+
+	switch (pr_num) {
+	case MAC_PROP_DUPLEX:
+		if (pr_valsize < sizeof (link_duplex_t)) {
+			ret = EOVERFLOW;
+			break;
+		}
+
+		bcopy(&ena->ena_link_duplex, pr_val, sizeof (link_duplex_t));
+		break;
+
+	case MAC_PROP_SPEED:
+		if (pr_valsize < sizeof (uint64_t)) {
+			ret = EOVERFLOW;
+			break;
+		}
+
+		speed = ena->ena_link_speed_mbits * 1000000ULL;
+		bcopy(&speed, pr_val, sizeof (speed));
+		break;
+
+	case MAC_PROP_STATUS:
+		if (pr_valsize < sizeof (link_state_t)) {
+			ret = EOVERFLOW;
+			break;
+		}
+
+		bcopy(&ena->ena_link_state, pr_val, sizeof (link_state_t));
+		break;
+
+	case MAC_PROP_AUTONEG:
+		if (pr_valsize < sizeof (uint8_t)) {
+			ret = EOVERFLOW;
+			break;
+		}
+
+		u8 = pr_val;
+		*u8 = (ena->ena_link_autoneg ? 0 : 1);
+		break;
+
+	case MAC_PROP_MTU:
+		if (pr_valsize < sizeof (uint32_t)) {
+			ret = EOVERFLOW;
+			break;
+		}
+
+		bcopy(&ena->ena_mtu, pr_val, sizeof (uint32_t));
+		break;
+
+	case MAC_PROP_ADV_1000FDX_CAP:
+	case MAC_PROP_EN_1000FDX_CAP:
+		if (pr_valsize < sizeof (uint8_t)) {
+			ret = EOVERFLOW;
+			break;
+		}
+
+		u8 = pr_val;
+		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_1G) != 0;
+		break;
+
+	case MAC_PROP_ADV_2500FDX_CAP:
+	case MAC_PROP_EN_2500FDX_CAP:
+		if (pr_valsize < sizeof (uint8_t)) {
+			ret = EOVERFLOW;
+			break;
+		}
+
+		u8 = pr_val;
+		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_2_HALF_G) != 0;
+		break;
+
+	case MAC_PROP_ADV_5000FDX_CAP:
+	case MAC_PROP_EN_5000FDX_CAP:
+		if (pr_valsize < sizeof (uint8_t)) {
+			ret = EOVERFLOW;
+			break;
+		}
+
+		u8 = pr_val;
+		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_5G) != 0;
+		break;
+
+	case MAC_PROP_ADV_10GFDX_CAP:
+	case MAC_PROP_EN_10GFDX_CAP:
+		if (pr_valsize < sizeof (uint8_t)) {
+			ret = EOVERFLOW;
+			break;
+		}
+
+		u8 = pr_val;
+		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_10G) != 0;
+		break;
+
+	case MAC_PROP_ADV_25GFDX_CAP:
+	case MAC_PROP_EN_25GFDX_CAP:
+		if (pr_valsize < sizeof (uint8_t)) {
+			ret = EOVERFLOW;
+			break;
+		}
+
+		u8 = pr_val;
+		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_25G) != 0;
+		break;
+
+	case MAC_PROP_ADV_40GFDX_CAP:
+	case MAC_PROP_EN_40GFDX_CAP:
+		if (pr_valsize < sizeof (uint8_t)) {
+			ret = EOVERFLOW;
+			break;
+		}
+
+		u8 = pr_val;
+		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_40G) != 0;
+		break;
+
+	case MAC_PROP_ADV_100GFDX_CAP:
+	case MAC_PROP_EN_100GFDX_CAP:
+		if (pr_valsize < sizeof (uint8_t)) {
+			ret = EOVERFLOW;
+			break;
+		}
+
+		u8 = pr_val;
+		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_100G) != 0;
+		break;
+
+	default:
+		ret = ENOTSUP;
+		break;
+	}
+
+	mutex_exit(&ena->ena_lock);
+	return (ret);
+}
+
+static void
+ena_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+    mac_prop_info_handle_t prh)
+{
+}
+
+static mac_callbacks_t ena_m_callbacks = {
+	.mc_callbacks = MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO,
+	.mc_getstat = ena_m_stat,
+	.mc_start = ena_m_start,
+	.mc_stop = ena_m_stop,
+	.mc_setpromisc = ena_m_setpromisc,
+	.mc_multicst = ena_m_multicast,
+	.mc_getcapab = ena_m_getcapab,
+	.mc_setprop = ena_m_setprop,
+	.mc_getprop = ena_m_getprop,
+	.mc_propinfo = ena_m_propinfo,
+};
+
+int
+ena_mac_unregister(ena_t *ena)
+{
+	if (ena->ena_mh == NULL) {
+		return (0);
+	}
+
+	return (mac_unregister(ena->ena_mh));
+}
+
+boolean_t
+ena_mac_register(ena_t *ena)
+{
+	int ret;
+	mac_register_t *regp;
+
+	if ((regp = mac_alloc(MAC_VERSION)) == NULL) {
+		ena_err(ena, "failed to allocate MAC handle");
+		return (B_FALSE);
+	}
+
+	regp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
+	regp->m_driver = ena;
+	regp->m_dip = ena->ena_dip;
+	regp->m_instance = 0;
+	regp->m_src_addr = ena->ena_mac_addr;
+	regp->m_dst_addr = NULL;
+	regp->m_callbacks = &ena_m_callbacks;
+	regp->m_min_sdu = 0;
+	regp->m_max_sdu = ena->ena_mtu;
+	regp->m_pdata = NULL;
+	regp->m_pdata_size = 0;
+	regp->m_priv_props = NULL;
+	regp->m_margin = VLAN_TAGSZ;
+	regp->m_v12n = MAC_VIRT_LEVEL1;
+
+	if ((ret = mac_register(regp, &ena->ena_mh)) != 0) {
+		ena_err(ena, "failed to register ena with mac: %d", ret);
+	}
+
+	mac_free(regp);
+
+	if (ret == 0) {
+		/*
+		 * Until we get the first AENQ link change event, we
+		 * do not actually know the status of the link.
+		 */
+		mac_link_update(ena->ena_mh, LINK_STATE_UNKNOWN);
+	}
+
+	return (ret == 0);
+}
diff --git a/usr/src/uts/common/io/ena/ena_hw.c b/usr/src/uts/common/io/ena/ena_hw.c
new file mode 100644
index 0000000000..f37b4100df
--- /dev/null
+++ b/usr/src/uts/common/io/ena/ena_hw.c
@@ -0,0 +1,93 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2021 Oxide Computer Company
+ */
+
+#include "ena_hw.h"
+#include "ena.h"
+
+uint32_t
+ena_hw_bar_read32(const ena_t *ena, const uint16_t offset)
+{
+	caddr_t addr = ena->ena_reg_base + offset;
+	return (ena_hw_abs_read32(ena, (uint32_t *)addr));
+}
+
+uint32_t
+ena_hw_abs_read32(const ena_t *ena, uint32_t *addr)
+{
+	VERIFY3U(addr, >=, ena->ena_reg_base);
+	VERIFY3U(addr, <, ena->ena_reg_base + (ena->ena_reg_size - 4));
+
+	return (ddi_get32(ena->ena_reg_hdl, addr));
+}
+
+void
+ena_hw_bar_write32(const ena_t *ena, const uint16_t offset, const uint32_t val)
+{
+	caddr_t addr = ena->ena_reg_base + offset;
+	ena_hw_abs_write32(ena, (uint32_t *)addr, val);
+}
+
+void
+ena_hw_abs_write32(const ena_t *ena, uint32_t *addr, const uint32_t val)
+{
+	VERIFY3P(ena, !=, NULL);
+	VERIFY3P(addr, !=, NULL);
+	VERIFY3U(addr, >=, ena->ena_reg_base);
+	VERIFY3U(addr, <, ena->ena_reg_base + (ena->ena_reg_size - 4));
+
+	ddi_put32(ena->ena_reg_hdl, addr, val);
+}
+
+int
+enahw_resp_status_to_errno(ena_t *ena, enahw_resp_status_t status)
+{
+	int ret = 0;
+
+	switch (status) {
+	case ENAHW_RESP_SUCCESS:
+		break;
+
+	case ENAHW_RESP_RESOURCE_ALLOCATION_FAILURE:
+		ret = ENOMEM;
+		break;
+
+	case ENAHW_RESP_UNSUPPORTED_OPCODE:
+		ret = ENOTSUP;
+		break;
+
+	case ENAHW_RESP_BAD_OPCODE:
+	case ENAHW_RESP_MALFORMED_REQUEST:
+	case ENAHW_RESP_ILLEGAL_PARAMETER:
+		ret = EINVAL;
+		break;
+
+	case ENAHW_RESP_RESOURCE_BUSY:
+		ret = EAGAIN;
+		break;
+
+	case ENAHW_RESP_UNKNOWN_ERROR:
+	default:
+		/*
+		 * If the device presents us with an "unknwon error"
+		 * code, or the status code is undefined, then we log
+		 * an error and convert it to EIO.
+		 */
+		ena_err(ena, "unexpected status code: %d", status);
+		ret = EIO;
+		break;
+	}
+
+	return (ret);
+}
diff --git a/usr/src/uts/common/io/ena/ena_hw.h b/usr/src/uts/common/io/ena/ena_hw.h
new file mode 100644
index 0000000000..fbd67851b4
--- /dev/null
+++ b/usr/src/uts/common/io/ena/ena_hw.h
@@ -0,0 +1,1930 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2021 Oxide Computer Company
+ */
+
+/*
+ * This file declares all constants and structures dealing with the
+ * physical ENA device. It is based on the ena_com code of the public
+ * Linux and FreeBSD drivers. While this file is based on the common
+ * code it doesn't share the same type names. Where it is useful, a
+ * "common" reference is added to include the name of the type as
+ * defined in the common code.
+ *
+ * The Linux driver defines enq_admin_aq_entry as the top-level type
+ * for admin command descriptors. From this type you can access the
+ * common bits shared by every descriptor (ena_admin_aq_common_desc)
+ * as well as the control buffer (ena_admin_ctrl_buff_info) which is
+ * present for _some_ commands. Other than that, this top-level type
+ * treats the rest of the data as an opaque array of unsigned 32-bit
+ * integers. Then, for each individual command, the Linux driver
+ * defines a dedicated type, each of which contains the following:
+ *
+ * 1. The common descriptor: ena_admin_aq_common_desc.
+ *
+ * 2. The optional control buffer desc: ena_admin_ctrl_buff_info.
+ *
+ * 3. The command-specific data.
+ *
+ * 4. Optional padding to make sure all commands are 64 bytes in size.
+ *
+ * Furthermore, there may be further common types for commands which
+ * are made up of several sub-commands, e.g. the get/set feature
+ * commands.
+ *
+ * Finally, when a command is passed to the common function for
+ * executing commands (ena_com_execute_admin_command()), it is cast as
+ * a pointer to the top-level type: ena_admin_aq_entry.
+ *
+ * This works for the Linux driver just fine, but it causes lots of
+ * repetition in the structure definitions and also means there is no
+ * easy way to determine all valid commands. This ENA driver has
+ * turned the Linux approach inside out -- the top-level type is a
+ * union of all possible commands: enahw_cmd_desc_t. Each command may
+ * then further sub-type via unions to represent its sub-commands.
+ * This same treatment was given to the response descriptor:
+ * enahw_resp_desc_t.
+ *
+ * What is the point of knowing all this? Well, when referencing the
+ * common type in the comment above the enahw_ type, you need to keep
+ * in mind that the Linux/common type will include all the common
+ * descriptor bits, whereas these types do not.
+ *
+ * The common code DOES NOT pack any of these structures, and thus
+ * neither do we. That means these structures all rely on natural
+ * compiler alignment, just as the common code does. In ena.c you will
+ * find CTASSERTs for many of these structures, to verify they are of
+ * the expected size.
+ */
+
+#ifndef	_ENA_HW_H
+#define	_ENA_HW_H
+
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/types.h>
+#include <sys/debug.h>
+#include <sys/ethernet.h>
+
+/*
+ * The common code sets the upper limit of I/O queues to 128. In this
+ * case a "queue" is a SQ+CQ pair that forms a logical queue or ring
+ * for sending or receiving packets. Thus, at maximum, we may expect
+ * 128 Tx rings, and 128 Rx rings; though, practically speaking, the
+ * number of rings will often be limited by number of CPUs or
+ * available interrupts.
+ *
+ * common: ENA_MAX_NUM_IO_QUEUES
+ */
+#define	ENAHW_MAX_NUM_IO_QUEUES	128
+
+/*
+ * Generate a 32-bit bitmask where the bits between high (inclusive)
+ * and low (inclusive) are set to 1.
+ */
+#define	GENMASK(h, l)	(((~0U) - (1U << (l)) + 1) & (~0U >> (32 - 1 - (h))))
+
+/*
+ * Generate a 64-bit bitmask where bit b is set to 1.
+ */
+#define	BIT(b)	(1UL << (b))
+
+#define	ENAHW_DMA_ADMINQ_ALIGNMENT	8
+
+#define	ENAHW_ADMIN_CQ_DESC_BUF_ALIGNMENT	8
+#define	ENAHW_ADMIN_SQ_DESC_BUF_ALIGNMENT	8
+#define	ENAHW_AENQ_DESC_BUF_ALIGNMENT		8
+#define	ENAHW_HOST_INFO_ALIGNMENT		8
+#define	ENAHW_HOST_INFO_ALLOC_SZ		4096
+#define	ENAHW_IO_CQ_DESC_BUF_ALIGNMENT		4096
+#define	ENAHW_IO_SQ_DESC_BUF_ALIGNMENT		8
+
+/*
+ * BAR0 register offsets.
+ *
+ * Any register not defined in the common code was marked as a gap,
+ * using the hex address of the register as suffix. The idea is to
+ * make it clear where the gaps are and allow the
+ * ena_hw_update_reg_cache() function to display any bits stored in
+ * these gaps in case they turn out to be interesting later.
+ */
+#define	ENAHW_REG_VERSION		0x0
+#define	ENAHW_REG_CONTROLLER_VERSION	0x4
+#define	ENAHW_REG_CAPS			0x8
+#define	ENAHW_REG_CAPS_EXT		0xc
+#define	ENAHW_REG_ASQ_BASE_LO		0x10
+#define	ENAHW_REG_ASQ_BASE_HI		0x14
+#define	ENAHW_REG_ASQ_CAPS		0x18
+#define	ENAHW_REG_GAP_1C		0x1c
+#define	ENAHW_REG_ACQ_BASE_LO		0x20
+#define	ENAHW_REG_ACQ_BASE_HI		0x24
+#define	ENAHW_REG_ACQ_CAPS		0x28
+#define	ENAHW_REG_ASQ_DB		0x2c
+#define	ENAHW_REG_ACQ_TAIL		0x30
+#define	ENAHW_REG_AENQ_CAPS		0x34
+#define	ENAHW_REG_AENQ_BASE_LO		0x38
+#define	ENAHW_REG_AENQ_BASE_HI		0x3c
+#define	ENAHW_REG_AENQ_HEAD_DB		0x40
+#define	ENAHW_REG_AENQ_TAIL		0x44
+#define	ENAHW_REG_GAP_48		0x48
+#define	ENAHW_REG_INTERRUPT_MASK	0x4c
+#define	ENAHW_REG_GAP_50		0x50
+#define	ENAHW_REG_DEV_CTL		0x54
+#define	ENAHW_REG_DEV_STS		0x58
+#define	ENAHW_REG_MMIO_REG_READ		0x5c
+#define	ENAHW_REG_MMIO_RESP_LO		0x60
+#define	ENAHW_REG_MMIO_RESP_HI		0x64
+#define	ENAHW_REG_RSS_IND_ENTRY_UPDATE	0x68
+#define	ENAHW_NUM_REGS		((ENAHW_REG_RSS_IND_ENTRY_UPDATE / 4) + 1)
+
+/*
+ * Device Version (Register 0x0)
+ */
+#define	ENAHW_DEV_MINOR_VSN_MASK	0xff
+#define	ENAHW_DEV_MAJOR_VSN_SHIFT	8
+#define	ENAHW_DEV_MAJOR_VSN_MASK	0xff00
+
+#define	ENAHW_DEV_MAJOR_VSN(vsn)					\
+	(((vsn) & ENAHW_DEV_MAJOR_VSN_MASK) >> ENAHW_DEV_MAJOR_VSN_SHIFT)
+#define	ENAHW_DEV_MINOR_VSN(vsn)		\
+	((vsn) & ENAHW_DEV_MINOR_VSN_MASK)
+
+/*
+ * Controller Version (Register 0x4)
+ */
+#define	ENAHW_CTRL_SUBMINOR_VSN_MASK	0xff
+#define	ENAHW_CTRL_MINOR_VSN_SHIFT	8
+#define	ENAHW_CTRL_MINOR_VSN_MASK	0xff00
+#define	ENAHW_CTRL_MAJOR_VSN_SHIFT	16
+#define	ENAHW_CTRL_MAJOR_VSN_MASK	0xff0000
+#define	ENAHW_CTRL_IMPL_ID_SHIFT	24
+#define	ENAHW_CTRL_IMPL_ID_MASK		0xff000000
+
+#define	ENAHW_CTRL_MAJOR_VSN(vsn)				\
+	(((vsn) & ENAHW_CTRL_MAJOR_VSN_MASK) >> ENAHW_CTRL_MAJOR_VSN_SHIFT)
+#define	ENAHW_CTRL_MINOR_VSN(vsn)				\
+	(((vsn) & ENAHW_CTRL_MINOR_VSN_MASK) >> ENAHW_CTRL_MINOR_VSN_SHIFT)
+#define	ENAHW_CTRL_SUBMINOR_VSN(vsn)	\
+	((vsn) & ENAHW_CTRL_SUBMINOR_VSN_MASK)
+#define	ENAHW_CTRL_IMPL_ID(vsn)				\
+	(((vsn) & ENAHW_CTRL_IMPL_ID_MASK) >> ENAHW_CTRL_IMPL_ID_SHIFT)
+
+/*
+ * Device Caps (Register 0x8)
+ */
+#define	ENAHW_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK	0x1
+#define	ENAHW_CAPS_RESET_TIMEOUT_SHIFT			1
+#define	ENAHW_CAPS_RESET_TIMEOUT_MASK			0x3e
+#define	ENAHW_CAPS_RESET_TIMEOUT(v)		    \
+	(((v) & ENAHW_CAPS_RESET_TIMEOUT_MASK) >>   \
+	    ENAHW_CAPS_RESET_TIMEOUT_SHIFT)
+#define	ENAHW_CAPS_DMA_ADDR_WIDTH_SHIFT			8
+#define	ENAHW_CAPS_DMA_ADDR_WIDTH_MASK			0xff00
+#define	ENAHW_CAPS_DMA_ADDR_WIDTH(v)		     \
+	(((v) & ENAHW_CAPS_DMA_ADDR_WIDTH_MASK) >>   \
+	    ENAHW_CAPS_DMA_ADDR_WIDTH_SHIFT)
+#define	ENAHW_CAPS_ADMIN_CMD_TIMEOUT_SHIFT		16
+#define	ENAHW_CAPS_ADMIN_CMD_TIMEOUT_MASK		0xf0000
+#define	ENAHW_CAPS_ADMIN_CMD_TIMEOUT(v)			\
+	(((v) & ENAHW_CAPS_ADMIN_CMD_TIMEOUT_MASK) >>	\
+	    ENAHW_CAPS_ADMIN_CMD_TIMEOUT_SHIFT)
+
+enum enahw_reset_reason_types {
+	ENAHW_RESET_NORMAL			= 0,
+	ENAHW_RESET_KEEP_ALIVE_TO		= 1,
+	ENAHW_RESET_ADMIN_TO			= 2,
+	ENAHW_RESET_MISS_TX_CMPL		= 3,
+	ENAHW_RESET_INV_RX_REQ_ID		= 4,
+	ENAHW_RESET_INV_TX_REQ_ID		= 5,
+	ENAHW_RESET_TOO_MANY_RX_DESCS		= 6,
+	ENAHW_RESET_INIT_ERR			= 7,
+	ENAHW_RESET_DRIVER_INVALID_STATE	= 8,
+	ENAHW_RESET_OS_TRIGGER			= 9,
+	ENAHW_RESET_OS_NETDEV_WD		= 10,
+	ENAHW_RESET_SHUTDOWN			= 11,
+	ENAHW_RESET_USER_TRIGGER		= 12,
+	ENAHW_RESET_GENERIC			= 13,
+	ENAHW_RESET_MISS_INTERRUPT		= 14,
+	ENAHW_RESET_LAST,
+};
+
+/*
+ * Admin Submission Queue Caps (Register 0x18)
+ */
+#define	ENAHW_ASQ_CAPS_DEPTH_MASK		0xffff
+#define	ENAHW_ASQ_CAPS_ENTRY_SIZE_SHIFT		16
+#define	ENAHW_ASQ_CAPS_ENTRY_SIZE_MASK		0xffff0000
+
+#define	ENAHW_ASQ_CAPS_DEPTH(x)	((x) & ENAHW_ASQ_CAPS_DEPTH_MASK)
+
+#define	ENAHW_ASQ_CAPS_ENTRY_SIZE(x)			\
+	(((x) << ENAHW_ASQ_CAPS_ENTRY_SIZE_SHIFT) &	\
+	    ENAHW_ASQ_CAPS_ENTRY_SIZE_MASK)
+
+/*
+ * Admin Completion Queue Caps (Register 0x28)
+ */
+#define	ENAHW_ACQ_CAPS_DEPTH_MASK	0xffff
+#define	ENAHW_ACQ_CAPS_ENTRY_SIZE_SHIFT	16
+#define	ENAHW_ACQ_CAPS_ENTRY_SIZE_MASK	0xffff0000
+
+#define	ENAHW_ACQ_CAPS_DEPTH(x)	((x) & ENAHW_ACQ_CAPS_DEPTH_MASK)
+
+#define	ENAHW_ACQ_CAPS_ENTRY_SIZE(x)			\
+	(((x) << ENAHW_ACQ_CAPS_ENTRY_SIZE_SHIFT) &	\
+	    ENAHW_ACQ_CAPS_ENTRY_SIZE_MASK)
+
+/*
+ * Asynchronous Event Notification Queue Caps (Register 0x34)
+ */
+#define	ENAHW_AENQ_CAPS_DEPTH_MASK		0xffff
+#define	ENAHW_AENQ_CAPS_ENTRY_SIZE_SHIFT	16
+#define	ENAHW_AENQ_CAPS_ENTRY_SIZE_MASK		0xffff0000
+
+#define	ENAHW_AENQ_CAPS_DEPTH(x) ((x) & ENAHW_AENQ_CAPS_DEPTH_MASK)
+
+#define	ENAHW_AENQ_CAPS_ENTRY_SIZE(x)		     \
+	(((x) << ENAHW_AENQ_CAPS_ENTRY_SIZE_SHIFT) & \
+	    ENAHW_AENQ_CAPS_ENTRY_SIZE_MASK)
+
+/*
+ * Interrupt Mask (Register 0x4c)
+ */
+#define	ENAHW_INTR_UNMASK	0x0
+#define	ENAHW_INTR_MASK		0x1
+
+/*
+ * Device Control (Register 0x54)
+ */
+#define	ENAHW_DEV_CTL_DEV_RESET_MASK		0x1
+#define	ENAHW_DEV_CTL_AQ_RESTART_SHIFT		1
+#define	ENAHW_DEV_CTL_AQ_RESTART_MASK		0x2
+#define	ENAHW_DEV_CTL_QUIESCENT_SHIFT		2
+#define	ENAHW_DEV_CTL_QUIESCENT_MASK		0x4
+#define	ENAHW_DEV_CTL_IO_RESUME_SHIFT		3
+#define	ENAHW_DEV_CTL_IO_RESUME_MASK		0x8
+#define	ENAHW_DEV_CTL_RESET_REASON_SHIFT	28
+#define	ENAHW_DEV_CTL_RESET_REASON_MASK		0xf0000000
+
+/*
+ * Device Status (Register 0x58)
+ */
+#define	ENAHW_DEV_STS_READY_MASK			0x1
+#define	ENAHW_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT	1
+#define	ENAHW_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK	0x2
+#define	ENAHW_DEV_STS_AQ_RESTART_FINISHED_SHIFT		2
+#define	ENAHW_DEV_STS_AQ_RESTART_FINISHED_MASK		0x4
+#define	ENAHW_DEV_STS_RESET_IN_PROGRESS_SHIFT		3
+#define	ENAHW_DEV_STS_RESET_IN_PROGRESS_MASK		0x8
+#define	ENAHW_DEV_STS_RESET_FINISHED_SHIFT		4
+#define	ENAHW_DEV_STS_RESET_FINISHED_MASK		0x10
+#define	ENAHW_DEV_STS_FATAL_ERROR_SHIFT			5
+#define	ENAHW_DEV_STS_FATAL_ERROR_MASK			0x20
+#define	ENAHW_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT	6
+#define	ENAHW_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK	0x40
+#define	ENAHW_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT	7
+#define	ENAHW_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK	0x80
+
+/* common: ena_admin_aenq_common_desc */
+typedef struct enahw_aenq_desc {
+	uint16_t	ead_group;
+	uint16_t	ead_syndrome;
+	uint8_t		ead_flags;
+	uint8_t		ead_rsvd1[3];
+	uint32_t	ead_ts_low;
+	uint32_t	ead_ts_high;
+
+	union {
+		uint32_t	raw[12];
+
+		struct {
+			uint32_t flags;
+		} link_change;
+
+		struct {
+			uint32_t rx_drops_low;
+			uint32_t rx_drops_high;
+			uint32_t tx_drops_low;
+			uint32_t tx_drops_high;
+		} keep_alive;
+	} ead_payload;
+} enahw_aenq_desc_t;
+
+#define	ENAHW_AENQ_DESC_PHASE_MASK	BIT(0)
+
+#define	ENAHW_AENQ_DESC_PHASE(desc)		\
+	((desc)->ead_flags & ENAHW_AENQ_DESC_PHASE_MASK)
+
+#define	ENAHW_AENQ_LINK_CHANGE_LINK_STATUS_MASK	BIT(0)
+
+/*
+ * Asynchronous Event Notification Queue groups.
+ *
+ * Note: These values represent the bit position of each feature as
+ * returned by ENAHW_FEAT_AENQ_CONFIG. We encode them this way so that
+ * they can double as an index into the AENQ handlers array.
+ *
+ * common: ena_admin_aenq_group
+ */
+typedef enum enahw_aenq_groups {
+	ENAHW_AENQ_GROUP_LINK_CHANGE		= 0,
+	ENAHW_AENQ_GROUP_FATAL_ERROR		= 1,
+	ENAHW_AENQ_GROUP_WARNING		= 2,
+	ENAHW_AENQ_GROUP_NOTIFICATION		= 3,
+	ENAHW_AENQ_GROUP_KEEP_ALIVE		= 4,
+	ENAHW_AENQ_GROUP_REFRESH_CAPABILITIES	= 5,
+	ENAHW_AENQ_GROUPS_ARR_NUM		= 6,
+} enahw_aenq_groups_t;
+
+/*
+ * The reason for ENAHW_AENQ_GROUP_NOFIFICATION.
+ *
+ * common: ena_admin_aenq_notification_syndrome
+ */
+typedef enum enahw_aenq_syndrome {
+	ENAHW_AENQ_SYNDROME_UPDATE_HINTS	= 2,
+} enahw_aenq_syndrome_t;
+
+/*
+ * ENA devices use a 48-bit memory space.
+ *
+ * common: ena_common_mem_addr
+ */
+typedef struct enahw_addr {
+	uint32_t	ea_low;
+	uint16_t	ea_high;
+	uint16_t	ea_rsvd; /* must be zero */
+} enahw_addr_t;
+
+/* common: ena_admin_ctrl_buff_info */
+struct enahw_ctrl_buff {
+	uint32_t	ecb_length;
+	enahw_addr_t	ecb_addr;
+};
+
+/* common: ena_admin_get_set_feature_common_desc */
+struct enahw_feat_common {
+	/*
+	 * 1:0 Select which value you want.
+	 *
+	 *	0x1 = Current value.
+	 *	0x3 = Default value.
+	 *
+	 *	Note: Linux seems to set this to 0 to get the value,
+	 *	not sure if that's a bug or just another way to get the
+	 *	current value.
+	 *
+	 * 7:3 Reserved.
+	 */
+	uint8_t	efc_flags;
+
+	/* An id from enahw_feature_id_t. */
+	uint8_t	efc_id;
+
+	/*
+	 * Each feature is versioned, allowing upgrades to the feature
+	 * set without breaking backwards compatibility. The driver
+	 * uses this field to specify which version it supports
+	 * (starting from zero). Linux doesn't document this very well
+	 * and sets this value to 0 for most features. We define a set
+	 * of macros, underneath the enahw_feature_id_t type, clearly
+	 * documenting the version we support for each feature.
+	 */
+	uint8_t	efc_version;
+	uint8_t	efc_rsvd;
+};
+
+/* common: ena_admin_get_feat_cmd */
+typedef struct enahw_cmd_get_feat {
+	struct enahw_ctrl_buff		ecgf_ctrl_buf;
+	struct enahw_feat_common	ecgf_comm;
+	uint32_t			egcf_unused[11];
+} enahw_cmd_get_feat_t;
+
+/*
+ * N.B. Linux sets efc_flags to 0 (via memset) when reading the
+ * current value, but the comments say it should be 0x1. We follow the
+ * comments.
+ */
+#define	ENAHW_GET_FEAT_FLAGS_GET_CURR_VAL(desc)		\
+	((desc)->ecgf_comm.efc_flags) |= 0x1
+#define	ENAHW_GET_FEAT_FLAGS_GET_DEF_VAL(desc)		\
+	((desc)->ecgf_comm.efc_flags) |= 0x3
+
+/*
+ * Set the MTU of the device. This value does not include the L2
+ * headers or trailers, only the payload.
+ *
+ * common: ena_admin_set_feature_mtu_desc
+ */
+typedef struct enahw_feat_mtu {
+	uint32_t efm_mtu;
+} enahw_feat_mtu_t;
+
+/* common: ena_admin_set_feature_host_attr_desc */
+typedef struct enahw_feat_host_attr {
+	enahw_addr_t	efha_os_addr;
+	enahw_addr_t	efha_debug_addr;
+	uint32_t	efha_debug_sz;
+} enahw_feat_host_attr_t;
+
+/*
+ * ENAHW_FEAT_AENQ_CONFIG
+ *
+ * common: ena_admin_feature_aenq_desc
+ */
+typedef struct enahw_feat_aenq {
+	/* Bitmask of AENQ groups this device supports. */
+	uint32_t efa_supported_groups;
+
+	/* Bitmask of AENQ groups currently enabled. */
+	uint32_t efa_enabled_groups;
+} enahw_feat_aenq_t;
+
+/* common: ena_admin_set_feat_cmd */
+typedef struct enahw_cmd_set_feat {
+	struct enahw_ctrl_buff		ecsf_ctrl_buf;
+	struct enahw_feat_common	ecsf_comm;
+
+	union {
+		uint32_t			ecsf_raw[11];
+		enahw_feat_host_attr_t		ecsf_host_attr;
+		enahw_feat_mtu_t		ecsf_mtu;
+		enahw_feat_aenq_t		ecsf_aenq;
+	} ecsf_feat;
+} enahw_cmd_set_feat_t;
+
+/*
+ * Used to populate the host information buffer which the Nitro
+ * hypervisor supposedly uses for display, debugging, and possibly
+ * other purposes.
+ *
+ * common: ena_admin_host_info
+ */
+typedef struct enahw_host_info {
+	uint32_t	ehi_os_type;
+	uint8_t		ehi_os_dist_str[128];
+	uint32_t	ehi_os_dist;
+	uint8_t		ehi_kernel_ver_str[32];
+	uint32_t	ehi_kernel_ver;
+	uint32_t	ehi_driver_ver;
+	uint32_t	ehi_supported_net_features[2];
+	uint16_t	ehi_ena_spec_version;
+	uint16_t	ehi_bdf;
+	uint16_t	ehi_num_cpus;
+	uint16_t	ehi_rsvd;
+	uint32_t	ehi_driver_supported_features;
+} enahw_host_info_t;
+
+#define	ENAHW_HOST_INFO_MAJOR_MASK				GENMASK(7, 0)
+#define	ENAHW_HOST_INFO_MINOR_SHIFT				8
+#define	ENAHW_HOST_INFO_MINOR_MASK				GENMASK(15, 8)
+#define	ENAHW_HOST_INFO_SUB_MINOR_SHIFT				16
+#define	ENAHW_HOST_INFO_SUB_MINOR_MASK				GENMASK(23, 16)
+#define	ENAHW_HOST_INFO_SPEC_MAJOR_SHIFT			8
+#define	ENAHW_HOST_INFO_MODULE_TYPE_SHIFT			24
+#define	ENAHW_HOST_INFO_MODULE_TYPE_MASK			GENMASK(31, 24)
+#define	ENAHW_HOST_INFO_FUNCTION_MASK				GENMASK(2, 0)
+#define	ENAHW_HOST_INFO_DEVICE_SHIFT				3
+#define	ENAHW_HOST_INFO_DEVICE_MASK				GENMASK(7, 3)
+#define	ENAHW_HOST_INFO_BUS_SHIFT				8
+#define	ENAHW_HOST_INFO_BUS_MASK				GENMASK(15, 8)
+#define	ENAHW_HOST_INFO_RX_OFFSET_SHIFT				1
+#define	ENAHW_HOST_INFO_RX_OFFSET_MASK				BIT(1)
+#define	ENAHW_HOST_INFO_INTERRUPT_MODERATION_SHIFT		2
+#define	ENAHW_HOST_INFO_INTERRUPT_MODERATION_MASK		BIT(2)
+#define	ENAHW_HOST_INFO_RX_BUF_MIRRORING_SHIFT			3
+#define	ENAHW_HOST_INFO_RX_BUF_MIRRORING_MASK			BIT(3)
+#define	ENAHW_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_SHIFT	4
+#define	ENAHW_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK	BIT(4)
+
+/* common: ena_admin_os_type */
+enum enahw_os_type {
+	ENAHW_OS_LINUX		= 1,
+	ENAHW_OS_WIN		= 2,
+	ENAHW_OS_DPDK		= 3,
+	ENAHW_OS_FREEBSD	= 4,
+	ENAHW_OS_IPXE		= 5,
+	ENAHW_OS_ESXI		= 6,
+	ENAHW_OS_MACOS		= 7,
+	ENAHW_OS_GROUPS_NUM	= 7,
+};
+
+/*
+ * Create I/O Completion Queue
+ *
+ * A completion queue is where the device writes responses to I/O
+ * requests. The admin completion queue must be created before such a
+ * command can be issued, see ena_admin_cq_init().
+ *
+ * common: ena_admin_aq_create_cq_cmd
+ */
+typedef struct enahw_cmd_create_cq {
+	/*
+	 * 7-6	reserved
+	 *
+	 * 5	interrupt mode: when set the device sends an interrupt
+	 *	for each completion, otherwise the driver must poll
+	 *	the queue.
+	 *
+	 * 4-0	reserved
+	 */
+	uint8_t		ecq_caps_1;
+
+	/*
+	 * 7-5	reserved
+	 *
+	 * 4-0	CQ entry size (in words): the size of a single CQ entry
+	 *	in multiples of 32-bit words.
+	 *
+	 *	NOTE: According to the common code the "valid" values
+	 *	are 4 or 8 -- this is incorrect. The valid values are
+	 *	2 and 4. The common code does have an "extended" Rx
+	 *	completion descriptor, ena_eth_io_rx_cdesc_ext, that
+	 *	is 32 bytes and thus would use a value of 8, but it is
+	 *	not used by the Linux or FreeBSD drivers, so we do not
+	 *	bother with it.
+	 *
+	 *	Type			Bytes		Value
+	 *	enahw_tx_cdesc_t	8		2
+	 *	enahw_rx_cdesc_t	16		4
+	 */
+	uint8_t		ecq_caps_2;
+
+	/* The number of CQ entries, must be a power of 2. */
+	uint16_t	ecq_num_descs;
+
+	/* The MSI-X vector assigned to this CQ. */
+	uint32_t	ecq_msix_vector;
+
+	/*
+	 * The CQ's physical base address. The CQ memory must be
+	 * physically contiguous.
+	 */
+	enahw_addr_t	ecq_addr;
+} enahw_cmd_create_cq_t;
+
+#define	ENAHW_CMD_CREATE_CQ_INTERRUPT_MODE_ENABLED_SHIFT	5
+#define	ENAHW_CMD_CREATE_CQ_INTERRUPT_MODE_ENABLED_MASK		(BIT(5))
+#define	ENAHW_CMD_CREATE_CQ_DESC_SIZE_WORDS_MASK		(GENMASK(4, 0))
+
+#define	ENAHW_CMD_CREATE_CQ_INTERRUPT_MODE_ENABLE(cmd)	\
+	((cmd)->ecq_caps_1 |= ENAHW_CMD_CREATE_CQ_INTERRUPT_MODE_ENABLED_MASK)
+
+#define	ENAHW_CMD_CREATE_CQ_DESC_SIZE_WORDS(cmd, val)		\
+	(((cmd)->ecq_caps_2) |=					\
+	    ((val) & ENAHW_CMD_CREATE_CQ_DESC_SIZE_WORDS_MASK))
+
+/*
+ * Destroy Completion Queue
+ *
+ * common: ena_admin_aq_destroy_cq_cmd
+ */
+typedef struct enahw_cmd_destroy_cq {
+	uint16_t	edcq_idx;
+	uint16_t	edcq_rsvd;
+} enahw_cmd_destroy_cq_t;
+
+/*
+ * common: ena_admin_aq_create_sq_cmd
+ */
+typedef struct enahw_cmd_create_sq {
+	/*
+	 * 7-5	direction: 0x1 = Tx, 0x2 = Rx
+	 * 4-0	reserved
+	 */
+	uint8_t		ecsq_dir;
+	uint8_t		ecsq_rsvd1;
+
+	/*
+	 * 7	reserved
+	 *
+	 * 6-4	completion policy: How are completion events generated.
+	 *
+	 *    See enahw_completion_policy_type_t for a description of
+	 *    the various values.
+	 *
+	 * 3-0	placement policy: Where the descriptor ring and
+	 *			  headers reside.
+	 *
+	 *    See enahw_placement_policy_t for a description of the
+	 *    various values.
+	 */
+	uint8_t		ecsq_caps_2;
+
+	/*
+	 * 7-1	reserved
+	 *
+	 * 0	physically contiguous: When set indicates the descriptor
+	 *			       ring memory is physically contiguous.
+	 */
+	uint8_t		ecsq_caps_3;
+
+	/*
+	 * The index of the associated Completion Queue (CQ). The CQ
+	 * must be created before the SQ.
+	 */
+	uint16_t	ecsq_cq_idx;
+
+	/* The number of descriptors in this SQ. */
+	uint16_t	ecsq_num_descs;
+
+	/*
+	 * The base physical address of the SQ. This should not be set
+	 * for LLQ. Must be page aligned.
+	 */
+	enahw_addr_t	ecsq_base;
+
+	/*
+	 * The physical address of the head write-back pointer. Valid
+	 * only when the completion policy is set to one of the head
+	 * write-back modes (0x2 or 0x3). Must be cacheline size
+	 * aligned.
+	 */
+	enahw_addr_t	ecsq_head_wb;
+	uint32_t	ecsq_rsvdw2;
+	uint32_t	ecsq_rsvdw3;
+} enahw_cmd_create_sq_t;
+
+typedef enum enahw_sq_direction {
+	ENAHW_SQ_DIRECTION_TX = 1,
+	ENAHW_SQ_DIRECTION_RX = 2,
+} enahw_sq_direction_t;
+
+typedef enum enahw_placement_policy {
+	/* Descriptors and headers are in host memory. */
+	ENAHW_PLACEMENT_POLICY_HOST = 1,
+
+	/*
+	 * Descriptors and headers are in device memory (a.k.a Low
+	 * Latency Queue).
+	 */
+	ENAHW_PLACEMENT_POLICY_DEV = 3,
+} enahw_placement_policy_t;
+
+/*
+ * DESC: Write a CQ entry for each SQ descriptor.
+ *
+ * DESC_ON_DEMAND: Write a CQ entry when requested by the SQ descriptor.
+ *
+ * HEAD_ON_DEMAND: Update head pointer when requested by the SQ
+ *		   descriptor.
+ *
+ * HEAD: Update head pointer for each SQ descriptor.
+ *
+ */
+typedef enum enahw_completion_policy_type {
+	ENAHW_COMPLETION_POLICY_DESC		= 0,
+	ENAHW_COMPLETION_POLICY_DESC_ON_DEMAND	= 1,
+	ENAHW_COMPLETION_POLICY_HEAD_ON_DEMAND	= 2,
+	ENAHW_COMPLETION_POLICY_HEAD		= 3,
+} enahw_completion_policy_type_t;
+
+#define	ENAHW_CMD_CREATE_SQ_DIR_SHIFT			5
+#define	ENAHW_CMD_CREATE_SQ_DIR_MASK			GENMASK(7, 5)
+#define	ENAHW_CMD_CREATE_SQ_PLACEMENT_POLICY_MASK	GENMASK(3, 0)
+#define	ENAHW_CMD_CREATE_SQ_COMPLETION_POLICY_SHIFT	4
+#define	ENAHW_CMD_CREATE_SQ_COMPLETION_POLICY_MASK	GENMASK(6, 4)
+#define	ENAHW_CMD_CREATE_SQ_PHYSMEM_CONTIG_MASK		BIT(0)
+
+#define	ENAHW_CMD_CREATE_SQ_DIR(cmd, val)				\
+	(((cmd)->ecsq_dir) |= (((val) << ENAHW_CMD_CREATE_SQ_DIR_SHIFT) & \
+	    ENAHW_CMD_CREATE_SQ_DIR_MASK))
+
+#define	ENAHW_CMD_CREATE_SQ_PLACEMENT_POLICY(cmd, val)		\
+	(((cmd)->ecsq_caps_2) |=				\
+	    ((val) & ENAHW_CMD_CREATE_SQ_PLACEMENT_POLICY_MASK))
+
+#define	ENAHW_CMD_CREATE_SQ_COMPLETION_POLICY(cmd, val)			\
+	(((cmd)->ecsq_caps_2) |=					\
+	    (((val) << ENAHW_CMD_CREATE_SQ_COMPLETION_POLICY_SHIFT) &	\
+		ENAHW_CMD_CREATE_SQ_COMPLETION_POLICY_MASK))
+
+#define	ENAHW_CMD_CREATE_SQ_PHYSMEM_CONTIG(cmd)				\
+	((cmd)->ecsq_caps_3 |= ENAHW_CMD_CREATE_SQ_PHYSMEM_CONTIG_MASK)
+
+/* common: ena_admin_sq */
+typedef struct enahw_cmd_destroy_sq {
+	uint16_t	edsq_idx;
+	uint8_t		edsq_dir; /* Tx/Rx */
+	uint8_t		edsq_rsvd;
+} enahw_cmd_destroy_sq_t;
+
+#define	ENAHW_CMD_DESTROY_SQ_DIR_SHIFT	5
+#define	ENAHW_CMD_DESTROY_SQ_DIR_MASK	GENMASK(7, 5)
+
+#define	ENAHW_CMD_DESTROY_SQ_DIR(cmd, val)				\
+	(((cmd)->edsq_dir) |= (((val) << ENAHW_CMD_DESTROY_SQ_DIR_SHIFT) & \
+	    ENAHW_CMD_DESTROY_SQ_DIR_MASK))
+
+/* common: ena_admin_aq_get_stats_cmd */
+typedef struct enahw_cmd_get_stats {
+	struct enahw_ctrl_buff	ecgs_ctrl_buf;
+	uint8_t			ecgs_type;
+	uint8_t			ecgs_scope;
+	uint16_t		ecgs_rsvd;
+	uint16_t		ecgs_queue_idx;
+
+	/*
+	 * The device ID for which to query stats from. The sentinel
+	 * value 0xFFFF indicates a query of the current device.
+	 * According to the common docs, a "privileged device" may
+	 * query stats for other ENA devices. However the definition
+	 * of this "privilege device" is not expanded upon.
+	 */
+	uint16_t		ecgs_device_id;
+} enahw_cmd_get_stats_t;
+
+/* Query the stats for my device. */
+#define	ENAHW_CMD_GET_STATS_MY_DEVICE_ID	0xFFFF
+
+/*
+ * BASIC: Returns enahw_resp_basic_stats.
+ *
+ * EXTENDED: According to the Linux documentation returns a buffer in
+ * "string format" with additional statistics per queue and per device ID.
+ *
+ * ENI: According to the Linux documentation it returns "extra HW
+ * stats for a specific network interfaces".
+ *
+ * common: ena_admin_get_stats_type
+ */
+typedef enum enahw_get_stats_type {
+	ENAHW_GET_STATS_TYPE_BASIC	= 0,
+	ENAHW_GET_STATS_TYPE_EXTENDED	= 1,
+	ENAHW_GET_STATS_TYPE_ENI	= 2,
+} enahw_get_stats_type_t;
+
+/* common: ena_admin_get_stats_scope */
+typedef enum enahw_get_stats_scope {
+	ENAHW_GET_STATS_SCOPE_QUEUE	= 0,
+	ENAHW_GET_STATS_SCOPE_ETH	= 1,
+} enahw_get_stats_scope_t;
+
+/* common: ena_admin_aq_entry */
+typedef struct enahw_cmd_desc {
+	uint16_t	ecd_cmd_id;
+	uint8_t		ecd_opcode;
+	uint8_t		ecd_flags;
+
+	union {
+		uint32_t			ecd_raw[15];
+		enahw_cmd_get_feat_t		ecd_get_feat;
+		enahw_cmd_set_feat_t		ecd_set_feat;
+		enahw_cmd_create_cq_t		ecd_create_cq;
+		enahw_cmd_destroy_cq_t		ecd_destroy_cq;
+		enahw_cmd_create_sq_t		ecd_create_sq;
+		enahw_cmd_destroy_sq_t		ecd_destroy_sq;
+		enahw_cmd_get_stats_t		ecd_get_stats;
+	} ecd_cmd;
+
+} enahw_cmd_desc_t;
+
+/*
+ * top level commands that may be sent to the Admin Queue.
+ *
+ * common: ena_admin_aq_opcode
+ */
+typedef enum ena_cmd_opcode {
+	ENAHW_CMD_NONE		= 0,
+	ENAHW_CMD_CREATE_SQ	= 1,
+	ENAHW_CMD_DESTROY_SQ	= 2,
+	ENAHW_CMD_CREATE_CQ	= 3,
+	ENAHW_CMD_DESTROY_CQ	= 4,
+	ENAHW_CMD_GET_FEATURE	= 8,
+	ENAHW_CMD_SET_FEATURE	= 9,
+	ENAHW_CMD_GET_STATS	= 11,
+} enahw_cmd_opcode_t;
+
+/* common: ENA_ADMIN_AQ_COMMON_DESC */
+#define	ENAHW_CMD_ID_MASK	GENMASK(11, 0)
+#define	ENAHW_CMD_PHASE_MASK	BIT(0)
+
+#define	ENAHW_CMD_ID(desc, id)					\
+	(((desc)->ecd_cmd_id) |= ((id) & ENAHW_CMD_ID_MASK))
+
+/*
+ * Subcommands for ENA_ADMIN_{GET,SET}_FEATURE.
+ *
+ * common: ena_admin_aq_feature_id
+ */
+typedef enum enahw_feature_id {
+	ENAHW_FEAT_DEVICE_ATTRIBUTES		= 1,
+	ENAHW_FEAT_MAX_QUEUES_NUM		= 2,
+	ENAHW_FEAT_HW_HINTS			= 3,
+	ENAHW_FEAT_LLQ				= 4,
+	ENAHW_FEAT_EXTRA_PROPERTIES_STRINGS	= 5,
+	ENAHW_FEAT_EXTRA_PROPERTIES_FLAGS	= 6,
+	ENAHW_FEAT_MAX_QUEUES_EXT		= 7,
+	ENAHW_FEAT_RSS_HASH_FUNCTION		= 10,
+	ENAHW_FEAT_STATELESS_OFFLOAD_CONFIG	= 11,
+	ENAHW_FEAT_RSS_INDIRECTION_TABLE_CONFIG	= 12,
+	ENAHW_FEAT_MTU				= 14,
+	ENAHW_FEAT_RSS_HASH_INPUT		= 18,
+	ENAHW_FEAT_INTERRUPT_MODERATION		= 20,
+	ENAHW_FEAT_AENQ_CONFIG			= 26,
+	ENAHW_FEAT_LINK_CONFIG			= 27,
+	ENAHW_FEAT_HOST_ATTR_CONFIG		= 28,
+	ENAHW_FEAT_NUM				= 32,
+} enahw_feature_id_t;
+
+/*
+ * The following macros define the maximum version we support for each
+ * feature. These are the feature versions we use to communicate with
+ * the feature command. Linux has these values spread throughout the
+ * code at the various callsites of ena_com_get_feature(). We choose
+ * to centralize our feature versions to make it easier to audit.
+ */
+#define	ENAHW_FEAT_DEVICE_ATTRIBUTES_VER		0
+#define	ENAHW_FEAT_MAX_QUEUES_NUM_VER			0
+#define	ENAHW_FEAT_HW_HINTS_VER				0
+#define	ENAHW_FEAT_LLQ_VER				0
+#define	ENAHW_FEAT_EXTRA_PROPERTIES_STRINGS_VER		0
+#define	ENAHW_FEAT_EXTRA_PROPERTIES_FLAGS_VER		0
+#define	ENAHW_FEAT_MAX_QUEUES_EXT_VER			1
+#define	ENAHW_FEAT_RSS_HASH_FUNCTION_VER		0
+#define	ENAHW_FEAT_STATELESS_OFFLOAD_CONFIG_VER		0
+#define	ENAHW_FEAT_RSS_INDIRECTION_TABLE_CONFIG_VER	0
+#define	ENAHW_FEAT_MTU_VER				0
+#define	ENAHW_FEAT_RSS_HASH_INPUT_VER			0
+#define	ENAHW_FEAT_INTERRUPT_MODERATION_VER		0
+#define	ENAHW_FEAT_AENQ_CONFIG_VER			0
+#define	ENAHW_FEAT_LINK_CONFIG_VER			0
+#define	ENAHW_FEAT_HOST_ATTR_CONFIG_VER			0
+
+/* common: ena_admin_link_types */
+typedef enum enahw_link_speeds {
+	ENAHW_LINK_SPEED_1G		= 0x1,
+	ENAHW_LINK_SPEED_2_HALF_G	= 0x2,
+	ENAHW_LINK_SPEED_5G		= 0x4,
+	ENAHW_LINK_SPEED_10G		= 0x8,
+	ENAHW_LINK_SPEED_25G		= 0x10,
+	ENAHW_LINK_SPEED_40G		= 0x20,
+	ENAHW_LINK_SPEED_50G		= 0x40,
+	ENAHW_LINK_SPEED_100G		= 0x80,
+	ENAHW_LINK_SPEED_200G		= 0x100,
+	ENAHW_LINK_SPEED_400G		= 0x200,
+} enahw_link_speeds_t;
+
+/*
+ * Response to ENAHW_FEAT_HW_HINTS.
+ *
+ * Hints from the device to the driver about what values to use for
+ * various communications between the two. A value of 0 indicates
+ * there is no hint and the driver should provide its own default. All
+ * timeout values are in milliseconds.
+ *
+ * common: ena_admin_ena_hw_hints
+ */
+typedef struct enahw_device_hints {
+	/*
+	 * The amount of time the driver should wait for an MMIO read
+	 * reply before giving up and returning an error.
+	 */
+	uint16_t edh_mmio_read_timeout;
+
+	/*
+	 * If the driver has not seen an AENQ keep alive in this
+	 * timeframe, then consider the device hung and perform a
+	 * reset.
+	 */
+	uint16_t edh_keep_alive_timeout;
+
+	/*
+	 * The timeperiod in which we expect a Tx to report
+	 * completion, otherwise it is considered "missed". Initiate a
+	 * device reset when the number of missed completions is
+	 * greater than the threshold.
+	 */
+	uint16_t edh_tx_comp_timeout;
+	uint16_t edh_missed_tx_reset_threshold;
+
+	/*
+	 * The timeperiod in which we expect an admin command to
+	 * report completion.
+	 */
+	uint16_t edh_admin_comp_timeout;
+
+	/*
+	 * Used by Linux to set the netdevice 'watchdog_timeo' value.
+	 * This value is used by the networking stack to determine
+	 * when a pending transmission has stalled. This is similar to
+	 * the keep alive timeout, except its viewing progress from
+	 * the perspective of the network stack itself. This differnce
+	 * is subtle but important: the device could be in a state
+	 * where it has a functioning keep alive heartbeat, but has a
+	 * stuck Tx queue impeding forward progress of the networking
+	 * stack (which in many cases results in a scenario
+	 * indistinguishable form a complete host hang).
+	 *
+	 * The mac layer does not currently provide such
+	 * functionality, though it could and should be extended to
+	 * support such a feature.
+	 */
+	uint16_t edh_net_wd_timeout;
+
+	/*
+	 * The maximum number of cookies/segments allowed in a DMA
+	 * scatter-gather list.
+	 */
+	uint16_t edh_max_tx_sgl;
+	uint16_t edh_max_rx_sgl;
+
+	uint16_t reserved[8];
+} enahw_device_hints_t;
+
+/*
+ * Response to ENAHW_FEAT_DEVICE_ATTRIBUTES.
+ *
+ * common: ena_admin_device_attr_feature_desc
+ */
+typedef struct enahw_feat_dev_attr {
+	uint32_t efda_impl_id;
+	uint32_t efda_device_version;
+
+	/*
+	 * Bitmap representing supported get/set feature subcommands
+	 * (enahw_feature_id).
+	 */
+	uint32_t efda_supported_features;
+	uint32_t efda_rsvd1;
+
+	/* Number of bits used for physical/vritual address. */
+	uint32_t efda_phys_addr_width;
+	uint32_t efda_virt_addr_with;
+
+	/* The unicast MAC address in network byte order. */
+	uint8_t efda_mac_addr[6];
+	uint8_t efda_rsvd2[2];
+	uint32_t efda_max_mtu;
+} enahw_feat_dev_attr_t;
+
+/*
+ * Response to ENAHW_FEAT_MAX_QUEUES_NUM.
+ *
+ * common: ena_admin_queue_feature_desc
+ */
+typedef struct enahw_feat_max_queue {
+	uint32_t efmq_max_sq_num;
+	uint32_t efmq_max_sq_depth;
+	uint32_t efmq_max_cq_num;
+	uint32_t efmq_max_cq_depth;
+	uint32_t efmq_max_legacy_llq_num;
+	uint32_t efmq_max_legacy_llq_depth;
+	uint32_t efmq_max_header_size;
+
+	/*
+	 * The maximum number of descriptors a single Tx packet may
+	 * span. This includes the meta descriptor.
+	 */
+	uint16_t efmq_max_per_packet_tx_descs;
+
+	/*
+	 * The maximum number of descriptors a single Rx packet may span.
+	 */
+	uint16_t efmq_max_per_packet_rx_descs;
+} enahw_feat_max_queue_t;
+
+/*
+ * Response to ENAHW_FEAT_MAX_QUEUES_EXT.
+ *
+ * common: ena_admin_queue_ext_feature_desc
+ */
+typedef struct enahw_feat_max_queue_ext {
+	uint8_t efmqe_version;
+	uint8_t	efmqe_rsvd[3];
+
+	uint32_t efmqe_max_tx_sq_num;
+	uint32_t efmqe_max_tx_cq_num;
+	uint32_t efmqe_max_rx_sq_num;
+	uint32_t efmqe_max_rx_cq_num;
+	uint32_t efmqe_max_tx_sq_depth;
+	uint32_t efmqe_max_tx_cq_depth;
+	uint32_t efmqe_max_rx_sq_depth;
+	uint32_t efmqe_max_rx_cq_depth;
+	uint32_t efmqe_max_tx_header_size;
+
+	/*
+	 * The maximum number of descriptors a single Tx packet may
+	 * span. This includes the meta descriptor.
+	 */
+	uint16_t efmqe_max_per_packet_tx_descs;
+
+	/*
+	 * The maximum number of descriptors a single Rx packet may span.
+	 */
+	uint16_t efmqe_max_per_packet_rx_descs;
+} enahw_feat_max_queue_ext_t;
+
+/*
+ * Response to ENA_ADMIN_LINK_CONFIG.
+ *
+ * common: ena_admin_get_feature_link_desc
+ */
+typedef struct enahw_feat_link_conf {
+	/* Link speed in Mbit/s. */
+	uint32_t eflc_speed;
+
+	/* Bit field of enahw_link_speeds_t. */
+	uint32_t eflc_supported;
+
+	/*
+	 * 31-2:	reserved
+	 * 1:		duplex - Full Duplex
+	 * 0:		autoneg
+	 */
+	uint32_t eflc_flags;
+} enahw_feat_link_conf_t;
+
+#define	ENAHW_FEAT_LINK_CONF_AUTONEG_MASK	BIT(0)
+#define	ENAHW_FEAT_LINK_CONF_DUPLEX_SHIFT	1
+#define	ENAHW_FEAT_LINK_CONF_DUPLEX_MASK	BIT(1)
+
+#define	ENAHW_FEAT_LINK_CONF_AUTONEG(f)				\
+	((f)->eflc_flags & ENAHW_FEAT_LINK_CONF_AUTONEG_MASK)
+
+#define	ENAHW_FEAT_LINK_CONF_FULL_DUPLEX(f)				\
+	((((f)->eflc_flags & ENAHW_FEAT_LINK_CONF_DUPLEX_MASK) >>	\
+	    ENAHW_FEAT_LINK_CONF_DUPLEX_SHIFT) == 1)
+
+/*
+ * Response to ENAHW_FEAT_STATELESS_OFFLOAD_CONFIG.
+ *
+ * common: ena_admin_feature_offload_desc
+ */
+typedef struct enahw_feat_offload {
+	/*
+	 * 0 : Tx IPv4 Header Checksum
+	 * 1 : Tx L4/IPv4 Partial Checksum
+	 *
+	 *    The L4 checksum field should be initialized with pseudo
+	 *    header checksum.
+	 *
+	 * 2 : Tx L4/IPv4 Checksum Full
+	 * 3 : Tx L4/IPv6 Partial Checksum
+	 *
+	 *    The L4 checksum field should be initialized with pseudo
+	 *    header checksum.
+	 *
+	 * 4 : Tx L4/IPv6 Checksum Full
+	 * 5 : TCP/IPv4 LSO (aka TSO)
+	 * 6 : TCP/IPv6 LSO (aka TSO)
+	 * 7 : LSO ECN
+	 */
+	uint32_t efo_tx;
+
+	/*
+	 * Receive side supported stateless offload.
+	 *
+	 * 0 : Rx IPv4 Header Checksum
+	 * 1 : Rx TCP/UDP + IPv4 Full Checksum
+	 * 2 : Rx TCP/UDP + IPv6 Full Checksum
+	 * 3 : Rx hash calculation
+	 */
+	uint32_t efo_rx_supported;
+
+	/* Linux seems to only check rx_supported. */
+	uint32_t efo_rx_enabled;
+} enahw_feat_offload_t;
+
+/* Feature Offloads */
+#define	ENAHW_FEAT_OFFLOAD_TX_L3_IPV4_CSUM_MASK		BIT(0)
+#define	ENAHW_FEAT_OFFLOAD_TX_L4_IPV4_CSUM_PART_SHIFT	1
+#define	ENAHW_FEAT_OFFLOAD_TX_L4_IPV4_CSUM_PART_MASK	BIT(1)
+#define	ENAHW_FEAT_OFFLOAD_TX_L4_IPV4_CSUM_FULL_SHIFT	2
+#define	ENAHW_FEAT_OFFLOAD_TX_L4_IPV4_CSUM_FULL_MASK	BIT(2)
+#define	ENAHW_FEAT_OFFLOAD_TX_L4_IPV6_CSUM_PART_SHIFT	3
+#define	ENAHW_FEAT_OFFLOAD_TX_L4_IPV6_CSUM_PART_MASK	BIT(3)
+#define	ENAHW_FEAT_OFFLOAD_TX_L4_IPV6_CSUM_FULL_SHIFT	4
+#define	ENAHW_FEAT_OFFLOAD_TX_L4_IPV6_CSUM_FULL_MASK	BIT(4)
+#define	ENAHW_FEAT_OFFLOAD_TSO_IPV4_SHIFT		5
+#define	ENAHW_FEAT_OFFLOAD_TSO_IPV4_MASK		BIT(5)
+#define	ENAHW_FEAT_OFFLOAD_TSO_IPV6_SHIFT		6
+#define	ENAHW_FEAT_OFFLOAD_TSO_IPV6_MASK		BIT(6)
+#define	ENAHW_FEAT_OFFLOAD_TSO_ECN_SHIFT		7
+#define	ENAHW_FEAT_OFFLOAD_TSO_ECN_MASK			BIT(7)
+#define	ENAHW_FEAT_OFFLOAD_RX_L3_IPV4_CSUM_MASK		BIT(0)
+#define	ENAHW_FEAT_OFFLOAD_RX_L4_IPV4_CSUM_SHIFT	1
+#define	ENAHW_FEAT_OFFLOAD_RX_L4_IPV4_CSUM_MASK		BIT(1)
+#define	ENAHW_FEAT_OFFLOAD_RX_L4_IPV6_CSUM_SHIFT	2
+#define	ENAHW_FEAT_OFFLOAD_RX_L4_IPV6_CSUM_MASK		BIT(2)
+#define	ENAHW_FEAT_OFFLOAD_RX_HASH_SHIFT		3
+#define	ENAHW_FEAT_OFFLOAD_RX_HASH_MASK			BIT(3)
+
+#define	ENAHW_FEAT_OFFLOAD_TX_L3_IPV4_CSUM(f)				\
+	(((f)->efo_tx & ENAHW_FEAT_OFFLOAD_TX_L3_IPV4_CSUM_MASK) != 0)
+
+#define	ENAHW_FEAT_OFFLOAD_TX_L4_IPV4_CSUM_PART(f)			\
+	(((f)->efo_tx & ENAHW_FEAT_OFFLOAD_TX_L4_IPV4_CSUM_PART_MASK) != 0)
+
+#define	ENAHW_FEAT_OFFLOAD_TX_L4_IPV4_CSUM_FULL(f)			\
+	(((f)->efo_tx & ENAHW_FEAT_OFFLOAD_TX_L4_IPV4_CSUM_FULL_MASK) != 0)
+
+#define	ENAHW_FEAT_OFFLOAD_TSO_IPV4(f)				\
+	(((f)->efo_tx & ENAHW_FEAT_OFFLOAD_TSO_IPV4_MASK) != 0)
+
+#define	ENAHW_FEAT_OFFLOAD_TX_L4_IPV6_CSUM_PART(f)		\
+	(((f)->efo_tx & ENAHW_FEAT_OFFLOAD_TX_L4_IPV6_CSUM_PART_MASK) != 0)
+
+#define	ENAHW_FEAT_OFFLOAD_TX_L4_IPV6_CSUM_FULL(f)		\
+	(((f)->efo_tx & ENAHW_FEAT_OFFLOAD_TX_L4_IPV6_CSUM_FULL_MASK) != 0)
+
+#define	ENAHW_FEAT_OFFLOAD_TSO_IPV6(f)				\
+	(((f)->efo_tx & ENAHW_FEAT_OFFLOAD_TSO_IPV6_MASK) != 0)
+
+#define	ENAHW_FEAT_OFFLOAD_RX_L3_IPV4_CSUM(f)				\
+	(((f)->efo_rx_supported & ENAHW_FEAT_OFFLOAD_RX_L3_IPV4_CSUM_MASK) != 0)
+
+#define	ENAHW_FEAT_OFFLOAD_RX_L4_IPV4_CSUM(f)				\
+	(((f)->efo_rx_supported & ENAHW_FEAT_OFFLOAD_RX_L4_IPV4_CSUM_MASK) != 0)
+
+#define	ENAHW_FEAT_OFFLOAD_RX_L4_IPV6_CSUM(f)				\
+	(((f)->efo_rx_supported & ENAHW_FEAT_OFFLOAD_RX_L4_IPV6_CSUM_MASK) != 0)
+
+typedef union enahw_resp_get_feat {
+	uint32_t			ergf_raw[14];
+	enahw_feat_dev_attr_t		ergf_dev_attr;
+	enahw_feat_max_queue_t		ergf_max_queue;
+	enahw_feat_max_queue_ext_t	ergf_max_queue_ext;
+	enahw_feat_aenq_t		ergf_aenq;
+	enahw_feat_link_conf_t		ergf_link_conf;
+	enahw_feat_offload_t		ergf_offload;
+} enahw_resp_get_feat_u;
+
+/*
+ * common: ena_admin_acq_create_cq_resp_desc
+ */
+typedef struct enahw_resp_create_cq {
+	/*
+	 * The hardware's index for this queue.
+	 */
+	uint16_t ercq_idx;
+
+	/*
+	 * Apparently the number of descriptors granted may be
+	 * different than that requested.
+	 */
+	uint16_t ercq_actual_num_descs;
+	uint32_t ercq_numa_node_reg_offset;
+	uint32_t ercq_head_db_reg_offset; /* doorbell */
+	uint32_t ercq_interrupt_mask_reg_offset; /* stop intr */
+} enahw_resp_create_cq_t;
+
+/* common: ena_admin_acq_create_sq_resp_desc */
+typedef struct enahw_resp_create_sq {
+	uint16_t ersq_idx;
+	uint16_t ersq_rsvdw1;
+	uint32_t ersq_db_reg_offset;
+	uint32_t ersq_llq_descs_reg_offset;
+	uint32_t ersq_llq_headers_reg_offset;
+} enahw_resp_create_sq_t;
+
+/* common: ena_admin_basic_stats */
+typedef struct enahw_resp_basic_stats {
+	uint32_t erbs_tx_bytes_low;
+	uint32_t erbs_tx_bytes_high;
+	uint32_t erbs_tx_pkts_low;
+	uint32_t erbs_tx_pkts_high;
+	uint32_t erbs_rx_bytes_low;
+	uint32_t erbs_rx_bytes_high;
+	uint32_t erbs_rx_pkts_low;
+	uint32_t erbs_rx_pkts_high;
+	uint32_t erbs_rx_drops_low;
+	uint32_t erbs_rx_drops_high;
+	uint32_t erbs_tx_drops_low;
+	uint32_t erbs_tx_drops_high;
+} enahw_resp_basic_stats_t;
+
+/* common: ena_admin_eni_stats */
+typedef struct enahw_resp_eni_stats {
+	/*
+	 * The number of inbound packets dropped due to aggregate
+	 * inbound bandwidth allowance being exceeded.
+	 */
+	uint64_t eres_bw_in_exceeded;
+
+	/*
+	 * The number of outbound packets dropped due to aggregated outbound
+	 * bandwidth allowance being exceeded.
+	 */
+	uint64_t eres_bw_out_exceeded;
+
+	/*
+	 * The number of packets dropped due to the Packets Per Second
+	 * allowance being exceeded.
+	 */
+	uint64_t eres_pps_exceeded;
+
+	/*
+	 * The number of packets dropped due to connection tracking
+	 * allowance being exceeded and leading to failure in
+	 * establishment of new connections.
+	 */
+	uint64_t eres_conns_exceeded;
+
+	/*
+	 * The number of packets dropped due to linklocal packet rate
+	 * allowance being exceeded.
+	 */
+	uint64_t eres_linklocal_exceeded;
+} enahw_resp_eni_stats_t;
+
+/*
+ * common: ena_admin_acq_entry
+ */
+typedef struct enahw_resp_desc {
+	/* The index of the completed command. */
+	uint16_t	erd_cmd_id;
+
+	/* The status of the command (enahw_resp_status_t). */
+	uint8_t		erd_status;
+
+	/*
+	 * 7-1	Reserved
+	 * 0	Phase
+	 */
+	uint8_t		erd_flags;
+
+	/* Extended status. */
+	uint16_t	erd_ext_status;
+
+	/*
+	 * The AQ entry (enahw_cmd_desc) index which has been consumed
+	 * by the device and can be reused. However, this field is not
+	 * used in the other drivers, and it seems to be redundant
+	 * with the erd_idx field.
+	 */
+	uint16_t	erd_sq_head_idx;
+
+	union {
+		uint32_t			raw[14];
+		enahw_resp_get_feat_u		erd_get_feat;
+		enahw_resp_create_cq_t		erd_create_cq;
+		/* destroy_cq: No command-specific response. */
+		enahw_resp_create_sq_t		erd_create_sq;
+		/* destroy_sq: No command-specific response. */
+		enahw_resp_basic_stats_t	erd_basic_stats;
+		enahw_resp_eni_stats_t		erd_eni_stats;
+	} erd_resp;
+} enahw_resp_desc_t;
+
+/* common: ENA_ADMIN_ACQ_COMMON_DESC */
+#define	ENAHW_RESP_CMD_ID_MASK	GENMASK(11, 0)
+#define	ENAHW_RESP_PHASE_MASK	0x1
+
+#define	ENAHW_RESP_CMD_ID(desc)				\
+	(((desc)->erd_cmd_id) & ENAHW_RESP_CMD_ID_MASK)
+
+/*
+ * The response status of an Admin Queue command.
+ *
+ * common: ena_admin_aq_completion_status
+ */
+typedef enum enahw_resp_status {
+	ENAHW_RESP_SUCCESS			= 0,
+	ENAHW_RESP_RESOURCE_ALLOCATION_FAILURE	= 1,
+	ENAHW_RESP_BAD_OPCODE			= 2,
+	ENAHW_RESP_UNSUPPORTED_OPCODE		= 3,
+	ENAHW_RESP_MALFORMED_REQUEST		= 4,
+	/*
+	 * At this place in the common code it mentions that there is
+	 * "additional status" in the reponse descriptor's
+	 * erd_ext_status field. As the common code never actually
+	 * uses this field it's hard to know the exact meaning of the
+	 * comment. My best guess is the illegal parameter error
+	 * stores additional context in the erd_ext_status field. But
+	 * how to interpret that additional context is anyone's guess.
+	 */
+	ENAHW_RESP_ILLEGAL_PARAMETER		= 5,
+	ENAHW_RESP_UNKNOWN_ERROR		= 6,
+	ENAHW_RESP_RESOURCE_BUSY		= 7,
+} enahw_resp_status_t;
+
+/*
+ * Not really a device structure, more of a helper to debug register values.
+ */
+typedef struct enahw_reg_nv {
+	char		*ern_name;
+	uint32_t	ern_offset;
+	uint32_t	ern_value;
+} enahw_reg_nv_t;
+
+/*
+ * I/O macros and strcutures.
+ * -------------------------
+ */
+
+/*
+ * The device's L3 and L4 protocol numbers. These are specific to the
+ * ENA device and not to be confused with IANA protocol numbers.
+ *
+ * common: ena_eth_io_l3_proto_index
+ */
+typedef enum enahw_io_l3_proto {
+	ENAHW_IO_L3_PROTO_UNKNOWN	= 0,
+	ENAHW_IO_L3_PROTO_IPV4		= 8,
+	ENAHW_IO_L3_PROTO_IPV6		= 11,
+	ENAHW_IO_L3_PROTO_FCOE		= 21,
+	ENAHW_IO_L3_PROTO_ROCE		= 22,
+} enahw_io_l3_proto_t;
+
+/* common: ena_eth_io_l4_proto_index */
+typedef enum enahw_io_l4_proto {
+	ENAHW_IO_L4_PROTO_UNKNOWN		= 0,
+	ENAHW_IO_L4_PROTO_TCP			= 12,
+	ENAHW_IO_L4_PROTO_UDP			= 13,
+	ENAHW_IO_L4_PROTO_ROUTEABLE_ROCE	= 23,
+} enahw_io_l4_proto_t;
+
+/* common: ena_eth_io_tx_desc */
+typedef struct enahw_tx_data_desc {
+	/*
+	 * 15-0   Buffer Length (LENGTH)
+	 *
+	 *	The buffer length in bytes. This should NOT include the
+	 *	Ethernet FCS bytes.
+	 *
+	 * 21-16  Request ID High Bits [15-10] (REQ_ID_HI)
+	 * 22	  Reserved Zero
+	 * 23	  Metadata Flag always zero (META_DESC)
+	 *
+	 *	This flag indicates if the descriptor is a metadata
+	 *	descriptor or not. In this case we are defining the Tx
+	 *	descriptor, so it's always zero.
+	 *
+	 * 24	  Phase bit (PHASE)
+	 * 25	  Reserved Zero
+	 * 26	  First Descriptor Bit (FIRST)
+	 *
+	 *	Indicates this is the first descriptor for the frame.
+	 *
+	 * 27	  Last Descriptor Bit (LAST)
+	 *
+	 *	Indicates this is the last descriptor for the frame.
+	 *
+	 * 28	  Completion Request Bit (COMP_REQ)
+	 *
+	 *	Indicates if completion should be posted after the
+	 *	frame is transmitted. This bit is only valid on the
+	 *	first descriptor.
+	 *
+	 * 31-29  Reserved Zero
+	 */
+	uint32_t etd_len_ctrl;
+
+	/*
+	 * 3-0	  L3 Protocol Number (L3_PROTO_IDX)
+	 *
+	 *	The L3 protocol type, one of enahw_io_l3_proto_t. This
+	 *	field is required when L3_CSUM_EN or TSO_EN is set.
+	 *
+	 * 4	  Don't Fragment Bit (DF)
+	 *
+	 *	The value of IPv4 DF. This value must copy the value
+	 *	found in the packet's IPv4 header.
+	 *
+	 * 6-5	  Reserved Zero
+	 * 7	  TSO Bit (TSO_EN)
+	 *
+	 *	Enable TCP Segment Offload.
+	 *
+	 * 12-8	  L4 Protocol Number (L4_PROTO_IDX)
+	 *
+	 *	The L4 protocol type, one of enahw_io_l4_proto_t. This
+	 *	field is required when L4_CSUM_EN or TSO_EN are
+	 *	set.
+	 *
+	 * 13	  L3 Checksum Offload (L3_CSUM_EN)
+	 *
+	 *	Enable IPv4 header checksum offload.
+	 *
+	 * 14	  L4 Checksum Offload (L4_CSUM_EN)
+	 *
+	 *	Enable TCP/UDP checksum offload.
+	 *
+	 * 15	  Ethernet FCS Disable (ETHERNET_FCS_DIS)
+	 *
+	 *	Disable the device's Ethernet Frame Check sequence.
+	 *
+	 * 16	  Reserved Zero
+	 * 17	  L4 Partial Checksum Present (L4_CSUM_PARTIAL)
+	 *
+	 *	When set it indicates the host has already provided
+	 *	the pseudo-header checksum. Otherwise, it is up to the
+	 *	device to calculate it.
+	 *
+	 *	When set and using TSO the host stack must remember
+	 *	not to include the TCP segment length in the supplied
+	 *	pseudo-header.
+	 *
+	 *	The host stack should provide the pseudo-header
+	 *	checksum when using IPv6 with Routing Headers.
+	 *
+	 * 21-18  Reserved Zero
+	 * 31-22  Request ID Low [9-0] (REQ_ID_LO)
+	 */
+	uint32_t etd_meta_ctrl;
+
+	/* The low 32 bits of the buffer address. */
+	uint32_t etd_buff_addr_lo;
+
+	/*
+	 * address high and header size
+	 *
+	 * 15-0	Buffer Address High [47-32] (ADDR_HI)
+	 *
+	 *	The upper 15 bits of the buffer address.
+	 *
+	 * 23-16  Reserved Zero
+	 * 31-24  Header Length (HEADER_LENGTH)
+	 *
+	 *	This field has dubious documentation in the
+	 *	common/Linux driver code, even contradicting itself in
+	 *	the same sentence. Here's what it says, verbatim:
+	 *
+	 *	> Header length. For Low Latency Queues, this fields
+	 *	> indicates the number of bytes written to the
+	 *	> headers' memory. For normal queues, if packet is TCP
+	 *	> or UDP, and longer than max_header_size, then this
+	 *	> field should be set to the sum of L4 header offset
+	 *	> and L4 header size(without options), otherwise, this
+	 *	> field should be set to 0. For both modes, this field
+	 *	> must not exceed the max_header_size. max_header_size
+	 *	> value is reported by the Max Queues Feature
+	 *	> descriptor
+	 *
+	 *	Here's what one _might_ ascertain from the above.
+	 *
+	 *	1. This field should always be set in the case of
+	 *	   LLQs/device placement.
+	 *
+	 *	2. This field must _never_ exceed the max header size
+	 *	   as reported by feature detection. In our code this
+	 *	   would be efmq_max_header_size for older ENA devices
+	 *	   and efmqe_max_tx_header_size for newer ones. One
+	 *	   empirical data point from a t3.small (with newer
+	 *	   device) is a max Tx header size of 128 bytes.
+	 *
+	 *	3. If the packet is TCP or UDP, and the packet (or the
+	 *	   headers?) is longer than the max header size, then
+	 *	   this field should be set to the total header size
+	 *	   with the exception of TCP header options.
+	 *	   Otherwise, if the packet is not TCP or UDP, or if
+	 *	   the packet (or header length?) _does not_ exceed
+	 *	   the max header size, then set this value to 0.
+	 *
+	 *	One might think, based on (3), that when the header
+	 *	size exceeds the max this field needs to be set, but
+	 *	that contradicts (2), which dictates that the total
+	 *	header size can never exceed the max. Sure enough, the
+	 *	Linux code drops all packets with headers that exceed
+	 *	the max. So in that case it would mean that "and
+	 *	longer than max_header_size" is referring to the total
+	 *	packet length. So for most workloads, the TCP/UDP
+	 *	packets should have this field set, to indicate their
+	 *	header length. This matches with Linux, which seems to
+	 *	set header length regardless of IP protocol.
+	 *
+	 *	However, the FreeBSD code tells a different story. In
+	 *	it's non-LLQ Tx path it has the following comment,
+	 *	verbatim:
+	 *
+	 *	> header_len is just a hint for the device. Because
+	 *	> FreeBSD is not giving us information about packet
+	 *	> header length and it is not guaranteed that all
+	 *	> packet headers will be in the 1st mbuf, setting
+	 *	> header_len to 0 is making the device ignore this
+	 *	> value and resolve header on it's own.
+	 *
+	 *	According to this we can just set the value to zero
+	 *	and let the device figure it out. This maps better to
+	 *	illumos, where we also allow the header to potentially
+	 *	span multiple mblks (though we do have access to the
+	 *	header sizes via mac_ether_offload_info_t).
+	 *
+	 *	The upshot: for now we take advantage of the device's
+	 *	ability to determine the header length on its own, at
+	 *	the potential cost of some performance (not measured).
+	 */
+	uint32_t etd_buff_addr_hi_hdr_sz;
+} enahw_tx_data_desc_t;
+
+#define	ENAHW_TX_DESC_LENGTH_MASK		GENMASK(15, 0)
+#define	ENAHW_TX_DESC_REQ_ID_HI_SHIFT		16
+#define	ENAHW_TX_DESC_REQ_ID_HI_MASK		GENMASK(21, 16)
+#define	ENAHW_TX_DESC_META_DESC_SHIFT		23
+#define	ENAHW_TX_DESC_META_DESC_MASK		BIT(23)
+#define	ENAHW_TX_DESC_PHASE_SHIFT		24
+#define	ENAHW_TX_DESC_PHASE_MASK		BIT(24)
+#define	ENAHW_TX_DESC_FIRST_SHIFT		26
+#define	ENAHW_TX_DESC_FIRST_MASK		BIT(26)
+#define	ENAHW_TX_DESC_LAST_SHIFT		27
+#define	ENAHW_TX_DESC_LAST_MASK			BIT(27)
+#define	ENAHW_TX_DESC_COMP_REQ_SHIFT		28
+#define	ENAHW_TX_DESC_COMP_REQ_MASK		BIT(28)
+#define	ENAHW_TX_DESC_L3_PROTO_IDX_MASK		GENMASK(3, 0)
+#define	ENAHW_TX_DESC_DF_SHIFT			4
+#define	ENAHW_TX_DESC_DF_MASK			BIT(4)
+#define	ENAHW_TX_DESC_TSO_EN_SHIFT		7
+#define	ENAHW_TX_DESC_TSO_EN_MASK		BIT(7)
+#define	ENAHW_TX_DESC_L4_PROTO_IDX_SHIFT	8
+#define	ENAHW_TX_DESC_L4_PROTO_IDX_MASK		GENMASK(12, 8)
+#define	ENAHW_TX_DESC_L3_CSUM_EN_SHIFT		13
+#define	ENAHW_TX_DESC_L3_CSUM_EN_MASK		BIT(13)
+#define	ENAHW_TX_DESC_L4_CSUM_EN_SHIFT		14
+#define	ENAHW_TX_DESC_L4_CSUM_EN_MASK		BIT(14)
+#define	ENAHW_TX_DESC_ETHERNET_FCS_DIS_SHIFT	15
+#define	ENAHW_TX_DESC_ETHERNET_FCS_DIS_MASK	BIT(15)
+#define	ENAHW_TX_DESC_L4_CSUM_PARTIAL_SHIFT	17
+#define	ENAHW_TX_DESC_L4_CSUM_PARTIAL_MASK	BIT(17)
+#define	ENAHW_TX_DESC_REQ_ID_LO_SHIFT		22
+#define	ENAHW_TX_DESC_REQ_ID_LO_MASK		GENMASK(31, 22)
+#define	ENAHW_TX_DESC_ADDR_HI_MASK		GENMASK(15, 0)
+#define	ENAHW_TX_DESC_HEADER_LENGTH_SHIFT	24
+#define	ENAHW_TX_DESC_HEADER_LENGTH_MASK	GENMASK(31, 24)
+
+#define	ENAHW_TX_DESC_LENGTH(desc, len)					\
+	(((desc)->etd_len_ctrl) |= ((len) & ENAHW_TX_DESC_LENGTH_MASK))
+
+#define	ENAHW_TX_DESC_FIRST_ON(desc)				\
+	(((desc)->etd_len_ctrl) |= ENAHW_TX_DESC_FIRST_MASK)
+
+#define	ENAHW_TX_DESC_FIRST_OFF(desc)				\
+	(((desc)->etd_len_ctrl) &= ~ENAHW_TX_DESC_FIRST_MASK)
+
+#define	ENAHW_TX_DESC_REQID_HI(desc, reqid)				\
+	(((desc)->etd_len_ctrl) |=					\
+	    ((((reqid) >> 10) << ENAHW_TX_DESC_REQ_ID_HI_SHIFT) &	\
+		ENAHW_TX_DESC_REQ_ID_HI_MASK))
+
+#define	ENAHW_TX_DESC_REQID_LO(desc, reqid)				\
+	(((desc)->etd_meta_ctrl) |=					\
+	    (((reqid) << ENAHW_TX_DESC_REQ_ID_LO_SHIFT) &		\
+		ENAHW_TX_DESC_REQ_ID_LO_MASK))
+
+#define	ENAHW_TX_DESC_PHASE(desc, phase)				\
+	(((desc)->etd_len_ctrl) |= (((phase) << ENAHW_TX_DESC_PHASE_SHIFT) & \
+	    ENAHW_TX_DESC_PHASE_MASK))
+
+#define	ENAHW_TX_DESC_LAST_ON(desc)				\
+	(((desc)->etd_len_ctrl) |= ENAHW_TX_DESC_LAST_MASK)
+
+#define	ENAHW_TX_DESC_LAST_OFF(desc)				\
+	(((desc)->etd_len_ctrl) &= ~ENAHW_TX_DESC_LAST_MASK)
+
+#define	ENAHW_TX_DESC_COMP_REQ_ON(desc)				\
+	(((desc)->etd_len_ctrl) |= ENAHW_TX_DESC_COMP_REQ_MASK)
+
+#define	ENAHW_TX_DESC_COMP_REQ_OFF(desc)				\
+	(((desc)->etd_len_ctrl) &= ~ENAHW_TX_DESC_COMP_REQ_MASK)
+
+#define	ENAHW_TX_DESC_META_DESC_ON(desc)				\
+	(((desc)->etd_len_ctrl) |= ENAHW_TX_DESC_META_DESC_MASK)
+
+#define	ENAHW_TX_DESC_META_DESC_OFF(desc)				\
+	(((desc)->etd_len_ctrl) &= ~ENAHW_TX_DESC_META_DESC_MASK)
+
+#define	ENAHW_TX_DESC_ADDR_LO(desc, addr)	\
+	(((desc)->etd_buff_addr_lo) = (addr))
+
+#define	ENAHW_TX_DESC_ADDR_HI(desc, addr)				\
+	(((desc)->etd_buff_addr_hi_hdr_sz) |=				\
+	    (((addr) >> 32) & ENAHW_TX_DESC_ADDR_HI_MASK))
+
+#define	ENAHW_TX_DESC_HEADER_LENGTH(desc, len)			\
+	(((desc)->etd_buff_addr_hi_hdr_sz) |=			\
+	    (((len) << ENAHW_TX_DESC_HEADER_LENGTH_SHIFT) &	\
+		ENAHW_TX_DESC_HEADER_LENGTH_MASK))
+
+#define	ENAHW_TX_DESC_DF_ON(desc)				\
+	((desc)->etd_meta_ctrl |= ENAHW_TX_DESC_DF_MASK)
+
+#define	ENAHW_TX_DESC_TSO_OFF(desc)				\
+	(((desc)->etd_meta_ctrl) &= ~ENAHW_TX_DESC_TSO_EN_MASK)
+
+#define	ENAHW_TX_DESC_L3_CSUM_OFF(desc)				\
+	(((desc)->etd_meta_ctrl) &= ~ENAHW_TX_DESC_L3_CSUM_EN_MASK)
+
+#define	ENAHW_TX_DESC_L4_CSUM_OFF(desc)				\
+	(((desc)->etd_meta_ctrl) &= ~ENAHW_TX_DESC_L4_CSUM_EN_MASK)
+
+#define	ENAHW_TX_DESC_L4_CSUM_PARTIAL_ON(desc)				\
+	(((desc)->etd_meta_ctrl) &= ~ENAHW_TX_DESC_L4_CSUM_PARTIAL_MASK)
+
+/* common: ena_eth_io_tx_meta_desc */
+typedef struct enahw_tx_meta_desc {
+	/*
+	 * 9-0	  Request ID Low [9-0] (REQ_ID_LO)
+	 * 13-10  Reserved Zero
+	 * 14	  Extended Metadata Valid (EXT_VALID)
+	 *
+	 *	When set this descriptor contains valid extended
+	 *	metadata. The extended metadata includes the L3/L4
+	 *	length and offset fields as well as the MSS bits. This
+	 *	is needed for TSO.
+	 *
+	 * 15	  Reserved Zero
+	 * 19-16  MSS High Bits (MSS_HI)
+	 * 20	  Meta Type (ETH_META_TYPE)
+	 *
+	 *	If enabled this is an extended metadata descriptor.
+	 *	This seems redundant with EXT_VALID.
+	 *
+	 * 21	  Meta Store (META_STORE)
+	 *
+	 *	Store the extended metadata in the queue cache.
+	 *
+	 * 22	  Reserved Zero
+	 * 23	  Metadata Flag (META_DESC) -- always one
+	 * 24	  Phase (PHASE)
+	 * 25	  Reserved Zero
+	 * 26	  First Descriptor Bit (FIRST)
+	 * 27	  Last Descriptor Bit (LAST)
+	 * 28	  Completion Request Bit (COMP_REQ)
+	 * 31-29  Reserved Zero
+	 */
+	uint32_t etmd_len_ctrl;
+
+	/*
+	 * 5-0	  Request ID High Bits [15-10] (REQ_ID_HI)
+	 * 31-6	  Reserved Zero
+	 */
+	uint32_t etmd_word1;
+
+	/*
+	 * 7-0	  L3 Header Length (L3_HDR_LEN)
+	 * 15:8	  L3 Header Offset (L3_HDR_OFF)
+	 * 21:16  L4 Header Length in Words (L4_HDR_LEN_IN_WORDS)
+	 *
+	 *    Specifies the L4 header length in words. The device
+	 *    assumes the L4 header follows directly after the L3
+	 *    header and that the L4 offset is equal to L3_HDR_OFF +
+	 *    L3_HDR_LEN.
+	 *
+	 * 31-22  MSS Low Bits (MSS_LO)
+	 */
+	uint32_t etmd_word2;
+	uint32_t etmd_reserved;
+} enahw_tx_meta_desc_t;
+
+/* common: N/A */
+typedef union enahw_tx_desc {
+	enahw_tx_data_desc_t etd_data;
+	enahw_tx_meta_desc_t etd_meta;
+} enahw_tx_desc_t;
+
+/* common: ena_eth_io_tx_cdesc */
+typedef struct enahw_tx_cdesc {
+	/*
+	 * 15-0	  Request ID Bits
+	 * 16	  Reserved Zero
+	 */
+	uint16_t etc_req_id;
+
+	/*
+	 * Presumably the status of the Tx, though the Linux driver
+	 * never checks this field.
+	 */
+	uint8_t etc_status;
+
+	/*
+	 * 0	  Phase
+	 * 7-1	  Reserved Zero
+	 */
+	uint8_t etc_flags;
+
+	/*
+	 * This isn't documented or used in the Linux driver, but
+	 * these probably store the submission queue ID and the
+	 * submission queue head index.
+	 */
+	uint16_t etc_sub_qid;
+	uint16_t etc_sq_head_idx;
+} enahw_tx_cdesc_t;
+
+#define	ENAHW_TX_CDESC_PHASE_SHIFT	0
+#define	ENAHW_TX_CDESC_PHASE_MASK	BIT(0)
+
+#define	ENAHW_TX_CDESC_GET_PHASE(cdesc)				\
+	((cdesc)->etc_flags & ENAHW_TX_CDESC_PHASE_MASK)
+
+/* common: ena_eth_io_rx_desc */
+typedef struct enahw_rx_desc {
+	/*
+	 * The length of the buffer provided by the host, in bytes.
+	 * Use the value of 0 to indicate 64K.
+	 */
+	uint16_t erd_length;
+	uint8_t erd_reserved1;
+
+	/*
+	 * 0	  Phase (PHASE)
+	 * 1	  Reserved Zero
+	 * 2	  First (FIRST)
+	 *
+	 *	Indicates this is the first descriptor for the frame.
+	 *
+	 * 3	  Last (LAST)
+	 *
+	 *	Indicates this is the last descriptor for the frame.
+	 *
+	 * 4	  Completion Request (COMP_REQ)
+	 *
+	 *	Indicates that a completion request should be generated
+	 *	for this descriptor.
+	 *
+	 * 7-5	  Reserved Zero
+	 */
+	uint8_t erd_ctrl;
+
+	/*
+	 * 15-0	  Request ID
+	 * 16	  Reserved 0
+	 */
+	uint16_t erd_req_id;
+	uint16_t erd_reserved2;
+
+	/* The physical address of the buffer provided by the host. */
+	uint32_t erd_buff_addr_lo;
+	uint16_t erd_buff_addr_hi;
+	uint16_t erd_reserved3;
+} enahw_rx_desc_t;
+
+#define	ENAHW_RX_DESC_PHASE_MASK	BIT(0)
+#define	ENAHW_RX_DESC_FIRST_SHIFT	2
+#define	ENAHW_RX_DESC_FIRST_MASK	BIT(2)
+#define	ENAHW_RX_DESC_LAST_SHIFT	3
+#define	ENAHW_RX_DESC_LAST_MASK		BIT(3)
+#define	ENAHW_RX_DESC_COMP_REQ_SHIFT	4
+#define	ENAHW_RX_DESC_COMP_REQ_MASK	BIT(4)
+
+#define	ENAHW_RX_DESC_SET_PHASE(desc, val)				\
+	((desc)->erd_ctrl |= ((val) & ENAHW_RX_DESC_PHASE_MASK))
+
+#define	ENAHW_RX_DESC_SET_FIRST(desc)			\
+	((desc)->erd_ctrl |= ENAHW_RX_DESC_FIRST_MASK)
+
+#define	ENAHW_RX_DESC_SET_LAST(desc)			\
+	((desc)->erd_ctrl |= ENAHW_RX_DESC_LAST_MASK)
+
+#define	ENAHW_RX_DESC_SET_COMP_REQ(desc)			\
+	((desc)->erd_ctrl |= ENAHW_RX_DESC_COMP_REQ_MASK)
+
+/*
+ * Ethernet parsing information is only valid when last == 1.
+ *
+ * common: ena_eth_io_rx_cdesc_base
+ */
+typedef struct enahw_rx_cdesc {
+	/*
+	 * 4-0	  L3 Protocol Number (L3_PROTO)
+	 *
+	 *	The L3 protocol type, one of enahw_io_l3_proto_t.
+	 *
+	 * 6-5	  (SRC_VLAN_CNT)
+	 * 7	  Reserved Zero
+	 * 12-8	  L4 Protocol Number (L4_PROTO)
+	 * 13	  L3 Checksum Error (L3_CSUM_ERR)
+	 *
+	 *	When set either the L3 checksum failed to match or the
+	 *	controller didn't attempt to validate the checksum.
+	 *	This bit is valid only when L3_PROTO indicates an IPv4
+	 *	packet.
+	 *
+	 * 14	  L4 Checksum Error (L4_CSUM_ERR)
+	 *
+	 *	When set either the L4 checksum failed to match or the
+	 *	controller didn't attempt to validate the checksum.
+	 *	This bit is valid only when L4_PROTO indicates a
+	 *	TCP/UDP packet, IPV4_FRAG is not set, and
+	 *	L4_CSUM_CHECKED is set.
+	 *
+	 * 15	  IPv4 Fragmented (IPV4_FRAG)
+	 * 16	  L4 Checksum Validated (L4_CSUM_CHECKED)
+	 *
+	 *	When set it indicates the device attempted to validate
+	 *	the L4 checksum.
+	 *
+	 * 23-17  Reserved Zero
+	 * 24	  Phase (PHASE)
+	 * 25	  (L3_CSUM2)
+	 *
+	 *	According to the Linux source this is the "second
+	 *	checksum engine result". It's never checked.
+	 *
+	 * 26	  First Descriptor Bit (FIRST)
+	 *
+	 *	Indicates the first descriptor for the frame.
+	 *
+	 * 27	  Last Descriptor Bit (LAST)
+	 *
+	 *	Indicates the last descriptor for the frame.
+	 *
+	 * 29-28  Reserved Zero
+	 * 30	  Buffer Type (BUFFER)
+	 *
+	 *	When enabled indicates this is a data descriptor.
+	 *	Otherwse, it is a metadata descriptor.
+	 *
+	 * 31 : reserved31
+	 */
+	uint32_t erc_status;
+	uint16_t erc_length;
+	uint16_t erc_req_id;
+
+	/* 32-bit hash result */
+	uint32_t erc_hash;
+	uint16_t erc_sub_qid;
+
+	/*
+	 * The device may choose to offset the start of the header
+	 * data (which implies this value only applies to the first
+	 * descriptor). When and why the device does this is not
+	 * documented in the common code. The most likely case would
+	 * be for IP header alignment.
+	 */
+	uint8_t erc_offset;
+	uint8_t erc_reserved;
+} enahw_rx_cdesc_t;
+
+#define	ENAHW_RX_CDESC_L3_PROTO_MASK		GENMASK(4, 0)
+#define	ENAHW_RX_CDESC_SRC_VLAN_CNT_SHIFT	5
+#define	ENAHW_RX_CDESC_SRC_VLAN_CNT_MASK	GENMASK(6, 5)
+#define	ENAHW_RX_CDESC_L4_PROTO_SHIFT		8
+#define	ENAHW_RX_CDESC_L4_PROTO_MASK		GENMASK(12, 8)
+#define	ENAHW_RX_CDESC_L3_CSUM_ERR_SHIFT	13
+#define	ENAHW_RX_CDESC_L3_CSUM_ERR_MASK		BIT(13)
+#define	ENAHW_RX_CDESC_L4_CSUM_ERR_SHIFT	14
+#define	ENAHW_RX_CDESC_L4_CSUM_ERR_MASK		BIT(14)
+#define	ENAHW_RX_CDESC_IPV4_FRAG_SHIFT		15
+#define	ENAHW_RX_CDESC_IPV4_FRAG_MASK		BIT(15)
+#define	ENAHW_RX_CDESC_L4_CSUM_CHECKED_SHIFT	16
+#define	ENAHW_RX_CDESC_L4_CSUM_CHECKED_MASK	BIT(16)
+#define	ENAHW_RX_CDESC_PHASE_SHIFT		24
+#define	ENAHW_RX_CDESC_PHASE_MASK		BIT(24)
+#define	ENAHW_RX_CDESC_L3_CSUM2_SHIFT		25
+#define	ENAHW_RX_CDESC_L3_CSUM2_MASK		BIT(25)
+#define	ENAHW_RX_CDESC_FIRST_SHIFT		26
+#define	ENAHW_RX_CDESC_FIRST_MASK		BIT(26)
+#define	ENAHW_RX_CDESC_LAST_SHIFT		27
+#define	ENAHW_RX_CDESC_LAST_MASK		BIT(27)
+#define	ENAHW_RX_CDESC_BUFFER_SHIFT		30
+#define	ENAHW_RX_CDESC_BUFFER_MASK		BIT(30)
+
+#define	ENAHW_RX_CDESC_L3_PROTO(desc)				\
+	((desc)->erc_status & ENAHW_RX_CDESC_L3_PROTO_MASK)
+
+#define	ENAHW_RX_CDESC_L3_CSUM_ERR(desc)				\
+	((((desc)->erc_status & ENAHW_RX_CDESC_L3_CSUM_ERR_MASK) >>	\
+	    ENAHW_RX_CDESC_L3_CSUM_ERR_SHIFT) != 0)
+
+#define	ENAHW_RX_CDESC_L4_PROTO(desc)				\
+	(((desc)->erc_status & ENAHW_RX_CDESC_L4_PROTO_MASK) >>	\
+	    ENAHW_RX_CDESC_L4_PROTO_SHIFT)
+
+#define	ENAHW_RX_CDESC_L4_CSUM_CHECKED(desc)				\
+	((((desc)->erc_status & ENAHW_RX_CDESC_L4_CSUM_CHECKED_MASK) >>	\
+	    ENAHW_RX_CDESC_L4_CSUM_CHECKED_SHIFT) != 0)
+
+#define	ENAHW_RX_CDESC_L4_CSUM_ERR(desc)				\
+	((((desc)->erc_status & ENAHW_RX_CDESC_L4_CSUM_ERR_MASK) >>	\
+	    ENAHW_RX_CDESC_L4_CSUM_ERR_SHIFT) != 0)
+
+#define	ENAHW_RX_CDESC_PHASE(desc)			 \
+	(((desc)->erc_status & ENAHW_RX_CDESC_PHASE_MASK) >> \
+	    ENAHW_RX_CDESC_PHASE_SHIFT)
+
+#define	ENAHW_RX_CDESC_FIRST(desc)			 \
+	((((desc)->erc_status & ENAHW_RX_CDESC_FIRST_MASK) >> \
+	    ENAHW_RX_CDESC_FIRST_SHIFT) == 1)
+
+#define	ENAHW_RX_CDESC_LAST(desc)			 \
+	((((desc)->erc_status & ENAHW_RX_CDESC_LAST_MASK) >> \
+	    ENAHW_RX_CDESC_LAST_SHIFT) == 1)
+
+/*
+ * Controls for the interrupt register mapped to each Rx/Tx CQ.
+ */
+#define	ENAHW_REG_INTR_RX_DELAY_MASK	GENMASK(14, 0)
+#define	ENAHW_REG_INTR_TX_DELAY_SHIFT	15
+#define	ENAHW_REG_INTR_TX_DELAY_MASK	GENMASK(29, 15)
+#define	ENAHW_REG_INTR_UNMASK_SHIFT	30
+#define	ENAHW_REG_INTR_UNMASK_MASK	BIT(30)
+
+#define	ENAHW_REG_INTR_UNMASK(val)		\
+	((val) |= ENAHW_REG_INTR_UNMASK_MASK)
+
+#define	ENAHW_REG_INTR_MASK(val)		\
+	((val) &= ~ENAHW_REG_INTR_UNMASK_MASK)
+
+#endif	/* _ENA_HW_H */
diff --git a/usr/src/uts/common/io/ena/ena_intr.c b/usr/src/uts/common/io/ena/ena_intr.c
new file mode 100644
index 0000000000..2650609cfa
--- /dev/null
+++ b/usr/src/uts/common/io/ena/ena_intr.c
@@ -0,0 +1,175 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2021 Oxide Computer Company
+ */
+#include "ena.h"
+
+/*
+ * We currently limit the number of Tx/Rx queues to the number of
+ * available interrupts (minus one for the admin queue).
+ */
+static uint_t
+ena_io_intr(caddr_t arg1, caddr_t arg2)
+{
+	ena_t *ena = (ena_t *)arg1;
+	uint16_t vector = (uintptr_t)(void *)arg2;
+	ASSERT3U(vector, >, 0);
+	ASSERT3U(vector, <, ena->ena_num_intrs);
+	ena_txq_t *txq = &ena->ena_txqs[vector - 1];
+	ena_rxq_t *rxq = &ena->ena_rxqs[vector - 1];
+	uint32_t intr_ctrl;
+
+	ASSERT3P(txq, !=, NULL);
+	ASSERT3P(rxq, !=, NULL);
+	ena_tx_intr_work(txq);
+	ena_rx_intr_work(rxq);
+
+	/*
+	 * The Rx/Tx queue share the same interrupt, only need to
+	 * unmask interrupts for one of them.
+	 */
+	intr_ctrl = ena_hw_abs_read32(ena, txq->et_cq_unmask_addr);
+	ENAHW_REG_INTR_UNMASK(intr_ctrl);
+	ena_hw_abs_write32(ena, txq->et_cq_unmask_addr, intr_ctrl);
+	return (DDI_INTR_CLAIMED);
+}
+
+static uint_t
+ena_admin_intr(caddr_t arg1, caddr_t arg2)
+{
+	ena_t *ena = (ena_t *)arg1;
+
+	ena_aenq_work(ena);
+	return (DDI_INTR_CLAIMED);
+}
+
+void
+ena_intr_remove_handlers(ena_t *ena)
+{
+	for (int i = 0; i < ena->ena_num_intrs; i++) {
+		int ret = ddi_intr_remove_handler(ena->ena_intr_handles[i]);
+
+		/* Nothing we can really do except log. */
+		if (ret != DDI_SUCCESS) {
+			ena_err(ena, "failed to remove interrupt handler for "
+			    "vector %d: %d", i, ret);
+		}
+	}
+}
+
+/*
+ * The ena driver uses separate interrupt handlers for the admin queue
+ * and I/O queues.
+ */
+boolean_t
+ena_intr_add_handlers(ena_t *ena)
+{
+	ASSERT3S(ena->ena_num_intrs, >=, 2);
+	if (ddi_intr_add_handler(ena->ena_intr_handles[0], ena_admin_intr, ena,
+	    (void *)(uintptr_t)0) != DDI_SUCCESS) {
+		ena_err(ena, "failed to add admin interrupt handler");
+		return (B_FALSE);
+	}
+
+	for (int i = 1; i < ena->ena_num_intrs; i++) {
+		caddr_t vector = (void *)(uintptr_t)(i);
+		int ret = ddi_intr_add_handler(ena->ena_intr_handles[i],
+		    ena_io_intr, ena, vector);
+
+		if (ret != DDI_SUCCESS) {
+			ena_err(ena, "failed to add I/O interrupt handler "
+			    "for vector %u", i);
+
+			/*
+			 * If we fail to add any I/O handler, then all
+			 * successfully added handlers are removed,
+			 * including the admin handler. For example,
+			 * when i=2 we remove handler 1 (the first I/O
+			 * handler), and when i=1 we remove handler 0
+			 * (the admin handler).
+			 */
+			while (i >= 1) {
+				i--;
+				(void) ddi_intr_remove_handler(
+				    ena->ena_intr_handles[i]);
+			}
+
+			return (B_FALSE);
+		}
+	}
+
+	return (B_TRUE);
+}
+
+boolean_t
+ena_intrs_disable(ena_t *ena)
+{
+	int ret;
+
+	if (ena->ena_intr_caps & DDI_INTR_FLAG_BLOCK) {
+		if ((ret = ddi_intr_block_disable(ena->ena_intr_handles,
+		    ena->ena_num_intrs)) != DDI_SUCCESS) {
+			ena_err(ena, "failed to block disable interrupts: %d",
+			    ret);
+			return (B_FALSE);
+		}
+	} else {
+		for (int i = 0; i < ena->ena_num_intrs; i++) {
+			ret = ddi_intr_disable(ena->ena_intr_handles[i]);
+			if (ret != DDI_SUCCESS) {
+				ena_err(ena, "failed to disable interrupt "
+				    "%d: %d", i, ret);
+				return (B_FALSE);
+			}
+		}
+	}
+
+	return (B_TRUE);
+}
+
+boolean_t
+ena_intrs_enable(ena_t *ena)
+{
+	int ret;
+
+	if (ena->ena_intr_caps & DDI_INTR_FLAG_BLOCK) {
+		if ((ret = ddi_intr_block_enable(ena->ena_intr_handles,
+		    ena->ena_num_intrs)) != DDI_SUCCESS) {
+			ena_err(ena, "failed to block enable interrupts: %d",
+			    ret);
+			return (B_FALSE);
+		}
+	} else {
+		for (int i = 0; i < ena->ena_num_intrs; i++) {
+			if ((ret = ddi_intr_enable(ena->ena_intr_handles[i])) !=
+			    DDI_SUCCESS) {
+				ena_err(ena, "failed to enable interrupt "
+				    "%d: %d", i, ret);
+
+				/*
+				 * If we fail to enable any interrupt,
+				 * then all interrupts are disabled.
+				 */
+				while (i >= 1) {
+					i--;
+					(void) ddi_intr_disable(
+					    ena->ena_intr_handles[i]);
+				}
+
+				return (B_FALSE);
+			}
+		}
+	}
+
+	return (B_TRUE);
+}
diff --git a/usr/src/uts/common/io/ena/ena_rx.c b/usr/src/uts/common/io/ena/ena_rx.c
new file mode 100644
index 0000000000..7f0b7db94a
--- /dev/null
+++ b/usr/src/uts/common/io/ena/ena_rx.c
@@ -0,0 +1,531 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2021 Oxide Computer Company
+ */
+#include "ena.h"
+
+static void
+ena_refill_rx(ena_rxq_t *rxq, uint16_t num)
+{
+	VERIFY3P(rxq, !=, NULL);
+	ASSERT(MUTEX_HELD(&rxq->er_lock));
+	ASSERT3U(num, <=, rxq->er_sq_num_descs);
+	uint16_t tail_mod = rxq->er_sq_tail_idx & (rxq->er_sq_num_descs - 1);
+
+	while (num != 0) {
+		enahw_rx_desc_t *desc = &rxq->er_sq_descs[tail_mod];
+		ena_rx_ctrl_block_t *rcb = &rxq->er_rcbs[tail_mod];
+		uint16_t phase = rxq->er_sq_phase;
+
+		VERIFY3U(tail_mod, <, rxq->er_sq_num_descs);
+		VERIFY3P(desc, !=, NULL);
+		VERIFY3P(rcb, !=, NULL);
+		VERIFY3P(desc, >=, rxq->er_sq_descs);
+		VERIFY3P(desc, <=,
+		    (rxq->er_sq_descs + rxq->er_sq_num_descs - 1));
+
+		desc->erd_length = rcb->ercb_dma.edb_len;
+		desc->erd_req_id = tail_mod;
+		VERIFY3P(rcb->ercb_dma.edb_cookie, !=, NULL);
+		ena_set_dma_addr_values(rxq->er_ena,
+		    rcb->ercb_dma.edb_cookie->dmac_laddress,
+		    &desc->erd_buff_addr_lo, &desc->erd_buff_addr_hi);
+		ENAHW_RX_DESC_SET_PHASE(desc, phase);
+		ENAHW_RX_DESC_SET_FIRST(desc);
+		ENAHW_RX_DESC_SET_LAST(desc);
+		ENAHW_RX_DESC_SET_COMP_REQ(desc);
+		DTRACE_PROBE1(ena__refill__rx, enahw_rx_desc_t *, desc);
+		rxq->er_sq_tail_idx++;
+		tail_mod = rxq->er_sq_tail_idx & (rxq->er_sq_num_descs - 1);
+
+		if (tail_mod == 0) {
+			rxq->er_sq_phase = !rxq->er_sq_phase;
+		}
+
+		num--;
+	}
+
+	ENA_DMA_SYNC(rxq->er_sq_dma, DDI_DMA_SYNC_FORDEV);
+	ena_hw_abs_write32(rxq->er_ena, rxq->er_sq_db_addr,
+	    rxq->er_sq_tail_idx);
+}
+
+void
+ena_free_rx_dma(ena_rxq_t *rxq)
+{
+	if (rxq->er_rcbs != NULL) {
+		for (uint_t i = 0; i < rxq->er_sq_num_descs; i++) {
+			ena_rx_ctrl_block_t *rcb = &rxq->er_rcbs[i];
+			ena_dma_free(&rcb->ercb_dma);
+		}
+
+		kmem_free(rxq->er_rcbs,
+		    sizeof (*rxq->er_rcbs) * rxq->er_sq_num_descs);
+
+		rxq->er_rcbs = NULL;
+	}
+
+	ena_dma_free(&rxq->er_cq_dma);
+	rxq->er_cq_descs = NULL;
+	rxq->er_cq_num_descs = 0;
+
+	ena_dma_free(&rxq->er_sq_dma);
+	rxq->er_sq_descs = NULL;
+	rxq->er_sq_num_descs = 0;
+
+	rxq->er_state &= ~ENA_RXQ_STATE_HOST_ALLOC;
+}
+
+static int
+ena_alloc_rx_dma(ena_rxq_t *rxq)
+{
+	ena_t *ena = rxq->er_ena;
+	size_t cq_descs_sz;
+	size_t sq_descs_sz;
+	ena_dma_conf_t conf;
+	int err = 0;
+
+	cq_descs_sz = rxq->er_cq_num_descs * sizeof (*rxq->er_cq_descs);
+	sq_descs_sz = rxq->er_sq_num_descs * sizeof (*rxq->er_sq_descs);
+	conf = (ena_dma_conf_t) {
+		.edc_size = sq_descs_sz,
+		.edc_align = ENAHW_IO_SQ_DESC_BUF_ALIGNMENT,
+		.edc_sgl = 1,
+		.edc_endian = DDI_NEVERSWAP_ACC,
+		.edc_stream = B_FALSE,
+	};
+
+	if (!ena_dma_alloc(ena, &rxq->er_sq_dma, &conf, sq_descs_sz)) {
+		return (ENOMEM);
+	}
+
+	rxq->er_sq_descs = (void *)rxq->er_sq_dma.edb_va;
+	rxq->er_rcbs = kmem_zalloc(sizeof (*rxq->er_rcbs) *
+	    rxq->er_sq_num_descs, KM_SLEEP);
+
+	for (uint_t i = 0; i < rxq->er_sq_num_descs; i++) {
+		ena_rx_ctrl_block_t *rcb = &rxq->er_rcbs[i];
+		ena_dma_conf_t buf_conf = {
+			.edc_size = ena->ena_rx_buf_sz,
+			.edc_align = 1,
+			.edc_sgl = ena->ena_rx_sgl_max_sz,
+			.edc_endian = DDI_NEVERSWAP_ACC,
+			.edc_stream = B_TRUE,
+		};
+
+		if (!ena_dma_alloc(ena, &rcb->ercb_dma, &buf_conf,
+		    ena->ena_rx_buf_sz)) {
+			err = ENOMEM;
+			goto error;
+		}
+	}
+
+	conf = (ena_dma_conf_t) {
+		.edc_size = cq_descs_sz,
+		.edc_align = ENAHW_IO_CQ_DESC_BUF_ALIGNMENT,
+		.edc_sgl = 1,
+		.edc_endian = DDI_NEVERSWAP_ACC,
+		.edc_stream = B_FALSE,
+	};
+
+	if (!ena_dma_alloc(ena, &rxq->er_cq_dma, &conf, cq_descs_sz)) {
+		err = ENOMEM;
+		goto error;
+	}
+
+	rxq->er_cq_descs = (void *)rxq->er_cq_dma.edb_va;
+	rxq->er_state |= ENA_RXQ_STATE_HOST_ALLOC;
+	return (0);
+
+error:
+	ena_free_rx_dma(rxq);
+	return (err);
+}
+
+boolean_t
+ena_alloc_rxq(ena_rxq_t *rxq)
+{
+	int ret = 0;
+	ena_t *ena = rxq->er_ena;
+	uint16_t cq_hw_idx, sq_hw_idx;
+	uint32_t *cq_unmask_addr, *cq_headdb, *cq_numanode;
+	uint32_t *sq_db_addr;
+
+	/*
+	 * First, allocate the Rx data buffers.
+	 */
+	if ((ret = ena_alloc_rx_dma(rxq)) != 0) {
+		ena_err(ena, "failed to allocate Rx queue %u data buffers: %d",
+		    rxq->er_rxqs_idx, ret);
+		return (B_FALSE);
+	}
+
+	ASSERT(rxq->er_state & ENA_RXQ_STATE_HOST_ALLOC);
+
+	/*
+	 * Second, create the Completion Queue.
+	 */
+	ret = ena_create_cq(ena,  rxq->er_cq_num_descs,
+	    rxq->er_cq_dma.edb_cookie->dmac_laddress, B_FALSE,
+	    rxq->er_intr_vector, &cq_hw_idx, &cq_unmask_addr, &cq_headdb,
+	    &cq_numanode);
+
+	if (ret != 0) {
+		ena_err(ena, "failed to create Rx CQ %u: %d", rxq->er_rxqs_idx,
+		    ret);
+		return (B_FALSE);
+	}
+
+	/* The phase must always start on 1. */
+	rxq->er_cq_phase = 1;
+	rxq->er_cq_head_idx = 0;
+	rxq->er_cq_hw_idx = cq_hw_idx;
+	rxq->er_cq_unmask_addr = cq_unmask_addr;
+	rxq->er_cq_head_db_addr = cq_headdb;
+	rxq->er_cq_numa_addr = cq_numanode;
+	rxq->er_state |= ENA_RXQ_STATE_CQ_CREATED;
+
+	/*
+	 * Third, create the Submission Queue to match with the above
+	 * CQ. At this time we force the SQ and CQ to have the same
+	 * number of descriptors as we only use a 1:1 completion
+	 * policy. However, in the future, we could loosen this and
+	 * use an on-demand completion policy and the two could have a
+	 * different number of descriptors.
+	 */
+	ASSERT3U(rxq->er_sq_num_descs, ==, rxq->er_cq_num_descs);
+	ret = ena_create_sq(ena, rxq->er_sq_num_descs,
+	    rxq->er_sq_dma.edb_cookie->dmac_laddress, B_FALSE, cq_hw_idx,
+	    &sq_hw_idx, &sq_db_addr);
+
+	if (ret != 0) {
+		ena_err(ena, "failed to create Rx SQ %u: %d", rxq->er_rxqs_idx,
+		    ret);
+		return (B_FALSE);
+	}
+
+	ASSERT3P(sq_db_addr, !=, NULL);
+	rxq->er_sq_hw_idx = sq_hw_idx;
+	rxq->er_sq_db_addr = sq_db_addr;
+	/* The phase must always start on 1. */
+	rxq->er_sq_phase = 1;
+	rxq->er_sq_tail_idx = 0;
+	rxq->er_sq_avail_descs = rxq->er_sq_num_descs;
+	rxq->er_mode = ENA_RXQ_MODE_INTR;
+	rxq->er_state |= ENA_RXQ_STATE_SQ_CREATED;
+
+	return (B_TRUE);
+}
+
+void
+ena_cleanup_rxq(ena_rxq_t *rxq)
+{
+	int ret = 0;
+	ena_t *ena = rxq->er_ena;
+
+	if ((rxq->er_state & ENA_RXQ_STATE_SQ_CREATED) != 0) {
+		ret = ena_destroy_sq(ena, rxq->er_sq_hw_idx, B_FALSE);
+
+		if (ret != 0) {
+			ena_err(ena, "failed to destroy Rx SQ %u: %d",
+			    rxq->er_rxqs_idx, ret);
+		}
+
+		rxq->er_sq_hw_idx = 0;
+		rxq->er_sq_db_addr = NULL;
+		rxq->er_sq_tail_idx = 0;
+		rxq->er_sq_phase = 0;
+		rxq->er_state &= ~ENA_RXQ_STATE_SQ_CREATED;
+	}
+
+	if ((rxq->er_state & ENA_RXQ_STATE_CQ_CREATED) != 0) {
+		ret = ena_destroy_cq(ena, rxq->er_cq_hw_idx);
+
+		if (ret != 0) {
+			ena_err(ena, "failed to destroy Rx CQ %u: %d",
+			    rxq->er_rxqs_idx, ret);
+		}
+
+		rxq->er_cq_hw_idx = 0;
+		rxq->er_cq_head_idx = 0;
+		rxq->er_cq_phase = 0;
+		rxq->er_cq_head_db_addr = NULL;
+		rxq->er_cq_unmask_addr = NULL;
+		rxq->er_cq_numa_addr = NULL;
+		rxq->er_state &= ~ENA_RXQ_STATE_CQ_CREATED;
+	}
+
+	ena_free_rx_dma(rxq);
+	ASSERT3S(rxq->er_state, ==, ENA_RXQ_STATE_NONE);
+}
+
+void
+ena_ring_rx_stop(mac_ring_driver_t rh)
+{
+	ena_rxq_t *rxq = (ena_rxq_t *)rh;
+	uint32_t intr_ctrl;
+
+	intr_ctrl = ena_hw_abs_read32(rxq->er_ena, rxq->er_cq_unmask_addr);
+	ENAHW_REG_INTR_MASK(intr_ctrl);
+	ena_hw_abs_write32(rxq->er_ena, rxq->er_cq_unmask_addr, intr_ctrl);
+
+	rxq->er_state &= ~ENA_RXQ_STATE_RUNNING;
+	rxq->er_state &= ~ENA_RXQ_STATE_READY;
+}
+
+int
+ena_ring_rx_start(mac_ring_driver_t rh, uint64_t gen_num)
+{
+	ena_rxq_t *rxq = (ena_rxq_t *)rh;
+	ena_t *ena = rxq->er_ena;
+	uint32_t intr_ctrl;
+
+	mutex_enter(&rxq->er_lock);
+	ena_refill_rx(rxq, rxq->er_sq_num_descs);
+	rxq->er_m_gen_num = gen_num;
+	rxq->er_intr_limit = ena->ena_rxq_intr_limit;
+	mutex_exit(&rxq->er_lock);
+
+	rxq->er_state |= ENA_RXQ_STATE_READY;
+
+	intr_ctrl = ena_hw_abs_read32(ena, rxq->er_cq_unmask_addr);
+	ENAHW_REG_INTR_UNMASK(intr_ctrl);
+	ena_hw_abs_write32(ena, rxq->er_cq_unmask_addr, intr_ctrl);
+	rxq->er_state |= ENA_RXQ_STATE_RUNNING;
+	return (0);
+}
+
+mblk_t *
+ena_ring_rx(ena_rxq_t *rxq, int poll_bytes)
+{
+	ena_t *ena = rxq->er_ena;
+	uint16_t head_mod = rxq->er_cq_head_idx & (rxq->er_cq_num_descs - 1);
+	uint64_t total_bytes = 0;
+	uint64_t num_frames = 0;
+	enahw_rx_cdesc_t *cdesc;
+	boolean_t polling = B_TRUE;
+	mblk_t *head = NULL;
+	mblk_t *tail = NULL;
+
+	ASSERT(MUTEX_HELD(&rxq->er_lock));
+	ENA_DMA_SYNC(rxq->er_cq_dma, DDI_DMA_SYNC_FORKERNEL);
+
+	if (poll_bytes == ENA_INTERRUPT_MODE) {
+		polling = B_FALSE;
+	}
+
+	cdesc = &rxq->er_cq_descs[head_mod];
+	VERIFY3P(cdesc, >=, rxq->er_cq_descs);
+	VERIFY3P(cdesc, <=, (rxq->er_cq_descs + rxq->er_cq_num_descs - 1));
+
+	while (ENAHW_RX_CDESC_PHASE(cdesc) == rxq->er_cq_phase) {
+		boolean_t first, last;
+		ena_rx_ctrl_block_t *rcb;
+		uint16_t req_id;
+		mblk_t *mp;
+		enahw_io_l3_proto_t l3proto;
+		enahw_io_l4_proto_t l4proto;
+		boolean_t l4csum_checked;
+		uint32_t hflags = 0;
+
+		VERIFY3U(head_mod, <, rxq->er_cq_num_descs);
+		/*
+		 * Currently, all incoming frames fit in a single Rx
+		 * buffer (erd_length > total frame size). In the
+		 * future, if we decide to loan buffers which are
+		 * smaller, we will need to modify this code to read
+		 * one or more descriptors (based on frame size).
+		 *
+		 * For this reason we do not expect any frame to span
+		 * multiple descriptors. Therefore, we drop any data
+		 * not delivered as a single descriptor, i.e., where
+		 * 'first' and 'last' are both true.
+		 */
+		first = ENAHW_RX_CDESC_FIRST(cdesc);
+		last = ENAHW_RX_CDESC_LAST(cdesc);
+
+		if (!first || !last) {
+			mutex_enter(&rxq->er_stat_lock);
+			rxq->er_stat.ers_multi_desc.value.ui64++;
+			mutex_exit(&rxq->er_stat_lock);
+			goto next_desc;
+		}
+
+		req_id = cdesc->erc_req_id;
+		VERIFY3U(req_id, <, rxq->er_cq_num_descs);
+		rcb = &rxq->er_rcbs[req_id];
+		rcb->ercb_offset = cdesc->erc_offset;
+		rcb->ercb_length = cdesc->erc_length;
+		ASSERT3U(rcb->ercb_length, <=, ena->ena_max_frame_total);
+		mp = allocb(rcb->ercb_length + ENA_RX_BUF_IPHDR_ALIGNMENT, 0);
+
+		/*
+		 * If we can't allocate an mblk, things are looking
+		 * grim. Forget about this frame and move on.
+		 */
+		if (mp == NULL) {
+			mutex_enter(&rxq->er_stat_lock);
+			rxq->er_stat.ers_allocb_fail.value.ui64++;
+			mutex_exit(&rxq->er_stat_lock);
+			goto next_desc;
+		}
+
+		/*
+		 * As we pull frames we need to link them together as
+		 * one chain to be delivered up to mac.
+		 */
+		if (head == NULL) {
+			head = mp;
+		} else {
+			tail->b_next = mp;
+		}
+
+		tail = mp;
+
+		/*
+		 * We need to make sure the bytes are copied to the
+		 * correct offset to achieve 4-byte IP header
+		 * alignment.
+		 *
+		 * If we start using desballoc on the buffers, then we
+		 * will need to make sure to apply this offset to the
+		 * DMA buffers as well. Though it may be the case the
+		 * device does this implicitly and that's what
+		 * cdesc->erc_offset is for; we don't know because
+		 * it's not documented.
+		 */
+		mp->b_wptr += ENA_RX_BUF_IPHDR_ALIGNMENT;
+		mp->b_rptr += ENA_RX_BUF_IPHDR_ALIGNMENT;
+		bcopy(rcb->ercb_dma.edb_va + rcb->ercb_offset, mp->b_wptr,
+		    rcb->ercb_length);
+		mp->b_wptr += rcb->ercb_length;
+		total_bytes += rcb->ercb_length;
+		VERIFY3P(mp->b_wptr, >, mp->b_rptr);
+		VERIFY3P(mp->b_wptr, <=, mp->b_datap->db_lim);
+
+		l3proto = ENAHW_RX_CDESC_L3_PROTO(cdesc);
+		l4proto = ENAHW_RX_CDESC_L4_PROTO(cdesc);
+
+		/*
+		 * When it comes to bad TCP/IP checksums we do not
+		 * discard the packet at this level. Instead, we let
+		 * it percolate up for further processing and tracking
+		 * by the upstream TCP/IP stack.
+		 */
+		if (ena->ena_rx_l3_ipv4_csum &&
+		    l3proto == ENAHW_IO_L3_PROTO_IPV4) {
+			boolean_t l3_csum_err =
+			    ENAHW_RX_CDESC_L3_CSUM_ERR(cdesc);
+
+			if (l3_csum_err) {
+				mutex_enter(&rxq->er_stat_lock);
+				rxq->er_stat.ers_hck_ipv4_err.value.ui64++;
+				mutex_exit(&rxq->er_stat_lock);
+			} else {
+				hflags |= HCK_IPV4_HDRCKSUM_OK;
+			}
+		}
+
+		l4csum_checked = ENAHW_RX_CDESC_L4_CSUM_CHECKED(cdesc);
+
+		if (ena->ena_rx_l4_ipv4_csum && l4csum_checked &&
+		    l4proto == ENAHW_IO_L4_PROTO_TCP) {
+			boolean_t l4_csum_err =
+			    ENAHW_RX_CDESC_L4_CSUM_ERR(cdesc);
+
+			if (l4_csum_err) {
+				mutex_enter(&rxq->er_stat_lock);
+				rxq->er_stat.ers_hck_l4_err.value.ui64++;
+				mutex_exit(&rxq->er_stat_lock);
+			} else {
+				hflags |= HCK_FULLCKSUM_OK;
+			}
+		}
+
+		if (hflags != 0) {
+			mac_hcksum_set(mp, 0, 0, 0, 0, hflags);
+		}
+
+next_desc:
+		/*
+		 * Technically, if we arrived here due to a failure,
+		 * then we did not read a new frame. However, we count
+		 * it all the same anyways in order to count it as
+		 * progress to the interrupt work limit. The failure
+		 * stats will allow us to differentiate good frames
+		 * from bad.
+		 */
+		num_frames++;
+		rxq->er_cq_head_idx++;
+		head_mod = rxq->er_cq_head_idx & (rxq->er_cq_num_descs - 1);
+
+		if (head_mod == 0) {
+			rxq->er_cq_phase = !rxq->er_cq_phase;
+		}
+
+		if (polling && (total_bytes > poll_bytes)) {
+			break;
+		} else if (!polling && (num_frames >= rxq->er_intr_limit)) {
+			mutex_enter(&rxq->er_stat_lock);
+			rxq->er_stat.ers_intr_limit.value.ui64++;
+			mutex_exit(&rxq->er_stat_lock);
+			break;
+		}
+
+		cdesc = &rxq->er_cq_descs[head_mod];
+		VERIFY3P(cdesc, >=, rxq->er_cq_descs);
+		VERIFY3P(cdesc, <=,
+		    (rxq->er_cq_descs + rxq->er_cq_num_descs - 1));
+	}
+
+	mutex_enter(&rxq->er_stat_lock);
+	rxq->er_stat.ers_packets.value.ui64 += num_frames;
+	rxq->er_stat.ers_bytes.value.ui64 += total_bytes;
+	mutex_exit(&rxq->er_stat_lock);
+
+	DTRACE_PROBE4(rx__frames, mblk_t *, head, boolean_t, polling, uint64_t,
+	    num_frames, uint64_t, total_bytes);
+	ena_refill_rx(rxq, num_frames);
+	return (head);
+}
+
+void
+ena_rx_intr_work(ena_rxq_t *rxq)
+{
+	mblk_t *mp;
+
+	mutex_enter(&rxq->er_lock);
+	mp = ena_ring_rx(rxq, ENA_INTERRUPT_MODE);
+	mutex_exit(&rxq->er_lock);
+
+	if (mp == NULL) {
+		return;
+	}
+
+	mac_rx_ring(rxq->er_ena->ena_mh, rxq->er_mrh, mp, rxq->er_m_gen_num);
+}
+
+mblk_t *
+ena_ring_rx_poll(void *rh, int poll_bytes)
+{
+	ena_rxq_t *rxq = rh;
+	mblk_t *mp;
+
+	ASSERT3S(poll_bytes, >, 0);
+
+	mutex_enter(&rxq->er_lock);
+	mp = ena_ring_rx(rxq, poll_bytes);
+	mutex_exit(&rxq->er_lock);
+
+	return (mp);
+}
diff --git a/usr/src/uts/common/io/ena/ena_stats.c b/usr/src/uts/common/io/ena/ena_stats.c
new file mode 100644
index 0000000000..c8ef7ae260
--- /dev/null
+++ b/usr/src/uts/common/io/ena/ena_stats.c
@@ -0,0 +1,475 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2021 Oxide Computer Company
+ */
+#include "ena.h"
+
+/*
+ * The ENA device provides the following hardware stats. It appears
+ * that all stats are available at both a device-level and
+ * queue-level. However, Linux and FreeBSD don't implement queue
+ * scope. It's not clear how one would implement queue scope because
+ * there is nothing in the common code describing how to determine the
+ * queue index number. Both the SQ and CQ have device index values,
+ * but for a given logical queue they don't always match and so it's
+ * not clear what value to use for querying the stats. Therefore,
+ * device-wide basic and extended stats come from the device, while
+ * queue/ring stats come from driver.
+ *
+ * From empirical testing, these statistics appear to be cumulative.
+ * However, this guarantee is not explicitly documented anywhere in
+ * the common code that the author could find.
+ *
+ * BASIC (ENAHW_GET_STATS_TYPE_BASIC)
+ *
+ *     - Rx packets/bytes
+ *     - Rx drops
+ *     - Tx packets/bytes
+ *     - Tx drops
+ *
+ * EXTENDED (ENAHW_GET_STATS_TYPE_EXTENDED)
+ *
+ *     There is no structure defined for these stats in the Linux
+ *     driver. Based on the FreeBSD driver, it looks like extended
+ *     stats are simply a buffer of C strings? Come back to this
+ *     later.
+ *
+ * ENI (ENAHW_GET_STATS_TYPE_ENI)
+ *
+ *     - Rx Bandwidth Allowance Exceeded
+ *     - Tx Bandwidth Allowance Exceeded
+ *     - PPS Allowance Exceeded (presumably for combined Rx/Tx)
+ *     - Connection Tracking PPS Allowance Exceeded
+ *     - Link-local PPS Alloance Exceeded
+ */
+
+static int
+ena_stat_device_basic_update(kstat_t *ksp, int rw)
+{
+	ena_t *ena = ksp->ks_private;
+	ena_basic_stat_t *ebs = ksp->ks_data;
+	enahw_resp_desc_t resp;
+	enahw_resp_basic_stats_t *stats = &resp.erd_resp.erd_basic_stats;
+	int ret = 0;
+
+	if (rw == KSTAT_WRITE) {
+		return (EACCES);
+	}
+
+	if ((ret = ena_admin_get_basic_stats(ena, &resp)) != 0) {
+		return (ret);
+	}
+
+	mutex_enter(&ena->ena_lock);
+
+	ebs->ebs_tx_bytes.value.ui64 =
+	    ((uint64_t)stats->erbs_tx_bytes_high << 32) |
+	    (uint64_t)stats->erbs_tx_bytes_low;
+	ebs->ebs_tx_pkts.value.ui64 =
+	    ((uint64_t)stats->erbs_tx_pkts_high << 32) |
+	    (uint64_t)stats->erbs_tx_pkts_low;
+	ebs->ebs_tx_drops.value.ui64 =
+	    ((uint64_t)stats->erbs_tx_drops_high << 32) |
+	    (uint64_t)stats->erbs_tx_drops_low;
+
+	ebs->ebs_rx_bytes.value.ui64 =
+	    ((uint64_t)stats->erbs_rx_bytes_high << 32) |
+	    (uint64_t)stats->erbs_rx_bytes_low;
+	ebs->ebs_rx_pkts.value.ui64 =
+	    ((uint64_t)stats->erbs_rx_pkts_high << 32) |
+	    (uint64_t)stats->erbs_rx_pkts_low;
+	ebs->ebs_rx_drops.value.ui64 =
+	    ((uint64_t)stats->erbs_rx_drops_high << 32) |
+	    (uint64_t)stats->erbs_rx_drops_low;
+
+	mutex_exit(&ena->ena_lock);
+
+	return (0);
+}
+
+void
+ena_stat_device_basic_cleanup(ena_t *ena)
+{
+	if (ena->ena_device_basic_kstat != NULL) {
+		kstat_delete(ena->ena_device_basic_kstat);
+		ena->ena_device_basic_kstat = NULL;
+	}
+}
+
+boolean_t
+ena_stat_device_basic_init(ena_t *ena)
+{
+	kstat_t *ksp = kstat_create(ENA_MODULE_NAME,
+	    ddi_get_instance(ena->ena_dip), "device_basic", "net",
+	    KSTAT_TYPE_NAMED,
+	    sizeof (ena_basic_stat_t) / sizeof (kstat_named_t), 0);
+	ena_basic_stat_t *ebs = NULL;
+
+	if (ksp == NULL) {
+		ena_err(ena, "!failed to create device_basic kstats");
+		return (B_FALSE);
+	}
+
+	ena->ena_device_basic_kstat = ksp;
+	ebs = ksp->ks_data;
+	ksp->ks_update = ena_stat_device_basic_update;
+	ksp->ks_private = ena;
+
+	kstat_named_init(&ebs->ebs_tx_bytes, "tx_bytes", KSTAT_DATA_UINT64);
+	ebs->ebs_tx_bytes.value.ui64 = 0;
+	kstat_named_init(&ebs->ebs_tx_pkts, "tx_packets", KSTAT_DATA_UINT64);
+	ebs->ebs_tx_pkts.value.ui64 = 0;
+	kstat_named_init(&ebs->ebs_tx_drops, "tx_drops", KSTAT_DATA_UINT64);
+	ebs->ebs_tx_drops.value.ui64 = 0;
+
+	kstat_named_init(&ebs->ebs_rx_bytes, "rx_bytes", KSTAT_DATA_UINT64);
+	ebs->ebs_rx_bytes.value.ui64 = 0;
+	kstat_named_init(&ebs->ebs_rx_pkts, "rx_packets", KSTAT_DATA_UINT64);
+	ebs->ebs_rx_pkts.value.ui64 = 0;
+	kstat_named_init(&ebs->ebs_rx_drops, "rx_drops", KSTAT_DATA_UINT64);
+	ebs->ebs_rx_drops.value.ui64 = 0;
+
+	kstat_install(ena->ena_device_basic_kstat);
+	return (B_TRUE);
+}
+
+int
+ena_stat_device_extended_update(kstat_t *ksp, int rw)
+{
+	ena_t *ena = ksp->ks_private;
+	ena_extended_stat_t *ees = ksp->ks_data;
+	enahw_resp_desc_t resp;
+	enahw_resp_eni_stats_t *stats = &resp.erd_resp.erd_eni_stats;
+	int ret = 0;
+
+	if (rw == KSTAT_WRITE) {
+		return (EACCES);
+	}
+
+	if ((ret = ena_admin_get_eni_stats(ena, &resp)) != 0) {
+		return (ret);
+	}
+
+	mutex_enter(&ena->ena_lock);
+
+	ees->ees_bw_in_exceeded.value.ui64 = stats->eres_bw_in_exceeded;
+	ees->ees_bw_out_exceeded.value.ui64 = stats->eres_bw_out_exceeded;
+	ees->ees_pps_exceeded.value.ui64 = stats->eres_pps_exceeded;
+	ees->ees_conns_exceeded.value.ui64 = stats->eres_conns_exceeded;
+	ees->ees_linklocal_exceeded.value.ui64 = stats->eres_linklocal_exceeded;
+
+	mutex_exit(&ena->ena_lock);
+
+	return (0);
+}
+
+void
+ena_stat_device_extended_cleanup(ena_t *ena)
+{
+	if (ena->ena_device_extended_kstat != NULL) {
+		kstat_delete(ena->ena_device_extended_kstat);
+		ena->ena_device_extended_kstat = NULL;
+	}
+}
+
+boolean_t
+ena_stat_device_extended_init(ena_t *ena)
+{
+	kstat_t *ksp = kstat_create(ENA_MODULE_NAME,
+	    ddi_get_instance(ena->ena_dip), "device_ext", "net",
+	    KSTAT_TYPE_NAMED,
+	    sizeof (ena_extended_stat_t) / sizeof (kstat_named_t), 0);
+	ena_extended_stat_t *ees;
+
+	if (ksp == NULL) {
+		ena_err(ena, "!failed to create device_ext kstats");
+		return (B_FALSE);
+	}
+
+	ena->ena_device_extended_kstat = ksp;
+	ees = ksp->ks_data;
+	ksp->ks_update = ena_stat_device_extended_update;
+	ksp->ks_private = ena;
+
+	kstat_named_init(&ees->ees_bw_in_exceeded, "bw_in_exceeded",
+	    KSTAT_DATA_UINT64);
+	ees->ees_bw_in_exceeded.value.ui64 = 0;
+
+	kstat_named_init(&ees->ees_bw_out_exceeded, "bw_out_exceeded",
+	    KSTAT_DATA_UINT64);
+	ees->ees_bw_out_exceeded.value.ui64 = 0;
+
+	kstat_named_init(&ees->ees_pps_exceeded, "pps_exceeded",
+	    KSTAT_DATA_UINT64);
+	ees->ees_pps_exceeded.value.ui64 = 0;
+
+	kstat_named_init(&ees->ees_conns_exceeded, "conns_exceeded",
+	    KSTAT_DATA_UINT64);
+	ees->ees_conns_exceeded.value.ui64 = 0;
+
+	kstat_named_init(&ees->ees_linklocal_exceeded, "linklocal_exceeded",
+	    KSTAT_DATA_UINT64);
+	ees->ees_linklocal_exceeded.value.ui64 = 0;
+
+	kstat_install(ena->ena_device_extended_kstat);
+	return (B_TRUE);
+}
+
+void
+ena_stat_aenq_cleanup(ena_t *ena)
+{
+	if (ena->ena_aenq_kstat != NULL) {
+		kstat_delete(ena->ena_aenq_kstat);
+		ena->ena_aenq_kstat = NULL;
+	}
+}
+
+boolean_t
+ena_stat_aenq_init(ena_t *ena)
+{
+	kstat_t *ksp = kstat_create(ENA_MODULE_NAME,
+	    ddi_get_instance(ena->ena_dip), "aenq", "net", KSTAT_TYPE_NAMED,
+	    sizeof (ena_aenq_stat_t) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL);
+	ena_aenq_stat_t *eas = &ena->ena_aenq_stat;
+
+	if (ksp == NULL) {
+		ena_err(ena, "!failed to create aenq kstats");
+		return (B_FALSE);
+	}
+
+	ena->ena_aenq_kstat = ksp;
+	ksp->ks_data = eas;
+
+	kstat_named_init(&eas->eaes_default, "default", KSTAT_DATA_UINT64);
+	eas->eaes_default.value.ui64 = 0;
+
+	kstat_named_init(&eas->eaes_link_change, "link_change",
+	    KSTAT_DATA_UINT64);
+	eas->eaes_link_change.value.ui64 = 0;
+
+	kstat_install(ena->ena_aenq_kstat);
+	return (B_TRUE);
+}
+
+void
+ena_stat_txq_cleanup(ena_txq_t *txq)
+{
+	if (txq->et_kstat != NULL) {
+		kstat_delete(txq->et_kstat);
+		txq->et_kstat = NULL;
+	}
+}
+
+boolean_t
+ena_stat_txq_init(ena_txq_t *txq)
+{
+	ena_t *ena = txq->et_ena;
+	kstat_t *ksp;
+	char buf[128];
+	ena_txq_stat_t *ets = &txq->et_stat;
+
+	(void) snprintf(buf, sizeof (buf), "txq_%d", txq->et_txqs_idx);
+
+	ksp = kstat_create(ENA_MODULE_NAME, ddi_get_instance(ena->ena_dip), buf,
+	    "net", KSTAT_TYPE_NAMED,
+	    sizeof (ena_txq_stat_t) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL);
+
+	if (ksp == NULL) {
+		ena_err(ena, "!failed to create %s kstats", buf);
+		return (B_FALSE);
+	}
+
+	txq->et_kstat = ksp;
+	ksp->ks_data = ets;
+
+	kstat_named_init(&ets->ets_hck_meoifail, "meoi_fail",
+	    KSTAT_DATA_UINT64);
+	ets->ets_hck_meoifail.value.ui64 = 0;
+
+	kstat_named_init(&ets->ets_blocked, "blocked", KSTAT_DATA_UINT64);
+	ets->ets_blocked.value.ui64 = 0;
+
+	kstat_named_init(&ets->ets_unblocked, "unblocked", KSTAT_DATA_UINT64);
+	ets->ets_unblocked.value.ui64 = 0;
+
+	kstat_named_init(&ets->ets_recycled, "recycled", KSTAT_DATA_UINT64);
+	ets->ets_recycled.value.ui64 = 0;
+
+	kstat_named_init(&ets->ets_bytes, "bytes", KSTAT_DATA_UINT64);
+	ets->ets_bytes.value.ui64 = 0;
+
+	kstat_named_init(&ets->ets_packets, "packets", KSTAT_DATA_UINT64);
+	ets->ets_packets.value.ui64 = 0;
+
+	kstat_install(txq->et_kstat);
+	return (B_TRUE);
+}
+
+void
+ena_stat_rxq_cleanup(ena_rxq_t *rxq)
+{
+	if (rxq->er_kstat != NULL) {
+		kstat_delete(rxq->er_kstat);
+		rxq->er_kstat = NULL;
+	}
+}
+
+boolean_t
+ena_stat_rxq_init(ena_rxq_t *rxq)
+{
+	ena_t *ena = rxq->er_ena;
+	kstat_t *ksp;
+	char buf[128];
+	ena_rxq_stat_t *ers = &rxq->er_stat;
+
+	(void) snprintf(buf, sizeof (buf), "rxq_%d", rxq->er_rxqs_idx);
+
+	ksp = kstat_create(ENA_MODULE_NAME, ddi_get_instance(ena->ena_dip), buf,
+	    "net", KSTAT_TYPE_NAMED,
+	    sizeof (ena_rxq_stat_t) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL);
+
+	if (ksp == NULL) {
+		ena_err(ena, "!failed to create %s kstats", buf);
+		return (B_FALSE);
+	}
+
+	rxq->er_kstat = ksp;
+	ksp->ks_data = ers;
+
+	kstat_named_init(&ers->ers_packets, "packets", KSTAT_DATA_UINT64);
+	ers->ers_packets.value.ui64 = 0;
+
+	kstat_named_init(&ers->ers_bytes, "bytes", KSTAT_DATA_UINT64);
+	ers->ers_bytes.value.ui64 = 0;
+
+	kstat_named_init(&ers->ers_multi_desc, "multi_desc", KSTAT_DATA_UINT64);
+	ers->ers_multi_desc.value.ui64 = 0;
+
+	kstat_named_init(&ers->ers_allocb_fail, "allocb_fail",
+	    KSTAT_DATA_UINT64);
+	ers->ers_allocb_fail.value.ui64 = 0;
+
+	kstat_named_init(&ers->ers_intr_limit, "intr_limit", KSTAT_DATA_UINT64);
+	ers->ers_intr_limit.value.ui64 = 0;
+
+	kstat_named_init(&ers->ers_hck_ipv4_err, "hck_ipv4_err",
+	    KSTAT_DATA_UINT64);
+	ers->ers_hck_ipv4_err.value.ui64 = 0;
+
+	kstat_named_init(&ers->ers_hck_l4_err, "hck_l4_err", KSTAT_DATA_UINT64);
+	ers->ers_hck_l4_err.value.ui64 = 0;
+
+	kstat_install(rxq->er_kstat);
+	return (B_TRUE);
+}
+
+int
+ena_ring_rx_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
+{
+	int ret = 0;
+	ena_rxq_t *rxq = (ena_rxq_t *)rh;
+
+	mutex_enter(&rxq->er_stat_lock);
+
+	switch (stat) {
+	case MAC_STAT_RBYTES:
+		*val = rxq->er_stat.ers_bytes.value.ui64;
+		break;
+	case MAC_STAT_IPACKETS:
+		*val = rxq->er_stat.ers_packets.value.ui64;
+		break;
+	default:
+		*val = 0;
+		ret = ENOTSUP;
+	}
+
+	mutex_exit(&rxq->er_stat_lock);
+	return (ret);
+}
+
+int
+ena_ring_tx_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
+{
+	int ret = 0;
+	ena_txq_t *txq = (ena_txq_t *)rh;
+
+	mutex_enter(&txq->et_stat_lock);
+
+	switch (stat) {
+	case MAC_STAT_OBYTES:
+		*val = txq->et_stat.ets_bytes.value.ui64;
+		break;
+	case MAC_STAT_OPACKETS:
+		*val = txq->et_stat.ets_packets.value.ui64;
+		break;
+	default:
+		*val = 0;
+		ret = ENOTSUP;
+	}
+
+	mutex_exit(&txq->et_stat_lock);
+	return (ret);
+}
+
+int
+ena_m_stat(void *arg, uint_t stat, uint64_t *val)
+{
+	ena_t *ena = arg;
+	ena_basic_stat_t *ebs = ena->ena_device_basic_kstat->ks_data;
+	int ret = 0;
+
+	ret = ena_stat_device_basic_update(ena->ena_device_basic_kstat,
+	    KSTAT_READ);
+
+	if (ret != 0) {
+		return (ret);
+	}
+
+	mutex_enter(&ena->ena_lock);
+
+	/*
+	 * The ENA device does not provide a lot of the stats that a
+	 * traditional NIC device would.
+	 */
+	switch (stat) {
+	case MAC_STAT_NORCVBUF:
+		*val = ebs->ebs_rx_drops.value.ui64;
+		break;
+
+	case MAC_STAT_RBYTES:
+		*val = ebs->ebs_rx_bytes.value.ui64;
+		break;
+
+	case MAC_STAT_IPACKETS:
+		*val = ebs->ebs_rx_pkts.value.ui64;
+		break;
+
+	case MAC_STAT_OBYTES:
+		*val = ebs->ebs_tx_bytes.value.ui64;
+		break;
+
+	case MAC_STAT_OPACKETS:
+		*val = ebs->ebs_tx_pkts.value.ui64;
+		break;
+
+	default:
+		ret = ENOTSUP;
+		break;
+	}
+
+	mutex_exit(&ena->ena_lock);
+	return (ret);
+}
diff --git a/usr/src/uts/common/io/ena/ena_tx.c b/usr/src/uts/common/io/ena/ena_tx.c
new file mode 100644
index 0000000000..30773496b0
--- /dev/null
+++ b/usr/src/uts/common/io/ena/ena_tx.c
@@ -0,0 +1,534 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2021 Oxide Computer Company
+ */
+#include "ena.h"
+
+void
+ena_free_tx_dma(ena_txq_t *txq)
+{
+	if (txq->et_tcbs != NULL) {
+		for (uint_t i = 0; i < txq->et_sq_num_descs; i++) {
+			ena_tx_control_block_t *tcb = &txq->et_tcbs[i];
+			ena_dma_free(&tcb->etcb_dma);
+		}
+
+		kmem_free(txq->et_tcbs,
+		    sizeof (*txq->et_tcbs) * txq->et_sq_num_descs);
+
+		txq->et_tcbs = NULL;
+
+	}
+
+	ena_dma_free(&txq->et_cq_dma);
+	txq->et_cq_descs = NULL;
+
+	ena_dma_free(&txq->et_sq_dma);
+	txq->et_sq_descs = NULL;
+
+	txq->et_state &= ~ENA_TXQ_STATE_HOST_ALLOC;
+}
+
+static int
+ena_alloc_tx_dma(ena_txq_t *txq)
+{
+	ena_t *ena = txq->et_ena;
+	size_t cq_descs_sz;
+	size_t sq_descs_sz;
+	int err = 0;
+	ena_dma_conf_t conf;
+
+	ASSERT0(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC);
+	ASSERT3P(ena, !=, NULL);
+
+	cq_descs_sz = txq->et_cq_num_descs * sizeof (*txq->et_cq_descs);
+	sq_descs_sz = txq->et_sq_num_descs * sizeof (*txq->et_sq_descs);
+
+	conf = (ena_dma_conf_t) {
+		.edc_size = sq_descs_sz,
+		.edc_align = ENAHW_IO_SQ_DESC_BUF_ALIGNMENT,
+		.edc_sgl = 1,
+		.edc_endian = DDI_NEVERSWAP_ACC,
+		.edc_stream = B_FALSE,
+	};
+
+	if (!ena_dma_alloc(ena, &txq->et_sq_dma, &conf, sq_descs_sz)) {
+		return (ENOMEM);
+	}
+
+	bzero(txq->et_sq_dma.edb_va, sq_descs_sz);
+	txq->et_sq_descs = (void *)txq->et_sq_dma.edb_va;
+	txq->et_tcbs = kmem_zalloc(sizeof (*txq->et_tcbs) *
+	    txq->et_sq_num_descs, KM_SLEEP);
+
+	for (uint_t i = 0; i < txq->et_sq_num_descs; i++) {
+		ena_tx_control_block_t *tcb = &txq->et_tcbs[i];
+		ena_dma_conf_t buf_conf = {
+			.edc_size = ena->ena_tx_buf_sz,
+			.edc_align = 1,
+			.edc_sgl = ena->ena_tx_sgl_max_sz,
+			.edc_endian = DDI_NEVERSWAP_ACC,
+			.edc_stream = B_TRUE,
+		};
+
+		if (!ena_dma_alloc(ena, &tcb->etcb_dma, &buf_conf,
+		    ena->ena_tx_buf_sz)) {
+			err = ENOMEM;
+			goto error;
+		}
+	}
+
+	conf = (ena_dma_conf_t) {
+		.edc_size = cq_descs_sz,
+		.edc_align = ENAHW_IO_CQ_DESC_BUF_ALIGNMENT,
+		.edc_sgl = 1,
+		.edc_endian = DDI_NEVERSWAP_ACC,
+		.edc_stream = B_FALSE,
+	};
+
+	if (!ena_dma_alloc(ena, &txq->et_cq_dma, &conf, cq_descs_sz)) {
+		err = ENOMEM;
+		goto error;
+	}
+
+	bzero(txq->et_cq_dma.edb_va, cq_descs_sz);
+	txq->et_cq_descs = (void *)txq->et_cq_dma.edb_va;
+	txq->et_state |= ENA_TXQ_STATE_HOST_ALLOC;
+	return (0);
+
+error:
+	ena_free_tx_dma(txq);
+	return (err);
+}
+
+boolean_t
+ena_alloc_txq(ena_txq_t *txq)
+{
+	int ret = 0;
+	ena_t *ena = txq->et_ena;
+	uint16_t cq_hw_idx, sq_hw_idx;
+	uint32_t *cq_unmask_addr, *cq_headdb, *cq_numanode;
+	uint32_t *sq_db_addr;
+
+	ASSERT3U(txq->et_cq_num_descs, >, 0);
+
+	/*
+	 * First, allocate the Tx data buffers.
+	 */
+	if ((ret = ena_alloc_tx_dma(txq)) != 0) {
+		ena_err(ena, "failed to allocate Tx queue %u data buffers: %d",
+		    txq->et_txqs_idx, ret);
+		return (B_FALSE);
+	}
+
+	ASSERT(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC);
+
+	/*
+	 * Second, create the Completion Queue.
+	 */
+	ret = ena_create_cq(ena, txq->et_cq_num_descs,
+	    txq->et_cq_dma.edb_cookie->dmac_laddress, B_TRUE,
+	    txq->et_intr_vector, &cq_hw_idx, &cq_unmask_addr, &cq_headdb,
+	    &cq_numanode);
+
+	if (ret != 0) {
+		ena_err(ena, "failed to create Tx CQ %u: %d", txq->et_txqs_idx,
+		    ret);
+		return (B_FALSE);
+	}
+
+	txq->et_cq_hw_idx = cq_hw_idx;
+	txq->et_cq_phase = 1;
+	txq->et_cq_unmask_addr = cq_unmask_addr;
+	txq->et_cq_head_db_addr = cq_headdb;
+	txq->et_cq_numa_addr = cq_numanode;
+	txq->et_state |= ENA_TXQ_STATE_CQ_CREATED;
+
+	/*
+	 * Third, create the Submission Queue to match with the above
+	 * CQ. At this time we force the SQ and CQ to have the same
+	 * number of descriptors as we only use a 1:1 completion
+	 * policy. However, in the future, we could loosen this and
+	 * use an on-demand completion policy and the two could have a
+	 * different number of descriptors.
+	 */
+	ASSERT3U(txq->et_sq_num_descs, ==, txq->et_cq_num_descs);
+
+	ret = ena_create_sq(ena, txq->et_sq_num_descs,
+	    txq->et_sq_dma.edb_cookie->dmac_laddress, B_TRUE, cq_hw_idx,
+	    &sq_hw_idx, &sq_db_addr);
+
+	if (ret != 0) {
+		ena_err(ena, "failed to create Tx SQ %u: %d", txq->et_txqs_idx,
+		    ret);
+		return (B_FALSE);
+	}
+
+	txq->et_sq_hw_idx = sq_hw_idx;
+	txq->et_sq_db_addr = sq_db_addr;
+	/* The phase must always start on 1. */
+	txq->et_sq_phase = 1;
+	txq->et_sq_avail_descs = txq->et_sq_num_descs;
+	txq->et_blocked = B_FALSE;
+	txq->et_state |= ENA_TXQ_STATE_SQ_CREATED;
+
+	return (B_TRUE);
+}
+
+void
+ena_cleanup_txq(ena_txq_t *txq)
+{
+	int ret = 0;
+	ena_t *ena = txq->et_ena;
+
+	if ((txq->et_state & ENA_TXQ_STATE_SQ_CREATED) != 0) {
+		ret = ena_destroy_sq(ena, txq->et_sq_hw_idx, B_TRUE);
+
+		if (ret != 0) {
+			ena_err(ena, "failed to destroy Tx SQ %u: %d",
+			    txq->et_txqs_idx, ret);
+		}
+
+		txq->et_sq_hw_idx = 0;
+		txq->et_sq_db_addr = NULL;
+		txq->et_sq_tail_idx = 0;
+		txq->et_sq_phase = 0;
+		txq->et_state &= ~ENA_TXQ_STATE_SQ_CREATED;
+	}
+
+	if ((txq->et_state & ENA_TXQ_STATE_CQ_CREATED) != 0) {
+		ret = ena_destroy_cq(ena, txq->et_cq_hw_idx);
+
+		if (ret != 0) {
+			ena_err(ena, "failed to destroy Tx CQ %u: %d",
+			    txq->et_txqs_idx, ret);
+		}
+
+		txq->et_cq_hw_idx = 0;
+		txq->et_cq_head_idx = 0;
+		txq->et_cq_phase = 0;
+		txq->et_cq_head_db_addr = NULL;
+		txq->et_cq_unmask_addr = NULL;
+		txq->et_cq_numa_addr = NULL;
+		txq->et_state &= ~ENA_TXQ_STATE_CQ_CREATED;
+	}
+
+	ena_free_tx_dma(txq);
+	VERIFY3S(txq->et_state, ==, ENA_TXQ_STATE_NONE);
+}
+
+void
+ena_ring_tx_stop(mac_ring_driver_t rh)
+{
+	ena_txq_t *txq = (ena_txq_t *)rh;
+	uint32_t intr_ctrl;
+
+	intr_ctrl = ena_hw_abs_read32(txq->et_ena, txq->et_cq_unmask_addr);
+	ENAHW_REG_INTR_UNMASK(intr_ctrl);
+	ena_hw_abs_write32(txq->et_ena, txq->et_cq_unmask_addr, intr_ctrl);
+
+	txq->et_state &= ~ENA_TXQ_STATE_RUNNING;
+	txq->et_state &= ~ENA_TXQ_STATE_READY;
+}
+
+int
+ena_ring_tx_start(mac_ring_driver_t rh, uint64_t gen_num)
+{
+	ena_txq_t *txq = (ena_txq_t *)rh;
+	ena_t *ena = txq->et_ena;
+	uint32_t intr_ctrl;
+
+	mutex_enter(&txq->et_lock);
+	txq->et_m_gen_num = gen_num;
+	mutex_exit(&txq->et_lock);
+
+	txq->et_state |= ENA_TXQ_STATE_READY;
+
+	intr_ctrl = ena_hw_abs_read32(ena, txq->et_cq_unmask_addr);
+	ENAHW_REG_INTR_UNMASK(intr_ctrl);
+	ena_hw_abs_write32(ena, txq->et_cq_unmask_addr, intr_ctrl);
+	txq->et_state |= ENA_TXQ_STATE_RUNNING;
+	return (0);
+}
+
+static void
+ena_tx_copy_fragment(ena_tx_control_block_t *tcb, const mblk_t *mp,
+    const size_t off, const size_t len)
+{
+	const void *soff = mp->b_rptr + off;
+	void *doff =
+	    (void *)(tcb->etcb_dma.edb_va + tcb->etcb_dma.edb_used_len);
+
+	VERIFY3U(len, >, 0);
+	VERIFY3P(soff, >=, mp->b_rptr);
+	VERIFY3P(soff, <=, mp->b_wptr);
+	VERIFY3U(len, <=, MBLKL(mp));
+	VERIFY3U((uintptr_t)soff + len, <=, (uintptr_t)mp->b_wptr);
+	VERIFY3U(tcb->etcb_dma.edb_used_len + len, <, tcb->etcb_dma.edb_len);
+
+	bcopy(soff, doff, len);
+	tcb->etcb_type = ENA_TCB_COPY;
+	tcb->etcb_dma.edb_used_len += len;
+}
+
+ena_tx_control_block_t *
+ena_pull_tcb(const ena_txq_t *txq, mblk_t *mp)
+{
+	mblk_t *nmp = mp;
+	ena_t *ena = txq->et_ena;
+	ena_tx_control_block_t *tcb = NULL;
+	const uint16_t tail_mod =
+	    txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1);
+
+	ASSERT(MUTEX_HELD(&txq->et_lock));
+	VERIFY3U(msgsize(mp), <, ena->ena_tx_buf_sz);
+
+	while (nmp != NULL) {
+		const size_t nmp_len = MBLKL(nmp);
+
+		if (nmp_len == 0) {
+			nmp = nmp->b_cont;
+			continue;
+		}
+
+		/* For now TCB is bound to SQ desc. */
+		if (tcb == NULL) {
+			tcb = &txq->et_tcbs[tail_mod];
+		}
+
+		ena_tx_copy_fragment(tcb, nmp, 0, nmp_len);
+		nmp = nmp->b_cont;
+	}
+
+	ENA_DMA_SYNC(tcb->etcb_dma, DDI_DMA_SYNC_FORDEV);
+	VERIFY3P(nmp, ==, NULL);
+	VERIFY3P(tcb, !=, NULL);
+	return (tcb);
+}
+
+static void
+ena_fill_tx_data_desc(ena_txq_t *txq, ena_tx_control_block_t *tcb,
+    uint16_t tail, uint8_t phase, enahw_tx_data_desc_t *desc,
+    mac_ether_offload_info_t *meo, size_t mlen)
+{
+	VERIFY3U(mlen, <=, ENAHW_TX_DESC_LENGTH_MASK);
+
+#ifdef DEBUG
+	/*
+	 * If there is no header for the specific layer it will be set
+	 * to zero, thus we elide the meoi_flags check here.
+	 */
+	size_t hdr_len = meo->meoi_l2hlen + meo->meoi_l3hlen + meo->meoi_l4hlen;
+	ASSERT3U(hdr_len, <=, txq->et_ena->ena_tx_max_hdr_len);
+#endif
+
+	bzero(desc, sizeof (*desc));
+	ENAHW_TX_DESC_FIRST_ON(desc);
+	ENAHW_TX_DESC_LENGTH(desc, mlen);
+	ENAHW_TX_DESC_REQID_HI(desc, tail);
+	ENAHW_TX_DESC_REQID_LO(desc, tail);
+	ENAHW_TX_DESC_PHASE(desc, phase);
+	ENAHW_TX_DESC_DF_ON(desc);
+	ENAHW_TX_DESC_LAST_ON(desc);
+	ENAHW_TX_DESC_COMP_REQ_ON(desc);
+	ENAHW_TX_DESC_META_DESC_OFF(desc);
+	ENAHW_TX_DESC_ADDR_LO(desc, tcb->etcb_dma.edb_cookie->dmac_laddress);
+	ENAHW_TX_DESC_ADDR_HI(desc, tcb->etcb_dma.edb_cookie->dmac_laddress);
+	/*
+	 * NOTE: Please see the block comment above
+	 * etd_buff_addr_hi_hdr_sz to see why this is set to 0.
+	 */
+	ENAHW_TX_DESC_HEADER_LENGTH(desc, 0);
+	ENAHW_TX_DESC_TSO_OFF(desc);
+	ENAHW_TX_DESC_L3_CSUM_OFF(desc);
+	ENAHW_TX_DESC_L4_CSUM_OFF(desc);
+	/*
+	 * Enabling this bit tells the device NOT to calculate the
+	 * pseudo header checksum.
+	 */
+	ENAHW_TX_DESC_L4_CSUM_PARTIAL_ON(desc);
+}
+
+static void
+ena_submit_tx(ena_txq_t *txq, uint16_t desc_idx)
+{
+	ena_hw_abs_write32(txq->et_ena, txq->et_sq_db_addr, desc_idx);
+}
+
+/*
+ * For now we do the simplest thing possible. All Tx uses bcopy to
+ * pre-allocated buffers, no checksum, no TSO, etc.
+ */
+mblk_t *
+ena_ring_tx(void *arg, mblk_t *mp)
+{
+	ena_txq_t *txq = arg;
+	ena_t *ena = txq->et_ena;
+	mac_ether_offload_info_t meo;
+	enahw_tx_data_desc_t *desc;
+	ena_tx_control_block_t *tcb;
+	const uint16_t tail_mod =
+	    txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1);
+
+	VERIFY3P(mp->b_next, ==, NULL);
+	VERIFY(txq->et_blocked == B_FALSE);
+
+	/*
+	 * The ena_state value is written by atomic operations. The
+	 * et_state value is currently Write Once, but if that changes
+	 * it should also be written with atomics.
+	 */
+	if (!(ena->ena_state & ENA_STATE_RUNNING) ||
+	    !(txq->et_state & ENA_TXQ_STATE_RUNNING)) {
+		freemsg(mp);
+		return (NULL);
+	}
+
+	if (mac_ether_offload_info(mp, &meo) != 0) {
+		freemsg(mp);
+		mutex_enter(&txq->et_stat_lock);
+		txq->et_stat.ets_hck_meoifail.value.ui64++;
+		mutex_exit(&txq->et_stat_lock);
+		return (NULL);
+	}
+
+	mutex_enter(&txq->et_lock);
+
+	/*
+	 * For the moment there is a 1:1 mapping between Tx descs and
+	 * Tx contexts. Currently Tx is copy only, and each context
+	 * buffer is guaranteed to be as large as MTU + frame header,
+	 * see ena_update_buf_sizes().
+	 */
+	if (txq->et_sq_avail_descs == 0) {
+		txq->et_blocked = B_TRUE;
+		mutex_enter(&txq->et_stat_lock);
+		txq->et_stat.ets_blocked.value.ui64++;
+		mutex_exit(&txq->et_stat_lock);
+		mutex_exit(&txq->et_lock);
+		return (mp);
+	}
+
+	ASSERT3U(meo.meoi_len, <=, ena->ena_max_frame_total);
+	tcb = ena_pull_tcb(txq, mp);
+	ASSERT3P(tcb, !=, NULL);
+	tcb->etcb_mp = mp;
+	txq->et_sq_avail_descs--;
+
+	/* Fill in the Tx descriptor. */
+	desc = &(txq->et_sq_descs[tail_mod].etd_data);
+	ena_fill_tx_data_desc(txq, tcb, tail_mod, txq->et_sq_phase, desc, &meo,
+	    meo.meoi_len);
+	DTRACE_PROBE3(tx__submit, ena_tx_control_block_t *, tcb, uint16_t,
+	    tail_mod, enahw_tx_data_desc_t *, desc);
+
+	/*
+	 * Remember, we submit the raw tail value to the device, the
+	 * hardware performs its own modulo (like we did to get
+	 * tail_mod).
+	 */
+	txq->et_sq_tail_idx++;
+	ena_submit_tx(txq, txq->et_sq_tail_idx);
+
+	mutex_enter(&txq->et_stat_lock);
+	txq->et_stat.ets_packets.value.ui64++;
+	txq->et_stat.ets_bytes.value.ui64 += meo.meoi_len;
+	mutex_exit(&txq->et_stat_lock);
+
+	if ((txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1)) == 0) {
+		txq->et_sq_phase = !txq->et_sq_phase;
+	}
+
+	mutex_exit(&txq->et_lock);
+	return (NULL);
+}
+
+void
+ena_tx_intr_work(ena_txq_t *txq)
+{
+	uint16_t head_mod;
+	enahw_tx_cdesc_t *cdesc;
+	ena_tx_control_block_t *tcb;
+	uint16_t req_id;
+	uint64_t recycled = 0;
+	boolean_t unblocked = B_FALSE;
+
+	mutex_enter(&txq->et_lock);
+	head_mod = txq->et_cq_head_idx & (txq->et_cq_num_descs - 1);
+	ENA_DMA_SYNC(txq->et_cq_dma, DDI_DMA_SYNC_FORKERNEL);
+	cdesc = &txq->et_cq_descs[head_mod];
+
+	/* Recycle any completed descriptors. */
+	while (ENAHW_TX_CDESC_GET_PHASE(cdesc) == txq->et_cq_phase) {
+		mblk_t *mp;
+
+		/* Get the corresponding TCB. */
+		req_id = cdesc->etc_req_id;
+		/*
+		 * It would be nice to make this a device reset
+		 * instead.
+		 */
+		VERIFY3U(req_id, <=, txq->et_sq_num_descs);
+		tcb = &txq->et_tcbs[req_id];
+		DTRACE_PROBE2(tx__complete, uint16_t, req_id,
+		    ena_tx_control_block_t *, tcb);
+
+		/* Free the associated mblk. */
+		tcb->etcb_dma.edb_used_len = 0;
+		mp = tcb->etcb_mp;
+		/* Make this a device reset instead. */
+		VERIFY3P(mp, !=, NULL);
+		freemsg(mp);
+		tcb->etcb_mp = NULL;
+
+		/* Add this descriptor back to the free list. */
+		txq->et_sq_avail_descs++;
+		txq->et_cq_head_idx++;
+
+		/* Check for phase rollover. */
+		head_mod = txq->et_cq_head_idx & (txq->et_cq_num_descs - 1);
+
+		if (head_mod == 0) {
+			txq->et_cq_phase = !txq->et_cq_phase;
+		}
+
+		if (txq->et_blocked) {
+			txq->et_blocked = B_FALSE;
+			unblocked = B_TRUE;
+			mac_tx_ring_update(txq->et_ena->ena_mh, txq->et_mrh);
+		}
+
+		recycled++;
+		cdesc = &txq->et_cq_descs[head_mod];
+	}
+
+	/*
+	 * If the device provided a head doorbell register, then we
+	 * need to update it to let the device know we are done
+	 * reading these CQ entries.
+	 */
+	if (txq->et_cq_head_db_addr != NULL) {
+		ena_hw_abs_write32(txq->et_ena, txq->et_cq_head_db_addr,
+		    head_mod);
+	}
+
+	mutex_exit(&txq->et_lock);
+
+	/* Update stats. */
+	mutex_enter(&txq->et_stat_lock);
+	txq->et_stat.ets_recycled.value.ui64 += recycled;
+	if (unblocked) {
+		txq->et_stat.ets_unblocked.value.ui64++;
+	}
+	mutex_exit(&txq->et_stat_lock);
+}
diff --git a/usr/src/uts/common/mapfiles/ddi.mapfile b/usr/src/uts/common/mapfiles/ddi.mapfile
index 798367c7e3..a9f4f2d730 100644
--- a/usr/src/uts/common/mapfiles/ddi.mapfile
+++ b/usr/src/uts/common/mapfiles/ddi.mapfile
@@ -12,6 +12,7 @@
 #
 # Copyright 2020 Joyent, Inc.
 # Copyright 2020 RackTop Systems, Inc.
+# Copyright 2021 Oxide Computer Company
 #
 
 #
@@ -78,6 +79,7 @@ SYMBOL_SCOPE {
 	ddi_dma_addr_bind_handle	{ FLAGS = EXTERN };
 	ddi_dma_alloc_handle		{ FLAGS = EXTERN };
 	ddi_dma_cookie_iter		{ FLAGS = EXTERN };
+	ddi_dma_cookie_one		{ FLAGS = EXTERN };
 	ddi_dma_free_handle		{ FLAGS = EXTERN };
 	ddi_dma_mem_alloc		{ FLAGS = EXTERN };
 	ddi_dma_mem_free		{ FLAGS = EXTERN };
@@ -153,6 +155,7 @@ SYMBOL_SCOPE {
 	dev_err				{ FLAGS = EXTERN };
 	drv_usectohz			{ FLAGS = EXTERN };
 	drv_usecwait			{ FLAGS = EXTERN };
+	ffs				{ FLAGS = EXTERN };
 	fm_ena_generate			{ FLAGS = EXTERN };
 	freeb				{ FLAGS = EXTERN };
 	freemsg				{ FLAGS = EXTERN };
@@ -168,6 +171,7 @@ SYMBOL_SCOPE {
 	list_create			{ FLAGS = EXTERN };
 	list_destroy			{ FLAGS = EXTERN };
 	list_head			{ FLAGS = EXTERN };
+	list_insert_head		{ FLAGS = EXTERN };
 	list_insert_tail		{ FLAGS = EXTERN };
 	list_next			{ FLAGS = EXTERN };
 	list_remove			{ FLAGS = EXTERN };
@@ -219,9 +223,12 @@ SYMBOL_SCOPE {
 	strcat				{ FLAGS = EXTERN };
 	strcmp				{ FLAGS = EXTERN };
 	strcpy				{ FLAGS = EXTERN };
+	strlcpy				{ FLAGS = EXTERN };
 	strlen				{ FLAGS = EXTERN };
 	timeout				{ FLAGS = EXTERN };
 	untimeout			{ FLAGS = EXTERN };
+	vcmn_err			{ FLAGS = EXTERN };
+	vdev_err			{ FLAGS = EXTERN };
 	vsnprintf			{ FLAGS = EXTERN };
 	vsprintf			{ FLAGS = EXTERN };
 };
diff --git a/usr/src/uts/common/mapfiles/kernel.mapfile b/usr/src/uts/common/mapfiles/kernel.mapfile
index 21a691dca2..6fcc1fa371 100644
--- a/usr/src/uts/common/mapfiles/kernel.mapfile
+++ b/usr/src/uts/common/mapfiles/kernel.mapfile
@@ -11,6 +11,7 @@
 
 #
 # Copyright 2016 Joyent, Inc.
+# Copyright 2021 Oxide Computer Company
 #
 
 #
@@ -40,4 +41,6 @@ SYMBOL_SCOPE {
 	servicing_interrupt		{ FLAGS = EXTERN };
 	fnvlist_alloc			{ FLAGS = EXTERN };
 	fnvlist_add_string		{ FLAGS = EXTERN };
+	ncpus_online			{ FLAGS = EXTERN };
+	utsname				{ FLAGS = EXTERN };
 };
diff --git a/usr/src/uts/common/sys/ethernet.h b/usr/src/uts/common/sys/ethernet.h
index 5b9de2f2bf..4febb8915f 100644
--- a/usr/src/uts/common/sys/ethernet.h
+++ b/usr/src/uts/common/sys/ethernet.h
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2021 Oxide Computer Company
  *
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
@@ -140,6 +141,8 @@ struct ether_vlan_extinfo {
 #endif
 
 #ifdef	_KERNEL
+#define	ETHER_IS_MULTICAST(addr)	(((addr)[0] & 0x01) != 0)
+
 extern int localetheraddr(struct ether_addr *, struct ether_addr *);
 extern char *ether_sprintf(struct ether_addr *);
 extern int ether_aton(char *, uchar_t *);
diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel
index cd5eabf7c5..4d1d2664c3 100644
--- a/usr/src/uts/intel/Makefile.intel
+++ b/usr/src/uts/intel/Makefile.intel
@@ -27,6 +27,7 @@
 # Copyright 2018 Nexenta Systems, Inc.
 # Copyright 2019 RackTop Systems
 # Copyright 2019 Peter Tribble.
+# Copyright 2021 Oxide Computer Company
 #
 
 #
@@ -385,6 +386,7 @@ DRV_KMODS	+= dmfe
 DRV_KMODS	+= e1000g
 DRV_KMODS	+= efe
 DRV_KMODS	+= elxl
+DRV_KMODS	+= ena
 DRV_KMODS	+= hme
 DRV_KMODS	+= mxfe
 DRV_KMODS	+= nge
diff --git a/usr/src/uts/intel/ena/Makefile b/usr/src/uts/intel/ena/Makefile
new file mode 100644
index 0000000000..bef9878cc0
--- /dev/null
+++ b/usr/src/uts/intel/ena/Makefile
@@ -0,0 +1,47 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2021 Oxide Computer Company
+#
+
+UTSBASE = ../..
+
+MODULE		= ena
+OBJECTS		= $(ENA_OBJS:%=$(OBJS_DIR)/%)
+ROOTMODULE	= $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR	= $(UTSBASE)/common/io/ena
+
+include $(UTSBASE)/intel/Makefile.intel
+
+CPPFLAGS	+= -I$(UTSBASE)/common/io/ena
+
+ALL_TARGET	= $(BINARY) $(CONFMOD)
+INSTALL_TARGET	= $(BINBAR) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+LDFLAGS		+= -dy -N misc/mac
+
+MAPFILES	+= ddi mac kernel
+
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+
+clobber:	$(CLOBBER_DEPS)
+
+install:	$(INSTALL_DEPS)
+
+include $(UTSBASE)/Makefile.mapfile
+include $(UTSBASE)/intel/Makefile.targ
author	Ryan Zezeski <ryan@zinascii.com>	2020-08-25 00:52:37 -0600
committer	Dan McDonald <danmcd@joyent.com>	2021-11-23 13:18:50 -0500
commit	6f443ebc1fb4fec01d6e8fa8ca4648182ed215bb (patch)
tree	5c4551c6d6caaaf138fe369af872c3fc31d02c8a
parent	a28480febf31f0e61debac062a55216a98a05a92 (diff)
download	illumos-joyent-6f443ebc1fb4fec01d6e8fa8ca4648182ed215bb.tar.gz