13380 Add virtio-9p (aka VirtFS) filesystem sharing to bhyve

Portions contributed by: Andy Fiddaman <andy@omnios.org> Reviewed by: Jason King <jason.brian.king@gmail.com> Reviewed by: Jorge Schrauwen <sjorge@blackdot.be> Approved by: Robert Mustacchi <rm@fingolfin.org>
author: Jason King <jason.brian.king@gmail.com> 2021-04-17 09:08:24 +0000
committer: Andy Fiddaman <omnios@citrus-it.co.uk> 2021-10-07 09:11:03 +0000
commit: aa693e996c2928c92cccd8a3efe91373e85a6967 (patch)
tree: 23d7431e48a5194bf8ae93968c3caedc6c8bc7a6
parent: 2d2dd8359f765a17f6caaa2d37d86837c0c40915 (diff)
download: illumos-gate-aa693e996c2928c92cccd8a3efe91373e85a6967.tar.gz
44 files changed, 12810 insertions, 9 deletions
diff --git a/exception_lists/cstyle b/exception_lists/cstyle
index bf1856d5f0..3b15aa6700 100644
--- a/exception_lists/cstyle
+++ b/exception_lists/cstyle
@@ -1357,6 +1357,7 @@ usr/src/cmd/bhyve/pci_lpc.[ch]
 usr/src/cmd/bhyve/pci_nvme.c
 usr/src/cmd/bhyve/pci_passthru.c
 usr/src/cmd/bhyve/pci_uart.c
+usr/src/cmd/bhyve/pci_virtio_9p.c
 usr/src/cmd/bhyve/pci_virtio_block.c
 usr/src/cmd/bhyve/pci_virtio_console.c
 usr/src/cmd/bhyve/pci_virtio_net.c
@@ -1390,3 +1391,4 @@ usr/src/uts/i86pc/io/vmm/amd/amdvi_*.[ch]
 usr/src/uts/i86pc/io/vmm/amd/ivrs_*.c
 usr/src/uts/i86pc/sys/vmm.h
 usr/src/uts/i86pc/sys/vmm_dev.h
+usr/src/lib/lib9p/common/*
diff --git a/exception_lists/hdrchk b/exception_lists/hdrchk
index fc022b3782..0c9c154ff0 100644
--- a/exception_lists/hdrchk
+++ b/exception_lists/hdrchk
@@ -433,3 +433,4 @@ usr/src/uts/i86pc/io/vmm/vmm_util.h
 usr/src/uts/i86pc/io/vmm/x86.h
 usr/src/uts/i86pc/sys/vmm.h
 usr/src/uts/i86pc/sys/vmm_dev.h
+usr/src/lib/lib9p/common/*
diff --git a/exception_lists/packaging b/exception_lists/packaging
index 591b4b9711..47acb0988a 100644
--- a/exception_lists/packaging
+++ b/exception_lists/packaging
@@ -862,6 +862,12 @@ usr/lib/sparcv9/libdwarf.so		sparc
 usr/lib/libdwarf.so
 
 #
+# lib9p is private
+#
+usr/include/lib9p.h
+usr/lib/amd64/lib9p.so			i386
+
+#
 # We're not quite ready to ship ctfconvert and ctfmerge
 #
 usr/bin/ctfconvert
diff --git a/exception_lists/wscheck b/exception_lists/wscheck
index 462546802f..fdebb77910 100644
--- a/exception_lists/wscheck
+++ b/exception_lists/wscheck
@@ -69,6 +69,7 @@ usr/src/cmd/bhyve/pci_lpc.[ch]
 usr/src/cmd/bhyve/pci_nvme.c
 usr/src/cmd/bhyve/pci_passthru.c
 usr/src/cmd/bhyve/pci_uart.c
+usr/src/cmd/bhyve/pci_virtio_9p.c
 usr/src/cmd/bhyve/pci_virtio_block.c
 usr/src/cmd/bhyve/pci_virtio_console.c
 usr/src/cmd/bhyve/pci_virtio_net.c
@@ -95,3 +96,4 @@ usr/src/cmd/bhyve/xmsr.[ch]
 usr/src/cmd/bhyvectl/bhyvectl.c
 usr/src/contrib/bhyve/*
 usr/src/lib/libvmmapi/common/vmmapi.[ch]
+usr/src/lib/lib9p/common/*
diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile
index bbc966d67f..4e54c6be42 100644
--- a/usr/src/cmd/bhyve/Makefile
+++ b/usr/src/cmd/bhyve/Makefile
@@ -56,6 +56,7 @@ SRCS =	acpi.c			\
 	pci_nvme.c		\
 	pci_passthru.c		\
 	pci_uart.c		\
+	pci_virtio_9p.c		\
 	pci_virtio_block.c	\
 	pci_virtio_console.c	\
 	pci_virtio_net.c	\
@@ -115,6 +116,7 @@ CPPFLAGS =	-I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \
 		-I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64 \
 		-I$(CONTRIB)/bhyve/dev/usb/controller \
 		-I$(CONTRIB)/bhyve/dev/mii \
+		-I$(SRC)/lib/lib9p/common \
 		-I$(SRC)/uts/common/io/e1000api \
 		$(CPPFLAGS.master) \
 		-I$(SRC)/uts/i86pc/io/vmm \
@@ -128,6 +130,8 @@ pci_nvme.o := SMOFF += kmalloc_wrong_size
 
 pci_passthru.o := CERRWARN += -_gcc10=-Wno-address-of-packed-member
 
+pci_virtio_9p.o := SMOFF += kmalloc_wrong_size
+
 pci_xhci.o := CERRWARN += -_gcc10=-Wno-address-of-packed-member
 
 SMOFF += all_func_returns,leaks,no_if_block
@@ -136,6 +140,7 @@ SMOFF += all_func_returns,leaks,no_if_block
 CSTD=		$(CSTD_GNU99)
 
 $(PROG) := LDLIBS += \
+	-l9p \
 	-lsocket \
 	-lnsl \
 	-ldlpi \
diff --git a/usr/src/cmd/bhyve/README.sync b/usr/src/cmd/bhyve/README.sync
index 4f71c1420e..bec61410ee 100644
--- a/usr/src/cmd/bhyve/README.sync
+++ b/usr/src/cmd/bhyve/README.sync
@@ -24,12 +24,6 @@ The draft Save/Restore functionality, added in FreeBSD commit
 yet.  It is not built by default in FreeBSD, so we're not interested in taking
 it until it successfully endures more in-depth testing.
 
-The VirtFS filesystem sharing feature, added in FreeBSD commit
-100353cfbf882e23c911300ebd0cb458bd3ee975, has not been synced into illumos bhyve
-yet.  It depends on the userland lib9p which needs a fair amount of work to
-build and run on illumos. The integration of this feature is being tracked in
-https://www.illumos.org/issues/13380
-
 The stub usr/src/compat/bhyve/stdatomic.h file only includes enough glue
 to satisfy the use of <stdatomic.h> in usr/src/cmd/bhyve/rfb.c, and in
 particular assumes that atomic variables are sized as an int. If other bhyve
diff --git a/usr/src/cmd/bhyve/pci_virtio_9p.c b/usr/src/cmd/bhyve/pci_virtio_9p.c
new file mode 100644
index 0000000000..b3fdb2db2c
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_virtio_9p.c
@@ -0,0 +1,406 @@
+/*-
+ * Copyright (c) 2015 iXsystems Inc.
+ * Copyright (c) 2017-2018 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * VirtIO filesystem passthrough using 9p protocol.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/uio.h>
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+
+#include <lib9p.h>
+#include <backend/fs.h>
+
+#include "bhyverun.h"
+#include "config.h"
+#include "debug.h"
+#include "pci_emul.h"
+#include "virtio.h"
+
+#ifndef __FreeBSD__
+#include "privileges.h"
+#endif
+
+#define	VT9P_MAX_IOV	128
+#define VT9P_RINGSZ	256
+#define	VT9P_MAXTAGSZ	256
+#define	VT9P_CONFIGSPACESZ	(VT9P_MAXTAGSZ + sizeof(uint16_t))
+
+static int pci_vt9p_debug;
+#define DPRINTF(params) if (pci_vt9p_debug) printf params
+#define WPRINTF(params) printf params
+
+/*
+ * Per-device softc
+ */
+struct pci_vt9p_softc {
+	struct virtio_softc      vsc_vs;
+	struct vqueue_info       vsc_vq;
+	pthread_mutex_t          vsc_mtx;
+	uint64_t                 vsc_cfg;
+	uint64_t                 vsc_features;
+	char *                   vsc_rootpath;
+	struct pci_vt9p_config * vsc_config;
+	struct l9p_backend *     vsc_fs_backend;
+	struct l9p_server *      vsc_server;
+        struct l9p_connection *  vsc_conn;
+};
+
+struct pci_vt9p_request {
+	struct pci_vt9p_softc *	vsr_sc;
+	struct iovec *		vsr_iov;
+	size_t			vsr_niov;
+	size_t			vsr_respidx;
+	size_t			vsr_iolen;
+	uint16_t		vsr_idx;
+};
+
+struct pci_vt9p_config {
+	uint16_t tag_len;
+	char tag[0];
+} __attribute__((packed));
+
+static int pci_vt9p_send(struct l9p_request *, const struct iovec *,
+    const size_t, const size_t, void *);
+static void pci_vt9p_drop(struct l9p_request *, const struct iovec *, size_t,
+    void *);
+static void pci_vt9p_reset(void *);
+static void pci_vt9p_notify(void *, struct vqueue_info *);
+static int pci_vt9p_cfgread(void *, int, int, uint32_t *);
+static void pci_vt9p_neg_features(void *, uint64_t);
+
+static struct virtio_consts vt9p_vi_consts = {
+	"vt9p",			/* our name */
+	1,			/* we support 1 virtqueue */
+	VT9P_CONFIGSPACESZ,	/* config reg size */
+	pci_vt9p_reset,		/* reset */
+	pci_vt9p_notify,	/* device-wide qnotify */
+	pci_vt9p_cfgread,	/* read virtio config */
+	NULL,			/* write virtio config */
+	pci_vt9p_neg_features,	/* apply negotiated features */
+	(1 << 0),		/* our capabilities */
+};
+
+
+static void
+pci_vt9p_reset(void *vsc)
+{
+	struct pci_vt9p_softc *sc;
+
+	sc = vsc;
+
+	DPRINTF(("vt9p: device reset requested !\n"));
+	vi_reset_dev(&sc->vsc_vs);
+}
+
+static void
+pci_vt9p_neg_features(void *vsc, uint64_t negotiated_features)
+{
+	struct pci_vt9p_softc *sc = vsc;
+
+	sc->vsc_features = negotiated_features;
+}
+
+static int
+pci_vt9p_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+	struct pci_vt9p_softc *sc = vsc;
+	void *ptr;
+
+	ptr = (uint8_t *)sc->vsc_config + offset;
+	memcpy(retval, ptr, size);
+	return (0);
+}
+
+static int
+pci_vt9p_get_buffer(struct l9p_request *req, struct iovec *iov, size_t *niov,
+    void *arg)
+{
+	struct pci_vt9p_request *preq = req->lr_aux;
+	size_t n = preq->vsr_niov - preq->vsr_respidx;
+	
+	memcpy(iov, preq->vsr_iov + preq->vsr_respidx,
+	    n * sizeof(struct iovec));
+	*niov = n;
+	return (0);
+}
+
+static int
+pci_vt9p_send(struct l9p_request *req, const struct iovec *iov,
+    const size_t niov, const size_t iolen, void *arg)
+{
+	struct pci_vt9p_request *preq = req->lr_aux;
+	struct pci_vt9p_softc *sc = preq->vsr_sc;
+
+	preq->vsr_iolen = iolen;
+
+	pthread_mutex_lock(&sc->vsc_mtx);
+	vq_relchain(&sc->vsc_vq, preq->vsr_idx, preq->vsr_iolen);
+	vq_endchains(&sc->vsc_vq, 1);
+	pthread_mutex_unlock(&sc->vsc_mtx);
+	free(preq);
+	return (0);
+}
+
+static void
+pci_vt9p_drop(struct l9p_request *req, const struct iovec *iov, size_t niov,
+    void *arg)
+{
+	struct pci_vt9p_request *preq = req->lr_aux;
+	struct pci_vt9p_softc *sc = preq->vsr_sc;
+
+	pthread_mutex_lock(&sc->vsc_mtx);
+	vq_relchain(&sc->vsc_vq, preq->vsr_idx, 0);
+	vq_endchains(&sc->vsc_vq, 1);
+	pthread_mutex_unlock(&sc->vsc_mtx);
+	free(preq);
+}
+
+static void
+pci_vt9p_notify(void *vsc, struct vqueue_info *vq)
+{
+	struct iovec iov[VT9P_MAX_IOV];
+	struct pci_vt9p_softc *sc;
+	struct pci_vt9p_request *preq;
+	uint16_t idx, n, i;
+	uint16_t flags[VT9P_MAX_IOV];
+
+	sc = vsc;
+
+	while (vq_has_descs(vq)) {
+		n = vq_getchain(vq, &idx, iov, VT9P_MAX_IOV, flags);
+		preq = calloc(1, sizeof(struct pci_vt9p_request));
+#ifndef __FreeBSD__
+		if (preq == NULL) {
+			EPRINTLN("virtio-9p: allocation failure: %s",
+			    strerror(errno));
+			break;
+		}
+#endif
+		preq->vsr_sc = sc;
+		preq->vsr_idx = idx;
+		preq->vsr_iov = iov;
+		preq->vsr_niov = n;
+		preq->vsr_respidx = 0;
+
+		/* Count readable descriptors */
+		for (i = 0; i < n; i++) {
+			if (flags[i] & VRING_DESC_F_WRITE)
+				break;
+
+			preq->vsr_respidx++;
+		}
+
+		for (int i = 0; i < n; i++) {
+			DPRINTF(("vt9p: vt9p_notify(): desc%d base=%p, "
+			    "len=%zu, flags=0x%04x\r\n", i, iov[i].iov_base,
+			    iov[i].iov_len, flags[i]));
+		}
+
+		l9p_connection_recv(sc->vsc_conn, iov, preq->vsr_respidx, preq);
+	}
+}
+
+static int
+pci_vt9p_legacy_config(nvlist_t *nvl, const char *opts)
+{
+	char *sharename = NULL, *tofree, *token, *tokens;
+
+	if (opts == NULL)
+		return (0);
+
+	tokens = tofree = strdup(opts);
+	while ((token = strsep(&tokens, ",")) != NULL) {
+		if (strchr(token, '=') != NULL) {
+			if (sharename != NULL) {
+				EPRINTLN(
+			    "virtio-9p: more than one share name given");
+				return (-1);
+			}
+
+			sharename = strsep(&token, "=");
+			set_config_value_node(nvl, "sharename", sharename);
+			set_config_value_node(nvl, "path", token);
+		} else
+			set_config_bool_node(nvl, token, true);
+	}
+	free(tofree);
+
+	return (0);
+}
+
+static int
+pci_vt9p_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
+{
+	struct pci_vt9p_softc *sc;
+	const char *value;
+	const char *sharename;
+	int rootfd;
+	bool ro;
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_t rootcap;
+#endif
+
+	ro = get_config_bool_node_default(nvl, "ro", false);
+
+#ifndef __FreeBSD__
+	illumos_priv_add_min(PRIV_FILE_DAC_READ, "vt9p");
+	illumos_priv_add_min(PRIV_FILE_DAC_SEARCH, "vt9p");
+
+	if (!ro) {
+		illumos_priv_add_min(PRIV_FILE_CHOWN, "vt9p");
+		illumos_priv_add_min(PRIV_FILE_CHOWN_SELF, "vt9p");
+		illumos_priv_add_min(PRIV_FILE_WRITE, "vt9p");
+		illumos_priv_add_min(PRIV_FILE_DAC_WRITE, "vt9p");
+		illumos_priv_add_min(PRIV_FILE_OWNER, "vt9p");
+		illumos_priv_add_min(PRIV_FILE_LINK_ANY, "vt9p");
+	}
+#endif
+
+	value = get_config_value_node(nvl, "path");
+	if (value == NULL) {
+		EPRINTLN("virtio-9p: path required");
+		return (1);
+	}
+	rootfd = open(value, O_DIRECTORY);
+	if (rootfd < 0) {
+		EPRINTLN("virtio-9p: failed to open '%s': %s", value,
+		    strerror(errno));
+		return (-1);
+	}
+
+	sharename = get_config_value_node(nvl, "sharename");
+	if (sharename == NULL) {
+		EPRINTLN("virtio-9p: share name required");
+		return (1);
+	}
+	if (strlen(sharename) > VT9P_MAXTAGSZ) {
+		EPRINTLN("virtio-9p: share name too long");
+		return (1);
+	}
+
+	sc = calloc(1, sizeof(struct pci_vt9p_softc));
+#ifndef __FreeBSD__
+	if (sc == NULL) {
+		EPRINTLN("virtio-9p: soft state allocation failure: %s",
+		    strerror(errno));
+		return (1);
+	}
+#endif
+	sc->vsc_config = calloc(1, sizeof(struct pci_vt9p_config) +
+	    VT9P_MAXTAGSZ);
+#ifndef __FreeBSD__
+	if (sc == NULL) {
+		EPRINTLN("virtio-9p: vsc_config allocation failure: %s",
+		    strerror(errno));
+		return (1);
+	}
+#endif
+
+	pthread_mutex_init(&sc->vsc_mtx, NULL);
+
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_init(&rootcap,
+	    CAP_LOOKUP, CAP_ACL_CHECK, CAP_ACL_DELETE, CAP_ACL_GET,
+	    CAP_ACL_SET, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSTAT,
+	    CAP_CREATE, CAP_FCHMODAT, CAP_FCHOWNAT, CAP_FTRUNCATE,
+	    CAP_LINKAT_SOURCE, CAP_LINKAT_TARGET, CAP_MKDIRAT, CAP_MKNODAT,
+	    CAP_PREAD, CAP_PWRITE, CAP_RENAMEAT_SOURCE, CAP_RENAMEAT_TARGET,
+	    CAP_SEEK, CAP_SYMLINKAT, CAP_UNLINKAT, CAP_EXTATTR_DELETE,
+	    CAP_EXTATTR_GET, CAP_EXTATTR_LIST, CAP_EXTATTR_SET,
+	    CAP_FUTIMES, CAP_FSTATFS, CAP_FSYNC, CAP_FPATHCONF);
+
+	if (cap_rights_limit(rootfd, &rootcap) != 0)
+		return (1);
+#endif
+
+	sc->vsc_config->tag_len = (uint16_t)strlen(sharename);
+	memcpy(sc->vsc_config->tag, sharename, sc->vsc_config->tag_len);
+	
+	if (l9p_backend_fs_init(&sc->vsc_fs_backend, rootfd, ro) != 0) {
+		errno = ENXIO;
+		return (1);
+	}
+
+	if (l9p_server_init(&sc->vsc_server, sc->vsc_fs_backend) != 0) {
+		errno = ENXIO;
+		return (1);
+	}
+
+	if (l9p_connection_init(sc->vsc_server, &sc->vsc_conn) != 0) {
+		errno = EIO;
+		return (1);
+	}
+
+	sc->vsc_conn->lc_msize = L9P_MAX_IOV * PAGE_SIZE;
+	sc->vsc_conn->lc_lt.lt_get_response_buffer = pci_vt9p_get_buffer;
+	sc->vsc_conn->lc_lt.lt_send_response = pci_vt9p_send;
+	sc->vsc_conn->lc_lt.lt_drop_response = pci_vt9p_drop;
+
+	vi_softc_linkup(&sc->vsc_vs, &vt9p_vi_consts, sc, pi, &sc->vsc_vq);
+	sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
+	sc->vsc_vq.vq_qsize = VT9P_RINGSZ;
+
+	/* initialize config space */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_9P);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
+	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_9P);
+	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
+
+	if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
+		return (1);
+	vi_set_io_bar(&sc->vsc_vs, 0);
+
+	return (0);
+}
+
+struct pci_devemu pci_de_v9p = {
+	.pe_emu =	"virtio-9p",
+	.pe_legacy_config = pci_vt9p_legacy_config,
+	.pe_init =	pci_vt9p_init,
+	.pe_barwrite =	vi_pci_write,
+	.pe_barread =	vi_pci_read
+};
+PCI_EMUL_SET(pci_de_v9p);
diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile
index 5c796c3caf..2673e008d8 100644
--- a/usr/src/lib/Makefile
+++ b/usr/src/lib/Makefile
@@ -274,6 +274,7 @@ SUBDIRS +=				\
 	$($(MACH)_SUBDIRS)
 
 i386_SUBDIRS=		\
+	lib9p		\
 	libfdisk	\
 	libppt		\
 	libsaveargs	\
@@ -489,6 +490,7 @@ HDRSUBDIRS=				\
 	$($(MACH)_HDRSUBDIRS)
 
 i386_HDRSUBDIRS=	\
+	lib9p		\
 	libfdisk	\
 	libppt		\
 	libsaveargs	\
@@ -581,6 +583,7 @@ gss_mechs/mech_krb5: libgss libresolv2 pkcs11 libkstat
 gss_mechs/mech_spnego: gss_mechs/mech_krb5
 hal:		dbusdeps
 krb5:		gss_mechs/mech_krb5 libtecla libldap5
+lib9p:		libsec libcustr
 libads:		libnsl
 libadt_jni:	libbsm
 libadutils:	libldap5 libresolv2
diff --git a/usr/src/lib/lib9p/COPYRIGHT b/usr/src/lib/lib9p/COPYRIGHT
new file mode 100644
index 0000000000..b02f09aabd
--- /dev/null
+++ b/usr/src/lib/lib9p/COPYRIGHT
@@ -0,0 +1,47 @@
+Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+All rights reserved
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted providing that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+Some parts of the code are based on libixp (http://libs.suckless.org/libixp)
+library code released under following license:
+
+© 2005-2006 Anselm R. Garbe <garbeam@gmail.com>
+© 2006-2010 Kris Maglione <maglione.k at Gmail>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/usr/src/lib/lib9p/COPYRIGHT.descrip b/usr/src/lib/lib9p/COPYRIGHT.descrip
new file mode 100644
index 0000000000..d854795482
--- /dev/null
+++ b/usr/src/lib/lib9p/COPYRIGHT.descrip
@@ -0,0 +1 @@
+lib9p library
diff --git a/usr/src/lib/lib9p/Makefile b/usr/src/lib/lib9p/Makefile
new file mode 100644
index 0000000000..65f8a88fae
--- /dev/null
+++ b/usr/src/lib/lib9p/Makefile
@@ -0,0 +1,41 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
+#
+
+include ../Makefile.lib
+
+$(BUILD64)SUBDIRS += $(MACH64)
+
+HDRS =		lib9p.h
+HDRDIR =	common
+CHECKHDRS =
+
+all:=		TARGET= all
+install:=	TARGET= install
+clean:=		TARGET= clean
+clobber:=	TARGET= clobber
+
+.KEEP_STATE:
+
+all install clean clobber: $(SUBDIRS)
+
+install_h:	$(ROOTHDRS)
+check:		$(CHECKHDRS)
+
+$(SUBDIRS): FRC
+	cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/lib/lib9p/Makefile.com b/usr/src/lib/lib9p/Makefile.com
new file mode 100644
index 0000000000..b04b210796
--- /dev/null
+++ b/usr/src/lib/lib9p/Makefile.com
@@ -0,0 +1,77 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
+#
+
+LIBRARY=	lib9p.a
+VERS=		.1
+
+OBJECTS=	backend/fs.o \
+		connection.o \
+		genacl.o \
+		hashtable.o \
+		log.o \
+		pack.o \
+		request.o \
+		rfuncs.o \
+		sbuf/sbuf.o \
+		threadpool.o \
+		transport/socket.o \
+		utils.o
+HDRS =		lib9p.h
+
+LOBJDIRS=	backend transport sbuf
+
+include ../../Makefile.lib
+
+LIBS =		$(DYNLIB)
+LDLIBS +=	-lc -lcustr -lsocket -lsec -lnvpair
+
+SRCDIR =	..
+
+CSTD =		$(CSTD_GNU99)
+
+CFLAGS +=	$(CCVERBOSE)
+
+CPPFLAGS +=	-D__illumos__
+CPPFLAGS +=	-D_POSIX_PTHREAD_SEMANTICS -D__EXTENSIONS__
+CPPFLAGS +=	-I../common -I../common/backend
+$(NOT_RELEASE_BUILD)CPPFLAGS +=	-DL9P_DEBUG=L9P_DEBUG
+
+SMOFF += all_func_returns
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+$(LIBS): mkpicdirs
+
+mkpicdirs:
+	@mkdir -p $(LOBJDIRS:%=pics/%)
+
+pics/%.o: ../common/%.c
+	$(COMPILE.c) -o $@ $<
+	$(POST_PROCESS_O)
+
+pics/backend/%.o: ../common/backend/%.c
+	$(COMPILE.c) -o $@ $<
+	$(POST_PROCESS_O)
+
+pics/transport/%.o: ../common/transport/%.c
+	$(COMPILE.c) -o $@ $<
+	$(POST_PROCESS_O)
+
+$(ROOTHDRDIR)/%.h: ../common/%.h
+	$(INS.file)
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/lib9p/amd64/Makefile b/usr/src/lib/lib9p/amd64/Makefile
new file mode 100644
index 0000000000..c3510fdb62
--- /dev/null
+++ b/usr/src/lib/lib9p/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/lib/lib9p/common/backend/backend.h b/usr/src/lib/lib9p/common/backend/backend.h
new file mode 100644
index 0000000000..2b4bf2d8e4
--- /dev/null
+++ b/usr/src/lib/lib9p/common/backend/backend.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+
+#ifndef LIB9P_BACKEND_H
+#define LIB9P_BACKEND_H
+
+struct l9p_backend {
+	void *softc;
+	void (*freefid)(void *, struct l9p_fid *);
+	int (*attach)(void *, struct l9p_request *);
+	int (*clunk)(void *, struct l9p_fid *);
+	int (*create)(void *, struct l9p_request *);
+	int (*open)(void *, struct l9p_request *);
+	int (*read)(void *, struct l9p_request *);
+	int (*remove)(void *, struct l9p_fid *);
+	int (*stat)(void *, struct l9p_request *);
+	int (*walk)(void *, struct l9p_request *);
+	int (*write)(void *, struct l9p_request *);
+	int (*wstat)(void *, struct l9p_request *);
+	int (*statfs)(void *, struct l9p_request *);
+	int (*lopen)(void *, struct l9p_request *);
+	int (*lcreate)(void *, struct l9p_request *);
+	int (*symlink)(void *, struct l9p_request *);
+	int (*mknod)(void *, struct l9p_request *);
+	int (*rename)(void *, struct l9p_request *);
+	int (*readlink)(void *, struct l9p_request *);
+	int (*getattr)(void *, struct l9p_request *);
+	int (*setattr)(void *, struct l9p_request *);
+	int (*xattrwalk)(void *, struct l9p_request *);
+	int (*xattrcreate)(void *, struct l9p_request *);
+	int (*xattrread)(void *, struct l9p_request *);
+	int (*xattrwrite)(void *, struct l9p_request *);
+	int (*xattrclunk)(void *, struct l9p_fid *);
+	int (*readdir)(void *, struct l9p_request *);
+	int (*fsync)(void *, struct l9p_request *);
+	int (*lock)(void *, struct l9p_request *);
+	int (*getlock)(void *, struct l9p_request *);
+	int (*link)(void *, struct l9p_request *);
+	int (*mkdir)(void *, struct l9p_request *);
+	int (*renameat)(void *, struct l9p_request *);
+	int (*unlinkat)(void *, struct l9p_request *);
+};
+
+#endif  /* LIB9P_BACKEND_H */
diff --git a/usr/src/lib/lib9p/common/backend/fs.c b/usr/src/lib/lib9p/common/backend/fs.c
new file mode 100644
index 0000000000..4b7764cd86
--- /dev/null
+++ b/usr/src/lib/lib9p/common/backend/fs.c
@@ -0,0 +1,3238 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Copyright 2021 Joyent, Inc.
+ */
+
+/*
+ * Based on libixp code: �2007-2010 Kris Maglione <maglione.k at Gmail>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <dirent.h>
+#include <pwd.h>
+#include <grp.h>
+#include <libgen.h>
+#include <pthread.h>
+#include "../lib9p.h"
+#include "../lib9p_impl.h"
+#include "../fid.h"
+#include "../log.h"
+#include "../rfuncs.h"
+#include "../genacl.h"
+#include "backend.h"
+#include "fs.h"
+
+#if defined(WITH_CASPER)
+  #include <libcasper.h>
+  #include <casper/cap_pwd.h>
+  #include <casper/cap_grp.h>
+#endif
+
+#if defined(__FreeBSD__)
+  #include <sys/param.h>
+  #if __FreeBSD_version >= 1000000
+    #define	HAVE_BINDAT
+  #endif
+#endif
+
+#if defined(__FreeBSD__)
+  #define	HAVE_BIRTHTIME
+#endif
+
+#if defined(__APPLE__)
+  #include <sys/syscall.h>
+  #include "Availability.h"
+  #define ACL_TYPE_NFS4 ACL_TYPE_EXTENDED
+#endif
+
+#if defined (__illumos__)
+  #include <sys/sysmacros.h>
+  #include <sys/statvfs.h>
+  #include <sys/un.h>
+  #include <attr.h>
+  #include <sys/nvpair.h>
+#endif
+
+struct fs_softc {
+	int 	fs_rootfd;
+	bool	fs_readonly;
+#if defined(__illumos__)
+	/*
+	 * On illumos, the file creation time (birthtime) is stored (on
+	 * supported filesystems -- i.e. zfs) in an extended attribute.
+	 * If for some reason the fs doesn't support extended attributes,
+	 * we skip trying to read the creation time.
+	 */
+	bool	fs_hasxattr;
+#endif
+#if defined(WITH_CASPER)
+	cap_channel_t *fs_cappwd;
+	cap_channel_t *fs_capgrp;
+#endif
+};
+
+struct fs_fid {
+	DIR	*ff_dir;
+	int	ff_dirfd;
+	int	ff_fd;
+	int	ff_flags;
+	char	*ff_name;
+	struct fs_authinfo *ff_ai;
+	pthread_mutex_t ff_mtx;
+	struct l9p_acl *ff_acl; /* cached ACL if any */
+};
+
+#if defined(__FreeBSD__)
+# define	STATFS_FSID(_s) \
+	(((uint64_t)(_s)->f_fsid.val[0] << 32) | (uint64_t)(_s)->f_fsid.val[1])
+
+# define	STAT_ATIME(_s)	((_s)->st_atimespec)
+# define	STAT_MTIME(_s)	((_s)->st_mtimespec)
+# define	STAT_CTIME(_s)	((_s)->st_ctimespec)
+#elif defined (__illumos__)
+# define	STATFS_FSID(_s)	((_s)->f_fsid)
+
+# define	STAT_ATIME(_s)	((_s)->st_atim)
+# define	STAT_MTIME(_s)	((_s)->st_mtim)
+# define	STAT_CTIME(_s)	((_s)->st_ctim)
+#else
+#error "Port me"
+#endif
+
+#define	FF_NO_NFSV4_ACL	0x01	/* don't go looking for NFSv4 ACLs */
+/*	FF_NO_POSIX_ACL	0x02	-- not yet */
+
+/*
+ * Our authinfo consists of:
+ *
+ *  - a reference count
+ *  - a uid
+ *  - a gid-set
+ *
+ * The "default" gid is the first gid in the git-set, provided the
+ * set size is at least 1.  The set-size may be zero, though.
+ *
+ * Adjustments to the ref-count must be atomic, once it's shared.
+ * It would be nice to use C11 atomics here but they are not common
+ * enough to all systems just yet; for now, we use a mutex.
+ *
+ * Note that some ops (Linux style ones) pass an effective gid for
+ * the op, in which case, that gid may override.  To achieve this
+ * effect, permissions testing functions also take an extra gid.
+ * If this gid is (gid_t)-1 it is not used and only the remaining
+ * gids take part.
+ *
+ * The uid may also be (uid_t)-1, meaning "no uid was available
+ * at all at attach time".  In this case, new files inherit parent
+ * directory uids.
+ *
+ * The refcount is simply the number of "openfile"s using this
+ * authinfo (so that when the last ref goes away, we can free it).
+ *
+ * There are also master ACL flags (same as in ff_flags).
+ */
+struct fs_authinfo {
+	pthread_mutex_t ai_mtx;	/* lock for refcnt */
+	uint32_t ai_refcnt;
+	int	ai_flags;
+	uid_t	ai_uid;
+	int	ai_ngids;
+	gid_t	ai_gids[];	/* NB: flexible array member */
+};
+
+/*
+ * We have a global-static mutex for single-threading Tattach
+ * requests, which use getpwnam (and indirectly, getgr* functions)
+ * which are not reentrant.
+ */
+static bool fs_attach_mutex_inited;
+static pthread_mutex_t fs_attach_mutex;
+
+static pthread_mutexattr_t fs_mutexattr;
+
+/*
+ * Internal functions (except inline functions).
+ */
+static struct passwd *fs_getpwuid(struct fs_softc *, uid_t, struct r_pgdata *);
+static struct group *fs_getgrgid(struct fs_softc *, gid_t, struct r_pgdata *);
+static int fs_buildname(struct l9p_fid *, char *, char *, size_t);
+static int fs_pdir(struct fs_softc *, struct l9p_fid *, char *, size_t,
+    struct stat *st);
+static int fs_dpf(char *, char *, size_t);
+static int fs_oflags_dotu(int, int *);
+static int fs_oflags_dotl(uint32_t, int *, enum l9p_omode *);
+static int fs_nde(struct fs_softc *, struct l9p_fid *, bool, gid_t,
+    struct stat *, uid_t *, gid_t *);
+static struct fs_fid *open_fid(int, const char *, struct fs_authinfo *, bool);
+static void dostat(struct fs_softc *, struct l9p_stat *, char *,
+    struct stat *, bool dotu);
+#ifdef __illumos__
+static void getcrtime(struct fs_softc *, int, const char *, uint64_t *,
+    uint64_t *);
+static void dostatfs(struct l9p_statfs *, struct statvfs *, long);
+#define	ACL_TYPE_NFS4 1
+acl_t *acl_get_fd_np(int fd, int type);
+#else
+static void dostatfs(struct l9p_statfs *, struct statfs *, long);
+#endif
+static void fillacl(struct fs_fid *ff);
+static struct l9p_acl *getacl(struct fs_fid *ff, int fd, const char *path);
+static void dropacl(struct fs_fid *ff);
+static struct l9p_acl *look_for_nfsv4_acl(struct fs_fid *ff, int fd,
+    const char *path);
+static int check_access(int32_t,
+    struct l9p_acl *, struct stat *, struct l9p_acl *, struct stat *,
+    struct fs_authinfo *, gid_t);
+static void generate_qid(struct stat *, struct l9p_qid *);
+
+static int fs_icreate(void *, struct l9p_fid *, char *, int,
+    bool, mode_t, gid_t, struct stat *);
+static int fs_iopen(void *, struct l9p_fid *, int, enum l9p_omode,
+    gid_t, struct stat *);
+static int fs_imkdir(void *, struct l9p_fid *, char *,
+    bool, mode_t, gid_t, struct stat *);
+static int fs_imkfifo(void *, struct l9p_fid *, char *,
+    bool, mode_t, gid_t, struct stat *);
+static int fs_imknod(void *, struct l9p_fid *, char *,
+    bool, mode_t, dev_t, gid_t, struct stat *);
+static int fs_imksocket(void *, struct l9p_fid *, char *,
+    bool, mode_t, gid_t, struct stat *);
+static int fs_isymlink(void *, struct l9p_fid *, char *, char *,
+    gid_t, struct stat *);
+
+/*
+ * Internal functions implementing backend.
+ */
+static int fs_attach(void *, struct l9p_request *);
+static int fs_clunk(void *, struct l9p_fid *);
+static int fs_create(void *, struct l9p_request *);
+static int fs_open(void *, struct l9p_request *);
+static int fs_read(void *, struct l9p_request *);
+static int fs_remove(void *, struct l9p_fid *);
+static int fs_stat(void *, struct l9p_request *);
+static int fs_walk(void *, struct l9p_request *);
+static int fs_write(void *, struct l9p_request *);
+static int fs_wstat(void *, struct l9p_request *);
+static int fs_statfs(void *, struct l9p_request *);
+static int fs_lopen(void *, struct l9p_request *);
+static int fs_lcreate(void *, struct l9p_request *);
+static int fs_symlink(void *, struct l9p_request *);
+static int fs_mknod(void *, struct l9p_request *);
+static int fs_rename(void *, struct l9p_request *);
+static int fs_readlink(void *, struct l9p_request *);
+static int fs_getattr(void *, struct l9p_request *);
+static int fs_setattr(void *, struct l9p_request *);
+static int fs_xattrwalk(void *, struct l9p_request *);
+static int fs_xattrcreate(void *, struct l9p_request *);
+static int fs_readdir(void *, struct l9p_request *);
+static int fs_fsync(void *, struct l9p_request *);
+static int fs_lock(void *, struct l9p_request *);
+static int fs_getlock(void *, struct l9p_request *);
+static int fs_link(void *, struct l9p_request *);
+static int fs_renameat(void *, struct l9p_request *);
+static int fs_unlinkat(void *, struct l9p_request *);
+static void fs_freefid(void *, struct l9p_fid *);
+
+/*
+ * Convert from 9p2000 open/create mode to Unix-style O_* flags.
+ * This includes 9p2000.u extensions, but not 9p2000.L protocol,
+ * which has entirely different open, create, etc., flag bits.
+ *
+ * The <mode> given here is the one-byte (uint8_t) "mode"
+ * argument to Tcreate or Topen, so it can have at most 8 bits.
+ *
+ * https://swtch.com/plan9port/man/man9/open.html and
+ * http://plan9.bell-labs.com/magic/man2html/5/open
+ * both say:
+ *
+ *   The [low two bits of the] mode field determines the
+ *   type of I/O ... [I]f mode has the OTRUNC (0x10) bit
+ *   set, the file is to be truncated, which requires write
+ *   permission ...; if the mode has the ORCLOSE (0x40) bit
+ *   set, the file is to be removed when the fid is clunked,
+ *   which requires permission to remove the file from its
+ *   directory.  All other bits in mode should be zero.  It
+ *   is illegal to write a directory, truncate it, or
+ *   attempt to remove it on close.
+ *
+ * 9P2000.u may add ODIRECT (0x80); this is not completely clear.
+ * The fcall.h header defines OCEXEC (0x20) as well, but it makes
+ * no sense to send this to a server.  There seem to be no bits
+ * 0x04 and 0x08.
+ *
+ * We always turn on O_NOCTTY since as a server, we never want
+ * to gain a controlling terminal.  We always turn on O_NOFOLLOW
+ * for reasons described elsewhere.
+ */
+static int
+fs_oflags_dotu(int mode, int *aflags)
+{
+	int flags;
+#define	CONVERT(theirs, ours) \
+	do { \
+		if (mode & (theirs)) { \
+			mode &= ~(theirs); \
+			flags |= ours; \
+		} \
+	} while (0)
+
+	switch (mode & L9P_OACCMODE) {
+
+	case L9P_OREAD:
+	default:
+		flags = O_RDONLY;
+		break;
+
+	case L9P_OWRITE:
+		flags = O_WRONLY;
+		break;
+
+	case L9P_ORDWR:
+		flags = O_RDWR;
+		break;
+
+	case L9P_OEXEC:
+		if (mode & L9P_OTRUNC)
+			return (EINVAL);
+		flags = O_RDONLY;
+		break;
+	}
+
+	flags |= O_NOCTTY | O_NOFOLLOW;
+
+	CONVERT(L9P_OTRUNC, O_TRUNC);
+
+	/*
+	 * Now take away some flags locally:
+	 *   the access mode (already translated)
+	 *   ORCLOSE - caller only
+	 *   OCEXEC - makes no sense in server
+	 *   ODIRECT - not applicable here
+	 * If there are any flag bits left after this,
+	 * we were unable to translate them.  For now, let's
+	 * treat this as EINVAL so that we can catch problems.
+	 */
+	mode &= ~(L9P_OACCMODE | L9P_ORCLOSE | L9P_OCEXEC | L9P_ODIRECT);
+	if (mode != 0) {
+		L9P_LOG(L9P_INFO,
+		    "fs_oflags_dotu: untranslated bits: %#x",
+		    (unsigned)mode);
+		return (EINVAL);
+	}
+
+	*aflags = flags;
+	return (0);
+#undef CONVERT
+}
+
+/*
+ * Convert from 9P2000.L (Linux) open mode bits to O_* flags.
+ * See fs_oflags_dotu above.
+ *
+ * Linux currently does not have open-for-exec, but there is a
+ * proposal for it using O_PATH|O_NOFOLLOW, now handled here.
+ *
+ * We may eventually also set L9P_ORCLOSE for L_O_TMPFILE.
+ */
+static int
+fs_oflags_dotl(uint32_t l_mode, int *aflags, enum l9p_omode *ap9)
+{
+	int flags;
+	enum l9p_omode p9;
+#define	CLEAR(theirs)	l_mode &= ~(uint32_t)(theirs)
+#define	CONVERT(theirs, ours) \
+	do { \
+		if (l_mode & (theirs)) { \
+			CLEAR(theirs); \
+			flags |= ours; \
+		} \
+	} while (0)
+
+	/*
+	 * Linux O_RDONLY, O_WRONLY, O_RDWR (0,1,2) match BSD/MacOS.
+	 */
+	flags = l_mode & O_ACCMODE;
+	if (flags == 3)
+		return (EINVAL);
+	CLEAR(O_ACCMODE);
+
+	if ((l_mode & (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) ==
+		    (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) {
+		CLEAR(L9P_L_O_PATH | L9P_L_O_NOFOLLOW);
+		p9 = L9P_OEXEC;
+	} else {
+		/*
+		 * Slightly dirty, but same dirt, really, as
+		 * setting flags from l_mode & O_ACCMODE.
+		 */
+		p9 = (enum l9p_omode)flags;	/* slightly dirty */
+	}
+
+	/* turn L_O_TMPFILE into L9P_ORCLOSE in *p9? */
+	if (l_mode & L9P_L_O_TRUNC)
+		p9 |= L9P_OTRUNC;	/* but don't CLEAR yet */
+
+	flags |= O_NOCTTY | O_NOFOLLOW;
+
+	/*
+	 * L_O_CREAT seems to be noise, since we get separate open
+	 * and create.  But it is actually set sometimes.  We just
+	 * throw it out here; create ops must set it themselves and
+	 * open ops have no permissions bits and hence cannot create.
+	 *
+	 * L_O_EXCL does make sense on create ops, i.e., we can
+	 * take a create op with or without L_O_EXCL.  We pass that
+	 * through.
+	 */
+	CLEAR(L9P_L_O_CREAT);
+	CONVERT(L9P_L_O_EXCL, O_EXCL);
+	CONVERT(L9P_L_O_TRUNC, O_TRUNC);
+	CONVERT(L9P_L_O_DIRECTORY, O_DIRECTORY);
+	CONVERT(L9P_L_O_APPEND, O_APPEND);
+	CONVERT(L9P_L_O_NONBLOCK, O_NONBLOCK);
+
+	/*
+	 * Discard these as useless noise at our (server) end.
+	 * (NOATIME might be useful but we can only set it on a
+	 * per-mount basis.)
+	 */
+	CLEAR(L9P_L_O_CLOEXEC);
+	CLEAR(L9P_L_O_DIRECT);
+	CLEAR(L9P_L_O_DSYNC);
+	CLEAR(L9P_L_O_FASYNC);
+	CLEAR(L9P_L_O_LARGEFILE);
+	CLEAR(L9P_L_O_NOATIME);
+	CLEAR(L9P_L_O_NOCTTY);
+	CLEAR(L9P_L_O_NOFOLLOW);
+	CLEAR(L9P_L_O_SYNC);
+
+	if (l_mode != 0) {
+		L9P_LOG(L9P_INFO,
+		    "fs_oflags_dotl: untranslated bits: %#x",
+		    (unsigned)l_mode);
+		return (EINVAL);
+	}
+
+	*aflags = flags;
+	*ap9 = p9;
+	return (0);
+#undef CLEAR
+#undef CONVERT
+}
+
+static struct passwd *
+fs_getpwuid(struct fs_softc *sc, uid_t uid, struct r_pgdata *pg)
+{
+#if defined(WITH_CASPER)
+	return (r_cap_getpwuid(sc->fs_cappwd, uid, pg));
+#else
+	(void)sc;
+	return (r_getpwuid(uid, pg));
+#endif
+}
+
+static struct group *
+fs_getgrgid(struct fs_softc *sc, gid_t gid, struct r_pgdata *pg)
+{
+#if defined(WITH_CASPER)
+	return (r_cap_getgrgid(sc->fs_capgrp, gid, pg));
+#else
+	(void)sc;
+	return (r_getgrgid(gid, pg));
+#endif
+}
+
+/*
+ * Build full name of file by appending given name to directory name.
+ */
+static int
+fs_buildname(struct l9p_fid *dir, char *name, char *buf, size_t size)
+{
+	struct fs_fid *dirf = dir->lo_aux;
+	size_t dlen, nlen1;
+
+	assert(dirf != NULL);
+	dlen = strlen(dirf->ff_name);
+	nlen1 = strlen(name) + 1;	/* +1 for '\0' */
+	if (dlen + 1 + nlen1 > size)
+		return (ENAMETOOLONG);
+	memcpy(buf, dirf->ff_name, dlen);
+	buf[dlen] = '/';
+	memcpy(buf + dlen + 1, name, nlen1);
+	return (0);
+}
+
+/*
+ * Build parent name of file by splitting it off.  Return an error
+ * if the given fid represents the root, so that there is no such
+ * parent, or if the discovered parent is not a directory.
+ */
+static int
+fs_pdir(struct fs_softc *sc __unused, struct l9p_fid *fid, char *buf,
+    size_t size, struct stat *st)
+{
+	struct fs_fid *ff;
+	char *path;
+
+	ff = fid->lo_aux;
+	assert(ff != NULL);
+	path = ff->ff_name;
+	path = r_dirname(path, buf, size);
+	if (path == NULL)
+		return (ENAMETOOLONG);
+	if (fstatat(ff->ff_dirfd, path, st, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (!S_ISDIR(st->st_mode))
+		return (ENOTDIR);
+	return (0);
+}
+
+/*
+ * Like fs_buildname() but for adding a file name to a buffer
+ * already holding a directory name.  Essentially does
+ *     strcat(dbuf, "/");
+ *     strcat(dbuf, fname);
+ * but with size checking and an ENAMETOOLONG error as needed.
+ *
+ * (Think of the function name as "directory plus-equals file".)
+ */
+static int
+fs_dpf(char *dbuf, char *fname, size_t size)
+{
+	size_t dlen, nlen1;
+
+	dlen = strlen(dbuf);
+	nlen1 = strlen(fname) + 1;
+	if (dlen + 1 + nlen1 > size)
+		return (ENAMETOOLONG);
+	dbuf[dlen] = '/';
+	memcpy(dbuf + dlen + 1, fname, nlen1);
+	return (0);
+}
+
+/*
+ * Prepare to create a new directory entry (open with O_CREAT,
+ * mkdir, etc -- any operation that creates a new inode),
+ * operating in parent data <dir>, based on authinfo <ai> and
+ * effective gid <egid>.
+ *
+ * The new entity should be owned by user/group <*nuid, *ngid>,
+ * if it's really a new entity.  It will be a directory if isdir.
+ *
+ * Returns an error number if the entry should not be created
+ * (e.g., read-only file system or no permission to write in
+ * parent directory).  Always sets *nuid and *ngid on success:
+ * in the worst case, when there is no available ID, this will
+ * use the parent directory's IDs.  Fills in <*st> on success.
+ */
+static int
+fs_nde(struct fs_softc *sc, struct l9p_fid *dir, bool isdir, gid_t egid,
+    struct stat *st, uid_t *nuid, gid_t *ngid)
+{
+	struct fs_fid *dirf;
+	struct fs_authinfo *ai;
+	int32_t op;
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+	dirf = dir->lo_aux;
+	assert(dirf != NULL);
+	if (fstatat(dirf->ff_dirfd, dirf->ff_name, st,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (!S_ISDIR(st->st_mode))
+		return (ENOTDIR);
+	dirf = dir->lo_aux;
+	ai = dirf->ff_ai;
+	fillacl(dirf);
+	op = isdir ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
+	error = check_access(op, dirf->ff_acl, st, NULL, NULL, ai, egid);
+	if (error)
+		return (EPERM);
+
+	*nuid = ai->ai_uid != (uid_t)-1 ? ai->ai_uid : st->st_uid;
+	*ngid = egid != (gid_t)-1 ? egid :
+	    ai->ai_ngids > 0 ?  ai->ai_gids[0] : st->st_gid;
+	return (0);
+}
+
+/*
+ * Allocate new open-file data structure to attach to a fid.
+ *
+ * The new file's authinfo is the same as the old one's, and
+ * we gain a reference.
+ */
+static struct fs_fid *
+open_fid(int dirfd, const char *path, struct fs_authinfo *ai, bool creating)
+{
+	struct fs_fid *ret;
+	uint32_t newcount;
+	int error;
+
+	ret = l9p_calloc(1, sizeof(*ret));
+#ifdef __illumos__
+	error = pthread_mutex_init(&ret->ff_mtx, &fs_mutexattr);
+#else
+	error = pthread_mutex_init(&ret->ff_mtx, NULL);
+#endif
+	if (error) {
+		free(ret);
+		return (NULL);
+	}
+	ret->ff_fd = -1;
+	ret->ff_dirfd = dirfd;
+	ret->ff_name = strdup(path);
+	if (ret->ff_name == NULL) {
+		(void) pthread_mutex_destroy(&ret->ff_mtx);
+		free(ret);
+		return (NULL);
+	}
+	if (pthread_mutex_lock(&ai->ai_mtx) != 0) {
+		(void) pthread_mutex_destroy(&ret->ff_mtx);
+		free(ret->ff_name);
+		free(ret);
+		return (NULL);
+	}
+	newcount = ++ai->ai_refcnt;
+	(void) pthread_mutex_unlock(&ai->ai_mtx);
+	/*
+	 * If we just incremented the count to 1, we're the *first*
+	 * reference.  This is only allowed when creating the authinfo,
+	 * otherwise it means something has gone wrong.  This cannot
+	 * catch every bad (re)use of a freed authinfo but it may catch
+	 * a few.
+	 */
+	assert(newcount > 1 || creating);
+	L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu",
+	    (void *)ai, (u_long)newcount);
+	ret->ff_ai = ai;
+	return (ret);
+}
+
+static void
+dostat(struct fs_softc *sc, struct l9p_stat *s, char *name,
+    struct stat *buf, bool dotu)
+{
+	struct passwd *user;
+	struct group *group;
+
+	memset(s, 0, sizeof(struct l9p_stat));
+
+	generate_qid(buf, &s->qid);
+
+	s->type = 0;
+	s->dev = 0;
+	s->mode = buf->st_mode & 0777;
+
+	if (S_ISDIR(buf->st_mode))
+		s->mode |= L9P_DMDIR;
+
+	if (S_ISLNK(buf->st_mode) && dotu)
+		s->mode |= L9P_DMSYMLINK;
+
+	if (S_ISCHR(buf->st_mode) || S_ISBLK(buf->st_mode))
+		s->mode |= L9P_DMDEVICE;
+
+	if (S_ISSOCK(buf->st_mode))
+		s->mode |= L9P_DMSOCKET;
+
+	if (S_ISFIFO(buf->st_mode))
+		s->mode |= L9P_DMNAMEDPIPE;
+
+	s->atime = (uint32_t)buf->st_atime;
+	s->mtime = (uint32_t)buf->st_mtime;
+	s->length = (uint64_t)buf->st_size;
+
+	s->name = r_basename(name, NULL, 0);
+
+	if (!dotu) {
+		struct r_pgdata udata, gdata;
+
+		user = fs_getpwuid(sc, buf->st_uid, &udata);
+		group = fs_getgrgid(sc, buf->st_gid, &gdata);
+		s->uid = user != NULL ? strdup(user->pw_name) : NULL;
+		s->gid = group != NULL ? strdup(group->gr_name) : NULL;
+		s->muid = user != NULL ? strdup(user->pw_name) : NULL;
+		r_pgfree(&udata);
+		r_pgfree(&gdata);
+	} else {
+		/*
+		 * When using 9P2000.u, we don't need to bother about
+		 * providing user and group names in textual form.
+		 *
+		 * NB: if the asprintf()s fail, s->extension should
+		 * be unset so we can ignore these.
+		 */
+		s->n_uid = buf->st_uid;
+		s->n_gid = buf->st_gid;
+		s->n_muid = buf->st_uid;
+
+		if (S_ISLNK(buf->st_mode)) {
+			char target[MAXPATHLEN];
+			ssize_t ret = readlink(name, target, MAXPATHLEN);
+
+			if (ret < 0) {
+				s->extension = NULL;
+				return;
+			}
+
+			s->extension = strndup(target, (size_t)ret);
+		}
+
+		if (S_ISBLK(buf->st_mode)) {
+			asprintf(&s->extension, "b %d %d", major(buf->st_rdev),
+			    minor(buf->st_rdev));
+		}
+
+		if (S_ISCHR(buf->st_mode)) {
+			asprintf(&s->extension, "c %d %d", major(buf->st_rdev),
+			    minor(buf->st_rdev));
+		}
+	}
+}
+
+#ifndef __illumos__
+static void
+dostatfs(struct l9p_statfs *out, struct statfs *in, long namelen)
+#else
+static void
+dostatfs(struct l9p_statfs *out, struct statvfs *in, long namelen)
+#endif
+{
+
+	out->type = L9P_FSTYPE;
+	out->bsize = in->f_bsize;
+#ifndef __illumos__
+	out->blocks = in->f_blocks;
+	out->bfree = in->f_bfree;
+	out->bavail = in->f_bavail;
+#else
+	out->blocks = in->f_blocks * in->f_frsize / in->f_bsize;
+	out->bfree = in->f_bfree * in->f_frsize / in->f_bsize;
+	out->bavail = in->f_bavail * in->f_frsize / in->f_bsize;
+#endif
+	out->files = in->f_files;
+	out->ffree = in->f_ffree;
+	out->namelen = (uint32_t)namelen;
+	out->fsid = STATFS_FSID(in);
+}
+
+static void
+generate_qid(struct stat *buf, struct l9p_qid *qid)
+{
+	qid->path = buf->st_ino;
+	qid->version = 0;
+
+	if (S_ISREG(buf->st_mode))
+		qid->type |= L9P_QTFILE;
+
+	if (S_ISDIR(buf->st_mode))
+		qid->type |= L9P_QTDIR;
+
+	if (S_ISLNK(buf->st_mode))
+		qid->type |= L9P_QTSYMLINK;
+}
+
+/*
+ * Fill in ff->ff_acl if it's not set yet.  Skip if the "don't use
+ * ACLs" flag is set, and use the flag to remember failure so
+ * we don't bother retrying either.
+ */
+static void
+fillacl(struct fs_fid *ff)
+{
+
+	if (ff->ff_acl == NULL && (ff->ff_flags & FF_NO_NFSV4_ACL) == 0) {
+		ff->ff_acl = look_for_nfsv4_acl(ff, ff->ff_fd, ff->ff_name);
+		if (ff->ff_acl == NULL)
+			ff->ff_flags |= FF_NO_NFSV4_ACL;
+	}
+}
+
+/*
+ * Get an ACL given fd and/or path name.  We check for the "don't get
+ * ACL" flag in the given ff_fid data structure first, but don't set
+ * the flag here.  The fillacl() code is similar but will set the
+ * flag; it also uses the ff_fd and ff_name directly.
+ *
+ * (This is used to get ACLs for parent directories, for instance.)
+ */
+static struct l9p_acl *
+getacl(struct fs_fid *ff, int fd, const char *path)
+{
+
+	if (ff->ff_flags & FF_NO_NFSV4_ACL)
+		return (NULL);
+	return look_for_nfsv4_acl(ff, fd, path);
+}
+
+/*
+ * Drop cached ff->ff_acl, e.g., after moving from one directory to
+ * another, where inherited ACLs might change.
+ */
+static void
+dropacl(struct fs_fid *ff)
+{
+
+	l9p_acl_free(ff->ff_acl);
+	ff->ff_acl = NULL;
+	ff->ff_flags = ff->ff_ai->ai_flags;
+}
+
+/*
+ * Check to see if we can find NFSv4 ACLs for the given file.
+ * If we have an open fd, we can use that, otherwise we need
+ * to use the path.
+ */
+static struct l9p_acl *
+look_for_nfsv4_acl(struct fs_fid *ff, int fd, const char *path)
+{
+	struct l9p_acl *acl;
+#ifdef __illumos__
+	acl_t *sysacl;
+#else
+	acl_t sysacl;
+#endif
+	int doclose = 0;
+
+	if (fd < 0) {
+		fd = openat(ff->ff_dirfd, path, 0);
+		doclose = 1;
+	}
+
+	sysacl = acl_get_fd_np(fd, ACL_TYPE_NFS4);
+	if (sysacl == NULL) {
+		/*
+		 * EINVAL means no NFSv4 ACLs apply for this file.
+		 * Other error numbers indicate some kind of problem.
+		 */
+		if (errno != EINVAL) {
+			L9P_LOG(L9P_ERROR,
+			    "error retrieving NFSv4 ACL from "
+			    "fdesc %d (%s): %s", fd,
+			    path, strerror(errno));
+		}
+
+		if (doclose)
+			close(fd);
+
+		return (NULL);
+	}
+#if defined(HAVE_FREEBSD_ACLS)
+	acl = l9p_freebsd_nfsv4acl_to_acl(sysacl);
+#elif defined(HAVE__ILLUMOS_ACLS)
+	acl = l9p_illumos_nfsv4acl_to_acl(sysacl);
+#else
+	acl = NULL; /* XXX need a l9p_darwin_acl_to_acl */
+#endif
+	acl_free(sysacl);
+
+	if (doclose)
+		close(fd);
+
+	return (acl);
+}
+
+/*
+ * Verify that the user whose authinfo is in <ai> and effective
+ * group ID is <egid> ((gid_t)-1 means no egid supplied) has
+ * permission to do something.
+ *
+ * The "something" may be rather complex: we allow NFSv4 style
+ * operation masks here, and provide parent and child ACLs and
+ * stat data.  At most one of pacl+pst and cacl+cst can be NULL,
+ * unless ACLs are not supported; then pacl and cacl can both
+ * be NULL but pst or cst must be non-NULL depending on the
+ * operation.
+ */
+static int
+check_access(int32_t opmask,
+    struct l9p_acl *pacl, struct stat *pst,
+    struct l9p_acl *cacl, struct stat *cst,
+    struct fs_authinfo *ai, gid_t egid)
+{
+	struct l9p_acl_check_args args;
+
+	/*
+	 * If we have ACLs, use them exclusively, ignoring Unix
+	 * permissions.  Otherwise, fall back on stat st_mode
+	 * bits, and allow super-user as well.
+	 */
+	args.aca_uid = ai->ai_uid;
+	args.aca_gid = egid;
+	args.aca_groups = ai->ai_gids;
+	args.aca_ngroups = (size_t)ai->ai_ngids;
+	args.aca_parent = pacl;
+	args.aca_pstat = pst;
+	args.aca_child = cacl;
+	args.aca_cstat = cst;
+	args.aca_aclmode = pacl == NULL && cacl == NULL
+	    ? L9P_ACM_STAT_MODE
+	    : L9P_ACM_NFS_ACL | L9P_ACM_ZFS_ACL;
+
+	args.aca_superuser = true;
+	return (l9p_acl_check_access(opmask, &args));
+}
+
+static int
+fs_attach(void *softc, struct l9p_request *req)
+{
+	struct fs_authinfo *ai;
+	struct fs_softc *sc = (struct fs_softc *)softc;
+	struct fs_fid *file;
+	struct passwd *pwd;
+	struct stat st;
+	struct r_pgdata udata;
+	uint32_t n_uname;
+	gid_t *gids;
+	uid_t uid;
+	int error;
+	int ngroups;
+
+	assert(req->lr_fid != NULL);
+
+	/*
+	 * Single-thread pwd/group related items.  We have a reentrant
+	 * r_getpwuid but not a reentrant r_getpwnam, and l9p_getgrlist
+	 * may use non-reentrant C library getgr* routines.
+	 */
+	if ((error = pthread_mutex_lock(&fs_attach_mutex)) != 0)
+		return (error);
+
+	n_uname = req->lr_req.tattach.n_uname;
+	if (n_uname != L9P_NONUNAME) {
+		uid = (uid_t)n_uname;
+		pwd = fs_getpwuid(sc, uid, &udata);
+#if defined(L9P_DEBUG)
+		if (pwd == NULL)
+			L9P_LOG(L9P_DEBUG,
+			    "Tattach: uid %ld: no such user", (long)uid);
+#endif
+	} else {
+		uid = (uid_t)-1;
+#if defined(WITH_CASPER)
+		pwd = cap_getpwnam(sc->fs_cappwd, req->lr_req.tattach.uname);
+#else
+		pwd = getpwnam(req->lr_req.tattach.uname);
+#endif
+#if defined(L9P_DEBUG)
+		if (pwd == NULL)
+			L9P_LOG(L9P_DEBUG,
+			    "Tattach: %s: no such user",
+			    req->lr_req.tattach.uname);
+#endif
+	}
+
+	/*
+	 * If caller didn't give a numeric UID, pick it up from pwd
+	 * if possible.  If that doesn't work we can't continue.
+	 *
+	 * Note that pwd also supplies the group set.  This assumes
+	 * the server has the right mapping; this needs improvement.
+	 * We do at least support ai->ai_ngids==0 properly now though.
+	 */
+	if (uid == (uid_t)-1 && pwd != NULL)
+		uid = pwd->pw_uid;
+	if (uid == (uid_t)-1)
+		error = EPERM;
+	else {
+		error = 0;
+		if (fstat(sc->fs_rootfd, &st) != 0)
+			error = errno;
+		else if (!S_ISDIR(st.st_mode))
+			error = ENOTDIR;
+	}
+	if (error) {
+		(void) pthread_mutex_unlock(&fs_attach_mutex);
+		L9P_LOG(L9P_DEBUG,
+		    "Tattach: denying uid=%ld access to rootdir: %s",
+		    (long)uid, strerror(error));
+		/*
+		 * Pass ENOENT and ENOTDIR through for diagnosis;
+		 * others become EPERM.  This should not leak too
+		 * much security.
+		 */
+		return (error == ENOENT || error == ENOTDIR ? error : EPERM);
+	}
+
+	if (pwd != NULL) {
+		/*
+		 * This either succeeds and fills in ngroups and
+		 * returns non-NULL, or fails and sets ngroups to 0
+		 * and returns NULL.  Either way ngroups is correct.
+		 */
+		gids = l9p_getgrlist(pwd->pw_name, pwd->pw_gid, &ngroups);
+	} else {
+		gids = NULL;
+		ngroups = 0;
+	}
+
+	/*
+	 * Done with pwd and group related items that may use
+	 * non-reentrant C library routines; allow other threads in.
+	 */
+	(void) pthread_mutex_unlock(&fs_attach_mutex);
+
+	ai = malloc(sizeof(*ai) + (size_t)ngroups * sizeof(gid_t));
+	if (ai == NULL) {
+		free(gids);
+		return (ENOMEM);
+	}
+#ifdef __illumos__
+	error = pthread_mutex_init(&ai->ai_mtx, &fs_mutexattr);
+#else
+	error = pthread_mutex_init(&ai->ai_mtx, NULL);
+#endif
+	if (error) {
+		free(gids);
+		free(ai);
+		return (error);
+	}
+	ai->ai_refcnt = 0;
+	ai->ai_uid = uid;
+	ai->ai_flags = 0;	/* XXX for now */
+	ai->ai_ngids = ngroups;
+	memcpy(ai->ai_gids, gids, (size_t)ngroups * sizeof(gid_t));
+	free(gids);
+
+	file = open_fid(sc->fs_rootfd, ".", ai, true);
+	if (file == NULL) {
+		(void) pthread_mutex_destroy(&ai->ai_mtx);
+		free(ai);
+		return (ENOMEM);
+	}
+
+	req->lr_fid->lo_aux = file;
+	generate_qid(&st, &req->lr_resp.rattach.qid);
+	return (0);
+}
+
+static int
+fs_clunk(void *softc __unused, struct l9p_fid *fid)
+{
+	struct fs_fid *file;
+
+	file = fid->lo_aux;
+	assert(file != NULL);
+
+	if (file->ff_dir) {
+		closedir(file->ff_dir);
+		file->ff_dir = NULL;
+	} else if (file->ff_fd != -1) {
+		close(file->ff_fd);
+		file->ff_fd = -1;
+	}
+
+	return (0);
+}
+
+/*
+ * Create ops.
+ *
+ * We are to create a new file under some existing path,
+ * where the new file's name is in the Tcreate request and the
+ * existing path is due to a fid-based file (req->lr_fid).
+ *
+ * One op (create regular file) sets file->fd, the rest do not.
+ */
+static int
+fs_create(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	uint32_t dmperm;
+	mode_t perm;
+	char *name;
+	int error;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tcreate.name;
+	dmperm = req->lr_req.tcreate.perm;
+	perm = (mode_t)(dmperm & 0777);
+
+	if (dmperm & L9P_DMDIR)
+		error = fs_imkdir(softc, dir, name, true,
+		    perm, (gid_t)-1, &st);
+	else if (dmperm & L9P_DMSYMLINK)
+		error = fs_isymlink(softc, dir, name,
+		    req->lr_req.tcreate.extension, (gid_t)-1, &st);
+	else if (dmperm & L9P_DMNAMEDPIPE)
+		error = fs_imkfifo(softc, dir, name, true,
+		    perm, (gid_t)-1, &st);
+	else if (dmperm & L9P_DMSOCKET)
+		error = fs_imksocket(softc, dir, name, true,
+		    perm, (gid_t)-1, &st);
+	else if (dmperm & L9P_DMDEVICE) {
+		unsigned int major, minor;
+		char type;
+		dev_t dev;
+
+		/*
+		 * ??? Should this be testing < 3?  For now, allow a single
+		 * integer mode with minor==0 implied.
+		 */
+		minor = 0;
+		if (sscanf(req->lr_req.tcreate.extension, "%c %u %u",
+		    &type, &major, &minor) < 2) {
+			return (EINVAL);
+		}
+
+		switch (type) {
+		case 'b':
+			perm |= S_IFBLK;
+			break;
+		case 'c':
+			perm |= S_IFCHR;
+			break;
+		default:
+			return (EINVAL);
+		}
+		dev = makedev(major, minor);
+		error = fs_imknod(softc, dir, name, true, perm, dev,
+		    (gid_t)-1, &st);
+	} else {
+		enum l9p_omode p9;
+		int flags;
+
+		p9 = req->lr_req.tcreate.mode;
+		error = fs_oflags_dotu(p9, &flags);
+		if (error)
+			return (error);
+		error = fs_icreate(softc, dir, name, flags,
+		    true, perm, (gid_t)-1, &st);
+		req->lr_resp.rcreate.iounit = req->lr_conn->lc_max_io_size;
+	}
+
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rcreate.qid);
+
+	return (error);
+}
+
+/*
+ * https://swtch.com/plan9port/man/man9/open.html and
+ * http://plan9.bell-labs.com/magic/man2html/5/open
+ * say that permissions are actually
+ *     perm & (~0666 | (dir.perm & 0666))
+ * for files, and
+ *     perm & (~0777 | (dir.perm & 0777))
+ * for directories.  That is, the parent directory may
+ * take away permissions granted by the operation.
+ *
+ * This seems a bit restrictive; probably
+ * there should be a control knob for this.
+ */
+static inline mode_t
+fs_p9perm(mode_t perm, mode_t dir_perm, bool isdir)
+{
+
+	if (isdir)
+		perm &= ~0777 | (dir_perm & 0777);
+	else
+		perm &= ~0666 | (dir_perm & 0666);
+	return (perm);
+}
+
+/*
+ * Internal form of create (plain file).
+ *
+ * Our caller takes care of splitting off all the special
+ * types of create (mknod, etc), so this is purely for files.
+ * We receive the fs_softc <softc>, the directory fid <dir>
+ * in which the new file is to be created, the name of the
+ * new file, a flag <isp9> indicating whether to do plan9 style
+ * permissions or Linux style permissions, the permissions <perm>,
+ * an effective group id <egid>, and a pointer to a stat structure
+ * <st> to fill in describing the final result on success.
+ *
+ * On successful create, the fid switches to the newly created
+ * file, which is now open; its associated file-name changes too.
+ *
+ * Note that the original (dir) fid is never currently open,
+ * so there is nothing to close.
+ */
+static int
+fs_icreate(void *softc, struct l9p_fid *dir, char *name, int flags,
+    bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+	struct fs_fid *file;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error, fd;
+
+	file = dir->lo_aux;
+
+	/*
+	 * Build full path name from directory + file name.  We'll
+	 * check permissions on the parent directory, then race to
+	 * create the file before anything bad happens like symlinks.
+	 *
+	 * (To close this race we need to use openat(), which is
+	 * left for a later version of this code.)
+	 */
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	/* In case of success, we will need a new file->ff_name. */
+	name = strdup(newname);
+	if (name == NULL)
+		return (ENOMEM);
+
+	/* Check create permission and compute new file ownership. */
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error) {
+		free(name);
+		return (error);
+	}
+
+	/* Adjust new-file permissions for Plan9 protocol. */
+	if (isp9)
+		perm = fs_p9perm(perm, st->st_mode, false);
+
+	/* Create is always exclusive so O_TRUNC is irrelevant. */
+	fd = openat(file->ff_dirfd, newname, flags | O_CREAT | O_EXCL, perm);
+	if (fd < 0) {
+		error = errno;
+		free(name);
+		return (error);
+	}
+
+	/* Fix permissions and owner. */
+	if (fchmod(fd, perm) != 0 ||
+	    fchown(fd, uid, gid) != 0 ||
+	    fstat(fd, st) != 0) {
+		error = errno;
+		(void) close(fd);
+		/* unlink(newname); ? */
+		free(name);
+		return (error);
+	}
+
+	/* It *was* a directory; now it's a file, and it's open. */
+	free(file->ff_name);
+	file->ff_name = name;
+	file->ff_fd = fd;
+	return (0);
+}
+
+/*
+ * Internal form of open: stat file and verify permissions (from p9
+ * argument), then open the file-or-directory, leaving the internal
+ * fs_fid fields set up.  If we cannot open the file, return a
+ * suitable error number, and leave everything unchanged.
+ *
+ * To mitigate the race between permissions testing and the actual
+ * open, we can stat the file twice (once with lstat() before open,
+ * then with fstat() after).  We assume O_NOFOLLOW is set in flags,
+ * so if some other race-winner substitutes in a symlink we won't
+ * open it here.  (However, embedded symlinks, if they occur, are
+ * still an issue.  Ideally we would like to have an O_NEVERFOLLOW
+ * that fails on embedded symlinks, and a way to pass this to
+ * lstat() as well.)
+ *
+ * When we use opendir() we cannot pass O_NOFOLLOW, so we must rely
+ * on substitution-detection via fstat().  To simplify the code we
+ * just always re-check.
+ *
+ * (For a proper fix in the future, we can require openat(), keep
+ * each parent directory open during walk etc, and allow only final
+ * name components with O_NOFOLLOW.)
+ *
+ * On successful return, st has been filled in.
+ */
+static int
+fs_iopen(void *softc, struct l9p_fid *fid, int flags, enum l9p_omode p9,
+    gid_t egid __unused, struct stat *st)
+{
+	struct fs_softc *sc = softc;
+	struct fs_fid *file;
+	struct stat first;
+	int32_t op;
+	char *name;
+	int error;
+	int fd;
+	DIR *dirp;
+
+	/* Forbid write ops on read-only file system. */
+	if (sc->fs_readonly) {
+		if ((flags & O_TRUNC) != 0)
+			return (EROFS);
+		if ((flags & O_ACCMODE) != O_RDONLY)
+			return (EROFS);
+		if (p9 & L9P_ORCLOSE)
+			return (EROFS);
+	}
+
+	file = fid->lo_aux;
+	assert(file != NULL);
+	name = file->ff_name;
+
+	if (fstatat(file->ff_dirfd, name, &first, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (S_ISLNK(first.st_mode))
+		return (EPERM);
+
+	/* Can we rely on O_APPEND here?  Best not, can be cleared. */
+	switch (flags & O_ACCMODE) {
+	case O_RDONLY:
+		op = L9P_ACE_READ_DATA;
+		break;
+	case O_WRONLY:
+		op = L9P_ACE_WRITE_DATA;
+		break;
+	case O_RDWR:
+		op = L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA;
+		break;
+	default:
+		return (EINVAL);
+	}
+	fillacl(file);
+	error = check_access(op, NULL, NULL, file->ff_acl, &first,
+	    file->ff_ai, (gid_t)-1);
+	if (error)
+		return (error);
+
+	if (S_ISDIR(first.st_mode)) {
+		/* Forbid write or truncate on directory. */
+		if ((flags & O_ACCMODE) != O_RDONLY || (flags & O_TRUNC))
+			return (EPERM);
+		fd = openat(file->ff_dirfd, name, O_DIRECTORY);
+		dirp = fdopendir(fd);
+		if (dirp == NULL)
+			return (EPERM);
+		fd = dirfd(dirp);
+	} else {
+		dirp = NULL;
+		fd = openat(file->ff_dirfd, name, flags);
+		if (fd < 0)
+			return (EPERM);
+	}
+
+	/*
+	 * We have a valid fd, and maybe non-null dirp.  Re-check
+	 * the file, and fail if st_dev or st_ino changed.
+	 */
+	if (fstat(fd, st) != 0 ||
+	    first.st_dev != st->st_dev ||
+	    first.st_ino != st->st_ino) {
+		if (dirp != NULL)
+			(void) closedir(dirp);
+		else
+			(void) close(fd);
+		return (EPERM);
+	}
+	if (dirp != NULL)
+		file->ff_dir = dirp;
+	else
+		file->ff_fd = fd;
+	return (0);
+}
+
+/*
+ * Internal form of mkdir (common code for all forms).
+ * We receive the fs_softc <softc>, the directory fid <dir>
+ * in which the new entry is to be created, the name of the
+ * new entry, a flag <isp9> indicating whether to do plan9 style
+ * permissions or Linux style permissions, the permissions <perm>,
+ * an effective group id <egid>, and a pointer to a stat structure
+ * <st> to fill in describing the final result on success.
+ *
+ * See also fs_icreate() above.
+ */
+static int
+fs_imkdir(void *softc, struct l9p_fid *dir, char *name,
+    bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error, fd;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, true, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (isp9)
+		perm = fs_p9perm(perm, st->st_mode, true);
+
+	if (mkdirat(ff->ff_dirfd, newname, perm) != 0)
+		return (errno);
+
+	fd = openat(ff->ff_dirfd, newname,
+	    O_DIRECTORY | O_RDONLY | O_NOFOLLOW);
+	if (fd < 0 ||
+	    fchown(fd, uid, gid) != 0 ||
+	    fchmod(fd, perm) != 0 ||
+	    fstat(fd, st) != 0) {
+		error = errno;
+		/* rmdir(newname) ? */
+	}
+	if (fd >= 0)
+		(void) close(fd);
+
+	return (error);
+}
+
+#ifdef __APPLE__
+/*
+ * This is an undocumented OS X syscall. It would be best to avoid it,
+ * but there doesn't seem to be another safe way to implement mknodat.
+ * Dear Apple, please implement mknodat before you remove this syscall.
+ */
+static int fs_ifchdir_thread_local(int fd)
+{
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+	return syscall(SYS___pthread_fchdir, fd);
+#pragma clang diagnostic pop
+}
+#endif
+
+/*
+ * Internal form of mknod (special device).
+ *
+ * The device type (S_IFBLK, S_IFCHR) is included in the <mode> parameter.
+ */
+static int
+fs_imknod(void *softc, struct l9p_fid *dir, char *name,
+    bool isp9, mode_t mode, dev_t dev, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	mode_t perm;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (isp9) {
+		perm = fs_p9perm(mode & 0777, st->st_mode, false);
+		mode = (mode & ~0777) | perm;
+	} else {
+		perm = mode & 0777;
+	}
+
+#ifdef __APPLE__
+	if (fs_ifchdir_thread_local(ff->ff_dirfd) < 0) {
+		return -1;
+	}
+	error = mknod(newname, mode, dev);
+	int preserved_errno = errno;
+	/* Stop using the thread-local cwd */
+	fs_ifchdir_thread_local(-1);
+	if (error < 0) {
+		errno = preserved_errno;
+		return errno;
+	}
+#else
+	if (mknodat(ff->ff_dirfd, newname, mode, dev) != 0)
+		return (errno);
+#endif
+
+	/* We cannot open the new name; race to use l* syscalls. */
+	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fchmodat(ff->ff_dirfd, newname, perm, 0) != 0 ||
+	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+		error = errno;
+	else if ((st->st_mode & S_IFMT) != (mode & S_IFMT))
+		error = EPERM;		/* ??? lost a race anyway */
+
+	/* if (error) unlink(newname) ? */
+
+	return (error);
+}
+
+/*
+ * Internal form of mkfifo.
+ */
+static int
+fs_imkfifo(void *softc, struct l9p_fid *dir, char *name,
+    bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (isp9)
+		perm = fs_p9perm(perm, st->st_mode, false);
+
+	if (mkfifo(newname, perm) != 0)
+		return (errno);
+
+	/* We cannot open the new name; race to use l* syscalls. */
+	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fchmodat(ff->ff_dirfd, newname, perm, 0) != 0 ||
+	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+		error = errno;
+	else if (!S_ISFIFO(st->st_mode))
+		error = EPERM;		/* ??? lost a race anyway */
+
+	/* if (error) unlink(newname) ? */
+
+	return (error);
+}
+
+/*
+ * Internal form of mksocket.
+ *
+ * This is a bit different because of the horrible socket naming
+ * system (bind() with sockaddr_un sun_path).
+ */
+static int
+fs_imksocket(void *softc, struct l9p_fid *dir, char *name,
+    bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	struct sockaddr_un un;
+	char *path;
+	char newname[MAXPATHLEN];
+	gid_t gid;
+	uid_t uid;
+	int error = 0, s, fd, slen;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (isp9)
+		perm = fs_p9perm(perm, st->st_mode, false);
+
+	s = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (s < 0)
+		return (errno);
+
+	path = newname;
+	fd = -1;
+#ifdef HAVE_BINDAT
+	/* Try bindat() if needed. */
+	if (strlen(path) >= sizeof(un.sun_path)) {
+		fd = openat(ff->ff_dirfd, ff->ff_name,
+		    O_RDONLY | O_DIRECTORY | O_NOFOLLOW);
+		if (fd >= 0)
+			path = name;
+	}
+#endif
+
+	/*
+	 * Can only create the socket if the path will fit.
+	 * Even if we are using bindat() there are limits
+	 * (the API for AF_UNIX sockets is ... not good).
+	 *
+	 * Note: in theory we can fill sun_path to the end
+	 * (omitting a terminating '\0') but in at least one
+	 * Unix-like system, this was known to behave oddly,
+	 * so we test for ">=" rather than just ">".
+	 */
+	if (strlen(path) >= sizeof(un.sun_path)) {
+		error = ENAMETOOLONG;
+		goto out;
+	}
+	un.sun_family = AF_UNIX;
+#ifndef __illumos__
+	slen = un.sun_len = sizeof(struct sockaddr_un);
+#else
+	slen = SUN_LEN(&un);
+#endif
+
+	strncpy(un.sun_path, path, sizeof(un.sun_path));
+
+#ifdef HAVE_BINDAT
+	if (fd >= 0) {
+		if (bindat(fd, s, (struct sockaddr *)&un, slen) < 0)
+			error = errno;
+		goto out;	/* done now, for good or ill */
+	}
+#endif
+
+	if (bind(s, (struct sockaddr *)&un, slen) < 0)
+		error = errno;
+out:
+
+	if (error == 0) {
+		/*
+		 * We believe we created the socket-inode.  Fix
+		 * permissions etc.  Note that we cannot use
+		 * fstat() on the socket descriptor: it succeeds,
+		 * but we get bogus data!
+		 */
+		if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+		    fchmodat(ff->ff_dirfd, newname, perm, 0) != 0 ||
+		    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+			error = errno;
+		else if (!S_ISSOCK(st->st_mode))
+			error = EPERM;		/* ??? lost a race anyway */
+
+		/* if (error) unlink(newname) ? */
+	}
+
+	/*
+	 * It's not clear which error should override, although
+	 * ideally we should never see either close() call fail.
+	 * In any case we do want to try to close both fd and s,
+	 * always.  Let's set error only if it is not already set,
+	 * so that all exit paths can use the same code.
+	 */
+	if (fd >= 0 && close(fd) != 0)
+		if (error == 0)
+			error = errno;
+	if (close(s) != 0)
+		if (error == 0)
+			error = errno;
+
+	return (error);
+}
+
+/*
+ * Internal form of symlink.
+ *
+ * Note that symlinks are presumed to carry no permission bits.
+ * They do have owners, however (who may be charged for quotas).
+ */
+static int
+fs_isymlink(void *softc, struct l9p_fid *dir, char *name,
+    char *symtgt, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (symlinkat(symtgt, ff->ff_dirfd, newname) != 0)
+		return (errno);
+
+	/* We cannot open the new name; race to use l* syscalls. */
+	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+		error = errno;
+	else if (!S_ISLNK(st->st_mode))
+		error = EPERM;		/* ??? lost a race anyway */
+
+	/* if (error) unlink(newname) ? */
+
+	return (error);
+}
+
+static int
+fs_open(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *fid = req->lr_fid;
+	struct stat st;
+	enum l9p_omode p9;
+	int error, flags;
+
+	p9 = req->lr_req.topen.mode;
+	error = fs_oflags_dotu(p9, &flags);
+	if (error)
+		return (error);
+
+	error = fs_iopen(softc, fid, flags, p9, (gid_t)-1, &st);
+	if (error)
+		return (error);
+
+	generate_qid(&st, &req->lr_resp.ropen.qid);
+	req->lr_resp.ropen.iounit = req->lr_conn->lc_max_io_size;
+	return (0);
+}
+
+/*
+ * Helper for directory read.  We want to run an lstat on each
+ * file name within the directory.  This is a lot faster if we
+ * have lstatat (or fstatat with AT_SYMLINK_NOFOLLOW), but not
+ * all systems do, so hide the ifdef-ed code in an inline function.
+ */
+static inline int
+fs_lstatat(struct fs_fid *file, char *name, struct stat *st)
+{
+
+	return (fstatat(dirfd(file->ff_dir), name, st, AT_SYMLINK_NOFOLLOW));
+}
+
+static int
+fs_read(void *softc, struct l9p_request *req)
+{
+	struct l9p_stat l9stat;
+	struct fs_softc *sc;
+	struct fs_fid *file;
+	bool dotu = req->lr_conn->lc_version >= L9P_2000U;
+	ssize_t ret;
+
+	sc = softc;
+	file = req->lr_fid->lo_aux;
+	assert(file != NULL);
+
+	if (file->ff_dir != NULL) {
+		struct dirent *d;
+		struct stat st;
+		struct l9p_message msg;
+		long o;
+		int err;
+
+		if ((err = pthread_mutex_lock(&file->ff_mtx)) != 0)
+			return (err);
+
+		/*
+		 * Must use telldir before readdir since seekdir
+		 * takes cookie values.  Unfortunately this wastes
+		 * a lot of time (and memory) building unneeded
+		 * cookies that can only be flushed by closing
+		 * the directory.
+		 *
+		 * NB: FreeBSD libc seekdir has SINGLEUSE defined,
+		 * so in fact, we can discard the cookies by
+		 * calling seekdir on them.  This clears up wasted
+		 * memory at the cost of even more wasted time...
+		 *
+		 * XXX: readdir/telldir/seekdir not thread safe
+		 */
+		l9p_init_msg(&msg, req, L9P_PACK);
+		for (;;) {
+			o = telldir(file->ff_dir);
+			d = readdir(file->ff_dir);
+			if (d == NULL)
+				break;
+			if (fs_lstatat(file, d->d_name, &st))
+				continue;
+			dostat(sc, &l9stat, d->d_name, &st, dotu);
+			if (l9p_pack_stat(&msg, req, &l9stat) != 0) {
+				seekdir(file->ff_dir, o);
+				break;
+			}
+#if defined(__FreeBSD__)
+			seekdir(file->ff_dir, o);
+			(void) readdir(file->ff_dir);
+#endif
+		}
+
+		(void) pthread_mutex_unlock(&file->ff_mtx);
+	} else {
+		size_t niov = l9p_truncate_iov(req->lr_data_iov,
+                    req->lr_data_niov, req->lr_req.io.count);
+
+#if defined(__FreeBSD__) || defined(__illumos__)
+		ret = preadv(file->ff_fd, req->lr_data_iov, niov,
+		    req->lr_req.io.offset);
+#else
+		/* XXX: not thread safe, should really use aio_listio. */
+		if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0)
+			return (errno);
+
+		ret = (uint32_t)readv(file->ff_fd, req->lr_data_iov, (int)niov);
+#endif
+
+		if (ret < 0)
+			return (errno);
+
+		req->lr_resp.io.count = (uint32_t)ret;
+	}
+
+	return (0);
+}
+
+static int
+fs_remove(void *softc, struct l9p_fid *fid)
+{
+	struct fs_softc *sc = softc;
+	struct l9p_acl *parent_acl;
+	struct fs_fid *file;
+	struct stat pst, cst;
+	char dirname[MAXPATHLEN];
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	error = fs_pdir(sc, fid, dirname, sizeof(dirname), &pst);
+	if (error)
+		return (error);
+
+	file = fid->lo_aux;
+	if (fstatat(file->ff_dirfd, file->ff_name, &cst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (error);
+
+	parent_acl = getacl(file, -1, dirname);
+	fillacl(file);
+
+	error = check_access(L9P_ACOP_UNLINK,
+	    parent_acl, &pst, file->ff_acl, &cst, file->ff_ai, (gid_t)-1);
+	l9p_acl_free(parent_acl);
+	if (error)
+		return (error);
+
+	if (unlinkat(file->ff_dirfd, file->ff_name,
+	    S_ISDIR(cst.st_mode) ? AT_REMOVEDIR : 0) != 0)
+		error = errno;
+
+	return (error);
+}
+
+static int
+fs_stat(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc;
+	struct fs_fid *file;
+	struct stat st;
+	bool dotu = req->lr_conn->lc_version >= L9P_2000U;
+
+	sc = softc;
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	if (fstatat(file->ff_dirfd, file->ff_name, &st,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	dostat(sc, &req->lr_resp.rstat.stat, file->ff_name, &st, dotu);
+	return (0);
+}
+
+static int
+fs_walk(void *softc, struct l9p_request *req)
+{
+	struct l9p_acl *acl;
+	struct fs_authinfo *ai;
+	struct fs_fid *file = req->lr_fid->lo_aux;
+	struct fs_fid *newfile;
+	struct stat st;
+	size_t clen, namelen, need;
+	char *comp, *succ, *next, *swtmp;
+	bool atroot;
+	bool dotdot;
+	int i, nwname;
+	int error = 0;
+	char namebufs[2][MAXPATHLEN];
+
+	/*
+	 * https://swtch.com/plan9port/man/man9/walk.html:
+	 *
+	 *    It is legal for nwname to be zero, in which case newfid
+	 *    will represent the same file as fid and the walk will
+	 *    usually succeed; this is equivalent to walking to dot.
+	 * [Aside: it's not clear if we should test S_ISDIR here.]
+	 *    ...
+	 *    The name ".." ... represents the parent directory.
+	 *    The name "." ... is not used in the protocol.
+	 *    ... A walk of the name ".." in the root directory
+	 *    of the server is equivalent to a walk with no name
+	 *    elements.
+	 *
+	 * Note that req.twalk.nwname never exceeds L9P_MAX_WELEM,
+	 * so it is safe to convert to plain int.
+	 *
+	 * We are to return an error only if the first walk fails,
+	 * else stop at the end of the names or on the first error.
+	 * The final fid is based on the last name successfully
+	 * walked.
+	 *
+	 * Note that we *do* get Twalk requests with nwname==0 on files.
+	 *
+	 * Set up "successful name" buffer pointer with base fid name,
+	 * initially.  We'll swap each new success into it as we go.
+	 *
+	 * Invariant: atroot and stat data correspond to current
+	 * (succ) path.
+	 */
+	succ = namebufs[0];
+	next = namebufs[1];
+	namelen = strlcpy(succ, file->ff_name, MAXPATHLEN);
+	if (namelen >= MAXPATHLEN)
+		return (ENAMETOOLONG);
+	if (fstatat(file->ff_dirfd, succ, &st, AT_SYMLINK_NOFOLLOW) < 0)
+		return (errno);
+	ai = file->ff_ai;
+	atroot = strlen(succ) == 0; /* XXX? */
+	fillacl(file);
+	acl = file->ff_acl;
+
+	nwname = (int)req->lr_req.twalk.nwname;
+
+	for (i = 0; i < nwname; i++) {
+		/*
+		 * Must have execute permission to search a directory.
+		 * Then, look up each component in its directory-so-far.
+		 * Check for ".." along the way, handlng specially
+		 * as needed.  Forbid "/" in name components.
+		 *
+		 */
+		if (!S_ISDIR(st.st_mode)) {
+			error = ENOTDIR;
+			goto out;
+		}
+		error = check_access(L9P_ACE_EXECUTE,
+		     NULL, NULL, acl, &st, ai, (gid_t)-1);
+		if (error) {
+			L9P_LOG(L9P_DEBUG,
+			    "Twalk: denying dir-walk on \"%s\" for uid %u",
+			    succ, (unsigned)ai->ai_uid);
+			error = EPERM;
+			goto out;
+		}
+		comp = req->lr_req.twalk.wname[i];
+		if (strchr(comp, '/') != NULL) {
+			error = EINVAL;
+			break;
+		}
+
+		clen = strlen(comp);
+		dotdot = false;
+
+		/*
+		 * Build next pathname (into "next").  If "..",
+		 * just strip one name component off the success
+		 * name so far.  Since we know this name fits, the
+		 * stripped down version also fits.  Otherwise,
+		 * the name is the base name plus '/' plus the
+		 * component name plus terminating '\0'; this may
+		 * or may not fit.
+		 */
+		if (comp[0] == '.') {
+			if (clen == 1) {
+				error = EINVAL;
+				break;
+			}
+			if (comp[1] == '.' && clen == 2)
+				dotdot = true;
+		}
+		if (dotdot) {
+			/*
+			 * It's not clear how ".." at root should
+			 * be handled when i > 0.  Obeying the man
+			 * page exactly, we reset i to 0 and stop,
+			 * declaring terminal success.
+			 *
+			 * Otherwise, we just climbed up one level
+			 * so adjust "atroot".
+			 */
+			if (atroot) {
+				i = 0;
+				break;
+			}
+			(void) r_dirname(succ, next, MAXPATHLEN);
+			namelen = strlen(next);
+			atroot = strlen(next) == 0; /* XXX? */
+		} else {
+			need = namelen + 1 + clen + 1;
+			if (need > MAXPATHLEN) {
+				error = ENAMETOOLONG;
+				break;
+			}
+			memcpy(next, succ, namelen);
+			next[namelen++] = '/';
+			memcpy(&next[namelen], comp, clen + 1);
+			namelen += clen;
+			/*
+			 * Since name is never ".", we are necessarily
+			 * descending below the root now.
+			 */
+			atroot = false;
+		}
+
+		if (fstatat(file->ff_dirfd, next, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+			error = ENOENT;
+			break;
+		}
+
+		/*
+		 * Success: generate qid and swap this
+		 * successful name into place.  Update acl.
+		 */
+		generate_qid(&st, &req->lr_resp.rwalk.wqid[i]);
+		swtmp = succ;
+		succ = next;
+		next = swtmp;
+		if (acl != NULL && acl != file->ff_acl)
+			l9p_acl_free(acl);
+		acl = getacl(file, -1, next);
+	}
+
+	/*
+	 * Fail only if we failed on the first name.
+	 * Otherwise we succeeded on something, and "succ"
+	 * points to the last successful name in namebufs[].
+	 */
+	if (error) {
+		if (i == 0)
+			goto out;
+		error = 0;
+	}
+
+	newfile = open_fid(file->ff_dirfd, succ, ai, false);
+	if (newfile == NULL) {
+		error = ENOMEM;
+		goto out;
+	}
+	if (req->lr_newfid == req->lr_fid) {
+		/*
+		 * Before overwriting fid->lo_aux, free the old value.
+		 * Note that this doesn't free the l9p_fid data,
+		 * just the fs_fid data.  (But it does ditch ff_acl.)
+		 */
+		if (acl == file->ff_acl)
+			acl = NULL;
+		fs_freefid(softc, req->lr_fid);
+		file = NULL;
+	}
+	req->lr_newfid->lo_aux = newfile;
+	if (file != NULL && acl != file->ff_acl) {
+		newfile->ff_acl = acl;
+		acl = NULL;
+	}
+	req->lr_resp.rwalk.nwqid = (uint16_t)i;
+out:
+	if (file != NULL && acl != file->ff_acl)
+		l9p_acl_free(acl);
+	return (error);
+}
+
+static int
+fs_write(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct fs_fid *file;
+	ssize_t ret;
+
+	file = req->lr_fid->lo_aux;
+	assert(file != NULL);
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	size_t niov = l9p_truncate_iov(req->lr_data_iov,
+            req->lr_data_niov, req->lr_req.io.count);
+
+#if defined(__FreeBSD__) || defined(__illumos__)
+	ret = pwritev(file->ff_fd, req->lr_data_iov, niov,
+	    req->lr_req.io.offset);
+#else
+	/* XXX: not thread safe, should really use aio_listio. */
+	if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0)
+		return (errno);
+
+	ret = writev(file->ff_fd, req->lr_data_iov,
+	    (int)niov);
+#endif
+
+	if (ret < 0)
+		return (errno);
+
+	req->lr_resp.io.count = (uint32_t)ret;
+	return (0);
+}
+
+static int
+fs_wstat(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct l9p_stat *l9stat = &req->lr_req.twstat.stat;
+	struct l9p_fid *fid;
+	struct fs_fid *file;
+	int error = 0;
+
+	fid = req->lr_fid;
+	file = fid->lo_aux;
+	assert(file != NULL);
+
+	/*
+	 * XXX:
+	 *
+	 * stat(9P) sez:
+	 *
+	 * Either all the changes in wstat request happen, or none of them
+	 * does: if the request succeeds, all changes were made; if it fails,
+	 * none were.
+	 *
+	 * Atomicity is clearly missing in current implementation.
+	 */
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	if (l9stat->atime != (uint32_t)~0) {
+		/* XXX: not implemented, ignore */
+	}
+
+	if (l9stat->mtime != (uint32_t)~0) {
+		/* XXX: not implemented, ignore */
+	}
+
+	if (l9stat->dev != (uint32_t)~0) {
+		error = EPERM;
+		goto out;
+	}
+
+	if (l9stat->length != (uint64_t)~0) {
+		if (file->ff_dir != NULL) {
+			error = EINVAL;
+			goto out;
+		}
+
+		if (truncate(file->ff_name, (off_t)l9stat->length) != 0) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (req->lr_conn->lc_version >= L9P_2000U) {
+		if (fchownat(file->ff_dirfd, file->ff_name, l9stat->n_uid,
+		    l9stat->n_gid, AT_SYMLINK_NOFOLLOW) != 0) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (l9stat->mode != (uint32_t)~0) {
+		if (fchmodat(file->ff_dirfd, file->ff_name,
+		    l9stat->mode & 0777, 0) != 0) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (strlen(l9stat->name) > 0) {
+		struct l9p_acl *parent_acl;
+		struct stat st;
+		char *tmp;
+		char newname[MAXPATHLEN];
+
+		/*
+		 * Rename-within-directory: it's not deleting anything,
+		 * but we need write permission on the directory.  This
+		 * should suffice.
+		 */
+		error = fs_pdir(softc, fid, newname, sizeof(newname), &st);
+		if (error)
+			goto out;
+		parent_acl = getacl(file, -1, newname);
+		error = check_access(L9P_ACE_ADD_FILE,
+		    parent_acl, &st, NULL, NULL, file->ff_ai, (gid_t)-1);
+		l9p_acl_free(parent_acl);
+		if (error)
+			goto out;
+		error = fs_dpf(newname, l9stat->name, sizeof(newname));
+		if (error)
+			goto out;
+		tmp = strdup(newname);
+		if (tmp == NULL) {
+			error = ENOMEM;
+			goto out;
+		}
+		if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd,
+		    tmp) != 0) {
+			error = errno;
+			free(tmp);
+			goto out;
+		}
+		/* Successful rename, update file->ff_name.  ACL can stay. */
+		free(file->ff_name);
+		file->ff_name = tmp;
+	}
+out:
+	return (error);
+}
+
+static int
+fs_statfs(void *softc __unused, struct l9p_request *req)
+{
+	struct fs_fid *file;
+	struct stat st;
+#ifdef __illumos__
+	struct statvfs f;
+#else
+	struct statfs f;
+#endif
+	long name_max;
+	int error;
+	int fd;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	if (fstatat(file->ff_dirfd, file->ff_name, &st,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	/*
+	 * Not entirely clear what access to require; we'll go
+	 * for "read data".
+	 */
+	fillacl(file);
+	error = check_access(L9P_ACE_READ_DATA, NULL, NULL,
+	    file->ff_acl, &st, file->ff_ai, (gid_t)-1);
+	if (error)
+		return (error);
+
+	fd = openat(file->ff_dirfd, file->ff_name, 0);
+	if (fd < 0)
+		return (errno);
+
+#ifdef __illumos__
+	if (fstatvfs(fd, &f) != 0)
+		return (errno);
+#else
+	if (fstatfs(fd, &f) != 0)
+		return (errno);
+#endif
+
+	name_max = fpathconf(fd, _PC_NAME_MAX);
+	error = errno;
+	close(fd);
+
+	if (name_max == -1)
+		return (error);
+
+	dostatfs(&req->lr_resp.rstatfs.statfs, &f, name_max);
+
+	return (0);
+}
+
+static int
+fs_lopen(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *fid = req->lr_fid;
+	struct stat st;
+	enum l9p_omode p9;
+	gid_t gid;
+	int error, flags;
+
+	error = fs_oflags_dotl(req->lr_req.tlopen.flags, &flags, &p9);
+	if (error)
+		return (error);
+
+	gid = req->lr_req.tlopen.gid;
+	error = fs_iopen(softc, fid, flags, p9, gid, &st);
+	if (error)
+		return (error);
+
+	generate_qid(&st, &req->lr_resp.rlopen.qid);
+	req->lr_resp.rlopen.iounit = req->lr_conn->lc_max_io_size;
+	return (0);
+}
+
+static int
+fs_lcreate(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	enum l9p_omode p9;
+	char *name;
+	mode_t perm;
+	gid_t gid;
+	int error, flags;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tlcreate.name;
+
+	error = fs_oflags_dotl(req->lr_req.tlcreate.flags, &flags, &p9);
+	if (error)
+		return (error);
+
+	perm = (mode_t)req->lr_req.tlcreate.mode & 0777; /* ? set-id bits? */
+	gid = req->lr_req.tlcreate.gid;
+	error = fs_icreate(softc, dir, name, flags, false, perm, gid, &st);
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rlcreate.qid);
+	req->lr_resp.rlcreate.iounit = req->lr_conn->lc_max_io_size;
+	return (error);
+}
+
+static int
+fs_symlink(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	gid_t gid;
+	char *name, *symtgt;
+	int error;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tsymlink.name;
+	symtgt = req->lr_req.tsymlink.symtgt;
+	gid = req->lr_req.tsymlink.gid;
+	error = fs_isymlink(softc, dir, name, symtgt, gid, &st);
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rsymlink.qid);
+	return (error);
+}
+
+static int
+fs_mknod(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	uint32_t mode, major, minor;
+	dev_t dev;
+	gid_t gid;
+	char *name;
+	int error;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tmknod.name;
+	mode = req->lr_req.tmknod.mode;
+	gid = req->lr_req.tmknod.gid;
+
+	switch (mode & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		mode = (mode & S_IFMT) | (mode & 0777);	/* ??? */
+		major = req->lr_req.tmknod.major;
+		minor = req->lr_req.tmknod.major;
+		dev = makedev(major, minor);
+		error = fs_imknod(softc, dir, name, false,
+		    (mode_t)mode, dev, gid, &st);
+		break;
+
+	case S_IFIFO:
+		error = fs_imkfifo(softc, dir, name, false,
+		    (mode_t)(mode & 0777), gid, &st);
+		break;
+
+	case S_IFSOCK:
+		error = fs_imksocket(softc, dir, name, false,
+		    (mode_t)(mode & 0777), gid, &st);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rmknod.qid);
+	return (error);
+}
+
+static int
+fs_rename(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct fs_authinfo *ai;
+	struct l9p_acl *oparent_acl;
+	struct l9p_fid *fid, *f2;
+	struct fs_fid *file, *f2ff;
+	struct stat cst, opst, npst;
+	int32_t op;
+	bool reparenting;
+	char *tmp;
+	char olddir[MAXPATHLEN], newname[MAXPATHLEN];
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	/*
+	 * Note: lr_fid represents the file that is to be renamed,
+	 * so we must locate its parent directory and verify that
+	 * both this parent directory and the new directory f2 are
+	 * writable.  But if the new parent directory is the same
+	 * path as the old parent directory, our job is simpler.
+	 */
+	fid = req->lr_fid;
+	file = fid->lo_aux;
+	assert(file != NULL);
+	ai = file->ff_ai;
+
+	error = fs_pdir(sc, fid, olddir, sizeof(olddir), &opst);
+	if (error)
+		return (error);
+
+	f2 = req->lr_fid2;
+	f2ff = f2->lo_aux;
+	assert(f2ff != NULL);
+
+	reparenting = strcmp(olddir, f2ff->ff_name) != 0;
+
+	fillacl(file);
+	fillacl(f2ff);
+
+	if (fstatat(file->ff_dirfd, file->ff_name, &cst,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	/*
+	 * Are we moving from olddir?  If so, we're unlinking
+	 * from it, in terms of ACL access.
+	 */
+	if (reparenting) {
+		oparent_acl = getacl(file, -1, olddir);
+		error = check_access(L9P_ACOP_UNLINK,
+		    oparent_acl, &opst, file->ff_acl, &cst, ai, (gid_t)-1);
+		l9p_acl_free(oparent_acl);
+		if (error)
+			return (error);
+	}
+
+	/*
+	 * Now check that we're allowed to "create" a file or directory in
+	 * f2.  (Should we do this, too, only if reparenting?  Maybe check
+	 * for dir write permission if not reparenting -- but that's just
+	 * add-file/add-subdir, which means doing this always.)
+	 */
+	if (fstatat(f2ff->ff_dirfd, f2ff->ff_name, &npst,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	op = S_ISDIR(cst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
+	error = check_access(op, f2ff->ff_acl, &npst, NULL, NULL,
+	    ai, (gid_t)-1);
+	if (error)
+		return (error);
+
+	/*
+	 * Directories OK, file systems not R/O, etc; build final name.
+	 * f2ff->ff_name cannot exceed MAXPATHLEN, but out of general
+	 * paranoia, let's double check anyway.
+	 */
+	if (strlcpy(newname, f2ff->ff_name, sizeof(newname)) >= sizeof(newname))
+		return (ENAMETOOLONG);
+	error = fs_dpf(newname, req->lr_req.trename.name, sizeof(newname));
+	if (error)
+		return (error);
+	tmp = strdup(newname);
+	if (tmp == NULL)
+		return (ENOMEM);
+
+	if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd, tmp) != 0) {
+		error = errno;
+		free(tmp);
+		return (error);
+	}
+
+	/* file has been renamed but old fid is not clunked */
+	free(file->ff_name);
+	file->ff_name = tmp;
+
+	dropacl(file);
+	return (0);
+}
+
+static int
+fs_readlink(void *softc __unused, struct l9p_request *req)
+{
+	struct fs_fid *file;
+	ssize_t linklen;
+	char buf[MAXPATHLEN];
+	int error = 0;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	linklen = readlinkat(file->ff_dirfd, file->ff_name, buf, sizeof(buf));
+	if (linklen < 0)
+		error = errno;
+	else if ((size_t)linklen >= sizeof(buf))
+		error = ENOMEM; /* todo: allocate dynamically */
+	else if ((req->lr_resp.rreadlink.target = strndup(buf,
+	    (size_t)linklen)) == NULL)
+		error = ENOMEM;
+	return (error);
+}
+
+static int
+fs_getattr(void *softc __unused, struct l9p_request *req)
+{
+	uint64_t mask, valid;
+	struct fs_fid *file;
+	struct stat st;
+	int error = 0;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	valid = 0;
+	if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) {
+		error = errno;
+		goto out;
+	}
+	/* ?? Can we provide items not-requested? If so, can skip tests. */
+	mask = req->lr_req.tgetattr.request_mask;
+	if (mask & L9PL_GETATTR_MODE) {
+		/* It is not clear if we need any translations. */
+		req->lr_resp.rgetattr.mode = st.st_mode;
+		valid |= L9PL_GETATTR_MODE;
+	}
+	if (mask & L9PL_GETATTR_NLINK) {
+		req->lr_resp.rgetattr.nlink = st.st_nlink;
+		valid |= L9PL_GETATTR_NLINK;
+	}
+	if (mask & L9PL_GETATTR_UID) {
+		/* provide st_uid, or file->ff_uid? */
+		req->lr_resp.rgetattr.uid = st.st_uid;
+		valid |= L9PL_GETATTR_UID;
+	}
+	if (mask & L9PL_GETATTR_GID) {
+		/* provide st_gid, or file->ff_gid? */
+		req->lr_resp.rgetattr.gid = st.st_gid;
+		valid |= L9PL_GETATTR_GID;
+	}
+	if (mask & L9PL_GETATTR_RDEV) {
+		/* It is not clear if we need any translations. */
+		req->lr_resp.rgetattr.rdev = (uint64_t)st.st_rdev;
+		valid |= L9PL_GETATTR_RDEV;
+	}
+	if (mask & L9PL_GETATTR_ATIME) {
+		req->lr_resp.rgetattr.atime_sec =
+		    (uint64_t)STAT_ATIME(&st).tv_sec;
+		req->lr_resp.rgetattr.atime_nsec =
+		    (uint64_t)STAT_ATIME(&st).tv_nsec;
+		valid |= L9PL_GETATTR_ATIME;
+	}
+	if (mask & L9PL_GETATTR_MTIME) {
+		req->lr_resp.rgetattr.mtime_sec =
+		    (uint64_t)STAT_MTIME(&st).tv_sec;
+		req->lr_resp.rgetattr.mtime_nsec =
+		    (uint64_t)STAT_MTIME(&st).tv_nsec;
+		valid |= L9PL_GETATTR_MTIME;
+	}
+	if (mask & L9PL_GETATTR_CTIME) {
+		req->lr_resp.rgetattr.ctime_sec =
+		    (uint64_t)STAT_CTIME(&st).tv_sec;
+		req->lr_resp.rgetattr.ctime_nsec =
+		    (uint64_t)STAT_CTIME(&st).tv_nsec;
+		valid |= L9PL_GETATTR_CTIME;
+	}
+	if (mask & L9PL_GETATTR_BTIME) {
+#if defined(HAVE_BIRTHTIME)
+		req->lr_resp.rgetattr.btime_sec =
+		    (uint64_t)st.st_birthtim.tv_sec;
+		req->lr_resp.rgetattr.btime_nsec =
+		    (uint64_t)st.st_birthtim.tv_nsec;
+#elif defined(__illumos__)
+		getcrtime(softc, file->ff_dirfd, file->ff_name,
+		    &req->lr_resp.rgetattr.btime_sec,
+		    &req->lr_resp.rgetattr.btime_nsec);
+#else
+		req->lr_resp.rgetattr.btime_sec = 0;
+		req->lr_resp.rgetattr.btime_nsec = 0;
+#endif
+		valid |= L9PL_GETATTR_BTIME;
+	}
+	if (mask & L9PL_GETATTR_INO)
+		valid |= L9PL_GETATTR_INO;
+	if (mask & L9PL_GETATTR_SIZE) {
+		req->lr_resp.rgetattr.size = (uint64_t)st.st_size;
+		valid |= L9PL_GETATTR_SIZE;
+	}
+	if (mask & L9PL_GETATTR_BLOCKS) {
+		req->lr_resp.rgetattr.blksize = (uint64_t)st.st_blksize;
+		req->lr_resp.rgetattr.blocks = (uint64_t)st.st_blocks;
+		valid |= L9PL_GETATTR_BLOCKS;
+	}
+#ifndef __illumos__
+	if (mask & L9PL_GETATTR_GEN) {
+		req->lr_resp.rgetattr.gen = st.st_gen;
+		valid |= L9PL_GETATTR_GEN;
+	}
+#endif
+	/* don't know what to do with data version yet */
+
+	generate_qid(&st, &req->lr_resp.rgetattr.qid);
+out:
+	req->lr_resp.rgetattr.valid = valid;
+	return (error);
+}
+
+/*
+ * Should combine some of this with wstat code.
+ */
+static int
+fs_setattr(void *softc, struct l9p_request *req)
+{
+	uint64_t mask;
+	struct fs_softc *sc = softc;
+	struct timespec ts[2];
+	struct fs_fid *file;
+	struct stat st;
+	int error = 0;
+	uid_t uid, gid;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	/*
+	 * As with WSTAT we have atomicity issues.
+	 */
+	mask = req->lr_req.tsetattr.valid;
+
+	if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) {
+		error = errno;
+		goto out;
+	}
+
+	if ((mask & L9PL_SETATTR_SIZE) && S_ISDIR(st.st_mode)) {
+		error = EISDIR;
+		goto out;
+	}
+
+	if (mask & L9PL_SETATTR_MODE) {
+		if (fchmodat(file->ff_dirfd, file->ff_name,
+		    req->lr_req.tsetattr.mode & 0777,
+		    0)) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (mask & (L9PL_SETATTR_UID | L9PL_SETATTR_GID)) {
+		uid = mask & L9PL_SETATTR_UID
+		    ? req->lr_req.tsetattr.uid
+		    : (uid_t)-1;
+
+		gid = mask & L9PL_SETATTR_GID
+		    ? req->lr_req.tsetattr.gid
+		    : (gid_t)-1;
+
+		if (fchownat(file->ff_dirfd, file->ff_name, uid, gid,
+		    AT_SYMLINK_NOFOLLOW)) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (mask & L9PL_SETATTR_SIZE) {
+		/* Truncate follows symlinks, is this OK? */
+		int fd = openat(file->ff_dirfd, file->ff_name, O_RDWR);
+		if (ftruncate(fd, (off_t)req->lr_req.tsetattr.size)) {
+			error = errno;
+			(void) close(fd);
+			goto out;
+		}
+		(void) close(fd);
+	}
+
+	if (mask & (L9PL_SETATTR_ATIME | L9PL_SETATTR_MTIME)) {
+		ts[0].tv_sec = STAT_ATIME(&st).tv_sec;
+		ts[0].tv_nsec = STAT_ATIME(&st).tv_nsec;
+		ts[1].tv_sec = STAT_MTIME(&st).tv_sec;
+		ts[1].tv_nsec = STAT_MTIME(&st).tv_nsec;
+
+		if (mask & L9PL_SETATTR_ATIME) {
+			if (mask & L9PL_SETATTR_ATIME_SET) {
+				ts[0].tv_sec = req->lr_req.tsetattr.atime_sec;
+				ts[0].tv_nsec = req->lr_req.tsetattr.atime_nsec;
+			} else {
+				if (clock_gettime(CLOCK_REALTIME, &ts[0]) != 0) {
+					error = errno;
+					goto out;
+				}
+			}
+		}
+
+		if (mask & L9PL_SETATTR_MTIME) {
+			if (mask & L9PL_SETATTR_MTIME_SET) {
+				ts[1].tv_sec = req->lr_req.tsetattr.mtime_sec;
+				ts[1].tv_nsec = req->lr_req.tsetattr.mtime_nsec;
+			} else {
+				if (clock_gettime(CLOCK_REALTIME, &ts[1]) != 0) {
+					error = errno;
+					goto out;
+				}
+			}
+		}
+
+		if (utimensat(file->ff_dirfd, file->ff_name, ts,
+		    AT_SYMLINK_NOFOLLOW)) {
+			error = errno;
+			goto out;
+		}
+	}
+out:
+	return (error);
+}
+
+static int
+fs_xattrwalk(void *softc __unused, struct l9p_request *req __unused)
+{
+	return (EOPNOTSUPP);
+}
+
+static int
+fs_xattrcreate(void *softc __unused, struct l9p_request *req __unused)
+{
+	return (EOPNOTSUPP);
+}
+
+static int
+fs_readdir(void *softc __unused, struct l9p_request *req)
+{
+	struct l9p_message msg;
+	struct l9p_dirent de;
+	struct fs_fid *file;
+	struct dirent *dp;
+	struct stat st;
+	uint32_t count;
+	int error = 0;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	if (file->ff_dir == NULL)
+		return (ENOTDIR);
+
+	if ((error = pthread_mutex_lock(&file->ff_mtx)) != 0)
+		return (error);
+
+	/*
+	 * It's not clear whether we can use the same trick for
+	 * discarding offsets here as we do in fs_read.  It
+	 * probably should work, we'll have to see if some
+	 * client(s) use the zero-offset thing to rescan without
+	 * clunking the directory first.
+	 *
+	 * Probably the thing to do is switch to calling
+	 * getdirentries() / getdents() directly, instead of
+	 * going through libc.
+	 */
+	if (req->lr_req.io.offset == 0)
+		rewinddir(file->ff_dir);
+	else
+		seekdir(file->ff_dir, (long)req->lr_req.io.offset);
+
+	l9p_init_msg(&msg, req, L9P_PACK);
+	count = (uint32_t)msg.lm_size; /* in case we get no entries */
+	while ((dp = readdir(file->ff_dir)) != NULL) {
+		/*
+		 * Although "." is forbidden in naming and ".." is
+		 * special cased, testing shows that we must transmit
+		 * them through readdir.  (For ".." at root, we
+		 * should perhaps alter the inode number, but not
+		 * yet.)
+		 */
+
+		/*
+		 * TODO: we do a full lstat here; could use dp->d_*
+		 * to construct the qid more efficiently, as long
+		 * as dp->d_type != DT_UNKNOWN.
+		 */
+		if (fs_lstatat(file, dp->d_name, &st))
+			continue;
+
+		de.qid.type = 0;
+		generate_qid(&st, &de.qid);
+		de.offset = (uint64_t)telldir(file->ff_dir);
+#ifdef __illumos__
+		de.type = st.st_mode & S_IFMT;
+#else
+		de.type = dp->d_type;
+#endif
+		de.name = dp->d_name;
+
+		/* Update count only if we completely pack the dirent. */
+		if (l9p_pudirent(&msg, &de) < 0)
+			break;
+		count = (uint32_t)msg.lm_size;
+	}
+
+	(void) pthread_mutex_unlock(&file->ff_mtx);
+	req->lr_resp.io.count = count;
+	return (error);
+}
+
+static int
+fs_fsync(void *softc __unused, struct l9p_request *req)
+{
+	struct fs_fid *file;
+	int error = 0;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+	if (fsync(file->ff_dir != NULL ? dirfd(file->ff_dir) : file->ff_fd))
+		error = errno;
+	return (error);
+}
+
+static int
+fs_lock(void *softc __unused, struct l9p_request *req)
+{
+
+	switch (req->lr_req.tlock.type) {
+	case L9PL_LOCK_TYPE_RDLOCK:
+	case L9PL_LOCK_TYPE_WRLOCK:
+	case L9PL_LOCK_TYPE_UNLOCK:
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	req->lr_resp.rlock.status = L9PL_LOCK_SUCCESS;
+	return (0);
+}
+
+static int
+fs_getlock(void *softc __unused, struct l9p_request *req)
+{
+
+	/*
+	 * Client wants to see if a request to lock a region would
+	 * block.  This is, of course, not atomic anyway, so the
+	 * op is useless.  QEMU simply says "unlocked!", so we do
+	 * too.
+	 */
+	switch (req->lr_req.getlock.type) {
+	case L9PL_LOCK_TYPE_RDLOCK:
+	case L9PL_LOCK_TYPE_WRLOCK:
+	case L9PL_LOCK_TYPE_UNLOCK:
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	req->lr_resp.getlock = req->lr_req.getlock;
+	req->lr_resp.getlock.type = L9PL_LOCK_TYPE_UNLOCK;
+	req->lr_resp.getlock.client_id = strdup("");  /* XXX what should go here? */
+	return (0);
+}
+
+static int
+fs_link(void *softc __unused, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct fs_fid *file;
+	struct fs_fid *dirf;
+	struct stat fst, tdst;
+	int32_t op;
+	char *name;
+	char newname[MAXPATHLEN];
+	int error;
+
+	/* N.B.: lr_fid is the file to link, lr_fid2 is the target dir */
+	dir = req->lr_fid2;
+	dirf = dir->lo_aux;
+	assert(dirf != NULL);
+
+	name = req->lr_req.tlink.name;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	file = req->lr_fid->lo_aux;
+	assert(file != NULL);
+
+	if (fstatat(dirf->ff_dirfd, dirf->ff_name, &tdst, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fstatat(file->ff_dirfd, file->ff_name, &fst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (S_ISDIR(fst.st_mode))
+		return (EISDIR);
+	fillacl(dirf);
+	op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
+	error = check_access(op,
+	    dirf->ff_acl, &tdst, NULL, NULL, file->ff_ai, (gid_t)-1);
+	if (error)
+		return (error);
+
+	if (linkat(file->ff_dirfd, file->ff_name, file->ff_dirfd,
+	    newname, 0) != 0)
+		error = errno;
+	else
+		dropacl(file);
+
+	return (error);
+}
+
+static int
+fs_mkdir(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	mode_t perm;
+	gid_t gid;
+	char *name;
+	int error;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tmkdir.name;
+	perm = (mode_t)req->lr_req.tmkdir.mode;
+	gid = req->lr_req.tmkdir.gid;
+
+	error = fs_imkdir(softc, dir, name, false, perm, gid, &st);
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rmkdir.qid);
+	return (error);
+}
+
+static int
+fs_renameat(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct l9p_fid *olddir, *newdir;
+	struct l9p_acl *facl;
+	struct fs_fid *off, *nff;
+	struct stat odst, ndst, fst;
+	int32_t op;
+	bool reparenting;
+	char *onp, *nnp;
+	char onb[MAXPATHLEN], nnb[MAXPATHLEN];
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	olddir = req->lr_fid;
+	newdir = req->lr_fid2;
+	assert(olddir != NULL && newdir != NULL);
+	off = olddir->lo_aux;
+	nff = newdir->lo_aux;
+	assert(off != NULL && nff != NULL);
+
+	onp = req->lr_req.trenameat.oldname;
+	nnp = req->lr_req.trenameat.newname;
+	error = fs_buildname(olddir, onp, onb, sizeof(onb));
+	if (error)
+		return (error);
+	error = fs_buildname(newdir, nnp, nnb, sizeof(nnb));
+	if (error)
+		return (error);
+	if (fstatat(off->ff_dirfd, onb, &fst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	reparenting = olddir != newdir &&
+	    strcmp(off->ff_name, nff->ff_name) != 0;
+
+	if (fstatat(off->ff_dirfd, off->ff_name, &odst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (!S_ISDIR(odst.st_mode))
+		return (ENOTDIR);
+	fillacl(off);
+
+	if (reparenting) {
+		if (fstatat(nff->ff_dirfd, nff->ff_name, &ndst, AT_SYMLINK_NOFOLLOW) != 0)
+			return (errno);
+		if (!S_ISDIR(ndst.st_mode))
+			return (ENOTDIR);
+		facl = getacl(off, -1, onb);
+		fillacl(nff);
+
+		error = check_access(L9P_ACOP_UNLINK,
+		    off->ff_acl, &odst, facl, &fst, off->ff_ai, (gid_t)-1);
+		l9p_acl_free(facl);
+		if (error)
+			return (error);
+		op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY :
+		    L9P_ACE_ADD_FILE;
+		error = check_access(op,
+		    nff->ff_acl, &ndst, NULL, NULL, nff->ff_ai, (gid_t)-1);
+		if (error)
+			return (error);
+	}
+
+	if (renameat(off->ff_dirfd, onb, nff->ff_dirfd, nnb))
+		error = errno;
+
+	return (error);
+}
+
+/*
+ * Unlink file in given directory, or remove directory in given
+ * directory, based on flags.
+ */
+static int
+fs_unlinkat(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct l9p_acl *facl;
+	struct l9p_fid *dir;
+	struct fs_fid *dirff;
+	struct stat dirst, fst;
+	char *name;
+	char newname[MAXPATHLEN];
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	dir = req->lr_fid;
+	dirff = dir->lo_aux;
+	assert(dirff != NULL);
+	name = req->lr_req.tunlinkat.name;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+	if (fstatat(dirff->ff_dirfd, newname, &fst, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fstatat(dirff->ff_dirfd, dirff->ff_name, &dirst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	fillacl(dirff);
+	facl = getacl(dirff, -1, newname);
+	error = check_access(L9P_ACOP_UNLINK,
+	    dirff->ff_acl, &dirst, facl, &fst, dirff->ff_ai, (gid_t)-1);
+	l9p_acl_free(facl);
+	if (error)
+		return (error);
+
+	if (req->lr_req.tunlinkat.flags & L9PL_AT_REMOVEDIR) {
+		if (unlinkat(dirff->ff_dirfd, newname, AT_REMOVEDIR) != 0)
+			error = errno;
+	} else {
+		if (unlinkat(dirff->ff_dirfd, newname, 0) != 0)
+			error = errno;
+	}
+	return (error);
+}
+
+static void
+fs_freefid(void *softc __unused, struct l9p_fid *fid)
+{
+	struct fs_fid *f = fid->lo_aux;
+	struct fs_authinfo *ai;
+	uint32_t newcount;
+
+	if (f == NULL) {
+		/* Nothing to do here */
+		return;
+	}
+
+	if (f->ff_fd != -1)
+		close(f->ff_fd);
+
+	if (f->ff_dir)
+		closedir(f->ff_dir);
+
+	(void) pthread_mutex_destroy(&f->ff_mtx);
+	free(f->ff_name);
+	ai = f->ff_ai;
+	l9p_acl_free(f->ff_acl);
+	free(f);
+	(void) pthread_mutex_lock(&ai->ai_mtx);
+	newcount = --ai->ai_refcnt;
+	(void) pthread_mutex_unlock(&ai->ai_mtx);
+	if (newcount == 0) {
+		/*
+		 * We *were* the last ref, no one can have gained a ref.
+		 */
+		L9P_LOG(L9P_DEBUG, "dropped last ref to authinfo %p",
+		    (void *)ai);
+		(void) pthread_mutex_destroy(&ai->ai_mtx);
+		free(ai);
+	} else {
+		L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu",
+		    (void *)ai, (u_long)newcount);
+	}
+}
+
+int
+l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro)
+{
+	struct l9p_backend *backend;
+	struct fs_softc *sc;
+	int error;
+#if defined(WITH_CASPER)
+	cap_channel_t *capcas;
+#endif
+
+	if (!fs_attach_mutex_inited) {
+#ifdef __illumos__
+		if ((error = pthread_mutexattr_init(&fs_mutexattr)) != 0) {
+			errno = error;
+			return (-1);
+		}
+		if ((error = pthread_mutexattr_settype(&fs_mutexattr,
+		    PTHREAD_MUTEX_ERRORCHECK)) != 0) {
+			errno = error;
+			return (-1);
+		}
+		error = pthread_mutex_init(&fs_attach_mutex, &fs_mutexattr);
+#else
+		error = pthread_mutex_init(&fs_attach_mutex, NULL);
+#endif
+		if (error) {
+			errno = error;
+			return (-1);
+		}
+		fs_attach_mutex_inited = true;
+	}
+
+	backend = l9p_malloc(sizeof(*backend));
+	backend->attach = fs_attach;
+	backend->clunk = fs_clunk;
+	backend->create = fs_create;
+	backend->open = fs_open;
+	backend->read = fs_read;
+	backend->remove = fs_remove;
+	backend->stat = fs_stat;
+	backend->walk = fs_walk;
+	backend->write = fs_write;
+	backend->wstat = fs_wstat;
+	backend->statfs = fs_statfs;
+	backend->lopen = fs_lopen;
+	backend->lcreate = fs_lcreate;
+	backend->symlink = fs_symlink;
+	backend->mknod = fs_mknod;
+	backend->rename = fs_rename;
+	backend->readlink = fs_readlink;
+	backend->getattr = fs_getattr;
+	backend->setattr = fs_setattr;
+	backend->xattrwalk = fs_xattrwalk;
+	backend->xattrcreate = fs_xattrcreate;
+	backend->readdir = fs_readdir;
+	backend->fsync = fs_fsync;
+	backend->lock = fs_lock;
+	backend->getlock = fs_getlock;
+	backend->link = fs_link;
+	backend->mkdir = fs_mkdir;
+	backend->renameat = fs_renameat;
+	backend->unlinkat = fs_unlinkat;
+	backend->freefid = fs_freefid;
+
+	sc = l9p_malloc(sizeof(*sc));
+	sc->fs_rootfd = rootfd;
+	sc->fs_readonly = ro;
+	backend->softc = sc;
+
+#if defined(__illumos__)
+	if (fpathconf(rootfd, _PC_XATTR_ENABLED) > 0)
+		sc->fs_hasxattr = 1;
+#endif
+
+#if defined(WITH_CASPER)
+	capcas = cap_init();
+	if (capcas == NULL)
+		return (-1);
+
+	sc->fs_cappwd = cap_service_open(capcas, "system.pwd");
+	if (sc->fs_cappwd == NULL)
+		return (-1);
+
+	sc->fs_capgrp = cap_service_open(capcas, "system.grp");
+	if (sc->fs_capgrp == NULL)
+		return (-1);
+
+	cap_setpassent(sc->fs_cappwd, 1);
+	cap_setgroupent(sc->fs_capgrp, 1);
+	cap_close(capcas);
+#elif defined(__illumos__)
+	setpwent();
+#else
+	setpassent(1);
+#endif
+
+	*backendp = backend;
+	return (0);
+}
+
+#ifdef __illumos__
+acl_t *
+acl_get_fd_np(int fd, int type)
+{
+	acl_t *acl;
+	int flag, ret;
+
+	flag = 0;
+	if (type == ACL_TYPE_NFS4)
+		flag = ACL_NO_TRIVIAL;
+
+	ret = facl_get(fd, flag, &acl);
+	if (ret != 0)
+		return (NULL);
+
+	return (acl);
+}
+
+static void
+getcrtime(struct fs_softc *sc, int dirfd, const char *fname, uint64_t *secp,
+    uint64_t *nsp)
+{
+	nvlist_t *nvl = NULL;
+	uint64_t *vals = NULL;
+	uint_t nvals = 0;
+	int error;
+
+	*secp = 0;
+	*nsp = 0;
+
+	if (!sc->fs_hasxattr)
+		return;
+
+	if ((error = getattrat(dirfd, XATTR_VIEW_READWRITE, fname, &nvl)) != 0)
+		return;
+
+	if (nvlist_lookup_uint64_array(nvl, "crtime", &vals, &nvals) != 0)
+		goto done;
+
+	if (nvals != 2)
+		goto done;
+
+	*secp = vals[0];
+	*nsp = vals[1];
+
+done:
+	nvlist_free(nvl);
+}
+#endif
diff --git a/usr/src/lib/lib9p/common/backend/fs.h b/usr/src/lib/lib9p/common/backend/fs.h
new file mode 100644
index 0000000000..84b37171c2
--- /dev/null
+++ b/usr/src/lib/lib9p/common/backend/fs.h
@@ -0,0 +1,37 @@
+
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_BACKEND_FS_H
+#define LIB9P_BACKEND_FS_H
+
+#include <stdbool.h>
+#include "backend.h"
+
+int l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro);
+
+#endif  /* LIB9P_BACKEND_FS_H */
diff --git a/usr/src/lib/lib9p/common/connection.c b/usr/src/lib/lib9p/common/connection.c
new file mode 100644
index 0000000000..20c27796b8
--- /dev/null
+++ b/usr/src/lib/lib9p/common/connection.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/queue.h>
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "fid.h"
+#include "hashtable.h"
+#include "log.h"
+#include "threadpool.h"
+#include "backend/backend.h"
+
+int
+l9p_server_init(struct l9p_server **serverp, struct l9p_backend *backend)
+{
+	struct l9p_server *server;
+
+	server = l9p_calloc(1, sizeof (*server));
+	server->ls_max_version = L9P_2000L;
+	server->ls_backend = backend;
+	LIST_INIT(&server->ls_conns);
+
+	*serverp = server;
+	return (0);
+}
+
+int
+l9p_connection_init(struct l9p_server *server, struct l9p_connection **conn)
+{
+	struct l9p_connection *newconn;
+
+	assert(server != NULL);
+	assert(conn != NULL);
+
+	newconn = calloc(1, sizeof (*newconn));
+	if (newconn == NULL)
+		return (-1);
+	newconn->lc_server = server;
+	newconn->lc_msize = L9P_DEFAULT_MSIZE;
+	if (l9p_threadpool_init(&newconn->lc_tp, L9P_NUMTHREADS)) {
+		free(newconn);
+		return (-1);
+	}
+	ht_init(&newconn->lc_files, 100);
+	ht_init(&newconn->lc_requests, 100);
+	LIST_INSERT_HEAD(&server->ls_conns, newconn, lc_link);
+	*conn = newconn;
+
+	return (0);
+}
+
+void
+l9p_connection_free(struct l9p_connection *conn)
+{
+
+	LIST_REMOVE(conn, lc_link);
+	free(conn);
+}
+
+void
+l9p_connection_recv(struct l9p_connection *conn, const struct iovec *iov,
+    const size_t niov, void *aux)
+{
+	struct l9p_request *req;
+	int error;
+
+	req = l9p_calloc(1, sizeof (struct l9p_request));
+	req->lr_aux = aux;
+	req->lr_conn = conn;
+
+	req->lr_req_msg.lm_mode = L9P_UNPACK;
+	req->lr_req_msg.lm_niov = niov;
+	memcpy(req->lr_req_msg.lm_iov, iov, sizeof (struct iovec) * niov);
+
+	req->lr_resp_msg.lm_mode = L9P_PACK;
+
+	if (l9p_pufcall(&req->lr_req_msg, &req->lr_req, conn->lc_version) != 0) {
+		L9P_LOG(L9P_WARNING, "cannot unpack received message");
+		l9p_freefcall(&req->lr_req);
+		free(req);
+		return;
+	}
+
+	if (ht_add(&conn->lc_requests, req->lr_req.hdr.tag, req)) {
+		L9P_LOG(L9P_WARNING, "client reusing outstanding tag %d",
+		    req->lr_req.hdr.tag);
+		l9p_freefcall(&req->lr_req);
+		free(req);
+		return;
+	}
+
+	error = conn->lc_lt.lt_get_response_buffer(req,
+	    req->lr_resp_msg.lm_iov,
+	    &req->lr_resp_msg.lm_niov,
+	    conn->lc_lt.lt_aux);
+	if (error) {
+		L9P_LOG(L9P_WARNING, "cannot obtain buffers for response");
+		ht_remove(&conn->lc_requests, req->lr_req.hdr.tag);
+		l9p_freefcall(&req->lr_req);
+		free(req);
+		return;
+	}
+
+	/*
+	 * NB: it's up to l9p_threadpool_run to decide whether
+	 * to queue the work or to run it immediately and wait
+	 * (it must do the latter for Tflush requests).
+	 */
+	l9p_threadpool_run(&conn->lc_tp, req);
+}
+
+void
+l9p_connection_close(struct l9p_connection *conn)
+{
+	struct ht_iter iter;
+	struct l9p_fid *fid;
+	struct l9p_request *req;
+
+	L9P_LOG(L9P_DEBUG, "waiting for thread pool to shut down");
+	l9p_threadpool_shutdown(&conn->lc_tp);
+
+	/* Drain pending requests (if any) */
+	L9P_LOG(L9P_DEBUG, "draining pending requests");
+	ht_iter(&conn->lc_requests, &iter);
+	while ((req = ht_next(&iter)) != NULL) {
+#ifdef notyet
+		/* XXX would be good to know if there is anyone listening */
+		if (anyone listening) {
+			/* XXX crude - ops like Tclunk should succeed */
+			req->lr_error = EINTR;
+			l9p_respond(req, false, false);
+		} else
+#endif
+		l9p_respond(req, true, false);	/* use no-answer path */
+		ht_remove_at_iter(&iter);
+	}
+
+	/* Close opened files (if any) */
+	L9P_LOG(L9P_DEBUG, "closing opened files");
+	ht_iter(&conn->lc_files, &iter);
+	while ((fid = ht_next(&iter)) != NULL) {
+		conn->lc_server->ls_backend->freefid(
+		    conn->lc_server->ls_backend->softc, fid);
+		free(fid);
+		ht_remove_at_iter(&iter);
+	}
+
+	ht_destroy(&conn->lc_requests);
+	ht_destroy(&conn->lc_files);
+}
+
+struct l9p_fid *
+l9p_connection_alloc_fid(struct l9p_connection *conn, uint32_t fid)
+{
+	struct l9p_fid *file;
+
+	file = l9p_calloc(1, sizeof (struct l9p_fid));
+	file->lo_fid = fid;
+	/*
+	 * Note that the new fid is not marked valid yet.
+	 * The insert here will fail if the fid number is
+	 * in use, otherwise we have an invalid fid in the
+	 * table (as desired).
+	 */
+
+	if (ht_add(&conn->lc_files, fid, file) != 0) {
+		free(file);
+		return (NULL);
+	}
+
+	return (file);
+}
+
+void
+l9p_connection_remove_fid(struct l9p_connection *conn, struct l9p_fid *fid)
+{
+	struct l9p_backend *be;
+
+	/* fid should be marked invalid by this point */
+	assert(!l9p_fid_isvalid(fid));
+
+	be = conn->lc_server->ls_backend;
+	be->freefid(be->softc, fid);
+
+	ht_remove(&conn->lc_files, fid->lo_fid);
+	free(fid);
+}
diff --git a/usr/src/lib/lib9p/common/fcall.h b/usr/src/lib/lib9p/common/fcall.h
new file mode 100644
index 0000000000..f779ea6ad5
--- /dev/null
+++ b/usr/src/lib/lib9p/common/fcall.h
@@ -0,0 +1,624 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Based on libixp code: ©2007-2010 Kris Maglione <maglione.k at Gmail>
+ */
+
+#ifndef LIB9P_FCALL_H
+#define LIB9P_FCALL_H
+
+#include <stdint.h>
+
+#define L9P_MAX_WELEM   256
+
+/*
+ * Function call/reply (Tfoo/Rfoo) numbers.
+ *
+ * These are protocol code numbers, so the exact values
+ * matter.  However, __FIRST and __LAST_PLUS_ONE are for
+ * debug code, and just need to encompass the entire range.
+ *
+ * Note that we rely (in the debug code) on Rfoo == Tfoo+1.
+ */
+enum l9p_ftype {
+	L9P__FIRST = 6,		/* NB: must be <= all legal values */
+	L9P_TLERROR = 6,	/* illegal; exists for parity with Rlerror */
+	L9P_RLERROR,
+	L9P_TSTATFS = 8,
+	L9P_RSTATFS,
+	L9P_TLOPEN = 12,
+	L9P_RLOPEN,
+	L9P_TLCREATE = 14,
+	L9P_RLCREATE,
+	L9P_TSYMLINK = 16,
+	L9P_RSYMLINK,
+	L9P_TMKNOD = 18,
+	L9P_RMKNOD,
+	L9P_TRENAME = 20,
+	L9P_RRENAME,
+	L9P_TREADLINK = 22,
+	L9P_RREADLINK,
+	L9P_TGETATTR = 24,
+	L9P_RGETATTR,
+	L9P_TSETATTR = 26,
+	L9P_RSETATTR,
+	L9P_TXATTRWALK = 30,
+	L9P_RXATTRWALK,
+	L9P_TXATTRCREATE = 32,
+	L9P_RXATTRCREATE,
+	L9P_TREADDIR = 40,
+	L9P_RREADDIR,
+	L9P_TFSYNC = 50,
+	L9P_RFSYNC,
+	L9P_TLOCK = 52,
+	L9P_RLOCK,
+	L9P_TGETLOCK = 54,
+	L9P_RGETLOCK,
+	L9P_TLINK = 70,
+	L9P_RLINK,
+	L9P_TMKDIR = 72,
+	L9P_RMKDIR,
+	L9P_TRENAMEAT = 74,
+	L9P_RRENAMEAT,
+	L9P_TUNLINKAT = 76,
+	L9P_RUNLINKAT,
+	L9P_TVERSION = 100,
+	L9P_RVERSION,
+	L9P_TAUTH = 102,
+	L9P_RAUTH,
+	L9P_TATTACH = 104,
+	L9P_RATTACH,
+	L9P_TERROR = 106, 	/* illegal */
+	L9P_RERROR,
+	L9P_TFLUSH = 108,
+	L9P_RFLUSH,
+	L9P_TWALK = 110,
+	L9P_RWALK,
+	L9P_TOPEN = 112,
+	L9P_ROPEN,
+	L9P_TCREATE = 114,
+	L9P_RCREATE,
+	L9P_TREAD = 116,
+	L9P_RREAD,
+	L9P_TWRITE = 118,
+	L9P_RWRITE,
+	L9P_TCLUNK = 120,
+	L9P_RCLUNK,
+	L9P_TREMOVE = 122,
+	L9P_RREMOVE,
+	L9P_TSTAT = 124,
+	L9P_RSTAT,
+	L9P_TWSTAT = 126,
+	L9P_RWSTAT,
+	L9P__LAST_PLUS_1,	/* NB: must be last */
+};
+
+/*
+ * When a Tfoo request comes over the wire, we decode it
+ * (pack.c) from wire format into a request laid out in
+ * a "union l9p_fcall" object.  This object is not in wire
+ * format, but rather in something more convenient for us
+ * to operate on.
+ *
+ * We then dispatch the request (request.c, backend/fs.c) and
+ * use another "union l9p_fcall" object to build a reply.
+ * The reply is converted to wire format on the way back out
+ * (pack.c again).
+ *
+ * All sub-objects start with a header containing the request
+ * or reply type code and two-byte tag, and whether or not it
+ * is needed, a four-byte fid.
+ *
+ * What this means here is that the data structures within
+ * the union can be shared across various requests and replies.
+ * For instance, replies to OPEN, CREATE, LCREATE, LOPEN, MKDIR, and
+ * SYMLINK are all fairly similar (providing a qid and sometimes
+ * an iounit) and hence can all use the l9p_f_ropen structure.
+ * Which structures are used for which operations is somewhat
+ * arbitrary; for programming ease, if an operation shares a
+ * data structure, it still has its own name: there are union
+ * members named ropen, rcreate, rlcreate, rlopen, rmkdir, and
+ * rsymlink, even though all use struct l9p_f_ropen.
+ *
+ * The big exception to the above rule is struct l9p_f_io, which
+ * is used as both request and reply for all of READ, WRITE, and
+ * READDIR.  Moreover, the READDIR reply must be pre-packed into
+ * wire format (it is handled like raw data a la READ).
+ *
+ * Some request messages (e.g., TREADLINK) fit in a header, having
+ * just type code, tag, and fid.  These have no separate data
+ * structure, nor union member name.  Similarly, some reply
+ * messages (e.g., RCLUNK, RREMOVE, RRENAME) have just the type
+ * code and tag.
+ */
+
+/*
+ * Type code bits in (the first byte of) a qid.
+ */
+enum l9p_qid_type {
+	L9P_QTDIR = 0x80, /* type bit for directories */
+	L9P_QTAPPEND = 0x40, /* type bit for append only files */
+	L9P_QTEXCL = 0x20, /* type bit for exclusive use files */
+	L9P_QTMOUNT = 0x10, /* type bit for mounted channel */
+	L9P_QTAUTH = 0x08, /* type bit for authentication file */
+	L9P_QTTMP = 0x04, /* type bit for non-backed-up file */
+	L9P_QTSYMLINK = 0x02, /* type bit for symbolic link */
+	L9P_QTFILE = 0x00 /* type bits for plain file */
+};
+
+/*
+ * Extra permission bits in create and file modes (stat).
+ */
+#define L9P_DMDIR 0x80000000
+enum {
+	L9P_DMAPPEND = 0x40000000,
+	L9P_DMEXCL = 0x20000000,
+	L9P_DMMOUNT = 0x10000000,
+	L9P_DMAUTH = 0x08000000,
+	L9P_DMTMP = 0x04000000,
+	L9P_DMSYMLINK = 0x02000000,
+	/* 9P2000.u extensions */
+	L9P_DMDEVICE = 0x00800000,
+	L9P_DMNAMEDPIPE = 0x00200000,
+	L9P_DMSOCKET = 0x00100000,
+	L9P_DMSETUID = 0x00080000,
+	L9P_DMSETGID = 0x00040000,
+};
+
+/*
+ * Open/create mode bits in 9P2000 and 9P2000.u operations
+ * (not Linux lopen and lcreate flags, which are different).
+ * Note that the mode field is only one byte wide.
+ */
+enum l9p_omode {
+	L9P_OREAD = 0,	/* open for read */
+	L9P_OWRITE = 1,	/* write */
+	L9P_ORDWR = 2,	/* read and write */
+	L9P_OEXEC = 3,	/* execute, == read but check execute permission */
+	L9P_OACCMODE = 3, /* mask for the above access-mode bits */
+	L9P_OTRUNC = 16,	/* or'ed in (except for exec), truncate file first */
+	L9P_OCEXEC = 32,	/* or'ed in, close on exec */
+	L9P_ORCLOSE = 64,	/* or'ed in, remove on close */
+	L9P_ODIRECT = 128,	/* or'ed in, direct access */
+};
+
+/*
+ * Flag bits in 9P2000.L operations (Tlopen, Tlcreate).  These are
+ * basically just the Linux L_* flags.  The bottom 3 bits are the
+ * same as for l9p_omode, although open-for-exec is not used:
+ * instead, the client does a Tgetattr and checks the mode for
+ * execute bits, then just opens for reading.
+ *
+ * Each L_O_xxx is just value O_xxx has on Linux in <fcntl.h>;
+ * not all are necessarily used.  From observation, we do get
+ * L_O_CREAT and L_O_EXCL when creating with exclusive, and always
+ * get L_O_LARGEFILE.  We do get L_O_APPEND when opening for
+ * append.  We also get both L_O_DIRECT and L_O_DIRECTORY set
+ * when opening directories.
+ *
+ * We probably never get L_O_NOCTTY which makes no sense, and
+ * some of the other options may need to be handled on the client.
+ */
+enum l9p_l_o_flags {
+	L9P_L_O_CREAT =		000000100U,
+	L9P_L_O_EXCL =		000000200U,
+	L9P_L_O_NOCTTY =	000000400U,
+	L9P_L_O_TRUNC =		000001000U,
+	L9P_L_O_APPEND =	000002000U,
+	L9P_L_O_NONBLOCK =	000004000U,
+	L9P_L_O_DSYNC =		000010000U,
+	L9P_L_O_FASYNC =	000020000U,
+	L9P_L_O_DIRECT =	000040000U,
+	L9P_L_O_LARGEFILE =	000100000U,
+	L9P_L_O_DIRECTORY =	000200000U,
+	L9P_L_O_NOFOLLOW =	000400000U,
+	L9P_L_O_NOATIME =	001000000U,
+	L9P_L_O_CLOEXEC =	002000000U,
+	L9P_L_O_SYNC =		004000000U,
+	L9P_L_O_PATH =		010000000U,
+	L9P_L_O_TMPFILE =	020000000U,
+};
+
+struct l9p_hdr {
+	uint8_t type;
+	uint16_t tag;
+	uint32_t fid;
+};
+
+struct l9p_qid {
+	uint8_t  type;
+	uint32_t version;
+	uint64_t path;
+};
+
+struct l9p_stat {
+	uint16_t type;
+	uint32_t dev;
+	struct l9p_qid qid;
+	uint32_t mode;
+	uint32_t atime;
+	uint32_t mtime;
+	uint64_t length;
+	char *name;
+	char *uid;
+	char *gid;
+	char *muid;
+	char *extension;
+	uint32_t n_uid;
+	uint32_t n_gid;
+	uint32_t n_muid;
+};
+
+#define	L9P_FSTYPE	 0x01021997
+
+struct l9p_statfs {
+	uint32_t type;		/* file system type */
+	uint32_t bsize;		/* block size for I/O */
+	uint64_t blocks;	/* file system size (bsize-byte blocks) */
+	uint64_t bfree;		/* free blocks in fs */
+	uint64_t bavail;	/* free blocks avail to non-superuser*/
+	uint64_t files;		/* file nodes in file system (# inodes) */
+	uint64_t ffree;		/* free file nodes in fs */
+	uint64_t fsid;		/* file system identifier */
+	uint32_t namelen;	/* maximum length of filenames */
+};
+
+struct l9p_f_version {
+	struct l9p_hdr hdr;
+	uint32_t msize;
+	char *version;
+};
+
+struct l9p_f_tflush {
+	struct l9p_hdr hdr;
+	uint16_t oldtag;
+};
+
+struct l9p_f_error {
+	struct l9p_hdr hdr;
+	char *ename;
+	uint32_t errnum;
+};
+
+struct l9p_f_ropen {
+	struct l9p_hdr hdr;
+	struct l9p_qid qid;
+	uint32_t iounit;
+};
+
+struct l9p_f_rauth {
+	struct l9p_hdr hdr;
+	struct l9p_qid aqid;
+};
+
+struct l9p_f_attach {
+	struct l9p_hdr hdr;
+	uint32_t afid;
+	char *uname;
+	char *aname;
+	uint32_t n_uname;
+};
+#define	L9P_NOFID ((uint32_t)-1)	/* in Tattach, no auth fid */
+#define	L9P_NONUNAME ((uint32_t)-1)	/* in Tattach, no n_uname */
+
+struct l9p_f_tcreate {
+	struct l9p_hdr hdr;
+	uint32_t perm;
+	char *name;
+	uint8_t mode; /* +Topen */
+	char *extension;
+};
+
+struct l9p_f_twalk {
+	struct l9p_hdr hdr;
+	uint32_t newfid;
+	uint16_t nwname;
+	char *wname[L9P_MAX_WELEM];
+};
+
+struct l9p_f_rwalk {
+	struct l9p_hdr hdr;
+	uint16_t nwqid;
+	struct l9p_qid wqid[L9P_MAX_WELEM];
+};
+
+struct l9p_f_io {
+	struct l9p_hdr hdr;
+	uint64_t offset; /* Tread, Twrite, Treaddir */
+	uint32_t count; /* Tread, Twrite, Rread, Treaddir, Rreaddir */
+};
+
+struct l9p_f_rstat {
+	struct l9p_hdr hdr;
+	struct l9p_stat stat;
+};
+
+struct l9p_f_twstat {
+	struct l9p_hdr hdr;
+	struct l9p_stat stat;
+};
+
+struct l9p_f_rstatfs {
+	struct l9p_hdr hdr;
+	struct l9p_statfs statfs;
+};
+
+/* Used for Tlcreate, Tlopen, Tmkdir, Tunlinkat. */
+struct l9p_f_tlcreate {
+	struct l9p_hdr hdr;
+	char *name;		/* Tlcreate, Tmkdir, Tunlinkat */
+	uint32_t flags;		/* Tlcreate, Tlopen, Tmkdir, Tunlinkat */
+	uint32_t mode;		/* Tlcreate, Tmkdir */
+	uint32_t gid;		/* Tlcreate, Tmkdir */
+};
+
+struct l9p_f_tsymlink {
+	struct l9p_hdr hdr;
+	char *name;
+	char *symtgt;
+	uint32_t gid;
+};
+
+struct l9p_f_tmknod {
+	struct l9p_hdr hdr;
+	char *name;
+	uint32_t mode;
+	uint32_t major;
+	uint32_t minor;
+	uint32_t gid;
+};
+
+struct l9p_f_trename {
+	struct l9p_hdr hdr;
+	uint32_t dfid;
+	char *name;
+};
+
+struct l9p_f_rreadlink {
+	struct l9p_hdr hdr;
+	char *target;
+};
+
+struct l9p_f_tgetattr {
+	struct l9p_hdr hdr;
+	uint64_t request_mask;
+};
+
+struct l9p_f_rgetattr {
+	struct l9p_hdr hdr;
+	uint64_t valid;
+	struct l9p_qid qid;
+	uint32_t mode;
+	uint32_t uid;
+	uint32_t gid;
+	uint64_t nlink;
+	uint64_t rdev;
+	uint64_t size;
+	uint64_t blksize;
+	uint64_t blocks;
+	uint64_t atime_sec;
+	uint64_t atime_nsec;
+	uint64_t mtime_sec;
+	uint64_t mtime_nsec;
+	uint64_t ctime_sec;
+	uint64_t ctime_nsec;
+	uint64_t btime_sec;
+	uint64_t btime_nsec;
+	uint64_t gen;
+	uint64_t data_version;
+};
+
+/* Fields in req->request_mask and reply->valid for Tgetattr, Rgetattr. */
+enum l9pl_getattr_flags {
+	L9PL_GETATTR_MODE = 0x00000001,
+	L9PL_GETATTR_NLINK = 0x00000002,
+	L9PL_GETATTR_UID = 0x00000004,
+	L9PL_GETATTR_GID = 0x00000008,
+	L9PL_GETATTR_RDEV = 0x00000010,
+	L9PL_GETATTR_ATIME = 0x00000020,
+	L9PL_GETATTR_MTIME = 0x00000040,
+	L9PL_GETATTR_CTIME = 0x00000080,
+	L9PL_GETATTR_INO = 0x00000100,
+	L9PL_GETATTR_SIZE = 0x00000200,
+	L9PL_GETATTR_BLOCKS = 0x00000400,
+	/* everything up to and including BLOCKS is BASIC */
+	L9PL_GETATTR_BASIC = L9PL_GETATTR_MODE |
+		L9PL_GETATTR_NLINK |
+		L9PL_GETATTR_UID |
+		L9PL_GETATTR_GID |
+		L9PL_GETATTR_RDEV |
+		L9PL_GETATTR_ATIME |
+		L9PL_GETATTR_MTIME |
+		L9PL_GETATTR_CTIME |
+		L9PL_GETATTR_INO |
+		L9PL_GETATTR_SIZE |
+		L9PL_GETATTR_BLOCKS,
+	L9PL_GETATTR_BTIME = 0x00000800,
+	L9PL_GETATTR_GEN = 0x00001000,
+	L9PL_GETATTR_DATA_VERSION = 0x00002000,
+	/* BASIC + birthtime + gen + data-version = ALL */
+	L9PL_GETATTR_ALL = L9PL_GETATTR_BASIC |
+		L9PL_GETATTR_BTIME |
+		L9PL_GETATTR_GEN |
+		L9PL_GETATTR_DATA_VERSION,
+};
+
+struct l9p_f_tsetattr {
+	struct l9p_hdr hdr;
+	uint32_t valid;
+	uint32_t mode;
+	uint32_t uid;
+	uint32_t gid;
+	uint64_t size;
+	uint64_t atime_sec;	/* if valid & L9PL_SETATTR_ATIME_SET */
+	uint64_t atime_nsec;	/* (else use on-server time) */
+	uint64_t mtime_sec;	/* if valid & L9PL_SETATTR_MTIME_SET */
+	uint64_t mtime_nsec;	/* (else use on-server time) */
+};
+
+/* Fields in req->valid for Tsetattr. */
+enum l9pl_setattr_flags {
+	L9PL_SETATTR_MODE = 0x00000001,
+	L9PL_SETATTR_UID = 0x00000002,
+	L9PL_SETATTR_GID = 0x00000004,
+	L9PL_SETATTR_SIZE = 0x00000008,
+	L9PL_SETATTR_ATIME = 0x00000010,
+	L9PL_SETATTR_MTIME = 0x00000020,
+	L9PL_SETATTR_CTIME = 0x00000040,
+	L9PL_SETATTR_ATIME_SET = 0x00000080,
+	L9PL_SETATTR_MTIME_SET = 0x00000100,
+};
+
+struct l9p_f_txattrwalk {
+	struct l9p_hdr hdr;
+	uint32_t newfid;
+	char *name;
+};
+
+struct l9p_f_rxattrwalk {
+	struct l9p_hdr hdr;
+	uint64_t size;
+};
+
+struct l9p_f_txattrcreate {
+	struct l9p_hdr hdr;
+	char *name;
+	uint64_t attr_size;
+	uint32_t flags;
+};
+
+struct l9p_f_tlock {
+	struct l9p_hdr hdr;
+	uint8_t type;		/* from l9pl_lock_type */
+	uint32_t flags;		/* from l9pl_lock_flags */
+	uint64_t start;
+	uint64_t length;
+	uint32_t proc_id;
+	char *client_id;
+};
+
+enum l9pl_lock_type {
+	L9PL_LOCK_TYPE_RDLOCK =	0,
+	L9PL_LOCK_TYPE_WRLOCK =	1,
+	L9PL_LOCK_TYPE_UNLOCK =	2,
+};
+
+enum l9pl_lock_flags {
+	L9PL_LOCK_TYPE_BLOCK = 1,
+	L9PL_LOCK_TYPE_RECLAIM = 2,
+};
+
+struct l9p_f_rlock {
+	struct l9p_hdr hdr;
+	uint8_t status;		/* from l9pl_lock_status */
+};
+
+enum l9pl_lock_status {
+	L9PL_LOCK_SUCCESS = 0,
+	L9PL_LOCK_BLOCKED = 1,
+	L9PL_LOCK_ERROR = 2,
+	L9PL_LOCK_GRACE = 3,
+};
+
+struct l9p_f_getlock {
+	struct l9p_hdr hdr;
+	uint8_t type;		/* from l9pl_lock_type */
+	uint64_t start;
+	uint64_t length;
+	uint32_t proc_id;
+	char *client_id;
+};
+
+struct l9p_f_tlink {
+	struct l9p_hdr hdr;
+	uint32_t dfid;
+	char *name;
+};
+
+struct l9p_f_trenameat {
+	struct l9p_hdr hdr;
+	char *oldname;
+	uint32_t newdirfid;
+	char *newname;
+};
+
+/*
+ * Flags in Tunlinkat (which re-uses f_tlcreate data structure but
+ * with different meaning).
+ */
+enum l9p_l_unlinkat_flags {
+	/* not sure if any other AT_* flags are passed through */
+	L9PL_AT_REMOVEDIR =	0x0200,
+};
+
+union l9p_fcall {
+	struct l9p_hdr hdr;
+	struct l9p_f_version version;
+	struct l9p_f_tflush tflush;
+	struct l9p_f_ropen ropen;
+	struct l9p_f_ropen rcreate;
+	struct l9p_f_ropen rattach;
+	struct l9p_f_error error;
+	struct l9p_f_rauth rauth;
+	struct l9p_f_attach tattach;
+	struct l9p_f_attach tauth;
+	struct l9p_f_tcreate tcreate;
+	struct l9p_f_tcreate topen;
+	struct l9p_f_twalk twalk;
+	struct l9p_f_rwalk rwalk;
+	struct l9p_f_twstat twstat;
+	struct l9p_f_rstat rstat;
+	struct l9p_f_rstatfs rstatfs;
+	struct l9p_f_tlcreate tlopen;
+	struct l9p_f_ropen rlopen;
+	struct l9p_f_tlcreate tlcreate;
+	struct l9p_f_ropen rlcreate;
+	struct l9p_f_tsymlink tsymlink;
+	struct l9p_f_ropen rsymlink;
+	struct l9p_f_tmknod tmknod;
+	struct l9p_f_ropen rmknod;
+	struct l9p_f_trename trename;
+	struct l9p_f_rreadlink rreadlink;
+	struct l9p_f_tgetattr tgetattr;
+	struct l9p_f_rgetattr rgetattr;
+	struct l9p_f_tsetattr tsetattr;
+	struct l9p_f_txattrwalk txattrwalk;
+	struct l9p_f_rxattrwalk rxattrwalk;
+	struct l9p_f_txattrcreate txattrcreate;
+	struct l9p_f_tlock tlock;
+	struct l9p_f_rlock rlock;
+	struct l9p_f_getlock getlock;
+	struct l9p_f_tlink tlink;
+	struct l9p_f_tlcreate tmkdir;
+	struct l9p_f_ropen rmkdir;
+	struct l9p_f_trenameat trenameat;
+	struct l9p_f_tlcreate tunlinkat;
+	struct l9p_f_io io;
+};
+
+#endif  /* LIB9P_FCALL_H */
diff --git a/usr/src/lib/lib9p/common/fid.h b/usr/src/lib/lib9p/common/fid.h
new file mode 100644
index 0000000000..cdfdd7ec93
--- /dev/null
+++ b/usr/src/lib/lib9p/common/fid.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_FID_H
+#define LIB9P_FID_H
+
+#include <stdbool.h>
+
+/*
+ * Data structure for a fid.  All active fids in one session
+ * are stored in a hash table; the hash table provides the
+ * iterator to process them.  (See also l9p_connection in lib9p.h.)
+ *
+ * The back-end code has additional data per fid, found via
+ * lo_aux.  Currently this is allocated with a separate calloc().
+ *
+ * Most fids represent a file or directory, but a few are special
+ * purpose, including the auth fid from Tauth+Tattach, and the
+ * fids used for extended attributes.  We have our own set of
+ * flags here in lo_flags.
+ *
+ * Note that all new fids start as potentially-valid (reserving
+ * their 32-bit fid value), but not actually-valid.  If another
+ * (threaded) op is invoked on a not-yet-valid fid, the fid cannot
+ * be used.  A fid can also be locked against other threads, in
+ * which case they must wait for it: this happens during create
+ * and open, which on success result in the fid changing from a
+ * directory to a file.  (At least, all this applies in principle
+ * -- we're currently single-threaded per connection so the locks
+ * are nop-ed out and the valid bit is mainly just for debug.)
+ *
+ * Fids that are "open" (the underlying file or directory is open)
+ * are marked as well.
+ *
+ * Locking is managed by the front end (request.c); validation
+ * and type-marking can be done by either side as needed.
+ *
+ * Fid types and validity are manipulated by set* and unset*
+ * functions, and tested by is* ops.  Note that we only
+ * distinguish between "directory" and "not directory" at this
+ * level, i.e., symlinks and devices are just "not a directory
+ * fid".  Also, fids cannot be unset as auth or xattr fids,
+ * nor can an open fid become closed, except by being clunked.
+ * While files should not normally become directories, it IS normal
+ * for directory fids to become file fids due to Twalk operations.
+ *
+ * (These accessor functions are just to leave wiggle room for
+ * different future implementations.)
+ */
+struct l9p_fid {
+	void	*lo_aux;
+	uint32_t lo_fid;
+	uint32_t lo_flags;	/* volatile atomic_t when threaded? */
+};
+
+enum l9p_lo_flags {
+	L9P_LO_ISAUTH = 0x01,
+	L9P_LO_ISDIR = 0x02,
+	L9P_LO_ISOPEN = 0x04,
+	L9P_LO_ISVALID = 0x08,
+	L9P_LO_ISXATTR = 0x10,
+};
+
+static inline bool
+l9p_fid_isauth(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISAUTH) != 0);
+}
+
+static inline void
+l9p_fid_setauth(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISAUTH;
+}
+
+static inline bool
+l9p_fid_isdir(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISDIR) != 0);
+}
+
+static inline void
+l9p_fid_setdir(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISDIR;
+}
+
+static inline void
+l9p_fid_unsetdir(struct l9p_fid *fid)
+{
+	fid->lo_flags &= ~(uint32_t)L9P_LO_ISDIR;
+}
+
+static inline bool
+l9p_fid_isopen(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISOPEN) != 0);
+}
+
+static inline void
+l9p_fid_setopen(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISOPEN;
+}
+
+static inline bool
+l9p_fid_isvalid(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISVALID) != 0);
+}
+
+static inline void
+l9p_fid_setvalid(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISVALID;
+}
+
+static inline void
+l9p_fid_unsetvalid(struct l9p_fid *fid)
+{
+	fid->lo_flags &= ~(uint32_t)L9P_LO_ISVALID;
+}
+
+static inline bool
+l9p_fid_isxattr(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISXATTR) != 0);
+}
+
+static inline void
+l9p_fid_setxattr(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISXATTR;
+}
+
+#endif  /* LIB9P_FID_H */
diff --git a/usr/src/lib/lib9p/common/genacl.c b/usr/src/lib/lib9p/common/genacl.c
new file mode 100644
index 0000000000..a7be17ca9b
--- /dev/null
+++ b/usr/src/lib/lib9p/common/genacl.c
@@ -0,0 +1,806 @@
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/acl.h>
+#include <sys/stat.h>
+
+#ifdef __illumos__
+#include <sys/sysmacros.h>
+#endif
+
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "genacl.h"
+#include "fid.h"
+#include "log.h"
+
+#ifndef __illumos__
+typedef int econvertfn(acl_entry_t, struct l9p_ace *);
+#endif
+
+#ifdef __FreeBSD__
+static struct l9p_acl *l9p_new_acl(uint32_t acetype, uint32_t aceasize);
+static struct l9p_acl *l9p_growacl(struct l9p_acl *acl, uint32_t aceasize);
+static int l9p_count_aces(acl_t sysacl);
+static struct l9p_acl *l9p_sysacl_to_acl(int, acl_t, econvertfn *);
+#endif
+static bool l9p_ingroup(gid_t tid, gid_t gid, gid_t *gids, size_t ngids);
+static int l9p_check_aces(int32_t mask, struct l9p_acl *acl, struct stat *st,
+    uid_t uid, gid_t gid, gid_t *gids, size_t ngids);
+
+void
+l9p_acl_free(struct l9p_acl *acl)
+{
+
+	free(acl);
+}
+
+/*
+ * Is the given group ID tid (test-id) any of the gid's in agids?
+ */
+static bool
+l9p_ingroup(gid_t tid, gid_t gid, gid_t *gids, size_t ngids)
+{
+	size_t i;
+
+	if (tid == gid)
+		return (true);
+	for (i = 0; i < ngids; i++)
+		if (tid == gids[i])
+			return (true);
+	return (false);
+}
+
+/* #define ACE_DEBUG */
+
+/*
+ * Note that NFSv4 tests are done on a "first match" basis.
+ * That is, we check each ACE sequentially until we run out
+ * of ACEs, or find something explicitly denied (DENIED!),
+ * or have cleared out all our attempt-something bits.  Once
+ * we come across an ALLOW entry for the bits we're trying,
+ * we clear those from the bits we're still looking for, in
+ * the order they appear.
+ *
+ * The result is either "definitely allowed" (we cleared
+ * all the bits), "definitely denied" (we hit a deny with
+ * some or all of the bits), or "unspecified".  We
+ * represent these three states as +1 (positive = yes = allow),
+ * -1 (negative = no = denied), or 0 (no strong answer).
+ *
+ * For our caller's convenience, if we are called with a
+ * mask of 0, we return 0 (no answer).
+ */
+static int
+l9p_check_aces(int32_t mask, struct l9p_acl *acl, struct stat *st,
+    uid_t uid, gid_t gid, gid_t *gids, size_t ngids)
+{
+	uint32_t i;
+	struct l9p_ace *ace;
+#ifdef ACE_DEBUG
+	const char *acetype, *allowdeny;
+	bool show_tid;
+#endif
+	bool match;
+	uid_t tid;
+
+	if (mask == 0)
+		return (0);
+
+	for (i = 0; mask != 0 && i < acl->acl_nace; i++) {
+		ace = &acl->acl_aces[i];
+		switch (ace->ace_type) {
+		case L9P_ACET_ACCESS_ALLOWED:
+		case L9P_ACET_ACCESS_DENIED:
+			break;
+		default:
+			/* audit, alarm - ignore */
+			continue;
+		}
+#ifdef ACE_DEBUG
+		show_tid = false;
+#endif
+		if (ace->ace_flags & L9P_ACEF_OWNER) {
+#ifdef ACE_DEBUG
+			acetype = "OWNER@";
+#endif
+			match = st->st_uid == uid;
+		} else if (ace->ace_flags & L9P_ACEF_GROUP) {
+#ifdef ACE_DEBUG
+			acetype = "GROUP@";
+#endif
+			match = l9p_ingroup(st->st_gid, gid, gids, ngids);
+		} else if (ace->ace_flags & L9P_ACEF_EVERYONE) {
+#ifdef ACE_DEBUG
+			acetype = "EVERYONE@";
+#endif
+			match = true;
+		} else {
+			if (ace->ace_idsize != sizeof(tid))
+				continue;
+#ifdef ACE_DEBUG
+			show_tid = true;
+#endif
+			memcpy(&tid, &ace->ace_idbytes, sizeof(tid));
+			if (ace->ace_flags & L9P_ACEF_IDENTIFIER_GROUP) {
+#ifdef ACE_DEBUG
+				acetype = "group";
+#endif
+				match = l9p_ingroup(tid, gid, gids, ngids);
+			} else {
+#ifdef ACE_DEBUG
+				acetype = "user";
+#endif
+				match = tid == uid;
+			}
+		}
+		/*
+		 * If this ACE applies to us, check remaining bits.
+		 * If any of those bits also apply, check the type:
+		 * DENY means "stop now", ALLOW means allow these bits
+		 * and keep checking.
+		 */
+#ifdef ACE_DEBUG
+		allowdeny = ace->ace_type == L9P_ACET_ACCESS_DENIED ?
+		    "deny" : "allow";
+#endif
+		if (match && (ace->ace_mask & (uint32_t)mask) != 0) {
+#ifdef ACE_DEBUG
+			if (show_tid)
+				L9P_LOG(L9P_DEBUG,
+				    "ACE: %s %s %d: mask 0x%x ace_mask 0x%x",
+				    allowdeny, acetype, (int)tid,
+				    (u_int)mask, (u_int)ace->ace_mask);
+			else
+				L9P_LOG(L9P_DEBUG,
+				    "ACE: %s %s: mask 0x%x ace_mask 0x%x",
+				    allowdeny, acetype,
+				    (u_int)mask, (u_int)ace->ace_mask);
+#endif
+			if (ace->ace_type == L9P_ACET_ACCESS_DENIED)
+				return (-1);
+			mask &= ~ace->ace_mask;
+#ifdef ACE_DEBUG
+			L9P_LOG(L9P_DEBUG, "clear 0x%x: now mask=0x%x",
+			    (u_int)ace->ace_mask, (u_int)mask);
+#endif
+		} else {
+#ifdef ACE_DEBUG
+			if (show_tid)
+				L9P_LOG(L9P_DEBUG,
+				    "ACE: SKIP %s %s %d: "
+				    "match %d mask 0x%x ace_mask 0x%x",
+				    allowdeny, acetype, (int)tid,
+				    (int)match, (u_int)mask,
+				    (u_int)ace->ace_mask);
+			else
+				L9P_LOG(L9P_DEBUG,
+				    "ACE: SKIP %s %s: "
+				    "match %d mask 0x%x ace_mask 0x%x",
+				    allowdeny, acetype,
+				    (int)match, (u_int)mask,
+				    (u_int)ace->ace_mask);
+#endif
+		}
+	}
+
+	/* Return 1 if access definitely granted. */
+#ifdef ACE_DEBUG
+	L9P_LOG(L9P_DEBUG, "ACE: end of ACEs, mask now 0x%x: %s",
+	    mask, mask ? "no-definitive-answer" : "ALLOW");
+#endif
+	return (mask == 0 ? 1 : 0);
+}
+
+/*
+ * Test against ACLs.
+ *
+ * The return value is normally 0 (access allowed) or EPERM
+ * (access denied), so it could just be a boolean....
+ *
+ * For "make new dir in dir" and "remove dir in dir", you must
+ * set the mask to test the directory permissions (not ADD_FILE but
+ * ADD_SUBDIRECTORY, and DELETE_CHILD).  For "make new file in dir"
+ * you must set the opmask to test file ADD_FILE.
+ *
+ * The L9P_ACE_DELETE flag means "can delete this thing"; it's not
+ * clear whether it should override the parent directory's ACL if
+ * any.  In our case it does not, but a caller may try
+ * L9P_ACE_DELETE_CHILD (separately, on its own) and then a
+ * (second, separate) L9P_ACE_DELETE, to make the permissions work
+ * as "or" instead of "and".
+ *
+ * Pass a NULL parent/pstat if they are not applicable, e.g.,
+ * for doing operations on an existing file, such as reading or
+ * writing data or attributes.  Pass in a null child/cstat if
+ * that's not applicable, such as creating a new file/dir.
+ *
+ * NB: it's probably wise to allow the owner of any file to update
+ * the ACLs of that file, but we leave that test to the caller.
+ */
+int l9p_acl_check_access(int32_t opmask, struct l9p_acl_check_args *args)
+{
+	struct l9p_acl *parent, *child;
+	struct stat *pstat, *cstat;
+	int32_t pop, cop;
+	size_t ngids;
+	uid_t uid;
+	gid_t gid, *gids;
+	int panswer, canswer;
+
+	assert(opmask != 0);
+	parent = args->aca_parent;
+	pstat = args->aca_pstat;
+	child = args->aca_child;
+	cstat = args->aca_cstat;
+	uid = args->aca_uid;
+	gid = args->aca_gid;
+	gids = args->aca_groups;
+	ngids = args->aca_ngroups;
+
+#ifdef ACE_DEBUG
+	L9P_LOG(L9P_DEBUG,
+	    "l9p_acl_check_access: opmask=0x%x uid=%ld gid=%ld ngids=%zd",
+	    (u_int)opmask, (long)uid, (long)gid, ngids);
+#endif
+	/*
+	 * If caller said "superuser semantics", check that first.
+	 * Note that we apply them regardless of ACLs.
+	 */
+	if (uid == 0 && args->aca_superuser)
+		return (0);
+
+	/*
+	 * If told to ignore ACLs and use only stat-based permissions,
+	 * discard any non-NULL ACL pointers.
+	 *
+	 * This will need some fancying up when we support POSIX ACLs.
+	 */
+	if ((args->aca_aclmode & L9P_ACM_NFS_ACL) == 0)
+		parent = child = NULL;
+
+	assert(parent == NULL || parent->acl_acetype == L9P_ACLTYPE_NFSv4);
+	assert(parent == NULL || pstat != NULL);
+	assert(child == NULL || child->acl_acetype == L9P_ACLTYPE_NFSv4);
+	assert(child == NULL || cstat != NULL);
+	assert(pstat != NULL || cstat != NULL);
+
+	/*
+	 * If the operation is UNLINK we should have either both ACLs
+	 * or no ACLs, but we won't require that here.
+	 *
+	 * If a parent ACL is supplied, it's a directory by definition.
+	 * Make sure we're allowed to do this there, whatever this is.
+	 * If a child ACL is supplied, check it too.  Note that the
+	 * DELETE permission only applies in the child though, not
+	 * in the parent, and the DELETE_CHILD only applies in the
+	 * parent.
+	 */
+	pop = cop = opmask;
+	if (parent != NULL || pstat != NULL) {
+		/*
+		 * Remove child-only bits from parent op and
+		 * parent-only bits from child op.
+		 *
+		 * L9P_ACE_DELETE is child-only.
+		 *
+		 * L9P_ACE_DELETE_CHILD is parent-only, and three data
+		 * access bits overlap with three directory access bits.
+		 * We should have child==NULL && cstat==NULL, so the
+		 * three data bits should be redundant, but it's
+		 * both trivial and safest to remove them anyway.
+		 */
+		pop &= ~L9P_ACE_DELETE;
+		cop &= ~(L9P_ACE_DELETE_CHILD | L9P_ACE_LIST_DIRECTORY |
+		    L9P_ACE_ADD_FILE | L9P_ACE_ADD_SUBDIRECTORY);
+	} else {
+		/*
+		 * Remove child-only bits from parent op.  We need
+		 * not bother since we just found we have no parent
+		 * and no pstat, and hence won't actually *use* pop.
+		 *
+		 * pop &= ~(L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA |
+		 *     L9P_ACE_APPEND_DATA);
+		 */
+	}
+	panswer = 0;
+	canswer = 0;
+	if (parent != NULL)
+		panswer = l9p_check_aces(pop, parent, pstat,
+		    uid, gid, gids, ngids);
+	if (child != NULL)
+		canswer = l9p_check_aces(cop, child, cstat,
+		    uid, gid, gids, ngids);
+
+	if (panswer || canswer) {
+		/*
+		 * Got a definitive answer from parent and/or
+		 * child ACLs.  We're not quite done yet though.
+		 */
+		if (opmask == L9P_ACOP_UNLINK) {
+			/*
+			 * For UNLINK, we can get an allow from child
+			 * and deny from parent, or vice versa.  It's
+			 * not 100% clear how to handle the two-answer
+			 * case.  ZFS says that if either says "allow",
+			 * we allow, and if both definitely say "deny",
+			 * we deny.  This makes sense, so we do that
+			 * here for all cases, even "strict".
+			 */
+			if (panswer > 0 || canswer > 0)
+				return (0);
+			if (panswer < 0 && canswer < 0)
+				return (EPERM);
+			/* non-definitive answer from one! move on */
+		} else {
+			/*
+			 * Have at least one definitive answer, and
+			 * should have only one; obey whichever
+			 * one it is.
+			 */
+			if (panswer)
+				return (panswer < 0 ? EPERM : 0);
+			return (canswer < 0 ? EPERM : 0);
+		}
+	}
+
+	/*
+	 * No definitive answer from ACLs alone.  Check for ZFS style
+	 * permissions checking and an "UNLINK" operation under ACLs.
+	 * If so, find write-and-execute permission on parent.
+	 * Note that WRITE overlaps with ADD_FILE -- that's ZFS's
+	 * way of saying "allow write to dir" -- but EXECUTE is
+	 * separate from LIST_DIRECTORY, so that's at least a little
+	 * bit cleaner.
+	 *
+	 * Note also that only a definitive yes (both bits are
+	 * explicitly allowed) results in granting unlink, and
+	 * a definitive no (at least one bit explicitly denied)
+	 * results in EPERM.  Only "no answer" moves on.
+	 */
+	if ((args->aca_aclmode & L9P_ACM_ZFS_ACL) &&
+	    opmask == L9P_ACOP_UNLINK && parent != NULL) {
+		panswer = l9p_check_aces(L9P_ACE_ADD_FILE | L9P_ACE_EXECUTE,
+		    parent, pstat, uid, gid, gids, ngids);
+		if (panswer)
+			return (panswer < 0 ? EPERM : 0);
+	}
+
+	/*
+	 * No definitive answer from ACLs.
+	 *
+	 * Try POSIX style rwx permissions if allowed.  This should
+	 * be rare, occurring mainly when caller supplied no ACLs
+	 * or set the mode to suppress them.
+	 *
+	 * The stat to check is the parent's if we don't have a child
+	 * (i.e., this is a dir op), or if the DELETE_CHILD bit is set
+	 * (i.e., this is an unlink or similar).  Otherwise it's the
+	 * child's.
+	 */
+	if (args->aca_aclmode & L9P_ACM_STAT_MODE) {
+		struct stat *st;
+		int rwx, bits;
+
+		rwx = l9p_ace_mask_to_rwx(opmask);
+		if ((st = cstat) == NULL || (opmask & L9P_ACE_DELETE_CHILD))
+			st = pstat;
+		if (uid == st->st_uid)
+			bits = (st->st_mode >> 6) & 7;
+		else if (l9p_ingroup(st->st_gid, gid, gids, ngids))
+			bits = (st->st_mode >> 3) & 7;
+		else
+			bits = st->st_mode & 7;
+		/*
+		 * If all the desired bits are set, we're OK.
+		 */
+		if ((rwx & bits) == rwx)
+			return (0);
+	}
+
+	/* all methods have failed, return EPERM */
+	return (EPERM);
+}
+
+/*
+ * Collapse fancy ACL operation mask down to simple Unix bits.
+ *
+ * Directory operations don't map that well.  However, listing
+ * a directory really does require read permission, and adding
+ * or deleting files really does require write permission, so
+ * this is probably sufficient.
+ */
+int
+l9p_ace_mask_to_rwx(int32_t opmask)
+{
+	int rwx = 0;
+
+	if (opmask &
+	    (L9P_ACE_READ_DATA | L9P_ACE_READ_NAMED_ATTRS |
+	     L9P_ACE_READ_ATTRIBUTES | L9P_ACE_READ_ACL))
+		rwx |= 4;
+	if (opmask &
+	    (L9P_ACE_WRITE_DATA | L9P_ACE_APPEND_DATA |
+	     L9P_ACE_ADD_FILE | L9P_ACE_ADD_SUBDIRECTORY |
+	     L9P_ACE_DELETE | L9P_ACE_DELETE_CHILD |
+	     L9P_ACE_WRITE_NAMED_ATTRS | L9P_ACE_WRITE_ATTRIBUTES |
+	     L9P_ACE_WRITE_ACL))
+		rwx |= 2;
+	if (opmask & L9P_ACE_EXECUTE)
+		rwx |= 1;
+	return (rwx);
+}
+
+#if defined(__FreeBSD__) || defined(__illumos__)
+/*
+ * Allocate new ACL holder and ACEs.
+ */
+static struct l9p_acl *
+l9p_new_acl(uint32_t acetype, uint32_t aceasize)
+{
+	struct l9p_acl *ret;
+	size_t asize, size;
+
+	asize = aceasize * sizeof(struct l9p_ace);
+	size = sizeof(struct l9p_acl) + asize;
+	ret = malloc(size);
+	if (ret != NULL) {
+		ret->acl_acetype = acetype;
+		ret->acl_nace = 0;
+		ret->acl_aceasize = aceasize;
+	}
+	return (ret);
+}
+#endif
+
+#ifdef __FreeBSD__
+/*
+ * Expand ACL to accomodate more entries.
+ *
+ * Currently won't shrink, only grow, so it's a fast no-op until
+ * we hit the allocated size.  After that, it's best to grow in
+ * big chunks, or this will be O(n**2).
+ */
+static struct l9p_acl *
+l9p_growacl(struct l9p_acl *acl, uint32_t aceasize)
+{
+	struct l9p_acl *tmp;
+	size_t asize, size;
+
+	if (acl->acl_aceasize < aceasize) {
+		asize = aceasize * sizeof(struct l9p_ace);
+		size = sizeof(struct l9p_acl) + asize;
+		tmp = realloc(acl, size);
+		if (tmp == NULL)
+			free(acl);
+		acl = tmp;
+	}
+	return (acl);
+}
+
+/*
+ * Annoyingly, there's no POSIX-standard way to count the number
+ * of ACEs in a system ACL other than to walk through them all.
+ * This is silly, but at least 2n is still O(n), and the walk is
+ * short.  (If the system ACL mysteriously grows, we'll handle
+ * that OK via growacl(), too.)
+ */
+static int
+l9p_count_aces(acl_t sysacl)
+{
+	acl_entry_t entry;
+	uint32_t n;
+	int id;
+
+	id = ACL_FIRST_ENTRY;
+	for (n = 0; acl_get_entry(sysacl, id, &entry) == 1; n++)
+		id = ACL_NEXT_ENTRY;
+
+	return ((int)n);
+}
+
+/*
+ * Create ACL with ACEs from the given acl_t.  We use the given
+ * convert function on each ACE.
+ */
+static struct l9p_acl *
+l9p_sysacl_to_acl(int acetype, acl_t sysacl, econvertfn *convert)
+{
+	struct l9p_acl *acl;
+	acl_entry_t entry;
+	uint32_t n;
+	int error, id;
+
+	acl = l9p_new_acl((uint32_t)acetype, (uint32_t)l9p_count_aces(sysacl));
+	if (acl == NULL)
+		return (NULL);
+	id = ACL_FIRST_ENTRY;
+	for (n = 0;;) {
+		if (acl_get_entry(sysacl, id, &entry) != 1)
+			break;
+		acl = l9p_growacl(acl, n + 1);
+		if (acl == NULL)
+			return (NULL);
+		error = (*convert)(entry, &acl->acl_aces[n]);
+		id = ACL_NEXT_ENTRY;
+		if (error == 0)
+			n++;
+	}
+	acl->acl_nace = n;
+	return (acl);
+}
+#endif
+
+#if defined(HAVE_POSIX_ACLS) && 0 /* not yet */
+struct l9p_acl *
+l9p_posix_acl_to_acl(acl_t sysacl)
+{
+}
+#endif
+
+#if defined(HAVE_FREEBSD_ACLS)
+static int
+l9p_frombsdnfs4(acl_entry_t sysace, struct l9p_ace *ace)
+{
+	acl_tag_t tag;			/* e.g., USER_OBJ, GROUP, etc */
+	acl_entry_type_t entry_type;	/* e.g., allow/deny */
+	acl_permset_t absdperm;
+	acl_flagset_t absdflag;
+	acl_perm_t bsdperm;		/* e.g., READ_DATA */
+	acl_flag_t bsdflag;		/* e.g., FILE_INHERIT_ACE */
+	uint32_t flags, mask;
+	int error;
+	uid_t uid, *aid;
+
+	error = acl_get_tag_type(sysace, &tag);
+	if (error == 0)
+		error = acl_get_entry_type_np(sysace, &entry_type);
+	if (error == 0)
+		error = acl_get_flagset_np(sysace, &absdflag);
+	if (error == 0)
+		error = acl_get_permset(sysace, &absdperm);
+	if (error)
+		return (error);
+
+	flags = 0;
+	uid = 0;
+	aid = NULL;
+
+	/* move user/group/everyone + id-is-group-id into flags */
+	switch (tag) {
+	case ACL_USER_OBJ:
+		flags |= L9P_ACEF_OWNER;
+		break;
+	case ACL_GROUP_OBJ:
+		flags |= L9P_ACEF_GROUP;
+		break;
+	case ACL_EVERYONE:
+		flags |= L9P_ACEF_EVERYONE;
+		break;
+	case ACL_GROUP:
+		flags |= L9P_ACEF_IDENTIFIER_GROUP;
+		/* FALLTHROUGH */
+	case ACL_USER:
+		aid = acl_get_qualifier(sysace); /* ugh, this malloc()s */
+		if (aid == NULL)
+			return (ENOMEM);
+		uid = *(uid_t *)aid;
+		free(aid);
+		aid = &uid;
+		break;
+	default:
+		return (EINVAL);	/* can't happen */
+	}
+
+	switch (entry_type) {
+
+	case ACL_ENTRY_TYPE_ALLOW:
+		ace->ace_type = L9P_ACET_ACCESS_ALLOWED;
+		break;
+
+	case ACL_ENTRY_TYPE_DENY:
+		ace->ace_type = L9P_ACET_ACCESS_DENIED;
+		break;
+
+	case ACL_ENTRY_TYPE_AUDIT:
+		ace->ace_type = L9P_ACET_SYSTEM_AUDIT;
+		break;
+
+	case ACL_ENTRY_TYPE_ALARM:
+		ace->ace_type = L9P_ACET_SYSTEM_ALARM;
+		break;
+
+	default:
+		return (EINVAL);	/* can't happen */
+	}
+
+	/* transform remaining BSD flags to internal NFS-y form */
+	bsdflag = *absdflag;
+	if (bsdflag & ACL_ENTRY_FILE_INHERIT)
+		flags |= L9P_ACEF_FILE_INHERIT_ACE;
+	if (bsdflag & ACL_ENTRY_DIRECTORY_INHERIT)
+		flags |= L9P_ACEF_DIRECTORY_INHERIT_ACE;
+	if (bsdflag & ACL_ENTRY_NO_PROPAGATE_INHERIT)
+		flags |= L9P_ACEF_NO_PROPAGATE_INHERIT_ACE;
+	if (bsdflag & ACL_ENTRY_INHERIT_ONLY)
+		flags |= L9P_ACEF_INHERIT_ONLY_ACE;
+	if (bsdflag & ACL_ENTRY_SUCCESSFUL_ACCESS)
+		flags |= L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG;
+	if (bsdflag & ACL_ENTRY_FAILED_ACCESS)
+		flags |= L9P_ACEF_FAILED_ACCESS_ACE_FLAG;
+	ace->ace_flags = flags;
+
+	/*
+	 * Transform BSD permissions to ace_mask.  Note that directory
+	 * vs file bits are the same in both sets, so we don't need
+	 * to worry about that, at least.
+	 *
+	 * There seem to be no BSD equivalents for WRITE_RETENTION
+	 * and WRITE_RETENTION_HOLD.
+	 */
+	mask = 0;
+	bsdperm = *absdperm;
+	if (bsdperm & ACL_READ_DATA)
+		mask |= L9P_ACE_READ_DATA;
+	if (bsdperm & ACL_WRITE_DATA)
+		mask |= L9P_ACE_WRITE_DATA;
+	if (bsdperm & ACL_APPEND_DATA)
+		mask |= L9P_ACE_APPEND_DATA;
+	if (bsdperm & ACL_READ_NAMED_ATTRS)
+		mask |= L9P_ACE_READ_NAMED_ATTRS;
+	if (bsdperm & ACL_WRITE_NAMED_ATTRS)
+		mask |= L9P_ACE_WRITE_NAMED_ATTRS;
+	if (bsdperm & ACL_EXECUTE)
+		mask |= L9P_ACE_EXECUTE;
+	if (bsdperm & ACL_DELETE_CHILD)
+		mask |= L9P_ACE_DELETE_CHILD;
+	if (bsdperm & ACL_READ_ATTRIBUTES)
+		mask |= L9P_ACE_READ_ATTRIBUTES;
+	if (bsdperm & ACL_WRITE_ATTRIBUTES)
+		mask |= L9P_ACE_WRITE_ATTRIBUTES;
+	/* L9P_ACE_WRITE_RETENTION */
+	/* L9P_ACE_WRITE_RETENTION_HOLD */
+	/* 0x00800 */
+	if (bsdperm & ACL_DELETE)
+		mask |= L9P_ACE_DELETE;
+	if (bsdperm & ACL_READ_ACL)
+		mask |= L9P_ACE_READ_ACL;
+	if (bsdperm & ACL_WRITE_ACL)
+		mask |= L9P_ACE_WRITE_ACL;
+	if (bsdperm & ACL_WRITE_OWNER)
+		mask |= L9P_ACE_WRITE_OWNER;
+	if (bsdperm & ACL_SYNCHRONIZE)
+		mask |= L9P_ACE_SYNCHRONIZE;
+	ace->ace_mask = mask;
+
+	/* fill in variable-size user or group ID bytes */
+	if (aid == NULL)
+		ace->ace_idsize = 0;
+	else {
+		ace->ace_idsize = sizeof(uid);
+		memcpy(&ace->ace_idbytes[0], aid, sizeof(uid));
+	}
+
+	return (0);
+}
+
+struct l9p_acl *
+l9p_freebsd_nfsv4acl_to_acl(acl_t sysacl)
+{
+
+	return (l9p_sysacl_to_acl(L9P_ACLTYPE_NFSv4, sysacl, l9p_frombsdnfs4));
+}
+#endif
+
+#if defined(HAVE_DARWIN_ACLS) && 0 /* not yet */
+struct l9p_acl *
+l9p_darwin_nfsv4acl_to_acl(acl_t sysacl)
+{
+}
+#endif
+
+#if defined(HAVE__ILLUMOS_ACLS)
+
+static struct {
+	uint16_t ace_flag;
+	uint32_t l9_flag;
+} ace_flag_tbl[] = {
+	{ ACE_FILE_INHERIT_ACE,		L9P_ACEF_FILE_INHERIT_ACE },
+	{ ACE_DIRECTORY_INHERIT_ACE,	L9P_ACEF_DIRECTORY_INHERIT_ACE },
+	{ ACE_NO_PROPAGATE_INHERIT_ACE,	L9P_ACEF_NO_PROPAGATE_INHERIT_ACE },
+	{ ACE_INHERIT_ONLY_ACE,		L9P_ACEF_INHERIT_ONLY_ACE },
+	{ ACE_SUCCESSFUL_ACCESS_ACE_FLAG,
+	    L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG },
+	{ ACE_IDENTIFIER_GROUP,		L9P_ACEF_IDENTIFIER_GROUP },
+	/* There doesn't appear to be an equivalent for ACE_INHERITED_ACE */
+	{ ACE_OWNER,			L9P_ACEF_OWNER },
+	{ ACE_GROUP,			L9P_ACEF_GROUP },
+	{ ACE_EVERYONE, 		L9P_ACEF_EVERYONE }
+};
+
+struct l9p_acl *
+l9p_illumos_nfsv4acl_to_acl(acl_t *sysacl)
+{
+	struct l9p_acl *l9acl;
+	struct l9p_ace *l9ace;
+	ace_t *ent;
+	int i, j;
+
+	/* We only support NFSv4 ACLs.. so don't try this on UFS */
+	if (sysacl->acl_type != ACE_T)
+		return (NULL);
+
+	l9acl = l9p_new_acl(L9P_ACLTYPE_NFSv4, sysacl->acl_cnt);
+	if (l9acl == NULL)
+		return (NULL);
+
+	ent = sysacl->acl_aclp;
+	l9ace = l9acl->acl_aces;
+	for (i = 0; i < sysacl->acl_cnt; i++, ent++, l9ace++) {
+		switch (ent->a_type) {
+		case ACE_ACCESS_ALLOWED_ACE_TYPE:
+			l9ace->ace_type = L9P_ACET_ACCESS_ALLOWED;
+			break;
+		case ACE_ACCESS_DENIED_ACE_TYPE:
+			l9ace->ace_type = L9P_ACET_ACCESS_DENIED;
+			break;
+		case ACE_SYSTEM_AUDIT_ACE_TYPE:
+			l9ace->ace_type = L9P_ACET_SYSTEM_AUDIT;
+			break;
+		case ACE_SYSTEM_ALARM_ACE_TYPE:
+			l9ace->ace_type = L9P_ACET_SYSTEM_ALARM;
+			break;
+		default:
+			L9P_LOG(L9P_ERROR, "invalid ACL type");
+			l9p_acl_free(l9acl);
+			return (NULL);
+		}
+
+		l9ace->ace_flags = 0;
+		for (j = 0; j < ARRAY_SIZE(ace_flag_tbl); j++) {
+			if ((ent->a_flags & ace_flag_tbl[j].ace_flag) != 0)
+				l9ace->ace_flags |= ace_flag_tbl[j].l9_flag;
+		}
+
+		/*
+		 * In a bit of good fortune, the bit values for ace_t masks
+		 * and l9p masks are the same (l9p does have WRITE_RETENTION
+		 * and WRITE_RETENTION_HOLD which aren't used -- we're also
+		 * going ace_t->l9p so they dont matter in this context).
+		 */
+		l9ace->ace_mask = ent->a_access_mask;
+		l9ace->ace_idsize = sizeof (ent->a_who);
+		memcpy(l9acl->acl_aces, &ent->a_who, sizeof (ent->a_who));
+	}
+
+	return (l9acl);
+}
+#endif
diff --git a/usr/src/lib/lib9p/common/genacl.h b/usr/src/lib/lib9p/common/genacl.h
new file mode 100644
index 0000000000..d74b543c19
--- /dev/null
+++ b/usr/src/lib/lib9p/common/genacl.h
@@ -0,0 +1,316 @@
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * General ACL support for 9P2000.L.
+ *
+ * We mostly use Linux's xattr name space and nfs4 ACL bits, as
+ * these are the most general forms available.
+ *
+ * Linux requests attributes named
+ *
+ *     "system.posix_acl_default"
+ *     "system.posix_acl_access"
+ *
+ * to get POSIX style ACLs, and:
+ *
+ *     "system.nfs4_acl"
+ *
+ * to get NFSv4 style ACLs.  The v9fs client does not explicitly
+ * ask for the latter, but if you use the Ubuntu nfs4-acl-tools
+ * package, it should be able to read and write these.
+ *
+ * For the record, the Linux kernel source code also shows:
+ *
+ *  - Lustre uses "trusted.*", with "*" matching "lov", "lma",
+ *    "lmv", "dmv", "link", "fid", "version", "som", "hsm", and
+ *    "lfsck_namespace".
+ *
+ *  - ceph has a name tree of the form "ceph.<type>.<name>" with
+ *     <type,name> pairs like <"dir","entries">, <"dir","files>,
+ *     <"file","layout">, and so on.
+ *
+ *  - ext4 uses the POSIX names, plus some special ext4-specific
+ *    goop that might not get externalized.
+ *
+ *  - NFS uses both the POSIX names and the NFSv4 ACLs.  However,
+ *    what it mainly does is have nfsd generate fake NFSv4 ACLs
+ *    from POSIX ACLs.  If you run an NFS client, the client
+ *    relies on the server actually implementing the ACLs, and
+ *    lets nfs4-acl-tools read and write the system.nfs4_acl xattr
+ *    data.  If you run an NFS server off, e.g., an ext4 file system,
+ *    the server looks for the system.nfs4_acl xattr, serves that
+ *    out if found, and otherwise just generates the fakes.
+ *
+ *  - "security.*" and "selinux.*" are reserved.
+ *
+ *  - "security.capability" is the name for capabilities.
+ *
+ *  - sockets use "system.sockprotoname".
+ */
+
+#if defined(__APPLE__)
+  #define HAVE_POSIX_ACLS
+  #define HAVE_DARWIN_ACLS
+#endif
+
+#if defined(__FreeBSD__)
+  #define HAVE_POSIX_ACLS
+  #define HAVE_FREEBSD_ACLS
+#endif
+
+#if defined (__illumos__)
+  #define HAVE_POSIX_ACLS
+  #define HAVE__ILLUMOS_ACLS
+#endif
+
+#include <sys/types.h>
+#include <sys/acl.h>		/* XXX assumes existence of sys/acl.h */
+
+/*
+ * An ACL consists of a number of ACEs that grant some kind of
+ * "allow" or "deny" to some specific entity.
+ *
+ * The number of ACEs is potentially unlimited, although in practice
+ * they tend not to be that long.
+ *
+ * It's the responsibility of the back-end to supply the ACL
+ * for each test.  However, the ACL may be in some sort of
+ * system-specific form.  It's the responsibility of some
+ * (system-specific) code to translate it to *this* form, after
+ * which the backend may use l9p_acl_check_access() to get
+ * access granted or denied (and, eventually, audits and alarms
+ * recorded and raises, although that's yet to be designed).
+ *
+ * The reason for all this faffing-about with formats is so that
+ * we can *report* the ACLs using Linux 9p style xattrs.
+ */
+
+struct l9p_acl;
+struct l9p_fid;
+
+void l9p_acl_free(struct l9p_acl *);
+
+/*
+ * An ACL is made up of ACEs.
+ *
+ * Each ACE has:
+ *
+ *   - a type: allow, deny, audit, alarm
+ *   - a set of flags
+ *   - permissions bits: a "mask"
+ *   - an optional, nominally-variable-length identity
+ *
+ * The last part is especially tricky and currently has limited
+ * support here: it's always a 16 byte field on Darwin, and just
+ * a uint32_t on BSD (should be larger, really).  Linux supports
+ * very large, actually-variable-size values; we'll deal with
+ * this later, maybe.
+ *
+ * We will define the mask first, below, since these are also the bits
+ * passed in for the accmask argument to l9p_acl_check_access().
+ */
+
+/*
+ * ACL entry mask, and accmask argument flags.
+ *
+ * NB: not every bit is implemented, but they are all here because
+ * they are all defined as part of an NFSv4 ACL entry, which is
+ * more or less a superset of a POSIX ACL entry.  This means you
+ * can put a complete NFSv4 ACL in and we can reproduce it.
+ *
+ * Note that the LIST_DIRECTORY, ADD_FILE, and ADD_SUBDIRECTORY bits
+ * apply only to a directory, while the READ_DATA, WRITE_DATA, and
+ * APPEND_DATA bits apply only to a file.  See aca_parent/aca_child
+ * below.
+ */
+#define	L9P_ACE_READ_DATA		0x00001
+#define	L9P_ACE_LIST_DIRECTORY		0x00001 /* same as READ_DATA */
+#define	L9P_ACE_WRITE_DATA		0x00002
+#define	L9P_ACE_ADD_FILE		0x00002 /* same as WRITE_DATA */
+#define	L9P_ACE_APPEND_DATA		0x00004
+#define	L9P_ACE_ADD_SUBDIRECTORY	0x00004 /* same as APPEND_DATA */
+#define	L9P_ACE_READ_NAMED_ATTRS	0x00008
+#define	L9P_ACE_WRITE_NAMED_ATTRS	0x00010
+#define	L9P_ACE_EXECUTE			0x00020
+#define	L9P_ACE_DELETE_CHILD		0x00040
+#define	L9P_ACE_READ_ATTRIBUTES		0x00080
+#define	L9P_ACE_WRITE_ATTRIBUTES	0x00100
+#define	L9P_ACE_WRITE_RETENTION		0x00200 /* not used here */
+#define	L9P_ACE_WRITE_RETENTION_HOLD	0x00400 /* not used here */
+/*					0x00800 unused? */
+#define	L9P_ACE_DELETE			0x01000
+#define	L9P_ACE_READ_ACL		0x02000
+#define	L9P_ACE_WRITE_ACL		0x04000
+#define	L9P_ACE_WRITE_OWNER		0x08000
+#define	L9P_ACE_SYNCHRONIZE		0x10000 /* not used here */
+
+/*
+ * This is not an ACE bit, but is used with the access checking
+ * below.  It represents a request to unlink (delete child /
+ * delete) an entity, and is equivalent to asking for *either*
+ * (not both) permission.
+ */
+#define	L9P_ACOP_UNLINK (L9P_ACE_DELETE_CHILD | L9P_ACE_DELETE)
+
+/*
+ * Access checking takes a lot of arguments, so they are
+ * collected into a "struct" here.
+ *
+ * The aca_parent and aca_pstat fields may/must be NULL if the
+ * operation itself does not involve "directory" permissions.
+ * The aca_child and aca_cstat fields may/must be NULL if the
+ * operation does not involve anything *but* a directory.  This
+ * is how we decide whether you're interested in L9P_ACE_READ_DATA
+ * vs L9P_ACE_LIST_DIRECTORY, for instance.
+ *
+ * Note that it's OK for both parent and child to be directories
+ * (as is the case when we're adding or deleting a subdirectory).
+ */
+struct l9p_acl_check_args {
+	uid_t	aca_uid;		/* the uid that is requesting access */
+	gid_t	aca_gid;		/* the gid that is requesting access */
+	gid_t	*aca_groups;		/* the additional group-set, if any */
+	size_t	aca_ngroups;		/* number of groups in group-set */
+	struct l9p_acl *aca_parent;	/* ACLs associated with parent/dir */
+	struct stat *aca_pstat;		/* stat data for parent/dir */
+	struct l9p_acl *aca_child;	/* ACLs associated with file */
+	struct stat *aca_cstat;		/* stat data for file */
+	int	aca_aclmode;		/* mode checking bits, see below */
+	bool	aca_superuser;		/* alway allow uid==0 in STAT_MODE */
+};
+
+/*
+ * Access checking mode bits in aca_checkmode.  If you enable
+ * ACLs, they are used first, optionally with ZFS style ACLs.
+ * This means that even if aca_superuser is set, if an ACL denies
+ * permission to uid 0, permission is really denied.
+ *
+ * NFS style ACLs run before POSIX style ACLs (though POSIX
+ * ACLs aren't done yet anyway).
+ *
+ * N.B.: you probably want L9P_ACL_ZFS, especially when operating
+ * with a ZFS file system on FreeBSD.
+ */
+#define	L9P_ACM_NFS_ACL		0x0001	/* enable NFS ACL checking */
+#define	L9P_ACM_ZFS_ACL		0x0002	/* use ZFS ACL unlink semantics */
+#define	L9P_ACM_POSIX_ACL	0x0004	/* enable POSIX ACL checking (notyet) */
+#define	L9P_ACM_STAT_MODE	0x0008	/* enable st_mode bits */
+
+/*
+ * Requests to access some file or directory must provide:
+ *
+ *  - An operation.  This should usually be just one bit from the
+ *    L9P_ACE_* bit-sets above, or our special L9P_ACOP_UNLINK.
+ *    For a few file-open operations it may be multiple bits,
+ *    e.g., both read and write data.
+ *  - The identity of the accessor: uid + gid + gid-set.
+ *  - The type of access desired: this may be multiple bits.
+ *  - The parent directory, if applicable.
+ *  - The child file/dir being accessed, if applicable.
+ *  - stat data for parent and/or child, if applicable.
+ *
+ * The ACLs and/or stat data of the parent and/or child get used
+ * here, so the caller must provide them.  We should have a way to
+ * cache these on fids, but not yet.  The parent and child
+ * arguments are a bit tricky; see the code in genacl.c.
+ */
+int l9p_acl_check_access(int32_t op, struct l9p_acl_check_args *args);
+
+/*
+ * When falling back to POSIX ACL or Unix-style permissions
+ * testing, it's nice to collapse the above detailed permissions
+ * into simple read/write/execute bits (value 0..7).  We provide
+ * a small utility function that does this.
+ */
+int l9p_ace_mask_to_rwx(int32_t);
+
+/*
+ * The rest of the data in an ACE.
+ */
+
+/* type in ace_type */
+#define	L9P_ACET_ACCESS_ALLOWED		0
+#define	L9P_ACET_ACCESS_DENIED		1
+#define	L9P_ACET_SYSTEM_AUDIT		2
+#define	L9P_ACET_SYSTEM_ALARM		3
+
+/* flags in ace_flags */
+#define	L9P_ACEF_FILE_INHERIT_ACE		0x001
+#define	L9P_ACEF_DIRECTORY_INHERIT_ACE		0x002
+#define	L9P_ACEF_NO_PROPAGATE_INHERIT_ACE	0x004
+#define	L9P_ACEF_INHERIT_ONLY_ACE		0x008
+#define	L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG	0x010
+#define	L9P_ACEF_FAILED_ACCESS_ACE_FLAG		0x020
+#define	L9P_ACEF_IDENTIFIER_GROUP		0x040
+#define	L9P_ACEF_OWNER				0x080
+#define	L9P_ACEF_GROUP				0x100
+#define	L9P_ACEF_EVERYONE			0x200
+
+#if defined(__APPLE__)
+#  define L9P_ACE_IDSIZE 16 /* but, how do we map Darwin uuid? */
+#else
+#  define L9P_ACE_IDSIZE 4
+#endif
+
+struct l9p_ace {
+	uint16_t ace_type;		/* ACL entry type */
+	uint16_t ace_flags;		/* ACL entry flags */
+	uint32_t ace_mask;		/* ACL entry mask */
+	uint32_t ace_idsize;		/* length of ace_idbytes */
+	unsigned char ace_idbytes[L9P_ACE_IDSIZE];
+};
+
+#define	L9P_ACLTYPE_NFSv4	1	/* currently the only valid type */
+struct l9p_acl {
+	uint32_t acl_acetype;		/* reserved for future expansion */
+	uint32_t acl_nace;		/* number of occupied ACEs */
+	uint32_t acl_aceasize;		/* actual size of ACE array */
+	struct l9p_ace acl_aces[];	/* variable length ACE array */
+};
+
+/*
+ * These are the system-specific converters.
+ *
+ * Right now the backend needs to just find BSD NFSv4 ACLs
+ * and convert them before each operation that needs to be
+ * tested.
+ */
+#if defined(HAVE_DARWIN_ACLS)
+struct l9p_acl *l9p_darwin_nfsv4acl_to_acl(acl_t acl);
+#endif
+
+#if defined(HAVE_FREEBSD_ACLS)
+struct l9p_acl *l9p_freebsd_nfsv4acl_to_acl(acl_t acl);
+#endif
+
+#if defined(HAVE__ILLUMOS_ACLS)
+struct l9p_acl *l9p_illumos_nfsv4acl_to_acl(acl_t *acl);
+#endif
+
+#if defined(HAVE_POSIX_ACLS) && 0 /* not yet */
+struct l9p_acl *l9p_posix_acl_to_acl(acl_t acl);
+#endif
diff --git a/usr/src/lib/lib9p/common/hashtable.c b/usr/src/lib/lib9p/common/hashtable.c
new file mode 100644
index 0000000000..70db6bcc0e
--- /dev/null
+++ b/usr/src/lib/lib9p/common/hashtable.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include "lib9p_impl.h"
+#include "hashtable.h"
+
+static struct ht_item *ht_iter_advance(struct ht_iter *, struct ht_item *);
+
+void
+ht_init(struct ht *h, ssize_t size)
+{
+	ssize_t i;
+
+	memset(h, 0, sizeof(struct ht));
+	h->ht_nentries = size;
+	h->ht_entries = l9p_calloc((size_t)size, sizeof(struct ht_entry));
+	(void) pthread_rwlock_init(&h->ht_rwlock, NULL);
+
+	for (i = 0; i < size; i++)
+		TAILQ_INIT(&h->ht_entries[i].hte_items);
+}
+
+void
+ht_destroy(struct ht *h)
+{
+	struct ht_entry *he;
+	struct ht_item *item, *tmp;
+	ssize_t i;
+
+	for (i = 0; i < h->ht_nentries; i++) {
+		he = &h->ht_entries[i];
+		TAILQ_FOREACH_SAFE(item, &he->hte_items, hti_link, tmp) {
+			free(item);
+		}
+	}
+
+	(void) pthread_rwlock_destroy(&h->ht_rwlock);
+	free(h->ht_entries);
+	h->ht_entries = NULL;
+}
+
+void *
+ht_find(struct ht *h, uint32_t hash)
+{
+	void *result;
+
+	if (ht_rdlock(h) != 0)
+		return (NULL);
+	result = ht_find_locked(h, hash);
+	(void) ht_unlock(h);
+	return (result);
+}
+
+void *
+ht_find_locked(struct ht *h, uint32_t hash)
+{
+	struct ht_entry *entry;
+	struct ht_item *item;
+
+	entry = &h->ht_entries[hash % h->ht_nentries];
+
+	TAILQ_FOREACH(item, &entry->hte_items, hti_link) {
+		if (item->hti_hash == hash)
+			return (item->hti_data);
+	}
+
+	return (NULL);
+}
+
+int
+ht_add(struct ht *h, uint32_t hash, void *value)
+{
+	struct ht_entry *entry;
+	struct ht_item *item;
+	int err;
+
+	if ((err = ht_wrlock(h)) != 0)
+		return (err);
+
+	entry = &h->ht_entries[hash % h->ht_nentries];
+
+	TAILQ_FOREACH(item, &entry->hte_items, hti_link) {
+		if (item->hti_hash == hash) {
+			errno = EEXIST;
+			(void) ht_unlock(h);
+			return (-1);
+		}
+	}
+
+	item = l9p_calloc(1, sizeof(struct ht_item));
+	item->hti_hash = hash;
+	item->hti_data = value;
+	TAILQ_INSERT_TAIL(&entry->hte_items, item, hti_link);
+	(void) ht_unlock(h);
+
+	return (0);
+}
+
+int
+ht_remove(struct ht *h, uint32_t hash)
+{
+	int result;
+	int err;
+
+	if ((err = ht_wrlock(h)) != 0)
+		return (err);
+	result = ht_remove_locked(h, hash);
+	(void) ht_unlock(h);
+	return (result);
+}
+
+int
+ht_remove_locked(struct ht *h, uint32_t hash)
+{
+	struct ht_entry *entry;
+	struct ht_item *item, *tmp;
+	ssize_t slot = hash % h->ht_nentries;
+
+	entry = &h->ht_entries[slot];
+
+	TAILQ_FOREACH_SAFE(item, &entry->hte_items, hti_link, tmp) {
+		if (item->hti_hash == hash) {
+			TAILQ_REMOVE(&entry->hte_items, item, hti_link);
+			free(item);
+			return (0);
+		}
+	}
+
+	errno = ENOENT;
+	return (-1);
+}
+
+/*
+ * Inner workings for advancing the iterator.
+ *
+ * If we have a current item, that tells us how to find the
+ * next item.  If not, we get the first item from the next
+ * slot (well, the next slot with an item); in any case, we
+ * record the new slot and return the next item.
+ *
+ * For bootstrapping, iter->htit_slot can be -1 to start
+ * searching at slot 0.
+ *
+ * Caller must hold a lock on the table.
+ */
+static struct ht_item *
+ht_iter_advance(struct ht_iter *iter, struct ht_item *cur)
+{
+	struct ht_item *next;
+	struct ht *h;
+	ssize_t slot;
+
+	h = iter->htit_parent;
+
+	if (cur == NULL)
+		next = NULL;
+	else
+		next = TAILQ_NEXT(cur, hti_link);
+
+	if (next == NULL) {
+		slot = iter->htit_slot;
+		while (++slot < h->ht_nentries) {
+			next = TAILQ_FIRST(&h->ht_entries[slot].hte_items);
+			if (next != NULL)
+				break;
+		}
+		iter->htit_slot = slot;
+	}
+	return (next);
+}
+
+/*
+ * Remove the current item - there must be one, or this is an
+ * error.  This (necessarily) pre-locates the next item, so callers
+ * must not use it on an actively-changing table.
+ */
+int
+ht_remove_at_iter(struct ht_iter *iter)
+{
+	struct ht_item *item;
+	struct ht *h;
+	ssize_t slot;
+	int err;
+
+	assert(iter != NULL);
+
+	if ((item = iter->htit_curr) == NULL) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	/* remove the item from the table, saving the NEXT one */
+	h = iter->htit_parent;
+	if ((err = ht_wrlock(h)) != 0)
+		return (err);
+	slot = iter->htit_slot;
+	iter->htit_next = ht_iter_advance(iter, item);
+	TAILQ_REMOVE(&h->ht_entries[slot].hte_items, item, hti_link);
+	(void) ht_unlock(h);
+
+	/* mark us as no longer on an item, then free it */
+	iter->htit_curr = NULL;
+	free(item);
+
+	return (0);
+}
+
+/*
+ * Initialize iterator.  Subsequent ht_next calls will find the
+ * first item, then the next, and so on.  Callers should in general
+ * not use this on actively-changing tables, though we do our best
+ * to make it semi-sensible.
+ */
+void
+ht_iter(struct ht *h, struct ht_iter *iter)
+{
+
+	iter->htit_parent = h;
+	iter->htit_curr = NULL;
+	iter->htit_next = NULL;
+	iter->htit_slot = -1;	/* which will increment to 0 */
+}
+
+/*
+ * Return the next item, which is the first item if we have not
+ * yet been called on this iterator, or the next item if we have.
+ */
+void *
+ht_next(struct ht_iter *iter)
+{
+	struct ht_item *item;
+	struct ht *h;
+
+	if ((item = iter->htit_next) == NULL) {
+		/* no pre-loaded next; find next from current */
+		h = iter->htit_parent;
+		if (ht_rdlock(h) != 0)
+			return (NULL);
+		item = ht_iter_advance(iter, iter->htit_curr);
+		(void) ht_unlock(h);
+	} else
+		iter->htit_next = NULL;
+	iter->htit_curr = item;
+	return (item == NULL ? NULL : item->hti_data);
+}
diff --git a/usr/src/lib/lib9p/common/hashtable.h b/usr/src/lib/lib9p/common/hashtable.h
new file mode 100644
index 0000000000..60b8dfff7b
--- /dev/null
+++ b/usr/src/lib/lib9p/common/hashtable.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_HASHTABLE_H
+#define LIB9P_HASHTABLE_H
+
+#include <pthread.h>
+#include <sys/queue.h>
+
+struct ht {
+	struct ht_entry * 	ht_entries;
+	ssize_t 		ht_nentries;
+	pthread_rwlock_t	ht_rwlock;
+};
+
+struct ht_entry {
+	TAILQ_HEAD(, ht_item) hte_items;
+};
+
+struct ht_item {
+	uint32_t		hti_hash;
+	void *			hti_data;
+	TAILQ_ENTRY(ht_item)	hti_link;
+};
+
+struct ht_iter {
+	struct ht *		htit_parent;
+	struct ht_item *	htit_curr;
+	struct ht_item *	htit_next;
+	ssize_t			htit_slot;
+};
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety-analysis"
+#endif
+
+/*
+ * Obtain read-lock on hash table.
+ */
+static inline int
+ht_rdlock(struct ht *h)
+{
+
+	return (pthread_rwlock_rdlock(&h->ht_rwlock));
+}
+
+/*
+ * Obtain write-lock on hash table.
+ */
+static inline int
+ht_wrlock(struct ht *h)
+{
+
+	return (pthread_rwlock_wrlock(&h->ht_rwlock));
+}
+
+/*
+ * Release lock on hash table.
+ */
+static inline int
+ht_unlock(struct ht *h)
+{
+
+	return (pthread_rwlock_unlock(&h->ht_rwlock));
+}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+void ht_init(struct ht *h, ssize_t size);
+void ht_destroy(struct ht *h);
+void *ht_find(struct ht *h, uint32_t hash);
+void *ht_find_locked(struct ht *h, uint32_t hash);
+int ht_add(struct ht *h, uint32_t hash, void *value);
+int ht_remove(struct ht *h, uint32_t hash);
+int ht_remove_locked(struct ht *h, uint32_t hash);
+int ht_remove_at_iter(struct ht_iter *iter);
+void ht_iter(struct ht *h, struct ht_iter *iter);
+void *ht_next(struct ht_iter *iter);
+
+#endif  /* LIB9P_HASHTABLE_H */
diff --git a/usr/src/lib/lib9p/common/illumos_endian.h b/usr/src/lib/lib9p/common/illumos_endian.h
new file mode 100644
index 0000000000..ecb7874724
--- /dev/null
+++ b/usr/src/lib/lib9p/common/illumos_endian.h
@@ -0,0 +1,26 @@
+#ifndef __ILLUMOS_ENDIAN_H
+#define __ILLUMOS_ENDIAN_H
+
+/*
+ * Shims to make illumos' endian headers and macros compatible
+ * with FreeBSD's <sys/endian.h>
+ */
+
+# include <endian.h>
+
+# define _COMPAT_LITTLE_ENDIAN 0x12345678
+# define _COMPAT_BIG_ENDIAN 0x87654321
+
+# ifdef _LITTLE_ENDIAN
+#  define _BYTE_ORDER _COMPAT_LITTLE_ENDIAN
+# endif
+# ifdef _BIG_ENDIAN
+#  define _BYTE_ORDER _COMPAT_BIG_ENDIAN
+# endif
+
+# undef _LITTLE_ENDIAN
+# undef _BIG_ENDIAN
+# define _LITTLE_ENDIAN _COMPAT_LITTLE_ENDIAN
+# define _BIG_ENDIAN _COMPAT_BIG_ENDIAN
+
+#endif /* __ILLUMOS_ENDIAN_H */
diff --git a/usr/src/lib/lib9p/common/lib9p.h b/usr/src/lib/lib9p/common/lib9p.h
new file mode 100644
index 0000000000..3d62e99006
--- /dev/null
+++ b/usr/src/lib/lib9p/common/lib9p.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+
+#ifndef LIB9P_LIB9P_H
+#define LIB9P_LIB9P_H
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/uio.h>
+#include <pthread.h>
+
+#if defined(__FreeBSD__)
+#include <sys/sbuf.h>
+#else
+#include "sbuf/sbuf.h"
+#endif
+
+#include "fcall.h"
+#include "threadpool.h"
+#include "hashtable.h"
+
+#define L9P_DEFAULT_MSIZE   8192
+#define L9P_MAX_IOV         128
+#define	L9P_NUMTHREADS      8
+
+struct l9p_request;
+struct l9p_backend;
+struct l9p_fid;
+
+/*
+ * Functions to implement underlying transport for lib9p.
+ *
+ * The transport is responsible for:
+ *
+ *   - allocating a response buffer (filling in the iovec and niov)
+ *     (gets req, pointer to base of iov array of size L9P_MAX_IOV,
+ *      pointer to niov, lt_aux)
+ *
+ *   - sending a response, when a request has a reply ready
+ *     (gets req, pointer to iov, niov, actual response length, lt_aux)
+ *
+ *   - dropping the response buffer, when a request has been
+ *     flushed or otherwise dropped without a response
+ *     (gets req, pointer to iov, niov, lt_aux)
+ *
+ * The transport is of course also responsible for feeding in
+ * request-buffers, but that happens by the transport calling
+ * l9p_connection_recv().
+ */
+struct l9p_transport {
+	void *lt_aux;
+	int (*lt_get_response_buffer)(struct l9p_request *, struct iovec *,
+	    size_t *, void *);
+	int (*lt_send_response)(struct l9p_request *, const struct iovec *,
+	    size_t, size_t, void *);
+	void (*lt_drop_response)(struct l9p_request *, const struct iovec *,
+	    size_t, void *);
+};
+
+enum l9p_pack_mode {
+	L9P_PACK,
+	L9P_UNPACK
+};
+
+enum l9p_integer_type {
+	L9P_BYTE = 1,
+	L9P_WORD = 2,
+	L9P_DWORD = 4,
+	L9P_QWORD = 8
+};
+
+enum l9p_version {
+	L9P_INVALID_VERSION = 0,
+	L9P_2000 = 1,
+	L9P_2000U = 2,
+	L9P_2000L = 3
+};
+
+/*
+ * This structure is used for unpacking (decoding) incoming
+ * requests and packing (encoding) outgoing results.  It has its
+ * own copy of the iov array, with its own counters for working
+ * through that array, but it borrows the actual DATA from the
+ * original iov array associated with the original request (see
+ * below).
+ */
+struct l9p_message {
+	enum l9p_pack_mode lm_mode;
+	struct iovec lm_iov[L9P_MAX_IOV];
+	size_t lm_niov;
+	size_t lm_cursor_iov;
+	size_t lm_cursor_offset;
+	size_t lm_size;
+};
+
+/*
+ * Data structure for a request/response pair (Tfoo/Rfoo).
+ *
+ * Note that the response is not formatted out into raw data
+ * (overwriting the request raw data) until we are really
+ * responding, with the exception of read operations Tread
+ * and Treaddir, which overlay their result-data into the
+ * iov array in the process of reading.
+ *
+ * We have room for two incoming fids, in case we are
+ * using 9P2000.L protocol.  Note that nothing that uses two
+ * fids also has an output fid (newfid), so we could have a
+ * union of lr_fid2 and lr_newfid, but keeping them separate
+ * is probably a bit less error-prone.  (If we want to shave
+ * memory requirements there are more places to look.)
+ *
+ * (The fid, fid2, and newfid fields should be removed via
+ * reorganization, as they are only used for smuggling data
+ * between request.c and the backend and should just be
+ * parameters to backend ops.)
+ */
+struct l9p_request {
+	struct l9p_message lr_req_msg;	/* for unpacking the request */
+	struct l9p_message lr_resp_msg;	/* for packing the response */
+	union l9p_fcall lr_req;		/* the request, decoded/unpacked */
+	union l9p_fcall lr_resp;	/* the response, not yet packed */
+
+	struct l9p_fid *lr_fid;
+	struct l9p_fid *lr_fid2;
+	struct l9p_fid *lr_newfid;
+
+	struct l9p_connection *lr_conn;	/* containing connection */
+	void *lr_aux;			/* reserved for transport layer */
+
+	struct iovec lr_data_iov[L9P_MAX_IOV];	/* iovecs for req + resp */
+	size_t lr_data_niov;			/* actual size of data_iov */
+
+	int lr_error;			/* result from l9p_dispatch_request */
+
+	/* proteced by threadpool mutex */
+	enum l9p_workstate lr_workstate;	/* threadpool: work state */
+	enum l9p_flushstate lr_flushstate;	/* flush state if flushee */
+	struct l9p_worker *lr_worker;		/* threadpool: worker */
+	STAILQ_ENTRY(l9p_request) lr_worklink;	/* reserved to threadpool */
+
+	/* protected by tag hash table lock */
+	struct l9p_request_queue lr_flushq;	/* q of flushers */
+	STAILQ_ENTRY(l9p_request) lr_flushlink;	/* link w/in flush queue */
+};
+
+/* N.B.: these dirents are variable length and for .L only */
+struct l9p_dirent {
+	struct l9p_qid qid;
+	uint64_t offset;
+	uint8_t type;
+	char *name;
+};
+
+/*
+ * The 9pfs protocol has the notion of a "session", which is
+ * traffic between any two "Tversion" requests.  All fids
+ * (lc_files, below) are specific to one particular session.
+ *
+ * We need a data structure per connection (client/server
+ * pair). This data structure lasts longer than these 9pfs
+ * sessions, but contains the request/response pairs and fids.
+ * Logically, the per-session data should be separate, but
+ * most of the time that would just require an extra
+ * indirection.  Instead, a new session simply clunks all
+ * fids, and otherwise keeps using this same connection.
+ */
+struct l9p_connection {
+	struct l9p_server *lc_server;
+	struct l9p_transport lc_lt;
+	struct l9p_threadpool lc_tp;
+	enum l9p_version lc_version;
+	uint32_t lc_msize;
+	uint32_t lc_max_io_size;
+	struct ht lc_files;
+	struct ht lc_requests;
+	LIST_ENTRY(l9p_connection) lc_link;
+};
+
+struct l9p_server {
+	struct l9p_backend *ls_backend;
+	enum l9p_version ls_max_version;
+	LIST_HEAD(, l9p_connection) ls_conns;
+};
+
+int l9p_pufcall(struct l9p_message *msg, union l9p_fcall *fcall,
+    enum l9p_version version);
+ssize_t l9p_pustat(struct l9p_message *msg, struct l9p_stat *s,
+    enum l9p_version version);
+uint16_t l9p_sizeof_stat(struct l9p_stat *stat, enum l9p_version version);
+int l9p_pack_stat(struct l9p_message *msg, struct l9p_request *req,
+    struct l9p_stat *s);
+ssize_t l9p_pudirent(struct l9p_message *msg, struct l9p_dirent *de);
+
+int l9p_server_init(struct l9p_server **serverp, struct l9p_backend *backend);
+
+int l9p_connection_init(struct l9p_server *server,
+    struct l9p_connection **connp);
+void l9p_connection_free(struct l9p_connection *conn);
+void l9p_connection_recv(struct l9p_connection *conn, const struct iovec *iov,
+    size_t niov, void *aux);
+void l9p_connection_close(struct l9p_connection *conn);
+struct l9p_fid *l9p_connection_alloc_fid(struct l9p_connection *conn,
+    uint32_t fid);
+void l9p_connection_remove_fid(struct l9p_connection *conn,
+    struct l9p_fid *fid);
+
+int l9p_dispatch_request(struct l9p_request *req);
+void l9p_respond(struct l9p_request *req, bool drop, bool rmtag);
+
+void l9p_init_msg(struct l9p_message *msg, struct l9p_request *req,
+    enum l9p_pack_mode mode);
+void l9p_seek_iov(const struct iovec *iov1, size_t niov1, struct iovec *iov2,
+    size_t *niov2, size_t seek);
+size_t l9p_truncate_iov(struct iovec *iov, size_t niov, size_t length);
+void l9p_describe_fcall(union l9p_fcall *fcall, enum l9p_version version,
+    struct sbuf *sb);
+void l9p_freefcall(union l9p_fcall *fcall);
+void l9p_freestat(struct l9p_stat *stat);
+
+gid_t *l9p_getgrlist(const char *, gid_t, int *);
+
+#endif  /* LIB9P_LIB9P_H */
diff --git a/usr/src/lib/lib9p/common/lib9p_impl.h b/usr/src/lib/lib9p/common/lib9p_impl.h
new file mode 100644
index 0000000000..41ff07ae18
--- /dev/null
+++ b/usr/src/lib/lib9p/common/lib9p_impl.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_LIB9P_IMPL_H
+#define LIB9P_LIB9P_IMPL_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef _KERNEL
+static inline void *
+l9p_malloc(size_t size)
+{
+	void *r = malloc(size);
+
+	if (r == NULL) {
+		fprintf(stderr, "cannot allocate %zd bytes: out of memory\n",
+		    size);
+		abort();
+	}
+
+	return (r);
+}
+
+static inline void *
+l9p_calloc(size_t n, size_t size)
+{
+	void *r = calloc(n, size);
+
+	if (r == NULL) {
+		fprintf(stderr, "cannot allocate %zd bytes: out of memory\n",
+		    n * size);
+		abort();
+	}
+
+	return (r);
+}
+
+static inline void *
+l9p_realloc(void *ptr, size_t newsize)
+{
+	void *r = realloc(ptr, newsize);
+
+	if (r == NULL) {
+		fprintf(stderr, "cannot allocate %zd bytes: out of memory\n",
+		    newsize);
+		abort();
+	}
+
+	return (r);
+}
+#endif /* _KERNEL */
+
+#endif /* LIB9P_LIB9P_IMPL_H */
diff --git a/usr/src/lib/lib9p/common/linux_errno.h b/usr/src/lib/lib9p/common/linux_errno.h
new file mode 100644
index 0000000000..72778daa23
--- /dev/null
+++ b/usr/src/lib/lib9p/common/linux_errno.h
@@ -0,0 +1,247 @@
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_LINUX_ERRNO_H
+#define LIB9P_LINUX_ERRNO_H
+
+/*
+ * Linux error numbers that are outside of the original base range
+ * (which ends with ERANGE).
+ *
+ * This is pretty much the same as Linux's errno.h except that the
+ * names are prefixed with "LINUX_", and we add _STR with the
+ * string name.
+ *
+ * The string expansions were obtained with a little program to
+ * print every strerror().
+ *
+ * Note that BSD EDEADLK is 11 and BSD EAGAIN is 35, vs
+ * Linux / Plan9 EAGAIN at 11.  So one value in the ERANGE
+ * range still needs translation too.
+ */
+
+#define	LINUX_EAGAIN		11
+#define	LINUX_EAGAIN_STR	"Resource temporarily unavailable"
+
+#define	LINUX_EDEADLK		35
+#define	LINUX_EDEADLK_STR	"Resource deadlock avoided"
+#define	LINUX_ENAMETOOLONG	36
+#define	LINUX_ENAMETOOLONG_STR	"File name too long"
+#define	LINUX_ENOLCK		37
+#define	LINUX_ENOLCK_STR	"No locks available"
+#define	LINUX_ENOSYS		38
+#define	LINUX_ENOSYS_STR	"Function not implemented"
+#define	LINUX_ENOTEMPTY		39
+#define	LINUX_ENOTEMPTY_STR	"Directory not empty"
+#define	LINUX_ELOOP		40
+#define	LINUX_ELOOP_STR		"Too many levels of symbolic links"
+/*				41 unused */
+#define	LINUX_ENOMSG		42
+#define	LINUX_ENOMSG_STR	"No message of desired type"
+#define	LINUX_EIDRM		43
+#define	LINUX_EIDRM_STR		"Identifier removed"
+#define	LINUX_ECHRNG		44
+#define	LINUX_ECHRNG_STR	"Channel number out of range"
+#define	LINUX_EL2NSYNC		45
+#define	LINUX_EL2NSYNC_STR	"Level 2 not synchronized"
+#define	LINUX_EL3HLT		46
+#define	LINUX_EL3HLT_STR	"Level 3 halted"
+#define	LINUX_EL3RST		47
+#define	LINUX_EL3RST_STR	"Level 3 reset"
+#define	LINUX_ELNRNG		48
+#define	LINUX_ELNRNG_STR	"Link number out of range"
+#define	LINUX_EUNATCH		49
+#define	LINUX_EUNATCH_STR	"Protocol driver not attached"
+#define	LINUX_ENOCSI		50
+#define	LINUX_ENOCSI_STR	"No CSI structure available"
+#define	LINUX_EL2HLT		51
+#define	LINUX_EL2HLT_STR	"Level 2 halted"
+#define	LINUX_EBADE		52
+#define	LINUX_EBADE_STR		"Invalid exchange"
+#define	LINUX_EBADR		53
+#define	LINUX_EBADR_STR		"Invalid request descriptor"
+#define	LINUX_EXFULL		54
+#define	LINUX_EXFULL_STR	"Exchange full"
+#define	LINUX_ENOANO		55
+#define	LINUX_ENOANO_STR	"No anode"
+#define	LINUX_EBADRQC		56
+#define	LINUX_EBADRQC_STR	"Invalid request code"
+#define	LINUX_EBADSLT		57
+#define	LINUX_EBADSLT_STR	"Invalid slot"
+/*				58 unused */
+#define	LINUX_EBFONT		59
+#define	LINUX_EBFONT_STR	"Bad font file format"
+#define	LINUX_ENOSTR		60
+#define	LINUX_ENOSTR_STR	"Device not a stream"
+#define	LINUX_ENODATA		61
+#define	LINUX_ENODATA_STR	"No data available"
+#define	LINUX_ETIME		62
+#define	LINUX_ETIME_STR		"Timer expired"
+#define	LINUX_ENOSR		63
+#define	LINUX_ENOSR_STR		"Out of streams resources"
+#define	LINUX_ENONET		64
+#define	LINUX_ENONET_STR	"Machine is not on the network"
+#define	LINUX_ENOPKG		65
+#define	LINUX_ENOPKG_STR	"Package not installed"
+#define	LINUX_EREMOTE		66
+#define	LINUX_EREMOTE_STR	"Object is remote"
+#define	LINUX_ENOLINK		67
+#define	LINUX_ENOLINK_STR	"Link has been severed"
+#define	LINUX_EADV		68
+#define	LINUX_EADV_STR		"Advertise error"
+#define	LINUX_ESRMNT		69
+#define	LINUX_ESRMNT_STR	"Srmount error"
+#define	LINUX_ECOMM		70
+#define	LINUX_ECOMM_STR		"Communication error on send"
+#define	LINUX_EPROTO		71
+#define	LINUX_EPROTO_STR	"Protocol error"
+#define	LINUX_EMULTIHOP		72
+#define	LINUX_EMULTIHOP_STR	"Multihop attempted"
+#define	LINUX_EDOTDOT		73
+#define	LINUX_EDOTDOT_STR	"RFS specific error"
+#define	LINUX_EBADMSG		74
+#define	LINUX_EBADMSG_STR	"Bad message"
+#define	LINUX_EOVERFLOW		75
+#define	LINUX_EOVERFLOW_STR	"Value too large for defined data type"
+#define	LINUX_ENOTUNIQ		76
+#define	LINUX_ENOTUNIQ_STR	"Name not unique on network"
+#define	LINUX_EBADFD		77
+#define	LINUX_EBADFD_STR	"File descriptor in bad state"
+#define	LINUX_EREMCHG		78
+#define	LINUX_EREMCHG_STR	"Remote address changed"
+#define	LINUX_ELIBACC		79
+#define	LINUX_ELIBACC_STR	"Can not access a needed shared library"
+#define	LINUX_ELIBBAD		80
+#define	LINUX_ELIBBAD_STR	"Accessing a corrupted shared library"
+#define	LINUX_ELIBSCN		81
+#define	LINUX_ELIBSCN_STR	".lib section in a.out corrupted"
+#define	LINUX_ELIBMAX		82
+#define	LINUX_ELIBMAX_STR	"Attempting to link in too many shared libraries"
+#define	LINUX_ELIBEXEC		83
+#define	LINUX_ELIBEXEC_STR	"Cannot exec a shared library directly"
+#define	LINUX_EILSEQ		84
+#define	LINUX_EILSEQ_STR	"Invalid or incomplete multibyte or wide character"
+#define	LINUX_ERESTART		85
+#define	LINUX_ERESTART_STR	"Interrupted system call should be restarted"
+#define	LINUX_ESTRPIPE		86
+#define	LINUX_ESTRPIPE_STR	"Streams pipe error"
+#define	LINUX_EUSERS		87
+#define	LINUX_EUSERS_STR	"Too many users"
+#define	LINUX_ENOTSOCK		88
+#define	LINUX_ENOTSOCK_STR	"Socket operation on non-socket"
+#define	LINUX_EDESTADDRREQ	89
+#define	LINUX_EDESTADDRREQ_STR	"Destination address required"
+#define	LINUX_EMSGSIZE		90
+#define	LINUX_EMSGSIZE_STR	"Message too long"
+#define	LINUX_EPROTOTYPE	91
+#define	LINUX_EPROTOTYPE_STR	"Protocol wrong type for socket"
+#define	LINUX_ENOPROTOOPT	92
+#define	LINUX_ENOPROTOOPT_STR	"Protocol not available"
+#define	LINUX_EPROTONOSUPPORT	93
+#define	LINUX_EPROTONOSUPPORT_STR "Protocol not supported"
+#define	LINUX_ESOCKTNOSUPPORT	94
+#define	LINUX_ESOCKTNOSUPPORT_STR "Socket type not supported"
+#define	LINUX_EOPNOTSUPP	95
+#define	LINUX_EOPNOTSUPP_STR	"Operation not supported"
+#define	LINUX_EPFNOSUPPORT	96
+#define	LINUX_EPFNOSUPPORT_STR	"Protocol family not supported"
+#define	LINUX_EAFNOSUPPORT	97
+#define	LINUX_EAFNOSUPPORT_STR	"Address family not supported by protocol"
+#define	LINUX_EADDRINUSE	98
+#define	LINUX_EADDRINUSE_STR	"Address already in use"
+#define	LINUX_EADDRNOTAVAIL	99
+#define	LINUX_EADDRNOTAVAIL_STR	"Cannot assign requested address"
+#define	LINUX_ENETDOWN		100
+#define	LINUX_ENETDOWN_STR	"Network is down"
+#define	LINUX_ENETUNREACH	101
+#define	LINUX_ENETUNREACH_STR	"Network is unreachable"
+#define	LINUX_ENETRESET		102
+#define	LINUX_ENETRESET_STR	"Network dropped connection on reset"
+#define	LINUX_ECONNABORTED	103
+#define	LINUX_ECONNABORTED_STR	"Software caused connection abort"
+#define	LINUX_ECONNRESET	104
+#define	LINUX_ECONNRESET_STR	"Connection reset by peer"
+#define	LINUX_ENOBUFS		105
+#define	LINUX_ENOBUFS_STR	"No buffer space available"
+#define	LINUX_EISCONN		106
+#define	LINUX_EISCONN_STR	"Transport endpoint is already connected"
+#define	LINUX_ENOTCONN		107
+#define	LINUX_ENOTCONN_STR	"Transport endpoint is not connected"
+#define	LINUX_ESHUTDOWN		108
+#define	LINUX_ESHUTDOWN_STR	"Cannot send after transport endpoint shutdown"
+#define	LINUX_ETOOMANYREFS	109
+#define	LINUX_ETOOMANYREFS_STR	"Too many references: cannot splice"
+#define	LINUX_ETIMEDOUT		110
+#define	LINUX_ETIMEDOUT_STR	"Connection timed out"
+#define	LINUX_ECONNREFUSED	111
+#define	LINUX_ECONNREFUSED_STR	"Connection refused"
+#define	LINUX_EHOSTDOWN		112
+#define	LINUX_EHOSTDOWN_STR	"Host is down"
+#define	LINUX_EHOSTUNREACH	113
+#define	LINUX_EHOSTUNREACH_STR	"No route to host"
+#define	LINUX_EALREADY		114
+#define	LINUX_EALREADY_STR	"Operation already in progress"
+#define	LINUX_EINPROGRESS	115
+#define	LINUX_EINPROGRESS_STR	"Operation now in progress"
+#define	LINUX_ESTALE		116
+#define	LINUX_ESTALE_STR	"Stale file handle"
+#define	LINUX_EUCLEAN		117
+#define	LINUX_EUCLEAN_STR	"Structure needs cleaning"
+#define	LINUX_ENOTNAM		118
+#define	LINUX_ENOTNAM_STR	"Not a XENIX named type file"
+#define	LINUX_ENAVAIL		119
+#define	LINUX_ENAVAIL_STR	"No XENIX semaphores available"
+#define	LINUX_EISNAM		120
+#define	LINUX_EISNAM_STR	"Is a named type file"
+#define	LINUX_EREMOTEIO		121
+#define	LINUX_EREMOTEIO_STR	"Remote I/O error"
+#define	LINUX_EDQUOT		122
+#define	LINUX_EDQUOT_STR	"Quota exceeded"
+#define	LINUX_ENOMEDIUM		123
+#define	LINUX_ENOMEDIUM_STR	"No medium found"
+#define	LINUX_EMEDIUMTYPE	124
+#define	LINUX_EMEDIUMTYPE_STR	"Wrong medium type"
+#define	LINUX_ECANCELED		125
+#define	LINUX_ECANCELED_STR	"Operation canceled"
+#define	LINUX_ENOKEY		126
+#define	LINUX_ENOKEY_STR	"Required key not available"
+#define	LINUX_EKEYEXPIRED	127
+#define	LINUX_EKEYEXPIRED_STR	"Key has expired"
+#define	LINUX_EKEYREVOKED	128
+#define	LINUX_EKEYREVOKED_STR	"Key has been revoked"
+#define	LINUX_EKEYREJECTED	129
+#define	LINUX_EKEYREJECTED_STR	"Key was rejected by service"
+#define	LINUX_EOWNERDEAD	130
+#define	LINUX_EOWNERDEAD_STR	"Owner died"
+#define	LINUX_ENOTRECOVERABLE	131
+#define	LINUX_ENOTRECOVERABLE_STR "State not recoverable"
+#define	LINUX_ERFKILL		132
+#define	LINUX_ERFKILL_STR	"Operation not possible due to RF-kill"
+#define	LINUX_EHWPOISON		133
+#define	LINUX_EHWPOISON_STR	"Memory page has hardware error"
+
+#endif	/* LIB9P_LINUX_ERRNO_H */
diff --git a/usr/src/lib/lib9p/common/log.c b/usr/src/lib/lib9p/common/log.c
new file mode 100644
index 0000000000..fb2596a16f
--- /dev/null
+++ b/usr/src/lib/lib9p/common/log.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include "log.h"
+
+static const char *l9p_log_level_names[] = {
+	"DEBUG",
+	"INFO",
+	"WARN",
+	"ERROR"
+};
+
+void
+l9p_logf(enum l9p_log_level level, const char *func, const char *fmt, ...)
+{
+	const char *dest = NULL;
+	static FILE *stream = NULL;
+	va_list ap;
+
+	if (stream == NULL) {
+		dest = getenv("LIB9P_LOGGING");
+		if (dest == NULL)
+			return;
+		else if (!strcmp(dest, "stderr"))
+			stream = stderr;
+		else {
+			stream = fopen(dest, "a");
+			if (stream == NULL)
+				return;
+		}
+	}
+
+	va_start(ap, fmt);
+	fprintf(stream, "[%s]\t %s: ", l9p_log_level_names[level], func);
+	vfprintf(stream, fmt, ap);
+	fprintf(stream, "\n");
+	fflush(stream);
+	va_end(ap);
+}
diff --git a/usr/src/lib/lib9p/common/log.h b/usr/src/lib/lib9p/common/log.h
new file mode 100644
index 0000000000..b801d4017a
--- /dev/null
+++ b/usr/src/lib/lib9p/common/log.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_LOG_H
+#define	LIB9P_LOG_H
+
+enum l9p_log_level {
+	L9P_DEBUG,
+	L9P_INFO,
+	L9P_WARNING,
+	L9P_ERROR
+};
+
+void l9p_logf(enum l9p_log_level level, const char *func, const char *fmt, ...);
+
+#if defined(L9P_DEBUG)
+#define	L9P_LOG(level, fmt, ...) l9p_logf(level, __func__, fmt, ##__VA_ARGS__)
+#else
+#define L9P_LOG(level, fmt, ...)
+#endif
+
+#endif	/* LIB9P_LOG_H */
diff --git a/usr/src/lib/lib9p/common/pack.c b/usr/src/lib/lib9p/common/pack.c
new file mode 100644
index 0000000000..13ec5f02b5
--- /dev/null
+++ b/usr/src/lib/lib9p/common/pack.c
@@ -0,0 +1,996 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Based on libixp code: ©2007-2010 Kris Maglione <maglione.k at Gmail>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#ifdef __APPLE__
+# include "apple_endian.h"
+#elif __illumos__
+# include "illumos_endian.h"
+# include <sys/sysmacros.h>
+#else
+# include <sys/endian.h>
+#endif
+#include <sys/uio.h>
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "log.h"
+
+#define N(ary)          (sizeof(ary) / sizeof(*ary))
+#define STRING_SIZE(s)  (L9P_WORD + (s != NULL ? (uint16_t)strlen(s) : 0))
+#define QID_SIZE        (L9P_BYTE + L9P_DWORD + L9P_QWORD)
+
+static ssize_t l9p_iov_io(struct l9p_message *, void *, size_t);
+static inline ssize_t l9p_pu8(struct l9p_message *, uint8_t *);
+static inline ssize_t l9p_pu16(struct l9p_message *, uint16_t *);
+static inline ssize_t l9p_pu32(struct l9p_message *, uint32_t *);
+static inline ssize_t l9p_pu64(struct l9p_message *, uint64_t *);
+static ssize_t l9p_pustring(struct l9p_message *, char **s);
+static ssize_t l9p_pustrings(struct l9p_message *, uint16_t *, char **, size_t);
+static ssize_t l9p_puqid(struct l9p_message *, struct l9p_qid *);
+static ssize_t l9p_puqids(struct l9p_message *, uint16_t *, struct l9p_qid *q);
+
+/*
+ * Transfer data from incoming request, or to outgoing response,
+ * using msg to track position and direction within request/response.
+ *
+ * Returns the number of bytes actually transferred (which is always
+ * just len itself, converted to signed), or -1 if we ran out of space.
+ *
+ * Note that if we return -1, subsequent l9p_iov_io() calls with
+ * the same (and not-reset) msg and len > 0 will also return -1.
+ * This means most users can just check the *last* call for failure.
+ */
+static ssize_t
+l9p_iov_io(struct l9p_message *msg, void *buffer, size_t len)
+{
+	size_t done = 0;
+	size_t left = len;
+
+	assert(msg != NULL);
+
+	if (len == 0)
+		return (0);
+
+	if (msg->lm_cursor_iov >= msg->lm_niov)
+		return (-1);
+
+	assert(buffer != NULL);
+
+	while (left > 0) {
+		size_t idx = msg->lm_cursor_iov;
+		size_t space = msg->lm_iov[idx].iov_len - msg->lm_cursor_offset;
+		size_t towrite = MIN(space, left);
+
+		if (msg->lm_mode == L9P_PACK) {
+			memcpy((char *)msg->lm_iov[idx].iov_base +
+			    msg->lm_cursor_offset, (char *)buffer + done,
+			    towrite);
+		}
+
+		if (msg->lm_mode == L9P_UNPACK) {
+			memcpy((char *)buffer + done,
+			    (char *)msg->lm_iov[idx].iov_base +
+			    msg->lm_cursor_offset, towrite);
+		}
+
+		msg->lm_cursor_offset += towrite;
+
+		done += towrite;
+		left -= towrite;
+
+		if (space - towrite == 0) {
+			/* Advance to next iov */
+			msg->lm_cursor_iov++;
+			msg->lm_cursor_offset = 0;
+
+			if (msg->lm_cursor_iov >= msg->lm_niov && left > 0)
+				return (-1);
+		}
+	}
+
+	msg->lm_size += done;
+	return ((ssize_t)done);
+}
+
+/*
+ * Pack or unpack a byte (8 bits).
+ *
+ * Returns 1 (success, 1 byte) or -1 (error).
+ */
+static inline ssize_t
+l9p_pu8(struct l9p_message *msg, uint8_t *val)
+{
+
+	return (l9p_iov_io(msg, val, sizeof (uint8_t)));
+}
+
+/*
+ * Pack or unpack 16-bit value.
+ *
+ * Returns 2 or -1.
+ */
+static inline ssize_t
+l9p_pu16(struct l9p_message *msg, uint16_t *val)
+{
+#if _BYTE_ORDER != _LITTLE_ENDIAN
+	/*
+	 * The ifdefs are annoying, but there is no need
+	 * for all of this foolery on little-endian hosts,
+	 * and I don't expect the compiler to optimize it
+	 * all away.
+	 */
+	uint16_t copy;
+	ssize_t ret;
+
+	if (msg->lm_mode == L9P_PACK) {
+		copy = htole16(*val);
+		return (l9p_iov_io(msg, &copy, sizeof (uint16_t)));
+	}
+	ret = l9p_iov_io(msg, val, sizeof (uint16_t));
+	*val = le16toh(*val);
+	return (ret);
+#else
+	return (l9p_iov_io(msg, val, sizeof (uint16_t)));
+#endif
+}
+
+/*
+ * Pack or unpack 32-bit value.
+ *
+ * Returns 4 or -1.
+ */
+static inline ssize_t
+l9p_pu32(struct l9p_message *msg, uint32_t *val)
+{
+#if _BYTE_ORDER != _LITTLE_ENDIAN
+	uint32_t copy;
+	ssize_t ret;
+
+	if (msg->lm_mode == L9P_PACK) {
+		copy = htole32(*val);
+		return (l9p_iov_io(msg, &copy, sizeof (uint32_t)));
+	}
+	ret = l9p_iov_io(msg, val, sizeof (uint32_t));
+	*val = le32toh(*val);
+	return (ret);
+#else
+	return (l9p_iov_io(msg, val, sizeof (uint32_t)));
+#endif
+}
+
+/*
+ * Pack or unpack 64-bit value.
+ *
+ * Returns 8 or -1.
+ */
+static inline ssize_t
+l9p_pu64(struct l9p_message *msg, uint64_t *val)
+{
+#if _BYTE_ORDER != _LITTLE_ENDIAN
+	uint64_t copy;
+	ssize_t ret;
+
+	if (msg->lm_mode == L9P_PACK) {
+		copy = htole64(*val);
+		return (l9p_iov_io(msg, &copy, sizeof (uint64_t)));
+	}
+	ret = l9p_iov_io(msg, val, sizeof (uint32_t));
+	*val = le64toh(*val);
+	return (ret);
+#else
+	return (l9p_iov_io(msg, val, sizeof (uint64_t)));
+#endif
+}
+
+/*
+ * Pack or unpack a string, encoded as 2-byte length followed by
+ * string bytes.  The returned length is 2 greater than the
+ * length of the string itself.
+ *
+ * When unpacking, this allocates a new string (NUL-terminated).
+ *
+ * Return -1 on error (not space, or failed to allocate string,
+ * or illegal string).
+ *
+ * Note that pustring (and hence pustrings) can return an error
+ * even when l9p_iov_io succeeds.
+ */
+static ssize_t
+l9p_pustring(struct l9p_message *msg, char **s)
+{
+	uint16_t len;
+
+	if (msg->lm_mode == L9P_PACK)
+		len = *s != NULL ? (uint16_t)strlen(*s) : 0;
+
+	if (l9p_pu16(msg, &len) < 0)
+		return (-1);
+
+	if (msg->lm_mode == L9P_UNPACK) {
+		*s = l9p_calloc(1, len + 1);
+		if (*s == NULL)
+			return (-1);
+	}
+
+	if (l9p_iov_io(msg, *s, len) < 0)
+		return (-1);
+
+	if (msg->lm_mode == L9P_UNPACK) {
+		/*
+		 * An embedded NUL byte in a string is illegal.
+		 * We don't necessarily have to check (we'll just
+		 * treat it as a shorter string), but checking
+		 * seems like a good idea.
+		 */
+		if (memchr(*s, '\0', len) != NULL)
+			return (-1);
+	}
+
+	return ((ssize_t)len + 2);
+}
+
+/*
+ * Pack or unpack a number (*num) of strings (but at most max of
+ * them).
+ *
+ * Returns the number of bytes transferred, including the packed
+ * number of strings.  If packing and the packed number of strings
+ * was reduced, the original *num value is unchanged; only the
+ * wire-format number is reduced.  If unpacking and the input
+ * number of strings exceeds the max, the incoming *num is reduced
+ * to lim, if needed.  (NOTE ASYMMETRY HERE!)
+ *
+ * Returns -1 on error.
+ */
+static ssize_t
+l9p_pustrings(struct l9p_message *msg, uint16_t *num, char **strings,
+    size_t max)
+{
+	size_t i, lim;
+	ssize_t r, ret;
+	uint16_t adjusted;
+
+	if (msg->lm_mode == L9P_PACK) {
+		lim = *num;
+		if (lim > max)
+			lim = max;
+		adjusted = (uint16_t)lim;
+		r = l9p_pu16(msg, &adjusted);
+	} else {
+		r = l9p_pu16(msg, num);
+		lim = *num;
+		if (lim > max)
+			*num = (uint16_t)(lim = max);
+	}
+	if (r < 0)
+		return (-1);
+
+	for (i = 0; i < lim; i++) {
+		ret = l9p_pustring(msg, &strings[i]);
+		if (ret < 1)
+			return (-1);
+
+		r += ret;
+	}
+
+	return (r);
+}
+
+/*
+ * Pack or unpack a qid.
+ *
+ * Returns 13 (success) or -1 (error).
+ */
+static ssize_t
+l9p_puqid(struct l9p_message *msg, struct l9p_qid *qid)
+{
+	ssize_t r;
+	uint8_t type;
+
+	if (msg->lm_mode == L9P_PACK) {
+		type = qid->type;
+		r = l9p_pu8(msg, &type);
+	} else {
+		r = l9p_pu8(msg, &type);
+		qid->type = type;
+	}
+	if (r > 0)
+		r = l9p_pu32(msg, &qid->version);
+	if (r > 0)
+		r = l9p_pu64(msg, &qid->path);
+
+	return (r > 0 ? QID_SIZE : r);
+}
+
+/*
+ * Pack or unpack *num qids.
+ *
+ * Returns 2 + 13 * *num (after possibly setting *num), or -1 on error.
+ */
+static ssize_t
+l9p_puqids(struct l9p_message *msg, uint16_t *num, struct l9p_qid *qids)
+{
+	size_t i, lim;
+	ssize_t ret, r;
+
+	r = l9p_pu16(msg, num);
+	if (r > 0) {
+		for (i = 0, lim = *num; i < lim; i++) {
+			ret = l9p_puqid(msg, &qids[i]);
+			if (ret < 0)
+				return (-1);
+			r += ret;
+		}
+	}
+	return (r);
+}
+
+/*
+ * Pack or unpack a l9p_stat.
+ *
+ * These have variable size, and the size further depends on
+ * the protocol version.
+ *
+ * Returns the number of bytes packed/unpacked, or -1 on error.
+ */
+ssize_t
+l9p_pustat(struct l9p_message *msg, struct l9p_stat *stat,
+    enum l9p_version version)
+{
+	ssize_t r = 0;
+	uint16_t size;
+
+	/* The on-wire size field excludes the size of the size field. */
+	if (msg->lm_mode == L9P_PACK)
+		size = l9p_sizeof_stat(stat, version) - 2;
+
+	r += l9p_pu16(msg, &size);
+	r += l9p_pu16(msg, &stat->type);
+	r += l9p_pu32(msg, &stat->dev);
+	r += l9p_puqid(msg, &stat->qid);
+	r += l9p_pu32(msg, &stat->mode);
+	r += l9p_pu32(msg, &stat->atime);
+	r += l9p_pu32(msg, &stat->mtime);
+	r += l9p_pu64(msg, &stat->length);
+	r += l9p_pustring(msg, &stat->name);
+	r += l9p_pustring(msg, &stat->uid);
+	r += l9p_pustring(msg, &stat->gid);
+	r += l9p_pustring(msg, &stat->muid);
+
+	if (version >= L9P_2000U) {
+		r += l9p_pustring(msg, &stat->extension);
+		r += l9p_pu32(msg, &stat->n_uid);
+		r += l9p_pu32(msg, &stat->n_gid);
+		r += l9p_pu32(msg, &stat->n_muid);
+	}
+
+	if (r < size + 2)
+		return (-1);
+
+	return (r);
+}
+
+/*
+ * Pack or unpack a variable-length dirent.
+ *
+ * If unpacking, the name field is malloc()ed and the caller must
+ * free it.
+ *
+ * Returns the wire-format length, or -1 if we ran out of room.
+ */
+ssize_t
+l9p_pudirent(struct l9p_message *msg, struct l9p_dirent *de)
+{
+	ssize_t r, s;
+
+	r = l9p_puqid(msg, &de->qid);
+	r += l9p_pu64(msg, &de->offset);
+	r += l9p_pu8(msg, &de->type);
+	s = l9p_pustring(msg, &de->name);
+	if (r < QID_SIZE + 8 + 1 || s < 0)
+		return (-1);
+	return (r + s);
+}
+
+/*
+ * Pack or unpack a request or response (fcall).
+ *
+ * Returns 0 on success, -1 on error.  (It's up to the caller
+ * to call l9p_freefcall on our failure.)
+ */
+int
+l9p_pufcall(struct l9p_message *msg, union l9p_fcall *fcall,
+    enum l9p_version version)
+{
+	uint32_t length = 0;
+	ssize_t r;
+
+	/*
+	 * Get overall length, type, and tag, which should appear
+	 * in all messages.  If not even that works, abort immediately.
+	 */
+	l9p_pu32(msg, &length);
+	l9p_pu8(msg, &fcall->hdr.type);
+	r = l9p_pu16(msg, &fcall->hdr.tag);
+	if (r < 0)
+		return (-1);
+
+	/*
+	 * Decode remainder of message.	 When unpacking, this may
+	 * allocate memory, even if we fail during the decode.
+	 * Note that the initial fcall is zeroed out, though, so
+	 * we can just freefcall() to release whatever might have
+	 * gotten allocated, if the unpack fails due to a short
+	 * packet.
+	 */
+	switch (fcall->hdr.type) {
+	case L9P_TVERSION:
+	case L9P_RVERSION:
+		l9p_pu32(msg, &fcall->version.msize);
+		r = l9p_pustring(msg, &fcall->version.version);
+		break;
+
+	case L9P_TAUTH:
+		l9p_pu32(msg, &fcall->tauth.afid);
+		r = l9p_pustring(msg, &fcall->tauth.uname);
+		if (r < 0)
+			break;
+		r = l9p_pustring(msg, &fcall->tauth.aname);
+		if (r < 0)
+			break;
+		if (version >= L9P_2000U)
+			r = l9p_pu32(msg, &fcall->tauth.n_uname);
+		break;
+
+	case L9P_RAUTH:
+		r = l9p_puqid(msg, &fcall->rauth.aqid);
+		break;
+
+	case L9P_TATTACH:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->tattach.afid);
+		r = l9p_pustring(msg, &fcall->tattach.uname);
+		if (r < 0)
+			break;
+		r = l9p_pustring(msg, &fcall->tattach.aname);
+		if (r < 0)
+			break;
+		if (version >= L9P_2000U)
+			r = l9p_pu32(msg, &fcall->tattach.n_uname);
+		break;
+
+	case L9P_RATTACH:
+		r = l9p_puqid(msg, &fcall->rattach.qid);
+		break;
+
+	case L9P_RERROR:
+		r = l9p_pustring(msg, &fcall->error.ename);
+		if (r < 0)
+			break;
+		if (version >= L9P_2000U)
+			r = l9p_pu32(msg, &fcall->error.errnum);
+		break;
+
+	case L9P_RLERROR:
+		r = l9p_pu32(msg, &fcall->error.errnum);
+		break;
+
+	case L9P_TFLUSH:
+		r = l9p_pu16(msg, &fcall->tflush.oldtag);
+		break;
+
+	case L9P_RFLUSH:
+		break;
+
+	case L9P_TWALK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->twalk.newfid);
+		r = l9p_pustrings(msg, &fcall->twalk.nwname,
+		    fcall->twalk.wname, N(fcall->twalk.wname));
+		break;
+
+	case L9P_RWALK:
+		r = l9p_puqids(msg, &fcall->rwalk.nwqid, fcall->rwalk.wqid);
+		break;
+
+	case L9P_TOPEN:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pu8(msg, &fcall->topen.mode);
+		break;
+
+	case L9P_ROPEN:
+		l9p_puqid(msg, &fcall->ropen.qid);
+		r = l9p_pu32(msg, &fcall->ropen.iounit);
+		break;
+
+	case L9P_TCREATE:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tcreate.name);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->tcreate.perm);
+		r = l9p_pu8(msg, &fcall->tcreate.mode);
+		if (version >= L9P_2000U)
+			r = l9p_pustring(msg, &fcall->tcreate.extension);
+		break;
+
+	case L9P_RCREATE:
+		l9p_puqid(msg, &fcall->rcreate.qid);
+		r = l9p_pu32(msg, &fcall->rcreate.iounit);
+		break;
+
+	case L9P_TREAD:
+	case L9P_TREADDIR:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu64(msg, &fcall->io.offset);
+		r = l9p_pu32(msg, &fcall->io.count);
+		break;
+
+	case L9P_RREAD:
+	case L9P_RREADDIR:
+		r = l9p_pu32(msg, &fcall->io.count);
+		break;
+
+	case L9P_TWRITE:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu64(msg, &fcall->io.offset);
+		r = l9p_pu32(msg, &fcall->io.count);
+		break;
+
+	case L9P_RWRITE:
+		r = l9p_pu32(msg, &fcall->io.count);
+		break;
+
+	case L9P_TCLUNK:
+	case L9P_TSTAT:
+	case L9P_TREMOVE:
+	case L9P_TSTATFS:
+		r = l9p_pu32(msg, &fcall->hdr.fid);
+		break;
+
+	case L9P_RCLUNK:
+	case L9P_RREMOVE:
+		break;
+
+	case L9P_RSTAT:
+	{
+		uint16_t size = l9p_sizeof_stat(&fcall->rstat.stat,
+		    version);
+		l9p_pu16(msg, &size);
+		r = l9p_pustat(msg, &fcall->rstat.stat, version);
+	}
+		break;
+
+	case L9P_TWSTAT:
+	{
+		uint16_t size;
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu16(msg, &size);
+		r = l9p_pustat(msg, &fcall->twstat.stat, version);
+	}
+		break;
+
+	case L9P_RWSTAT:
+		break;
+
+	case L9P_RSTATFS:
+		l9p_pu32(msg, &fcall->rstatfs.statfs.type);
+		l9p_pu32(msg, &fcall->rstatfs.statfs.bsize);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.blocks);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.bfree);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.bavail);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.files);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.ffree);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.fsid);
+		r = l9p_pu32(msg, &fcall->rstatfs.statfs.namelen);
+		break;
+
+	case L9P_TLOPEN:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pu32(msg, &fcall->tlopen.flags);
+		break;
+
+	case L9P_RLOPEN:
+		l9p_puqid(msg, &fcall->rlopen.qid);
+		r = l9p_pu32(msg, &fcall->rlopen.iounit);
+		break;
+
+	case L9P_TLCREATE:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tlcreate.name);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->tlcreate.flags);
+		l9p_pu32(msg, &fcall->tlcreate.mode);
+		r = l9p_pu32(msg, &fcall->tlcreate.gid);
+		break;
+
+	case L9P_RLCREATE:
+		l9p_puqid(msg, &fcall->rlcreate.qid);
+		r = l9p_pu32(msg, &fcall->rlcreate.iounit);
+		break;
+
+	case L9P_TSYMLINK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tsymlink.name);
+		if (r < 0)
+			break;
+		r = l9p_pustring(msg, &fcall->tsymlink.symtgt);
+		if (r < 0)
+			break;
+		r = l9p_pu32(msg, &fcall->tlcreate.gid);
+		break;
+
+	case L9P_RSYMLINK:
+		r = l9p_puqid(msg, &fcall->rsymlink.qid);
+		break;
+
+	case L9P_TMKNOD:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tmknod.name);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->tmknod.mode);
+		l9p_pu32(msg, &fcall->tmknod.major);
+		l9p_pu32(msg, &fcall->tmknod.minor);
+		r = l9p_pu32(msg, &fcall->tmknod.gid);
+		break;
+
+	case L9P_RMKNOD:
+		r = l9p_puqid(msg, &fcall->rmknod.qid);
+		break;
+
+	case L9P_TRENAME:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->trename.dfid);
+		r = l9p_pustring(msg, &fcall->trename.name);
+		break;
+
+	case L9P_RRENAME:
+		break;
+
+	case L9P_TREADLINK:
+		r = l9p_pu32(msg, &fcall->hdr.fid);
+		break;
+
+	case L9P_RREADLINK:
+		r = l9p_pustring(msg, &fcall->rreadlink.target);
+		break;
+
+	case L9P_TGETATTR:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pu64(msg, &fcall->tgetattr.request_mask);
+		break;
+
+	case L9P_RGETATTR:
+		l9p_pu64(msg, &fcall->rgetattr.valid);
+		l9p_puqid(msg, &fcall->rgetattr.qid);
+		l9p_pu32(msg, &fcall->rgetattr.mode);
+		l9p_pu32(msg, &fcall->rgetattr.uid);
+		l9p_pu32(msg, &fcall->rgetattr.gid);
+		l9p_pu64(msg, &fcall->rgetattr.nlink);
+		l9p_pu64(msg, &fcall->rgetattr.rdev);
+		l9p_pu64(msg, &fcall->rgetattr.size);
+		l9p_pu64(msg, &fcall->rgetattr.blksize);
+		l9p_pu64(msg, &fcall->rgetattr.blocks);
+		l9p_pu64(msg, &fcall->rgetattr.atime_sec);
+		l9p_pu64(msg, &fcall->rgetattr.atime_nsec);
+		l9p_pu64(msg, &fcall->rgetattr.mtime_sec);
+		l9p_pu64(msg, &fcall->rgetattr.mtime_nsec);
+		l9p_pu64(msg, &fcall->rgetattr.ctime_sec);
+		l9p_pu64(msg, &fcall->rgetattr.ctime_nsec);
+		l9p_pu64(msg, &fcall->rgetattr.btime_sec);
+		l9p_pu64(msg, &fcall->rgetattr.btime_nsec);
+		l9p_pu64(msg, &fcall->rgetattr.gen);
+		r = l9p_pu64(msg, &fcall->rgetattr.data_version);
+		break;
+
+	case L9P_TSETATTR:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->tsetattr.valid);
+		l9p_pu32(msg, &fcall->tsetattr.mode);
+		l9p_pu32(msg, &fcall->tsetattr.uid);
+		l9p_pu32(msg, &fcall->tsetattr.gid);
+		l9p_pu64(msg, &fcall->tsetattr.size);
+		l9p_pu64(msg, &fcall->tsetattr.atime_sec);
+		l9p_pu64(msg, &fcall->tsetattr.atime_nsec);
+		l9p_pu64(msg, &fcall->tsetattr.mtime_sec);
+		r = l9p_pu64(msg, &fcall->tsetattr.mtime_nsec);
+		break;
+
+	case L9P_RSETATTR:
+		break;
+
+	case L9P_TXATTRWALK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->txattrwalk.newfid);
+		r = l9p_pustring(msg, &fcall->txattrwalk.name);
+		break;
+
+	case L9P_RXATTRWALK:
+		r = l9p_pu64(msg, &fcall->rxattrwalk.size);
+		break;
+
+	case L9P_TXATTRCREATE:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->txattrcreate.name);
+		if (r < 0)
+			break;
+		l9p_pu64(msg, &fcall->txattrcreate.attr_size);
+		r = l9p_pu32(msg, &fcall->txattrcreate.flags);
+		break;
+
+	case L9P_RXATTRCREATE:
+		break;
+
+	case L9P_TFSYNC:
+		r = l9p_pu32(msg, &fcall->hdr.fid);
+		break;
+
+	case L9P_RFSYNC:
+		break;
+
+	case L9P_TLOCK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu8(msg, &fcall->tlock.type);
+		l9p_pu32(msg, &fcall->tlock.flags);
+		l9p_pu64(msg, &fcall->tlock.start);
+		l9p_pu64(msg, &fcall->tlock.length);
+		l9p_pu32(msg, &fcall->tlock.proc_id);
+		r = l9p_pustring(msg, &fcall->tlock.client_id);
+		break;
+
+	case L9P_RLOCK:
+		r = l9p_pu8(msg, &fcall->rlock.status);
+		break;
+
+	case L9P_TGETLOCK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		/* FALLTHROUGH */
+
+	case L9P_RGETLOCK:
+		l9p_pu8(msg, &fcall->getlock.type);
+		l9p_pu64(msg, &fcall->getlock.start);
+		l9p_pu64(msg, &fcall->getlock.length);
+		l9p_pu32(msg, &fcall->getlock.proc_id);
+		r = l9p_pustring(msg, &fcall->getlock.client_id);
+		break;
+
+	case L9P_TLINK:
+		l9p_pu32(msg, &fcall->tlink.dfid);
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tlink.name);
+		break;
+
+	case L9P_RLINK:
+		break;
+
+	case L9P_TMKDIR:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tmkdir.name);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->tmkdir.mode);
+		r = l9p_pu32(msg, &fcall->tmkdir.gid);
+		break;
+
+	case L9P_RMKDIR:
+		r = l9p_puqid(msg, &fcall->rmkdir.qid);
+		break;
+
+	case L9P_TRENAMEAT:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->trenameat.oldname);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->trenameat.newdirfid);
+		r = l9p_pustring(msg, &fcall->trenameat.newname);
+		break;
+
+	case L9P_RRENAMEAT:
+		break;
+
+	case L9P_TUNLINKAT:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tunlinkat.name);
+		if (r < 0)
+			break;
+		r = l9p_pu32(msg, &fcall->tunlinkat.flags);
+		break;
+
+	case L9P_RUNLINKAT:
+		break;
+
+	default:
+		L9P_LOG(L9P_ERROR, "%s(): missing case for type %d",
+		    __func__, fcall->hdr.type);
+		break;
+	}
+
+	/* Check for over- or under-run, or pustring error. */
+	if (r < 0)
+		return (-1);
+
+	if (msg->lm_mode == L9P_PACK) {
+		/* Rewind to the beginning and install size at front. */
+		uint32_t len = (uint32_t)msg->lm_size;
+		msg->lm_cursor_offset = 0;
+		msg->lm_cursor_iov = 0;
+
+		/*
+		 * Subtract 4 bytes from current size, becase we're
+		 * overwriting size (rewinding message to the beginning)
+		 * and writing again, which will increase it 4 more.
+		 */
+		msg->lm_size -= sizeof(uint32_t);
+
+		if (fcall->hdr.type == L9P_RREAD ||
+		    fcall->hdr.type == L9P_RREADDIR)
+			len += fcall->io.count;
+
+		l9p_pu32(msg, &len);
+	}
+
+	return (0);
+}
+
+/*
+ * Free any strings or other data malloc'ed in the process of
+ * packing or unpacking an fcall.
+ */
+void
+l9p_freefcall(union l9p_fcall *fcall)
+{
+	uint16_t i;
+
+	switch (fcall->hdr.type) {
+
+	case L9P_TVERSION:
+	case L9P_RVERSION:
+		free(fcall->version.version);
+		return;
+
+	case L9P_TATTACH:
+		free(fcall->tattach.aname);
+		free(fcall->tattach.uname);
+		return;
+
+	case L9P_TWALK:
+		for (i = 0; i < fcall->twalk.nwname; i++)
+			free(fcall->twalk.wname[i]);
+		return;
+
+	case L9P_TCREATE:
+	case L9P_TOPEN:
+		free(fcall->tcreate.name);
+		free(fcall->tcreate.extension);
+		return;
+
+	case L9P_RSTAT:
+		l9p_freestat(&fcall->rstat.stat);
+		return;
+
+	case L9P_TWSTAT:
+		l9p_freestat(&fcall->twstat.stat);
+		return;
+
+	case L9P_TLCREATE:
+		free(fcall->tlcreate.name);
+		return;
+
+	case L9P_TSYMLINK:
+		free(fcall->tsymlink.name);
+		free(fcall->tsymlink.symtgt);
+		return;
+
+	case L9P_TMKNOD:
+		free(fcall->tmknod.name);
+		return;
+
+	case L9P_TRENAME:
+		free(fcall->trename.name);
+		return;
+
+	case L9P_RREADLINK:
+		free(fcall->rreadlink.target);
+		return;
+
+	case L9P_TXATTRWALK:
+		free(fcall->txattrwalk.name);
+		return;
+
+	case L9P_TXATTRCREATE:
+		free(fcall->txattrcreate.name);
+		return;
+
+	case L9P_TLOCK:
+		free(fcall->tlock.client_id);
+		return;
+
+	case L9P_TGETLOCK:
+	case L9P_RGETLOCK:
+		free(fcall->getlock.client_id);
+		return;
+
+	case L9P_TLINK:
+		free(fcall->tlink.name);
+		return;
+
+	case L9P_TMKDIR:
+		free(fcall->tmkdir.name);
+		return;
+
+	case L9P_TRENAMEAT:
+		free(fcall->trenameat.oldname);
+		free(fcall->trenameat.newname);
+		return;
+
+	case L9P_TUNLINKAT:
+		free(fcall->tunlinkat.name);
+		return;
+	}
+}
+
+void
+l9p_freestat(struct l9p_stat *stat)
+{
+	free(stat->name);
+	free(stat->extension);
+	free(stat->uid);
+	free(stat->gid);
+	free(stat->muid);
+}
+
+uint16_t
+l9p_sizeof_stat(struct l9p_stat *stat, enum l9p_version version)
+{
+	uint16_t size = L9P_WORD /* size */
+	    + L9P_WORD /* type */
+	    + L9P_DWORD /* dev */
+	    + QID_SIZE /* qid */
+	    + 3 * L9P_DWORD /* mode, atime, mtime */
+	    + L9P_QWORD /* length */
+	    + STRING_SIZE(stat->name)
+	    + STRING_SIZE(stat->uid)
+	    + STRING_SIZE(stat->gid)
+	    + STRING_SIZE(stat->muid);
+
+	if (version >= L9P_2000U) {
+		size += STRING_SIZE(stat->extension)
+		    + 3 * L9P_DWORD;
+	}
+
+	return (size);
+}
diff --git a/usr/src/lib/lib9p/common/request.c b/usr/src/lib/lib9p/common/request.c
new file mode 100644
index 0000000000..99885690af
--- /dev/null
+++ b/usr/src/lib/lib9p/common/request.c
@@ -0,0 +1,1446 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/param.h>
+#include <sys/uio.h>
+#ifdef __illumos__
+#include <sys/sysmacros.h>
+#endif
+#if defined(__FreeBSD__)
+#include <sys/sbuf.h>
+#else
+#include "sbuf/sbuf.h"
+#endif
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "fcall.h"
+#include "fid.h"
+#include "hashtable.h"
+#include "log.h"
+#include "linux_errno.h"
+#include "backend/backend.h"
+#include "threadpool.h"
+
+#define N(x)    (sizeof(x) / sizeof(x[0]))
+
+static int l9p_dispatch_tversion(struct l9p_request *req);
+static int l9p_dispatch_tattach(struct l9p_request *req);
+static int l9p_dispatch_tclunk(struct l9p_request *req);
+static int l9p_dispatch_tcreate(struct l9p_request *req);
+static int l9p_dispatch_topen(struct l9p_request *req);
+static int l9p_dispatch_tread(struct l9p_request *req);
+static int l9p_dispatch_tremove(struct l9p_request *req);
+static int l9p_dispatch_tstat(struct l9p_request *req);
+static int l9p_dispatch_twalk(struct l9p_request *req);
+static int l9p_dispatch_twrite(struct l9p_request *req);
+static int l9p_dispatch_twstat(struct l9p_request *req);
+static int l9p_dispatch_tstatfs(struct l9p_request *req);
+static int l9p_dispatch_tlopen(struct l9p_request *req);
+static int l9p_dispatch_tlcreate(struct l9p_request *req);
+static int l9p_dispatch_tsymlink(struct l9p_request *req);
+static int l9p_dispatch_tmknod(struct l9p_request *req);
+static int l9p_dispatch_trename(struct l9p_request *req);
+static int l9p_dispatch_treadlink(struct l9p_request *req);
+static int l9p_dispatch_tgetattr(struct l9p_request *req);
+static int l9p_dispatch_tsetattr(struct l9p_request *req);
+static int l9p_dispatch_txattrwalk(struct l9p_request *req);
+static int l9p_dispatch_txattrcreate(struct l9p_request *req);
+static int l9p_dispatch_treaddir(struct l9p_request *req);
+static int l9p_dispatch_tfsync(struct l9p_request *req);
+static int l9p_dispatch_tlock(struct l9p_request *req);
+static int l9p_dispatch_tgetlock(struct l9p_request *req);
+static int l9p_dispatch_tlink(struct l9p_request *req);
+static int l9p_dispatch_tmkdir(struct l9p_request *req);
+static int l9p_dispatch_trenameat(struct l9p_request *req);
+static int l9p_dispatch_tunlinkat(struct l9p_request *req);
+
+/*
+ * Each Txxx handler has a "must run" flag.  If it is false,
+ * we check for a flush request before calling the handler.
+ * If a flush is already requested we can instantly fail the
+ * request with EINTR.
+ *
+ * Tclunk and Tremove must run because they make their fids
+ * become invalid.  Tversion and Tattach should never get
+ * a flush request applied (it makes no sense as the connection
+ * is not really running yet), so it should be harmless to
+ * set them either way, but for now we have them as must-run.
+ * Flushing a Tflush is not really allowed either so we keep
+ * these as must-run too (although they run without being done
+ * threaded anyway).
+ */
+struct l9p_handler {
+	enum l9p_ftype type;
+	int (*handler)(struct l9p_request *);
+	bool must_run;
+};
+
+static const struct l9p_handler l9p_handlers_no_version[] = {
+	{L9P_TVERSION, l9p_dispatch_tversion, true},
+};
+
+static const struct l9p_handler l9p_handlers_base[] = {
+	{L9P_TVERSION, l9p_dispatch_tversion, true},
+	{L9P_TATTACH, l9p_dispatch_tattach, true},
+	{L9P_TCLUNK, l9p_dispatch_tclunk, true},
+	{L9P_TFLUSH, l9p_threadpool_tflush, true},
+	{L9P_TCREATE, l9p_dispatch_tcreate, false},
+	{L9P_TOPEN, l9p_dispatch_topen, false},
+	{L9P_TREAD, l9p_dispatch_tread, false},
+	{L9P_TWRITE, l9p_dispatch_twrite, false},
+	{L9P_TREMOVE, l9p_dispatch_tremove, true},
+	{L9P_TSTAT, l9p_dispatch_tstat, false},
+	{L9P_TWALK, l9p_dispatch_twalk, false},
+	{L9P_TWSTAT, l9p_dispatch_twstat, false}
+};
+static const struct l9p_handler l9p_handlers_dotu[] = {
+	{L9P_TVERSION, l9p_dispatch_tversion, true},
+	{L9P_TATTACH, l9p_dispatch_tattach, true},
+	{L9P_TCLUNK, l9p_dispatch_tclunk, true},
+	{L9P_TFLUSH, l9p_threadpool_tflush, true},
+	{L9P_TCREATE, l9p_dispatch_tcreate, false},
+	{L9P_TOPEN, l9p_dispatch_topen, false},
+	{L9P_TREAD, l9p_dispatch_tread, false},
+	{L9P_TWRITE, l9p_dispatch_twrite, false},
+	{L9P_TREMOVE, l9p_dispatch_tremove, true},
+	{L9P_TSTAT, l9p_dispatch_tstat, false},
+	{L9P_TWALK, l9p_dispatch_twalk, false},
+	{L9P_TWSTAT, l9p_dispatch_twstat, false}
+};
+static const struct l9p_handler l9p_handlers_dotL[] = {
+	{L9P_TVERSION, l9p_dispatch_tversion, true},
+	{L9P_TATTACH, l9p_dispatch_tattach, true},
+	{L9P_TCLUNK, l9p_dispatch_tclunk, true},
+	{L9P_TFLUSH, l9p_threadpool_tflush, true},
+	{L9P_TCREATE, l9p_dispatch_tcreate, false},
+	{L9P_TOPEN, l9p_dispatch_topen, false},
+	{L9P_TREAD, l9p_dispatch_tread, false},
+	{L9P_TWRITE, l9p_dispatch_twrite, false},
+	{L9P_TREMOVE, l9p_dispatch_tremove, true},
+	{L9P_TSTAT, l9p_dispatch_tstat, false},
+	{L9P_TWALK, l9p_dispatch_twalk, false},
+	{L9P_TWSTAT, l9p_dispatch_twstat, false},
+	{L9P_TSTATFS, l9p_dispatch_tstatfs, false},
+	{L9P_TLOPEN, l9p_dispatch_tlopen, false},
+	{L9P_TLCREATE, l9p_dispatch_tlcreate, false},
+	{L9P_TSYMLINK, l9p_dispatch_tsymlink, false},
+	{L9P_TMKNOD, l9p_dispatch_tmknod, false},
+	{L9P_TRENAME, l9p_dispatch_trename, false},
+	{L9P_TREADLINK, l9p_dispatch_treadlink, false},
+	{L9P_TGETATTR, l9p_dispatch_tgetattr, false},
+	{L9P_TSETATTR, l9p_dispatch_tsetattr, false},
+	{L9P_TXATTRWALK, l9p_dispatch_txattrwalk, false},
+	{L9P_TXATTRCREATE, l9p_dispatch_txattrcreate, false},
+	{L9P_TREADDIR, l9p_dispatch_treaddir, false},
+	{L9P_TFSYNC, l9p_dispatch_tfsync, false},
+	{L9P_TLOCK, l9p_dispatch_tlock, true},
+	{L9P_TGETLOCK, l9p_dispatch_tgetlock, true},
+	{L9P_TLINK, l9p_dispatch_tlink, false},
+	{L9P_TMKDIR, l9p_dispatch_tmkdir, false},
+	{L9P_TRENAMEAT, l9p_dispatch_trenameat, false},
+	{L9P_TUNLINKAT, l9p_dispatch_tunlinkat, false},
+};
+
+/*
+ * NB: version index 0 is reserved for new connections, and
+ * is a protocol that handles only L9P_TVERSION.  Once we get a
+ * valid version, we start a new session using its dispatch table.
+ */
+static const struct {
+	const char *name;
+	const struct l9p_handler *handlers;
+	int n_handlers;
+} l9p_versions[] = {
+	{ "<none>", l9p_handlers_no_version, N(l9p_handlers_no_version) },
+	{ "9P2000", l9p_handlers_base, N(l9p_handlers_base) },
+	{ "9P2000.u", l9p_handlers_dotu, N(l9p_handlers_dotu), },
+	{ "9P2000.L", l9p_handlers_dotL, N(l9p_handlers_dotL), },
+};
+
+/*
+ * Run the appropriate handler for this request.
+ * It's our caller's responsibility to respond.
+ */
+int
+l9p_dispatch_request(struct l9p_request *req)
+{
+	struct l9p_connection *conn;
+#if defined(L9P_DEBUG)
+	struct sbuf *sb;
+#endif
+	size_t i, n;
+	const struct l9p_handler *handlers, *hp;
+	bool flush_requested;
+
+	conn = req->lr_conn;
+	flush_requested = req->lr_flushstate == L9P_FLUSH_REQUESTED_PRE_START;
+
+	handlers = l9p_versions[conn->lc_version].handlers;
+	n = (size_t)l9p_versions[conn->lc_version].n_handlers;
+	for (hp = handlers, i = 0; i < n; hp++, i++)
+		if (req->lr_req.hdr.type == hp->type)
+			goto found;
+	hp = NULL;
+found:
+
+#if defined(L9P_DEBUG)
+	sb = sbuf_new_auto();
+	if (flush_requested) {
+		sbuf_cat(sb, "FLUSH requested pre-dispatch");
+		if (hp != NULL && hp->must_run)
+			sbuf_cat(sb, ", but must run");
+		sbuf_cat(sb, ": ");
+	}
+	l9p_describe_fcall(&req->lr_req, conn->lc_version, sb);
+	sbuf_finish(sb);
+
+	L9P_LOG(L9P_DEBUG, "%s", sbuf_data(sb));
+	sbuf_delete(sb);
+#endif
+
+	if (hp != NULL) {
+		if (!flush_requested || hp->must_run)
+			return (hp->handler(req));
+		return (EINTR);
+	}
+
+	L9P_LOG(L9P_WARNING, "unknown request of type %d",
+	    req->lr_req.hdr.type);
+	return (ENOSYS);
+}
+
+/*
+ * Translate BSD errno to 9P2000/9P2000.u errno.
+ */
+static inline int
+e29p(int errnum)
+{
+	static int const table[] = {
+		[ENOTEMPTY] = EPERM,
+		[EDQUOT] = EPERM,
+		[ENOSYS] = EPERM,	/* ??? */
+	};
+
+	if ((size_t)errnum < N(table) && table[errnum] != 0)
+		return (table[errnum]);
+	if (errnum <= ERANGE)
+		return (errnum);
+	return (EIO);			/* ??? */
+}
+
+/*
+ * Translate BSD errno to Linux errno.
+ */
+static inline int
+e2linux(int errnum)
+{
+	static int const table[] = {
+		[EDEADLK] = LINUX_EDEADLK,
+		[EAGAIN] = LINUX_EAGAIN,
+		[EINPROGRESS] = LINUX_EINPROGRESS,
+		[EALREADY] = LINUX_EALREADY,
+		[ENOTSOCK] = LINUX_ENOTSOCK,
+		[EDESTADDRREQ] = LINUX_EDESTADDRREQ,
+		[EMSGSIZE] = LINUX_EMSGSIZE,
+		[EPROTOTYPE] = LINUX_EPROTOTYPE,
+		[ENOPROTOOPT] = LINUX_ENOPROTOOPT,
+		[EPROTONOSUPPORT] = LINUX_EPROTONOSUPPORT,
+		[ESOCKTNOSUPPORT] = LINUX_ESOCKTNOSUPPORT,
+		[EOPNOTSUPP] = LINUX_EOPNOTSUPP,
+		[EPFNOSUPPORT] = LINUX_EPFNOSUPPORT,
+		[EAFNOSUPPORT] = LINUX_EAFNOSUPPORT,
+		[EADDRINUSE] = LINUX_EADDRINUSE,
+		[EADDRNOTAVAIL] = LINUX_EADDRNOTAVAIL,
+		[ENETDOWN] = LINUX_ENETDOWN,
+		[ENETUNREACH] = LINUX_ENETUNREACH,
+		[ENETRESET] = LINUX_ENETRESET,
+		[ECONNABORTED] = LINUX_ECONNABORTED,
+		[ECONNRESET] = LINUX_ECONNRESET,
+		[ENOBUFS] = LINUX_ENOBUFS,
+		[EISCONN] = LINUX_EISCONN,
+		[ENOTCONN] = LINUX_ENOTCONN,
+		[ESHUTDOWN] = LINUX_ESHUTDOWN,
+		[ETOOMANYREFS] = LINUX_ETOOMANYREFS,
+		[ETIMEDOUT] = LINUX_ETIMEDOUT,
+		[ECONNREFUSED] = LINUX_ECONNREFUSED,
+		[ELOOP] = LINUX_ELOOP,
+		[ENAMETOOLONG] = LINUX_ENAMETOOLONG,
+		[EHOSTDOWN] = LINUX_EHOSTDOWN,
+		[EHOSTUNREACH] = LINUX_EHOSTUNREACH,
+		[ENOTEMPTY] = LINUX_ENOTEMPTY,
+#ifndef __illumos__
+		[EPROCLIM] = LINUX_EAGAIN,
+#endif
+		[EUSERS] = LINUX_EUSERS,
+		[EDQUOT] = LINUX_EDQUOT,
+		[ESTALE] = LINUX_ESTALE,
+		[EREMOTE] = LINUX_EREMOTE,
+		/* EBADRPC = unmappable? */
+		/* ERPCMISMATCH = unmappable? */
+		/* EPROGUNAVAIL = unmappable? */
+		/* EPROGMISMATCH = unmappable? */
+		/* EPROCUNAVAIL = unmappable? */
+		[ENOLCK] = LINUX_ENOLCK,
+		[ENOSYS] = LINUX_ENOSYS,
+		/* EFTYPE = unmappable? */
+		/* EAUTH = unmappable? */
+		/* ENEEDAUTH = unmappable? */
+		[EIDRM] = LINUX_EIDRM,
+		[ENOMSG] = LINUX_ENOMSG,
+		[EOVERFLOW] = LINUX_EOVERFLOW,
+		[ECANCELED] = LINUX_ECANCELED,
+		[EILSEQ] = LINUX_EILSEQ,
+		/* EDOOFUS = unmappable? */
+		[EBADMSG] = LINUX_EBADMSG,
+		[EMULTIHOP] = LINUX_EMULTIHOP,
+		[ENOLINK] = LINUX_ENOLINK,
+		[EPROTO] = LINUX_EPROTO,
+		/* ENOTCAPABLE = unmappable? */
+#ifdef ECAPMODE
+		[ECAPMODE] = EPERM,
+#endif
+#ifdef ENOTRECOVERABLE
+		[ENOTRECOVERABLE] = LINUX_ENOTRECOVERABLE,
+#endif
+#ifdef EOWNERDEAD
+		[EOWNERDEAD] = LINUX_EOWNERDEAD,
+#endif
+	};
+
+	/*
+	 * In case we want to return a raw Linux errno, allow negative
+	 * values a la Linux kernel internals.
+	 *
+	 * Values up to ERANGE are shared across systems (see
+	 * linux_errno.h), except for EAGAIN.
+	 */
+	if (errnum < 0)
+		return (-errnum);
+
+	if ((size_t)errnum < N(table) && table[errnum] != 0)
+		return (table[errnum]);
+
+	if (errnum <= ERANGE)
+		return (errnum);
+
+	L9P_LOG(L9P_WARNING, "cannot map errno %d to anything reasonable",
+	    errnum);
+
+	return (LINUX_ENOTRECOVERABLE);	/* ??? */
+}
+
+/*
+ * Send response to request, or possibly just drop request.
+ * We also need to know whether to remove the request from
+ * the tag hash table.
+ */
+void
+l9p_respond(struct l9p_request *req, bool drop, bool rmtag)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	size_t iosize;
+#if defined(L9P_DEBUG)
+	struct sbuf *sb;
+	const char *ftype;
+#endif
+	int error;
+
+	req->lr_resp.hdr.tag = req->lr_req.hdr.tag;
+
+	error = req->lr_error;
+	if (error == 0)
+		req->lr_resp.hdr.type = req->lr_req.hdr.type + 1;
+	else {
+		if (conn->lc_version == L9P_2000L) {
+			req->lr_resp.hdr.type = L9P_RLERROR;
+			req->lr_resp.error.errnum = (uint32_t)e2linux(error);
+		} else {
+			req->lr_resp.hdr.type = L9P_RERROR;
+			req->lr_resp.error.ename = strerror(error);
+			req->lr_resp.error.errnum = (uint32_t)e29p(error);
+		}
+	}
+
+#if defined(L9P_DEBUG)
+	sb = sbuf_new_auto();
+	l9p_describe_fcall(&req->lr_resp, conn->lc_version, sb);
+	sbuf_finish(sb);
+
+	switch (req->lr_flushstate) {
+	case L9P_FLUSH_NONE:
+	default:
+		ftype = "";
+		break;
+	case L9P_FLUSH_REQUESTED_PRE_START:
+		ftype = "FLUSH requested pre-dispatch: ";
+		break;
+	case L9P_FLUSH_REQUESTED_POST_START:
+		ftype = "FLUSH requested while running: ";
+		break;
+	case L9P_FLUSH_TOOLATE:
+		ftype = "FLUSH requested too late: ";
+		break;
+	}
+	L9P_LOG(L9P_DEBUG, "%s%s%s",
+	    drop ? "DROP: " : "", ftype, sbuf_data(sb));
+	sbuf_delete(sb);
+#endif
+
+	error = drop ? 0 :
+	    l9p_pufcall(&req->lr_resp_msg, &req->lr_resp, conn->lc_version);
+	if (rmtag)
+		ht_remove(&conn->lc_requests, req->lr_req.hdr.tag);
+	if (error != 0) {
+		L9P_LOG(L9P_ERROR, "cannot pack response");
+		drop = true;
+	}
+
+	if (drop) {
+		conn->lc_lt.lt_drop_response(req,
+		    req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+		    conn->lc_lt.lt_aux);
+	} else {
+		iosize = req->lr_resp_msg.lm_size;
+
+		/*
+		 * Include I/O size in calculation for Rread and
+		 * Rreaddir responses.
+		 */
+		if (req->lr_resp.hdr.type == L9P_RREAD ||
+		    req->lr_resp.hdr.type == L9P_RREADDIR)
+			iosize += req->lr_resp.io.count;
+
+		conn->lc_lt.lt_send_response(req,
+		    req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+		    iosize, conn->lc_lt.lt_aux);
+	}
+
+	l9p_freefcall(&req->lr_req);
+	l9p_freefcall(&req->lr_resp);
+
+	free(req);
+}
+
+/*
+ * This allows a caller to iterate through the data in a
+ * read or write request (creating the data if packing,
+ * scanning through it if unpacking).  This is used for
+ * writing readdir entries, so mode should be L9P_PACK
+ * (but we allow L9P_UNPACK so that debug code can also scan
+ * through the data later, if desired).
+ *
+ * This relies on the Tread op having positioned the request's
+ * iov to the beginning of the data buffer (note the l9p_seek_iov
+ * in l9p_dispatch_tread).
+ */
+void
+l9p_init_msg(struct l9p_message *msg, struct l9p_request *req,
+    enum l9p_pack_mode mode)
+{
+
+	msg->lm_size = 0;
+	msg->lm_mode = mode;
+	msg->lm_cursor_iov = 0;
+	msg->lm_cursor_offset = 0;
+	msg->lm_niov = req->lr_data_niov;
+	memcpy(msg->lm_iov, req->lr_data_iov,
+	    sizeof (struct iovec) * req->lr_data_niov);
+}
+
+enum fid_lookup_flags {
+	F_REQUIRE_OPEN = 0x01,	/* require that the file be marked OPEN */
+	F_REQUIRE_DIR = 0x02,	/* require that the file be marked ISDIR */
+	F_REQUIRE_XATTR = 0x04,	/* require that the file be marked XATTR */
+	F_REQUIRE_AUTH = 0x08,	/* require that the fid be marked AUTH */
+	F_FORBID_OPEN = 0x10,	/* forbid that the file be marked OPEN */
+	F_FORBID_DIR = 0x20,	/* forbid that the file be marked ISDIR */
+	F_FORBID_XATTR = 0x40,	/* forbid that the file be marked XATTR */
+	F_ALLOW_AUTH = 0x80,	/* allow that the fid be marked AUTH */
+};
+
+/*
+ * Look up a fid.  It must correspond to a valid file, else we return
+ * the given errno (some "not a valid fid" calls must return EIO and
+ * some must return EINVAL and qemu returns ENOENT in other cases and
+ * so on, so we just provide a general "return this error number").
+ *
+ * Callers may also set constraints: fid must be (or not be) open,
+ * must be (or not be) a directory, must be (or not be) an xattr.
+ *
+ * Only one op has a fid that *must* be an auth fid.  Most ops forbid
+ * auth fids  So instead of FORBID we have ALLOW here and the default
+ * is FORBID.
+ */
+static inline int
+fid_lookup(struct l9p_connection *conn, uint32_t fid, int err, int flags,
+    struct l9p_fid **afile)
+{
+	struct l9p_fid *file;
+
+	file = ht_find(&conn->lc_files, fid);
+	if (file == NULL)
+		return (err);
+
+	/*
+	 * As soon as we go multithreaded / async, this
+	 * assert has to become "return EINVAL" or "return err".
+	 *
+	 * We may also need a way to mark a fid as
+	 * "in async op" (valid for some purposes, but cannot be
+	 * used elsewhere until async op is completed or aborted).
+	 *
+	 * For now, this serves for bug-detecting.
+	 */
+	assert(l9p_fid_isvalid(file));
+
+	/*
+	 * Note that we're inline expanded and flags is constant,
+	 * so unnecessary tests just drop out entirely.
+	 */
+	if ((flags & F_REQUIRE_OPEN) && !l9p_fid_isopen(file))
+		return (EINVAL);
+	if ((flags & F_FORBID_OPEN) && l9p_fid_isopen(file))
+		return (EINVAL);
+	if ((flags & F_REQUIRE_DIR) && !l9p_fid_isdir(file))
+		return (ENOTDIR);
+	if ((flags & F_FORBID_DIR) && l9p_fid_isdir(file))
+		return (EISDIR);
+	if ((flags & F_REQUIRE_XATTR) && !l9p_fid_isxattr(file))
+		return (EINVAL);
+	if ((flags & F_FORBID_XATTR) && l9p_fid_isxattr(file))
+		return (EINVAL);
+	if (l9p_fid_isauth(file)) {
+		if ((flags & (F_REQUIRE_AUTH | F_ALLOW_AUTH)) == 0)
+			return (EINVAL);
+	} else if (flags & F_REQUIRE_AUTH)
+		return (EINVAL);
+	*afile = file;
+	return (0);
+}
+
+/*
+ * Append variable-size stat object and adjust io count.
+ * Returns 0 if the entire stat object was packed, -1 if not.
+ * A fully packed object updates the request's io count.
+ *
+ * Caller must use their own private l9p_message object since
+ * a partially packed object will leave the message object in
+ * a useless state.
+ *
+ * Frees the stat object.
+ */
+int
+l9p_pack_stat(struct l9p_message *msg, struct l9p_request *req,
+    struct l9p_stat *st)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	uint16_t size = l9p_sizeof_stat(st, conn->lc_version);
+	int ret = 0;
+
+	assert(msg->lm_mode == L9P_PACK);
+
+	if (req->lr_resp.io.count + size > req->lr_req.io.count ||
+	    l9p_pustat(msg, st, conn->lc_version) < 0)
+		ret = -1;
+	else
+		req->lr_resp.io.count += size;
+	l9p_freestat(st);
+	return (ret);
+}
+
+static int
+l9p_dispatch_tversion(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_server *server = conn->lc_server;
+	enum l9p_version remote_version = L9P_INVALID_VERSION;
+	size_t i;
+	const char *remote_version_name;
+
+	for (i = 0; i < N(l9p_versions); i++) {
+		if (strcmp(req->lr_req.version.version,
+		    l9p_versions[i].name) == 0) {
+			remote_version = (enum l9p_version)i;
+			break;
+		}
+	}
+
+	if (remote_version == L9P_INVALID_VERSION) {
+		L9P_LOG(L9P_ERROR, "unsupported remote version: %s",
+		    req->lr_req.version.version);
+		return (ENOSYS);
+	}
+
+	remote_version_name = l9p_versions[remote_version].name;
+	L9P_LOG(L9P_INFO, "remote version: %s", remote_version_name);
+	L9P_LOG(L9P_INFO, "local version: %s",
+	    l9p_versions[server->ls_max_version].name);
+
+	conn->lc_version = MIN(remote_version, server->ls_max_version);
+	conn->lc_msize = MIN(req->lr_req.version.msize, conn->lc_msize);
+	conn->lc_max_io_size = conn->lc_msize - 24;
+	req->lr_resp.version.version = strdup(remote_version_name);
+	req->lr_resp.version.msize = conn->lc_msize;
+	return (0);
+}
+
+static int
+l9p_dispatch_tattach(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/*
+	 * We still don't have Tauth yet, but let's code this part
+	 * anyway.
+	 *
+	 * Look up the auth fid first since if it fails we can just
+	 * return immediately.
+	 */
+	if (req->lr_req.tattach.afid != L9P_NOFID) {
+		error = fid_lookup(conn, req->lr_req.tattach.afid, EINVAL,
+		    F_REQUIRE_AUTH, &req->lr_fid2);
+		if (error)
+			return (error);
+	} else
+		req->lr_fid2 = NULL;
+
+	fid = l9p_connection_alloc_fid(conn, req->lr_req.hdr.fid);
+	if (fid == NULL)
+		return (EINVAL);
+
+	be = conn->lc_server->ls_backend;
+
+	req->lr_fid = fid;
+
+	/* For backend convenience, set NONUNAME on 9P2000. */
+	if (conn->lc_version == L9P_2000)
+		req->lr_req.tattach.n_uname = L9P_NONUNAME;
+	error = be->attach(be->softc, req);
+
+	/*
+	 * On success, fid becomes valid; on failure, disconnect.
+	 * It certainly *should* be a directory here...
+	 */
+	if (error == 0) {
+		l9p_fid_setvalid(fid);
+		if (req->lr_resp.rattach.qid.type & L9P_QTDIR)
+			l9p_fid_setdir(fid);
+	} else
+		l9p_connection_remove_fid(conn, fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tclunk(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/* Note that clunk is the only way to dispose of an auth fid. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_ALLOW_AUTH, &fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	l9p_fid_unsetvalid(fid);
+
+	/*
+	 * If it's an xattr fid there must, by definition, be an
+	 * xattrclunk.  The xattrclunk function can only be NULL if
+	 * xattrwalk and xattrcreate are NULL or always return error.
+	 *
+	 * Q: do we want to allow async xattrclunk in case of very
+	 * large xattr create?  This will make things difficult,
+	 * so probably not.
+	 */
+	if (l9p_fid_isxattr(fid))
+		error = be->xattrclunk(be->softc, fid);
+	else
+		error = be->clunk(be->softc, fid);
+
+	/* fid is now gone regardless of any error return */
+	l9p_connection_remove_fid(conn, fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tcreate(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	uint32_t dmperm;
+	int error;
+
+	/* Incoming fid must represent a directory that has not been opened. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	dmperm = req->lr_req.tcreate.perm;
+#define MKDIR_OR_SIMILAR \
+    (L9P_DMDIR | L9P_DMSYMLINK | L9P_DMNAMEDPIPE | L9P_DMSOCKET | L9P_DMDEVICE)
+
+	/*
+	 * TODO:
+	 *  - check new file name
+	 *  - break out different kinds of create (file vs mkdir etc)
+	 *  - add async file-create (leaves req->lr_fid in limbo)
+	 *
+	 * A successful file-create changes the fid into an open file.
+	 */
+	error = be->create(be->softc, req);
+	if (error == 0 && (dmperm & MKDIR_OR_SIMILAR) == 0) {
+		l9p_fid_unsetdir(req->lr_fid);
+		l9p_fid_setopen(req->lr_fid);
+	}
+
+	return (error);
+}
+
+static int
+l9p_dispatch_topen(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_OPEN | F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - add async open (leaves req->lr_fid in limbo)
+	 */
+	error = be->open(be->softc, req);
+	if (error == 0)
+		l9p_fid_setopen(req->lr_fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tread(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/* Xattr fids are not open, so we need our own tests. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/*
+	 * Adjust so that writing messages (packing data) starts
+	 * right after the count field in the response.
+	 *
+	 * size[4] + Rread[1] + tag[2] + count[4] = 11
+	 */
+	l9p_seek_iov(req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+	    req->lr_data_iov, &req->lr_data_niov, 11);
+
+	/*
+	 * If it's an xattr fid there must, by definition, be an
+	 * xattrread.  The xattrread function can only be NULL if
+	 * xattrwalk and xattrcreate are NULL or always return error.
+	 *
+	 * TODO:
+	 *   separate out directory-read
+	 *   allow async read
+	 */
+	be = conn->lc_server->ls_backend;
+	fid = req->lr_fid;
+	if (l9p_fid_isxattr(fid)) {
+		error = be->xattrread(be->softc, req);
+	} else if (l9p_fid_isopen(fid)) {
+		error = be->read(be->softc, req);
+	} else {
+		error = EINVAL;
+	}
+
+	return (error);
+}
+
+static int
+l9p_dispatch_tremove(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/*
+	 * ?? Should we allow Tremove on auth fids? If so, do
+	 * we pretend it is just a Tclunk?
+	 */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	l9p_fid_unsetvalid(fid);
+
+	error = be->remove(be->softc, fid);
+	/* fid is now gone regardless of any error return */
+	l9p_connection_remove_fid(conn, fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tstat(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/* Allow Tstat on auth fid?  Seems harmless enough... */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_ALLOW_AUTH, &fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	req->lr_fid = fid;
+	error = be->stat(be->softc, req);
+
+	if (error == 0) {
+		if (l9p_fid_isauth(fid))
+			req->lr_resp.rstat.stat.qid.type |= L9P_QTAUTH;
+
+		/* should we check req->lr_resp.rstat.qid.type L9P_QTDIR bit? */
+		if (req->lr_resp.rstat.stat.qid.type &= L9P_QTDIR)
+			l9p_fid_setdir(fid);
+		else
+			l9p_fid_unsetdir(fid);
+	}
+
+	return (error);
+}
+
+static int
+l9p_dispatch_twalk(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid, *newfid;
+	uint16_t n;
+	int error;
+
+	/* Can forbid XATTR, but cannot require DIR. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &fid);
+	if (error)
+		return (error);
+
+	if (req->lr_req.twalk.hdr.fid != req->lr_req.twalk.newfid) {
+		newfid = l9p_connection_alloc_fid(conn,
+		    req->lr_req.twalk.newfid);
+		if (newfid == NULL)
+			return (EINVAL);
+	} else
+		newfid = fid;
+
+	be = conn->lc_server->ls_backend;
+	req->lr_fid = fid;
+	req->lr_newfid = newfid;
+	error = be->walk(be->softc, req);
+
+	/*
+	 * If newfid == fid, then fid itself has (potentially) changed,
+	 * but is still valid.  Otherwise set newfid valid on
+	 * success, and destroy it on error.
+	 */
+	if (newfid != fid) {
+		if (error == 0)
+			l9p_fid_setvalid(newfid);
+		else
+			l9p_connection_remove_fid(conn, newfid);
+	}
+
+	/*
+	 * If we walked any name elements, the last (n-1'th) qid
+	 * has the type (dir vs file) for the new fid.  Otherwise
+	 * the type of newfid is the same as fid.  Of course, if
+	 * n==0 and fid==newfid, fid is already set up correctly
+	 * as the whole thing was a big no-op, but it's safe to
+	 * copy its dir bit to itself.
+	 */
+	if (error == 0) {
+		n = req->lr_resp.rwalk.nwqid;
+		if (n > 0) {
+			if (req->lr_resp.rwalk.wqid[n - 1].type & L9P_QTDIR)
+				l9p_fid_setdir(newfid);
+		} else {
+			if (l9p_fid_isdir(fid))
+				l9p_fid_setdir(newfid);
+		}
+	}
+	return (error);
+}
+
+static int
+l9p_dispatch_twrite(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/* Cannot require open due to xattr write, but can forbid dir. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+	    F_FORBID_DIR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/*
+	 * Adjust to point to the data to be written (a la
+	 * l9p_dispatch_tread, but we're pointing into the request
+	 * buffer rather than the response):
+	 *
+	 * size[4] + Twrite[1] + tag[2] + fid[4] + offset[8] + count[4] = 23
+	 */
+	l9p_seek_iov(req->lr_req_msg.lm_iov, req->lr_req_msg.lm_niov,
+	    req->lr_data_iov, &req->lr_data_niov, 23);
+
+	/*
+	 * Unlike read, write and xattrwrite are optional (for R/O fs).
+	 *
+	 * TODO:
+	 *   allow async write
+	 */
+	be = conn->lc_server->ls_backend;
+	fid = req->lr_fid;
+	if (l9p_fid_isxattr(fid)) {
+		error = be->xattrwrite != NULL ?
+		    be->xattrwrite(be->softc, req) : ENOSYS;
+	} else if (l9p_fid_isopen(fid)) {
+		error = be->write != NULL ?
+		    be->write(be->softc, req) : ENOSYS;
+	} else {
+		error = EINVAL;
+	}
+
+	return (error);
+}
+
+static int
+l9p_dispatch_twstat(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+	    F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	error = be->wstat != NULL ? be->wstat(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tstatfs(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* Should we allow statfs on auth fids? */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	error = be->statfs(be->softc, req);
+	return (error);
+}
+
+static int
+l9p_dispatch_tlopen(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_OPEN | F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - add async open (leaves req->lr_fid in limbo)
+	 */
+	error = be->lopen != NULL ? be->lopen(be->softc, req) : ENOSYS;
+	if (error == 0)
+		l9p_fid_setopen(req->lr_fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tlcreate(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - check new file name
+	 *  - add async create (leaves req->lr_fid in limbo)
+	 */
+	error = be->lcreate != NULL ? be->lcreate(be->softc, req) : ENOSYS;
+	if (error == 0) {
+		l9p_fid_unsetdir(req->lr_fid);
+		l9p_fid_setopen(req->lr_fid);
+	}
+	return (error);
+}
+
+static int
+l9p_dispatch_tsymlink(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* This doesn't affect the containing dir; maybe allow OPEN? */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - check new file name
+	 */
+	error = be->symlink != NULL ? be->symlink(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tmknod(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* This doesn't affect the containing dir; maybe allow OPEN? */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - check new file name
+	 */
+	error = be->mknod != NULL ? be->mknod(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_trename(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* Rename directory or file (including symlink etc). */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/* Doesn't affect new dir fid; maybe allow OPEN? */
+	error = fid_lookup(conn, req->lr_req.trename.dfid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - check new file name (trename.name)
+	 */
+	error = be->rename != NULL ? be->rename(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_treadlink(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/*
+	 * The underlying readlink will fail unless it's a symlink,
+	 * and the back end has to check, but we might as well forbid
+	 * directories and open files here since it's cheap.
+	 */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->readlink != NULL ? be->readlink(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tgetattr(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->getattr != NULL ? be->getattr(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tsetattr(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->setattr != NULL ? be->setattr(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_txattrwalk(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid, *newfid;
+	int error;
+
+	/*
+	 * Not sure if we care if file-or-dir is open or not.
+	 * However, the fid argument should always be a file or
+	 * dir and the newfid argument must be supplied, must
+	 * be different, and always becomes a new xattr,
+	 * so this is not very much like Twalk.
+	 */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &fid);
+	if (error)
+		return (error);
+
+	newfid = l9p_connection_alloc_fid(conn, req->lr_req.txattrwalk.newfid);
+	if (newfid == NULL)
+		return (EINVAL);
+
+	be = conn->lc_server->ls_backend;
+
+	req->lr_fid = fid;
+	req->lr_newfid = newfid;
+	error = be->xattrwalk != NULL ? be->xattrwalk(be->softc, req) : ENOSYS;
+
+	/*
+	 * Success/fail is similar to Twalk, except that we need
+	 * to set the xattr type bit in the new fid.  It's also
+	 * much simpler since newfid is always a new fid.
+	 */
+	if (error == 0) {
+		l9p_fid_setvalid(newfid);
+		l9p_fid_setxattr(newfid);
+	} else {
+		l9p_connection_remove_fid(conn, newfid);
+	}
+	return (error);
+}
+
+static int
+l9p_dispatch_txattrcreate(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/*
+	 * Forbid incoming open fid since it's going to become an
+	 * xattr fid instead.  If it turns out we need to allow
+	 * it, fs code will need to handle this.
+	 *
+	 * Curiously, qemu 9pfs uses ENOENT for a bad txattrwalk
+	 * fid, but EINVAL for txattrcreate (so we do too).
+	 */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+	    F_FORBID_XATTR | F_FORBID_OPEN, &fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	req->lr_fid = fid;
+	error = be->xattrcreate != NULL ? be->xattrcreate(be->softc, req) :
+	    ENOSYS;
+
+	/*
+	 * On success, fid has changed from a regular (file or dir)
+	 * fid to an xattr fid.
+	 */
+	if (error == 0) {
+		l9p_fid_unsetdir(fid);
+		l9p_fid_setxattr(fid);
+	}
+	return (error);
+}
+
+static int
+l9p_dispatch_treaddir(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_REQUIRE_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/*
+	 * Adjust so that writing messages (packing data) starts
+	 * right after the count field in the response.
+	 *
+	 * size[4] + Rreaddir[1] + tag[2] + count[4] = 11
+	 */
+	l9p_seek_iov(req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+	    req->lr_data_iov, &req->lr_data_niov, 11);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->readdir != NULL ? be->readdir(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tfsync(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->fsync != NULL ? be->fsync(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tlock(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* Forbid directories? */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO: multiple client handling; perhaps async locking.
+	 */
+	error = be->lock != NULL ? be->lock(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tgetlock(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO: multiple client handling; perhaps async locking.
+	 */
+	error = be->getlock != NULL ? be->getlock(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tlink(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/*
+	 * Note, dfid goes into fid2 in current scheme.
+	 *
+	 * Allow open dir?  Target dir fid is not modified...
+	 */
+	error = fid_lookup(conn, req->lr_req.tlink.dfid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2);
+	if (error)
+		return (error);
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_DIR | F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->link != NULL ? be->link(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tmkdir(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/* Slashes embedded in the name are not allowed */
+	if (strchr(req->lr_req.tlcreate.name, '/') != NULL)
+		return (EINVAL);
+
+	be = conn->lc_server->ls_backend;
+	error = be->mkdir != NULL ? be->mkdir(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_trenameat(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	error = fid_lookup(conn, req->lr_req.trenameat.newdirfid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/* TODO: check old and new names */
+	error = be->renameat != NULL ? be->renameat(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tunlinkat(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/* TODO: check dir-or-file name */
+	error = be->unlinkat != NULL ? be->unlinkat(be->softc, req) : ENOSYS;
+	return (error);
+}
diff --git a/usr/src/lib/lib9p/common/rfuncs.c b/usr/src/lib/lib9p/common/rfuncs.c
new file mode 100644
index 0000000000..f80e8c1541
--- /dev/null
+++ b/usr/src/lib/lib9p/common/rfuncs.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright 2016 Chris Torek <chris.torek@gmail.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#if defined(WITH_CASPER)
+#include <libcasper.h>
+#include <casper/cap_pwd.h>
+#include <casper/cap_grp.h>
+#endif
+
+#include "rfuncs.h"
+
+/*
+ * This is essentially a clone of the BSD basename_r function,
+ * which is like POSIX basename() but puts the result in a user
+ * supplied buffer.
+ *
+ * In BSD basename_r, the buffer must be least MAXPATHLEN bytes
+ * long.  In our case we take the size of the buffer as an argument.
+ *
+ * Note that it's impossible in general to do this without
+ * a temporary buffer since basename("foo/bar") is "bar",
+ * but basename("foo/bar/") is still "bar" -- no trailing
+ * slash is allowed.
+ *
+ * The return value is your supplied buffer <buf>, or NULL if
+ * the length of the basename of the supplied <path> equals or
+ * exceeds your indicated <bufsize>.
+ *
+ * As a special but useful case, if you supply NULL for the <buf>
+ * argument, we allocate the buffer dynamically to match the
+ * basename, i.e., the result is basically strdup()ed for you.
+ * In this case <bufsize> is ignored (recommended: pass 0 here).
+ */
+char *
+r_basename(const char *path, char *buf, size_t bufsize)
+{
+	const char *endp, *comp;
+	size_t len;
+
+	/*
+	 * NULL or empty path means ".".  This is perhaps overly
+	 * forgiving but matches libc basename_r(), and avoids
+	 * breaking the code below.
+	 */
+	if (path == NULL || *path == '\0') {
+		comp = ".";
+		len = 1;
+	} else {
+		/*
+		 * Back up over any trailing slashes.  If we reach
+		 * the top of the path and it's still a trailing
+		 * slash, it's also a leading slash and the entire
+		 * path is just "/" (or "//", or "///", etc).
+		 */
+		endp = path + strlen(path) - 1;
+		while (*endp == '/' && endp > path)
+			endp--;
+		/* Invariant: *endp != '/' || endp == path */
+		if (*endp == '/') {
+			/* then endp==path and hence entire path is "/" */
+			comp = "/";
+			len = 1;
+		} else {
+			/*
+			 * We handled empty strings earlier, and
+			 * we just proved *endp != '/'.  Hence
+			 * we have a non-empty basename, ending
+			 * at endp.
+			 *
+			 * Back up one path name component.  The
+			 * part between these two is the basename.
+			 *
+			 * Note that we only stop backing up when
+			 * either comp==path, or comp[-1] is '/'.
+			 *
+			 * Suppose path[0] is '/'.  Then, since *endp
+			 * is *not* '/', we had comp>path initially, and
+			 * stopped backing up because we found a '/'
+			 * (perhaps path[0], perhaps a later '/').
+			 *
+			 * Or, suppose path[0] is NOT '/'.  Then,
+			 * either there are no '/'s at all and
+			 * comp==path, or comp[-1] is '/'.
+			 *
+			 * In all cases, we want all bytes from *comp
+			 * to *endp, inclusive.
+			 */
+			comp = endp;
+			while (comp > path && comp[-1] != '/')
+				comp--;
+			len = (size_t)(endp - comp + 1);
+		}
+	}
+	if (buf == NULL) {
+		buf = malloc(len + 1);
+		if (buf == NULL)
+			return (NULL);
+	} else {
+		if (len >= bufsize) {
+			errno = ENAMETOOLONG;
+			return (NULL);
+		}
+	}
+	memcpy(buf, comp, len);
+	buf[len] = '\0';
+	return (buf);
+}
+
+/*
+ * This is much like POSIX dirname(), but is reentrant.
+ *
+ * We examine a path, find the directory portion, and copy that
+ * to a user supplied buffer <buf> of the given size <bufsize>.
+ *
+ * Note that dirname("/foo/bar/") is "/foo", dirname("/foo") is "/",
+ * and dirname("////") is "/". However, dirname("////foo/bar") is
+ * "////foo" (we do not resolve these leading slashes away -- this
+ * matches the BSD libc behavior).
+ *
+ * The return value is your supplied buffer <buf>, or NULL if
+ * the length of the dirname of the supplied <path> equals or
+ * exceeds your indicated <bufsize>.
+ *
+ * As a special but useful case, if you supply NULL for the <buf>
+ * argument, we allocate the buffer dynamically to match the
+ * dirname, i.e., the result is basically strdup()ed for you.
+ * In this case <bufsize> is ignored (recommended: pass 0 here).
+ */
+char *
+r_dirname(const char *path, char *buf, size_t bufsize)
+{
+	const char *endp, *dirpart;
+	size_t len;
+
+	/*
+	 * NULL or empty path means ".".  This is perhaps overly
+	 * forgiving but matches libc dirname(), and avoids breaking
+	 * the code below.
+	 */
+	if (path == NULL || *path == '\0') {
+		dirpart = ".";
+		len = 1;
+	} else {
+		/*
+		 * Back up over any trailing slashes, then back up
+		 * one path name, then back up over more slashes.
+		 * In all cases, stop as soon as endp==path so
+		 * that we do not back out of the buffer entirely.
+		 *
+		 * The first loop takes care of trailing slashes
+		 * in names like "/foo/bar//" (where the dirname
+		 * part is to be "/foo"), the second strips out
+		 * the non-dir-name part, and the third leaves us
+		 * pointing to the end of the directory component.
+		 *
+		 * If the entire name is of the form "/foo" or
+		 * "//foo" (or "/foo/", etc, but we already
+		 * handled trailing slashes), we end up pointing
+		 * to the leading "/", which is what we want; but
+		 * if it is of the form "foo" (or "foo/", etc) we
+		 * point to a non-slash.  So, if (and only if)
+		 * endp==path AND *endp is not '/', the dirname is
+		 * ".", but in all cases, the LENGTH of the
+		 * dirname is (endp-path+1).
+		 */
+		endp = path + strlen(path) - 1;
+		while (endp > path && *endp == '/')
+			endp--;
+		while (endp > path && *endp != '/')
+			endp--;
+		while (endp > path && *endp == '/')
+			endp--;
+
+		len = (size_t)(endp - path + 1);
+		if (endp == path && *endp != '/')
+			dirpart = ".";
+		else
+			dirpart = path;
+	}
+	if (buf == NULL) {
+		buf = malloc(len + 1);
+		if (buf == NULL)
+			return (NULL);
+	} else {
+		if (len >= bufsize) {
+			errno = ENAMETOOLONG;
+			return (NULL);
+		}
+	}
+	memcpy(buf, dirpart, len);
+	buf[len] = '\0';
+	return (buf);
+}
+
+static void
+r_pginit(struct r_pgdata *pg)
+{
+
+	/* Note: init to half size since the first thing we do is double it */
+	pg->r_pgbufsize = 1 << 9;
+	pg->r_pgbuf = NULL;	/* note that realloc(NULL) == malloc */
+}
+
+static int
+r_pgexpand(struct r_pgdata *pg)
+{
+	size_t nsize;
+
+	nsize = pg->r_pgbufsize << 1;
+	if (nsize >= (1 << 20) ||
+	    (pg->r_pgbuf = reallocf(pg->r_pgbuf, nsize)) == NULL)
+		return (ENOMEM);
+	return (0);
+}
+
+void
+r_pgfree(struct r_pgdata *pg)
+{
+
+	free(pg->r_pgbuf);
+}
+
+struct passwd *
+r_getpwuid(uid_t uid, struct r_pgdata *pg)
+{
+	struct passwd *result = NULL;
+	int error;
+
+	r_pginit(pg);
+	do {
+		error = r_pgexpand(pg);
+		if (error == 0)
+			error = getpwuid_r(uid, &pg->r_pgun.un_pw,
+			    pg->r_pgbuf, pg->r_pgbufsize, &result);
+	} while (error == ERANGE);
+
+	return (error ? NULL : result);
+}
+
+struct group *
+r_getgrgid(gid_t gid, struct r_pgdata *pg)
+{
+	struct group *result = NULL;
+	int error;
+
+	r_pginit(pg);
+	do {
+		error = r_pgexpand(pg);
+		if (error == 0)
+			error = getgrgid_r(gid, &pg->r_pgun.un_gr,
+			    pg->r_pgbuf, pg->r_pgbufsize, &result);
+	} while (error == ERANGE);
+
+	return (error ? NULL : result);
+}
+
+#if defined(WITH_CASPER)
+struct passwd *
+r_cap_getpwuid(cap_channel_t *cap, uid_t uid, struct r_pgdata *pg)
+{
+	struct passwd *result = NULL;
+	int error;
+
+	r_pginit(pg);
+	do {
+		error = r_pgexpand(pg);
+		if (error == 0)
+			error = cap_getpwuid_r(cap, uid, &pg->r_pgun.un_pw,
+			    pg->r_pgbuf, pg->r_pgbufsize, &result);
+	} while (error == ERANGE);
+
+	return (error ? NULL : result);
+}
+
+struct group *
+r_cap_getgrgid(cap_channel_t *cap, gid_t gid, struct r_pgdata *pg)
+{
+	struct group *result = NULL;
+	int error;
+
+	r_pginit(pg);
+	do {
+		error = r_pgexpand(pg);
+		if (error == 0)
+			error = cap_getgrgid_r(cap, gid, &pg->r_pgun.un_gr,
+			    pg->r_pgbuf, pg->r_pgbufsize, &result);
+	} while (error == ERANGE);
+
+	return (error ? NULL : result);
+}
+#endif
diff --git a/usr/src/lib/lib9p/common/rfuncs.h b/usr/src/lib/lib9p/common/rfuncs.h
new file mode 100644
index 0000000000..5946f2e2b7
--- /dev/null
+++ b/usr/src/lib/lib9p/common/rfuncs.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2016 Chris Torek <chris.torek@gmail.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_RFUNCS_H
+#define LIB9P_RFUNCS_H
+
+#if defined(__illumos__) && !defined(_POSIX_PTHREAD_SEMANTICS)
+#define	_POSIX_PTHREAD_SEMANTICS 1
+#endif
+
+#include <grp.h>
+#include <pwd.h>
+#include <string.h>
+
+#if defined(WITH_CASPER)
+#include <libcasper.h>
+#endif
+
+/*
+ * Reentrant, optionally-malloc-ing versions of
+ * basename() and dirname().
+ */
+char	*r_basename(const char *, char *, size_t);
+char	*r_dirname(const char *, char *, size_t);
+
+/*
+ * Yuck: getpwuid, getgrgid are not thread-safe, and the
+ * POSIX replacements (getpwuid_r, getgrgid_r) are horrible.
+ * This is to allow us to loop over the get.*_r calls with ever
+ * increasing buffers until they succeed or get unreasonable
+ * (same idea as the libc code for the non-reentrant versions,
+ * although prettier).
+ *
+ * The getpwuid/getgrgid functions auto-init one of these,
+ * but the caller must call r_pgfree() when done with the
+ * return values.
+ *
+ * If we need more later, we may have to expose the init function.
+ */
+struct r_pgdata {
+	char	*r_pgbuf;
+	size_t	r_pgbufsize;
+	union {
+		struct passwd un_pw;
+		struct group un_gr;
+	} r_pgun;
+};
+
+/* void r_pginit(struct r_pgdata *); */
+void r_pgfree(struct r_pgdata *);
+struct passwd *r_getpwuid(uid_t, struct r_pgdata *);
+struct group *r_getgrgid(gid_t, struct r_pgdata *);
+
+#if defined(WITH_CASPER)
+struct passwd *r_cap_getpwuid(cap_channel_t *, uid_t, struct r_pgdata *);
+struct group *r_cap_getgrgid(cap_channel_t *, gid_t, struct r_pgdata *);
+#endif
+
+#endif	/* LIB9P_RFUNCS_H */
diff --git a/usr/src/lib/lib9p/common/sbuf/sbuf.c b/usr/src/lib/lib9p/common/sbuf/sbuf.c
new file mode 100644
index 0000000000..55e0f88650
--- /dev/null
+++ b/usr/src/lib/lib9p/common/sbuf/sbuf.c
@@ -0,0 +1,65 @@
+/*
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Minimal libsbuf wrapper around libcustr for illumos.
+ */
+
+#include <stdlib.h>
+#include "sbuf.h"
+
+struct sbuf *
+sbuf_new_auto()
+{
+	struct sbuf *s;
+
+	s = malloc(sizeof(struct sbuf));
+	if (s == NULL)
+		return (s);
+	if (custr_alloc(&s->s_custr) != 0) {
+		free(s);
+		return (NULL);
+	}
+	return (s);
+}
+
+int
+sbuf_printf(struct sbuf *s, const char *fmt, ...)
+{
+	int ret;
+	va_list ap;
+
+	va_start(ap, fmt);
+	ret = custr_append_vprintf(s->s_custr, fmt, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
+void
+sbuf_delete(struct sbuf *s)
+{
+	custr_free(s->s_custr);
+	free(s);
+}
diff --git a/usr/src/lib/lib9p/common/sbuf/sbuf.h b/usr/src/lib/lib9p/common/sbuf/sbuf.h
new file mode 100644
index 0000000000..5b17b3113e
--- /dev/null
+++ b/usr/src/lib/lib9p/common/sbuf/sbuf.h
@@ -0,0 +1,51 @@
+/*
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Minimal libsbuf wrapper around libcustr for illumos.
+ */
+
+#ifndef LIB9P_SBUF_H
+#define LIB9P_SBUF_H
+
+#include <stdarg.h>
+#include <libcustr.h>
+
+struct sbuf
+{
+	custr_t *s_custr;
+};
+
+struct sbuf *sbuf_new_auto(void);
+char *sbuf_data(struct sbuf *s);
+int sbuf_printf(struct sbuf *s, const char *fmt, ...);
+void sbuf_delete(struct sbuf *s);
+
+#define	sbuf_cat(s, str) custr_append((s)->s_custr, (str))
+#define	sbuf_vprintf(s, fmt, args) \
+    custr_append_vprintf((s)->s_custr, (fmt), (args))
+#define	sbuf_data(s) custr_cstr((s)->s_custr)
+#define	sbuf_finish(s)
+
+#endif /* LIB9P_SBUF_H */
diff --git a/usr/src/lib/lib9p/common/threadpool.c b/usr/src/lib/lib9p/common/threadpool.c
new file mode 100644
index 0000000000..a29f2315c5
--- /dev/null
+++ b/usr/src/lib/lib9p/common/threadpool.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Copyright 2020 Joyent, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <pthread.h>
+#if defined(__FreeBSD__)
+#include <pthread_np.h>
+#endif
+#include <sys/queue.h>
+#include "lib9p.h"
+#include "threadpool.h"
+
+static void l9p_threadpool_rflush(struct l9p_threadpool *tp,
+    struct l9p_request *req);
+
+static void *
+l9p_responder(void *arg)
+{
+	struct l9p_threadpool *tp;
+	struct l9p_worker *worker = arg;
+	struct l9p_request *req;
+
+	tp = worker->ltw_tp;
+	for (;;) {
+		/* get next reply to send */
+
+		if (pthread_mutex_lock(&tp->ltp_mtx) != 0)
+			break;
+		while (STAILQ_EMPTY(&tp->ltp_replyq) && !worker->ltw_exiting) {
+			(void) pthread_cond_wait(&tp->ltp_reply_cv,
+			    &tp->ltp_mtx);
+		}
+		if (worker->ltw_exiting) {
+			(void) pthread_mutex_unlock(&tp->ltp_mtx);
+			break;
+		}
+
+		/* off reply queue */
+		req = STAILQ_FIRST(&tp->ltp_replyq);
+		STAILQ_REMOVE_HEAD(&tp->ltp_replyq, lr_worklink);
+
+		/* request is now in final glide path, can't be Tflush-ed */
+		req->lr_workstate = L9P_WS_REPLYING;
+
+		/* any flushers waiting for this request can go now */
+		if (req->lr_flushstate != L9P_FLUSH_NONE)
+			l9p_threadpool_rflush(tp, req);
+
+		if (pthread_mutex_unlock(&tp->ltp_mtx) != 0)
+			break;
+
+		/* send response */
+		l9p_respond(req, false, true);
+	}
+	return (NULL);
+}
+
+static void *
+l9p_worker(void *arg)
+{
+	struct l9p_threadpool *tp;
+	struct l9p_worker *worker = arg;
+	struct l9p_request *req;
+
+	tp = worker->ltw_tp;
+	if (pthread_mutex_lock(&tp->ltp_mtx) != 0)
+		return (NULL);
+	for (;;) {
+		while (STAILQ_EMPTY(&tp->ltp_workq) && !worker->ltw_exiting) {
+			(void) pthread_cond_wait(&tp->ltp_work_cv,
+			    &tp->ltp_mtx);
+		}
+		if (worker->ltw_exiting)
+			break;
+
+		/* off work queue; now work-in-progress, by us */
+		req = STAILQ_FIRST(&tp->ltp_workq);
+		STAILQ_REMOVE_HEAD(&tp->ltp_workq, lr_worklink);
+		req->lr_workstate = L9P_WS_INPROGRESS;
+		req->lr_worker = worker;
+		(void) pthread_mutex_unlock(&tp->ltp_mtx);
+
+		/* actually try the request */
+		req->lr_error = l9p_dispatch_request(req);
+
+		/* move to responder queue, updating work-state */
+		if (pthread_mutex_lock(&tp->ltp_mtx) != 0)
+			return (NULL);
+		req->lr_workstate = L9P_WS_RESPQUEUED;
+		req->lr_worker = NULL;
+		STAILQ_INSERT_TAIL(&tp->ltp_replyq, req, lr_worklink);
+
+		/* signal the responder */
+		(void) pthread_cond_signal(&tp->ltp_reply_cv);
+	}
+	(void) pthread_mutex_unlock(&tp->ltp_mtx);
+	return (NULL);
+}
+
+/*
+ * Just before finally replying to a request that got touched by
+ * a Tflush request, we enqueue its flushers (requests of type
+ * Tflush, which are now on the flushee's lr_flushq) onto the
+ * response queue.
+ */
+static void
+l9p_threadpool_rflush(struct l9p_threadpool *tp, struct l9p_request *req)
+{
+	struct l9p_request *flusher;
+
+	/*
+	 * https://swtch.com/plan9port/man/man9/flush.html says:
+	 *
+	 * "Should multiple Tflushes be received for a pending
+	 * request, they must be answered in order.  A Rflush for
+	 * any of the multiple Tflushes implies an answer for all
+	 * previous ones.  Therefore, should a server receive a
+	 * request and then multiple flushes for that request, it
+	 * need respond only to the last flush."  This means
+	 * we could march through the queue of flushers here,
+	 * marking all but the last one as "to be dropped" rather
+	 * than "to be replied-to".
+	 *
+	 * However, we'll leave that for later, if ever -- it
+	 * should be harmless to respond to each, in order.
+	 */
+	STAILQ_FOREACH(flusher, &req->lr_flushq, lr_flushlink) {
+		flusher->lr_workstate = L9P_WS_RESPQUEUED;
+#ifdef notdef
+		if (not the last) {
+			flusher->lr_flushstate = L9P_FLUSH_NOT_RUN;
+			/* or, flusher->lr_drop = true ? */
+		}
+#endif
+		STAILQ_INSERT_TAIL(&tp->ltp_replyq, flusher, lr_worklink);
+	}
+}
+
+int
+l9p_threadpool_init(struct l9p_threadpool *tp, int size)
+{
+	struct l9p_worker *worker;
+#if defined(__FreeBSD__)
+	char threadname[16];
+#endif
+	int error;
+	int i, nworkers, nresponders;
+
+	if (size <= 0)
+		return (EINVAL);
+#ifdef __illumos__
+	pthread_mutexattr_t attr;
+
+	if ((error = pthread_mutexattr_init(&attr)) != 0)
+		return (error);
+	if ((error = pthread_mutexattr_settype(&attr,
+	    PTHREAD_MUTEX_ERRORCHECK)) != 0) {
+		return (error);
+	}
+	error = pthread_mutex_init(&tp->ltp_mtx, &attr);
+#else
+	error = pthread_mutex_init(&tp->ltp_mtx, NULL);
+#endif
+	if (error)
+		return (error);
+	error = pthread_cond_init(&tp->ltp_work_cv, NULL);
+	if (error)
+		goto fail_work_cv;
+	error = pthread_cond_init(&tp->ltp_reply_cv, NULL);
+	if (error)
+		goto fail_reply_cv;
+
+	STAILQ_INIT(&tp->ltp_workq);
+	STAILQ_INIT(&tp->ltp_replyq);
+	LIST_INIT(&tp->ltp_workers);
+
+	nresponders = 0;
+	nworkers = 0;
+	for (i = 0; i <= size; i++) {
+		worker = calloc(1, sizeof(struct l9p_worker));
+#ifdef __illumos__
+		if (worker == NULL)
+			break;
+#endif
+		worker->ltw_tp = tp;
+		worker->ltw_responder = i == 0;
+		error = pthread_create(&worker->ltw_thread, NULL,
+		    worker->ltw_responder ? l9p_responder : l9p_worker,
+		    (void *)worker);
+		if (error) {
+			free(worker);
+			break;
+		}
+		if (worker->ltw_responder)
+			nresponders++;
+		else
+			nworkers++;
+
+#if defined(__FreeBSD__)
+		if (worker->ltw_responder) {
+			pthread_set_name_np(worker->ltw_thread, "9p-responder");
+		} else {
+			sprintf(threadname, "9p-worker:%d", i - 1);
+			pthread_set_name_np(worker->ltw_thread, threadname);
+		}
+#elif defined(__illumos__)
+		if (worker->ltw_responder) {
+			(void) pthread_setname_np(worker->ltw_thread,
+			    "9p-responder");
+		} else {
+			char threadname[PTHREAD_MAX_NAMELEN_NP];
+
+			(void) snprintf(threadname, sizeof (threadname),
+			    "9p-worker:%d", i - 1);
+			(void) pthread_setname_np(worker->ltw_thread,
+			    threadname);
+		}
+#endif
+
+		LIST_INSERT_HEAD(&tp->ltp_workers, worker, ltw_link);
+	}
+	if (nresponders == 0 || nworkers == 0) {
+		/* need the one responder, and at least one worker */
+		l9p_threadpool_shutdown(tp);
+		return (error);
+	}
+	return (0);
+
+	/*
+	 * We could avoid these labels by having multiple destroy
+	 * paths (one for each error case), or by having booleans
+	 * for which variables were initialized.  Neither is very
+	 * appealing...
+	 */
+fail_reply_cv:
+	(void) pthread_cond_destroy(&tp->ltp_work_cv);
+fail_work_cv:
+	(void) pthread_mutex_destroy(&tp->ltp_mtx);
+
+	return (error);
+}
+
+/*
+ * Run a request, usually by queueing it.
+ */
+void
+l9p_threadpool_run(struct l9p_threadpool *tp, struct l9p_request *req)
+{
+
+	/*
+	 * Flush requests must be handled specially, since they
+	 * can cancel / kill off regular requests.  (But we can
+	 * run them through the regular dispatch mechanism.)
+	 */
+	if (req->lr_req.hdr.type == L9P_TFLUSH) {
+		/* not on a work queue yet so we can touch state */
+		req->lr_workstate = L9P_WS_IMMEDIATE;
+		(void) l9p_dispatch_request(req);
+	} else {
+		if (pthread_mutex_lock(&tp->ltp_mtx) != 0)
+			return;
+		req->lr_workstate = L9P_WS_NOTSTARTED;
+		STAILQ_INSERT_TAIL(&tp->ltp_workq, req, lr_worklink);
+		(void) pthread_cond_signal(&tp->ltp_work_cv);
+		(void) pthread_mutex_unlock(&tp->ltp_mtx);
+	}
+}
+
+/*
+ * Run a Tflush request.  Called via l9p_dispatch_request() since
+ * it has some debug code in it, but not called from worker thread.
+ */
+int
+l9p_threadpool_tflush(struct l9p_request *req)
+{
+	struct l9p_connection *conn;
+	struct l9p_threadpool *tp;
+	struct l9p_request *flushee;
+	uint16_t oldtag;
+	enum l9p_flushstate nstate = L9P_FLUSH_NONE;
+	int err;
+
+	/*
+	 * Find what we're supposed to flush (the flushee, as it were).
+	 */
+	req->lr_error = 0;	/* Tflush always succeeds */
+	conn = req->lr_conn;
+	tp = &conn->lc_tp;
+	oldtag = req->lr_req.tflush.oldtag;
+	if ((err = ht_wrlock(&conn->lc_requests)) != 0)
+		return (err);
+	flushee = ht_find_locked(&conn->lc_requests, oldtag);
+	if (flushee == NULL) {
+		/*
+		 * Nothing to flush!  The old request must have
+		 * been done and gone already.  Just queue this
+		 * Tflush for a success reply.
+		 */
+		(void) ht_unlock(&conn->lc_requests);
+		if ((err = pthread_mutex_lock(&tp->ltp_mtx)) != 0)
+			return (err);
+		goto done;
+	}
+
+	/*
+	 * Found the original request.  We'll need to inspect its
+	 * work-state to figure out what to do.
+	 */
+	if ((err = pthread_mutex_lock(&tp->ltp_mtx)) != 0) {
+		(void) ht_unlock(&conn->lc_requests);
+		return (err);
+	}
+	(void) ht_unlock(&conn->lc_requests);
+
+	switch (flushee->lr_workstate) {
+
+	case L9P_WS_NOTSTARTED:
+		/*
+		 * Flushee is on work queue, but not yet being
+		 * handled by a worker.
+		 *
+		 * The documentation -- see
+		 * http://ericvh.github.io/9p-rfc/rfc9p2000.html
+		 * https://swtch.com/plan9port/man/man9/flush.html
+		 * -- says that "the server should answer the
+		 * flush message immediately".  However, Linux
+		 * sends flush requests for operations that
+		 * must finish, such as Tclunk, and it's not
+		 * possible to *answer* the flush request until
+		 * it has been handled (if necessary) or aborted
+		 * (if allowed).
+		 *
+		 * We therefore now just  the original request
+		 * and let the request-handler do whatever is
+		 * appropriate.  NOTE: we could have a table of
+		 * "requests that can be aborted without being
+		 * run" vs "requests that must be run to be
+		 * aborted", but for now that seems like an
+		 * unnecessary complication.
+		 */
+		nstate = L9P_FLUSH_REQUESTED_PRE_START;
+		break;
+
+	case L9P_WS_IMMEDIATE:
+		/*
+		 * This state only applies to Tflush requests, and
+		 * flushing a Tflush is illegal.  But we'll do nothing
+		 * special here, which will make us act like a flush
+		 * request for the flushee that arrived too late to
+		 * do anything about the flushee.
+		 */
+		nstate = L9P_FLUSH_REQUESTED_POST_START;
+		break;
+
+	case L9P_WS_INPROGRESS:
+		/*
+		 * Worker thread flushee->lr_worker is working on it.
+		 * Kick it to get it out of blocking system calls.
+		 * (This requires that it carefully set up some
+		 * signal handlers, and may be FreeBSD-dependent,
+		 * it probably cannot be handled this way on MacOS.)
+		 */
+#ifdef notyet
+		pthread_kill(...);
+#endif
+		nstate = L9P_FLUSH_REQUESTED_POST_START;
+		break;
+
+	case L9P_WS_RESPQUEUED:
+		/*
+		 * The flushee is already in the response queue.
+		 * We'll just mark it as having had some flush
+		 * action applied.
+		 */
+		nstate = L9P_FLUSH_TOOLATE;
+		break;
+
+	case L9P_WS_REPLYING:
+		/*
+		 * Although we found the flushee, it's too late to
+		 * make us depend on it: it's already heading out
+		 * the door as a reply.
+		 *
+		 * We don't want to do anything to the flushee.
+		 * Instead, we want to work the same way as if
+		 * we had never found the tag.
+		 */
+		goto done;
+	}
+
+	/*
+	 * Now add us to the list of Tflush-es that are waiting
+	 * for the flushee (creating the list if needed, i.e., if
+	 * this is the first Tflush for the flushee).  We (req)
+	 * will get queued for reply later, when the responder
+	 * processes the flushee and calls l9p_threadpool_rflush().
+	 */
+	if (flushee->lr_flushstate == L9P_FLUSH_NONE)
+		STAILQ_INIT(&flushee->lr_flushq);
+	flushee->lr_flushstate = nstate;
+	STAILQ_INSERT_TAIL(&flushee->lr_flushq, req, lr_flushlink);
+
+	(void) pthread_mutex_unlock(&tp->ltp_mtx);
+
+	return (0);
+
+done:
+	/*
+	 * This immediate op is ready to be replied-to now, so just
+	 * stick it onto the reply queue.
+	 */
+	req->lr_workstate = L9P_WS_RESPQUEUED;
+	STAILQ_INSERT_TAIL(&tp->ltp_replyq, req, lr_worklink);
+	(void) pthread_mutex_unlock(&tp->ltp_mtx);
+	(void) pthread_cond_signal(&tp->ltp_reply_cv);
+	return (0);
+}
+
+int
+l9p_threadpool_shutdown(struct l9p_threadpool *tp)
+{
+	struct l9p_worker *worker, *tmp;
+
+	LIST_FOREACH_SAFE(worker, &tp->ltp_workers, ltw_link, tmp) {
+		if (pthread_mutex_lock(&tp->ltp_mtx) != 0)
+			continue;
+		worker->ltw_exiting = true;
+		if (worker->ltw_responder)
+			(void) pthread_cond_signal(&tp->ltp_reply_cv);
+		else
+			(void) pthread_cond_broadcast(&tp->ltp_work_cv);
+		(void) pthread_mutex_unlock(&tp->ltp_mtx);
+		(void) pthread_join(worker->ltw_thread, NULL);
+		LIST_REMOVE(worker, ltw_link);
+		free(worker);
+	}
+	(void) pthread_cond_destroy(&tp->ltp_reply_cv);
+	(void) pthread_cond_destroy(&tp->ltp_work_cv);
+	(void) pthread_mutex_destroy(&tp->ltp_mtx);
+
+	return (0);
+}
diff --git a/usr/src/lib/lib9p/common/threadpool.h b/usr/src/lib/lib9p/common/threadpool.h
new file mode 100644
index 0000000000..2855c1c545
--- /dev/null
+++ b/usr/src/lib/lib9p/common/threadpool.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef	LIB9P_THREADPOOL_H
+#define	LIB9P_THREADPOOL_H
+
+#include <stdbool.h>
+#include <pthread.h>
+#include <sys/queue.h>
+#include "lib9p.h"
+
+STAILQ_HEAD(l9p_request_queue, l9p_request);
+
+/*
+ * Most of the workers in the threadpool run requests.
+ *
+ * One distinguished worker delivers responses from the
+ * response queue.  The reason this worker exists is to
+ * guarantee response order, so that flush responses go
+ * after their flushed requests.
+ */
+struct l9p_threadpool {
+    struct l9p_connection *	ltp_conn;	/* the connection */
+    struct l9p_request_queue	ltp_workq;	/* requests awaiting a worker */
+    struct l9p_request_queue	ltp_replyq;	/* requests that are done */
+    pthread_mutex_t		ltp_mtx;	/* locks queues and cond vars */
+    pthread_cond_t		ltp_work_cv;	/* to signal regular workers */
+    pthread_cond_t		ltp_reply_cv;	/* to signal reply-worker */
+    LIST_HEAD(, l9p_worker)	ltp_workers;	/* list of all workers */
+};
+
+/*
+ * All workers, including the responder, use this as their
+ * control structure.  (The only thing that distinguishes the
+ * responder is that it runs different code and waits on the
+ * reply_cv.)
+ */
+struct l9p_worker {
+    struct l9p_threadpool *	ltw_tp;
+    pthread_t			ltw_thread;
+    bool			ltw_exiting;
+    bool			ltw_responder;
+    LIST_ENTRY(l9p_worker)	ltw_link;
+};
+
+/*
+ * Each request has a "work state" telling where the request is,
+ * in terms of workers working on it.  That is, this tells us
+ * which threadpool queue, if any, the request is in now or would
+ * go in, or what's happening with it.
+ */
+enum l9p_workstate {
+	L9P_WS_NOTSTARTED,		/* not yet started */
+	L9P_WS_IMMEDIATE,		/* Tflush being done sans worker */
+	L9P_WS_INPROGRESS,		/* worker is working on it */
+	L9P_WS_RESPQUEUED,		/* worker is done, response queued */
+	L9P_WS_REPLYING,		/* responder is in final reply path */
+};
+
+/*
+ * Each request has a "flush state", initally NONE meaning no
+ * Tflush affected the request.
+ *
+ * If a Tflush comes in before we ever assign a work thread,
+ * the flush state goes to FLUSH_REQUESTED_PRE_START.
+ *
+ * If a Tflush comes in after we assign a work thread, the
+ * flush state goes to FLUSH_REQUESTED_POST_START.  The flush
+ * request may be too late: the request might finish anyway.
+ * Or it might be soon enough to abort.  In all cases, though, the
+ * operation requesting the flush (the "flusher") must wait for
+ * the other request (the "flushee") to go through the respond
+ * path.  The respond routine gets to decide whether to send a
+ * normal response, send an error, or drop the request
+ * entirely.
+ *
+ * There's one especially annoying case: what if a Tflush comes in
+ * *while* we're sending a response?  In this case it's too late:
+ * the flush just waits for the fully-composed response.
+ */
+enum l9p_flushstate {
+	L9P_FLUSH_NONE = 0,		/* must be zero */
+	L9P_FLUSH_REQUESTED_PRE_START,	/* not even started before flush */
+	L9P_FLUSH_REQUESTED_POST_START,	/* started, then someone said flush */
+	L9P_FLUSH_TOOLATE		/* too late, already responding */
+};
+
+void	l9p_threadpool_flushee_done(struct l9p_request *);
+int	l9p_threadpool_init(struct l9p_threadpool *, int);
+void	l9p_threadpool_run(struct l9p_threadpool *, struct l9p_request *);
+int	l9p_threadpool_shutdown(struct l9p_threadpool *);
+int	l9p_threadpool_tflush(struct l9p_request *);
+
+#endif	/* LIB9P_THREADPOOL_H  */
diff --git a/usr/src/lib/lib9p/common/transport/socket.c b/usr/src/lib/lib9p/common/transport/socket.c
new file mode 100644
index 0000000000..214a1c8d70
--- /dev/null
+++ b/usr/src/lib/lib9p/common/transport/socket.c
@@ -0,0 +1,593 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Copyright 2021 Joyent, Inc.
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sys/types.h>
+#ifdef __APPLE__
+# include "../apple_endian.h"
+#elif __illumos__
+# include <sys/param.h>
+# include <port.h>
+# include "../illumos_endian.h"
+#else
+# include <sys/endian.h>
+#endif
+#include <sys/socket.h>
+#ifndef __illumos__
+# include <sys/event.h>
+#endif
+#include <sys/uio.h>
+#include <netdb.h>
+#include "../lib9p.h"
+#include "../lib9p_impl.h"
+#include "../log.h"
+#include "socket.h"
+
+struct l9p_socket_softc
+{
+	struct l9p_connection *ls_conn;
+	struct sockaddr ls_sockaddr;
+	socklen_t ls_socklen;
+	pthread_t ls_thread;
+	int ls_fd;
+};
+
+#ifdef __FreeBSD__
+struct event_svr {
+	struct kevent *ev_kev;
+	struct kevent *ev_event;
+	int ev_kq;
+};
+#elif __illumos__
+struct event_svr {
+	port_event_t *ev_pe;
+	int ev_port;
+};
+#else
+#error "No event server defined"
+#endif
+
+static int l9p_init_event_svr(struct event_svr *, uint_t);
+static uint_t l9p_get_server_addrs(const char *, const char *,
+    struct addrinfo **);
+static uint_t l9p_bind_addrs(struct event_svr *, struct addrinfo *, uint_t,
+    int **);
+static int l9p_event_get(struct l9p_server *, struct event_svr *, uint_t,
+    void (*cb)(struct l9p_server *, int));
+static int l9p_socket_readmsg(struct l9p_socket_softc *, void **, size_t *);
+static int l9p_socket_get_response_buffer(struct l9p_request *,
+    struct iovec *, size_t *, void *);
+static int l9p_socket_send_response(struct l9p_request *, const struct iovec *,
+    const size_t, const size_t, void *);
+static void l9p_socket_drop_response(struct l9p_request *, const struct iovec *,
+    size_t, void *);
+static void *l9p_socket_thread(void *);
+static ssize_t xread(int, void *, size_t);
+static ssize_t xwrite(int, void *, size_t);
+
+int
+l9p_start_server(struct l9p_server *server, const char *host, const char *port)
+{
+	struct addrinfo *res = NULL;
+	int *sockets = NULL;
+	uint_t naddrs = 0;
+	uint_t nsockets = 0;
+	uint_t i;
+	struct event_svr esvr;
+
+	naddrs = l9p_get_server_addrs(host, port, &res);
+	if (naddrs == 0)
+		return (-1);
+
+	if (l9p_init_event_svr(&esvr, naddrs) != 0) {
+		freeaddrinfo(res);
+		return (-1);
+	}
+
+	nsockets = l9p_bind_addrs(&esvr, res, naddrs, &sockets);
+
+	/*
+	 * We don't need res, after this, so free it and NULL it to prevent
+	 * any possible use after free.
+	 */
+	freeaddrinfo(res);
+	res = NULL;
+
+	if (nsockets == 0)
+		goto fail;
+
+	for (;;) {
+		if (l9p_event_get(server, &esvr, nsockets,
+		    l9p_socket_accept) < 0)
+			break;
+	}
+
+	/* We get here if something failed */
+	for (i = 0; i < nsockets; i++)
+		close(sockets[i]);
+
+fail:
+	free(sockets);
+
+#ifdef __FreeBSD__
+	close(esvr.ev_kq);
+	free(esvr.ev_kev);
+	free(esvr.ev_event);
+#elif __illumos__
+	close(esvr.ev_port);
+	free(esvr.ev_pe);
+#else
+#error "Port me"
+#endif
+
+	return (-1);
+}
+
+static uint_t
+l9p_get_server_addrs(const char *host, const char *port, struct addrinfo **resp)
+{
+	struct addrinfo *res, hints;
+	uint_t naddrs;
+	int rc;
+
+	memset(&hints, 0, sizeof(hints));
+	hints.ai_family = PF_UNSPEC;
+	hints.ai_socktype = SOCK_STREAM;
+	rc = getaddrinfo(host, port, &hints, resp);
+	if (rc > 0) {
+		L9P_LOG(L9P_ERROR, "getaddrinfo(): %s", gai_strerror(rc));
+		return (0);
+	}
+
+	naddrs = 0;
+	for (res = *resp; res != NULL; res = res->ai_next)
+		naddrs++;
+
+	if (naddrs == 0) {
+		L9P_LOG(L9P_ERROR, "no addresses found for %s:%s", host, port);
+	}
+
+	return (naddrs);
+}
+
+#ifdef __FreeBSD__
+static int
+l9p_init_event_svr(struct event_svr *svr, uint_t nsockets)
+{
+	svr->ev_kev = calloc(nsockets, sizeof(struct kevent));
+	if (svr->ev_kev == NULL) {
+		L9P_LOG(L9P_ERROR, "calloc(): %s", strerror(errno));
+		return (-1);
+	}
+
+	svr->ev_event = calloc(nsockets, sizeof(struct kevent));
+	if (svr->ev_event == NULL) {
+		L9P_LOG(L9P_ERROR, "calloc(): %s", strerror(errno));
+		free(svr->ev_key);
+		svr->ev_key = NULL;
+		return (-1);
+	}
+
+	svr->ev_kq = kqueue();
+	if (svr->ev_kq == -1) {
+		L9P_LOG(L9P_ERROR, "kqueue(): %s", strerror(errno));
+		free(svr->ev_kev);
+		free(svr->ev_event);
+		svr->ev_kev = NULL;
+		svr->ev_event = NULL;
+		return (-1);
+	}
+
+	return (0);
+}
+#elif __illumos__
+static int
+l9p_init_event_svr(struct event_svr *svr, uint_t nsockets)
+{
+	svr->ev_pe = calloc(nsockets, sizeof(port_event_t));
+	if (svr->ev_pe == NULL) {
+		L9P_LOG(L9P_ERROR, "calloc(): %s", strerror(errno));
+		return (-1);
+	}
+
+	svr->ev_port = port_create();
+	if (svr->ev_port == -1) {
+		L9P_LOG(L9P_ERROR, "port_create(): %s", strerror(errno));
+		return (-1);
+	}
+
+	return (0);
+}
+#else
+#error "No event server defined"
+#endif
+
+static uint_t
+l9p_bind_addrs(struct event_svr *svr, struct addrinfo *addrs, uint_t naddrs,
+    int **socketsp)
+{
+	struct addrinfo *addr;
+	uint_t i, j;
+
+	*socketsp = calloc(naddrs, sizeof(int));
+	if (*socketsp == NULL) {
+		L9P_LOG(L9P_ERROR, "calloc(): %s", strerror(errno));
+		return (0);
+	}
+
+	for (i = 0, addr = addrs; addr != NULL; addr = addr->ai_next) {
+		int s;
+		int val = 1;
+
+		s = socket(addr->ai_family, addr->ai_socktype,
+		    addr->ai_protocol);
+		if (s == -1) {
+			L9P_LOG(L9P_ERROR, "socket(): %s", strerror(errno));
+			continue;
+		}
+
+		if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &val,
+		    sizeof(val)) < 0) {
+			L9P_LOG(L9P_ERROR, "setsockopt(): %s", strerror(errno));
+			close(s);
+			continue;
+		}
+
+		if (bind(s, addr->ai_addr, addr->ai_addrlen) < 0) {
+			L9P_LOG(L9P_ERROR, "bind(): %s", strerror(errno));
+			close(s);
+			continue;
+		}
+
+		if (listen(s, 10) < 0) {
+			L9P_LOG(L9P_ERROR, "listen(): %s", strerror(errno));
+			close(s);
+			continue;
+		}
+
+#ifdef __FreeBSD__
+		EV_SET(&svr->ev_kev[i], s, EVFILT_READ, EV_ADD | EV_ENABLE, 0,
+		    0, 0);
+#elif __illumos__
+		if (port_associate(svr->ev_port, PORT_SOURCE_FD, s,
+		    POLLIN|POLLHUP, NULL) < 0) {
+			L9P_LOG(L9P_ERROR, "port_associate(%d): %s", s,
+			    strerror(errno));
+			close(s);
+			continue;
+		}
+#else
+#error "Port me"
+#endif
+
+		*socketsp[i++] = s;
+	}
+
+	if (i < 1) {
+		free(*socketsp);
+		*socketsp = NULL;
+		return (0);
+	}
+
+	for (j = i; j < naddrs; j++)
+		*socketsp[j++] = -1;
+
+#ifdef __FreeBSD__
+	if (kevent(svr->ev_kq, svr->ev_kev, i, NULL, 0, NULL) < 0) {
+		L9P_LOG(L9P_ERROR, "kevent(): %s", strerror(errno));
+
+		for (j = 0; j < i; j++)
+			close(j);
+
+		free(*socketsp);
+		*socketsp = NULL;
+
+		return (0);
+	}
+#endif
+
+	return (i);
+}
+
+#ifdef __FreeBSD__
+static int
+l9p_event_get(struct l9p_server *l9svr, struct event_svr *esvr, uint_t nsockets,
+    void (*cb)(struct l9p_server *, int))
+{
+	int i, evs;
+
+	evs = kevent(esvr->ev_kq, NULL, 0, esvr->ev_event, nsockets, NULL);
+	if (evs < 0) {
+		if (errno == EINTR)
+			return (0);
+		L9P_LOG(L9P_ERROR, "kevent(): %s", strerror(errno));
+		return (-1);
+	}
+
+	for (i = 0; i < evs; i++)
+		cb(l9svr, (int)sevr->ev_event[i].ident);
+
+	return (0);
+}
+#elif __illumos__
+static int
+l9p_event_get(struct l9p_server *l9svr, struct event_svr *esvr, uint_t nsockets,
+    void (*cb)(struct l9p_server *, int))
+{
+	uint_t evs = 1;
+	int i;
+
+	if (port_getn(esvr->ev_port, esvr->ev_pe, nsockets, &evs, NULL) < 0) {
+		if (errno == EINTR)
+			return (0);
+		L9P_LOG(L9P_ERROR, "port_getn(): %s", strerror(errno));
+		return (-1);
+	}
+
+	for (i = 0; i < evs; i++) {
+		if (esvr->ev_pe[i].portev_source != PORT_SOURCE_FD)
+			continue;
+
+		cb(l9svr, (int)esvr->ev_pe[i].portev_object);
+	}
+
+	return (0);
+}
+#else
+#error "Port me"
+#endif
+
+void
+l9p_socket_accept(struct l9p_server *server, int svr_fd)
+{
+	struct l9p_socket_softc *sc;
+	struct l9p_connection *conn;
+	char host[NI_MAXHOST + 1];
+	char serv[NI_MAXSERV + 1];
+	struct sockaddr client_addr;
+	socklen_t client_addr_len = sizeof(client_addr);
+	int conn_fd, err;
+
+	conn_fd = accept(svr_fd, &client_addr, &client_addr_len);
+	if (conn_fd < 0) {
+		L9P_LOG(L9P_WARNING, "accept(): %s", strerror(errno));
+		return;
+	}
+
+	err = getnameinfo(&client_addr, client_addr_len, host, NI_MAXHOST,
+	    serv, NI_MAXSERV, NI_NUMERICHOST | NI_NUMERICSERV);
+
+	if (err != 0) {
+		L9P_LOG(L9P_WARNING, "cannot look up client name: %s",
+		    gai_strerror(err));
+	} else {
+		L9P_LOG(L9P_INFO, "new connection from %s:%s", host, serv);
+	}
+
+	if (l9p_connection_init(server, &conn) != 0) {
+		L9P_LOG(L9P_ERROR, "cannot create new connection");
+		return;
+	}
+
+	sc = l9p_calloc(1, sizeof(*sc));
+	sc->ls_conn = conn;
+	sc->ls_fd = conn_fd;
+
+	/*
+	 * Fill in transport handler functions and aux argument.
+	 */
+	conn->lc_lt.lt_aux = sc;
+	conn->lc_lt.lt_get_response_buffer = l9p_socket_get_response_buffer;
+	conn->lc_lt.lt_send_response = l9p_socket_send_response;
+	conn->lc_lt.lt_drop_response = l9p_socket_drop_response;
+
+	err = pthread_create(&sc->ls_thread, NULL, l9p_socket_thread, sc);
+	if (err) {
+		L9P_LOG(L9P_ERROR,
+		    "pthread_create (for connection from %s:%s): error %s",
+		    host, serv, strerror(err));
+		l9p_connection_close(sc->ls_conn);
+		free(sc);
+	}
+}
+
+static void *
+l9p_socket_thread(void *arg)
+{
+	struct l9p_socket_softc *sc = (struct l9p_socket_softc *)arg;
+	struct iovec iov;
+	void *buf;
+	size_t length;
+
+	for (;;) {
+		if (l9p_socket_readmsg(sc, &buf, &length) != 0)
+			break;
+
+		iov.iov_base = buf;
+		iov.iov_len = length;
+		l9p_connection_recv(sc->ls_conn, &iov, 1, NULL);
+		free(buf);
+	}
+
+	L9P_LOG(L9P_INFO, "connection closed");
+	l9p_connection_close(sc->ls_conn);
+	free(sc);
+	return (NULL);
+}
+
+static int
+l9p_socket_readmsg(struct l9p_socket_softc *sc, void **buf, size_t *size)
+{
+	uint32_t msize;
+	size_t toread;
+	ssize_t ret;
+	void *buffer;
+	int fd = sc->ls_fd;
+
+	assert(fd > 0);
+
+	buffer = l9p_malloc(sizeof(uint32_t));
+
+	ret = xread(fd, buffer, sizeof(uint32_t));
+	if (ret < 0) {
+		L9P_LOG(L9P_ERROR, "read(): %s", strerror(errno));
+		return (-1);
+	}
+
+	if (ret != sizeof(uint32_t)) {
+		if (ret == 0) {
+			L9P_LOG(L9P_DEBUG, "%p: EOF", (void *)sc->ls_conn);
+		} else {
+			L9P_LOG(L9P_ERROR,
+			    "short read: %zd bytes of %zd expected",
+			    ret, sizeof(uint32_t));
+		}
+		return (-1);
+	}
+
+	msize = le32toh(*(uint32_t *)buffer);
+	toread = msize - sizeof(uint32_t);
+	buffer = l9p_realloc(buffer, msize);
+
+	ret = xread(fd, (char *)buffer + sizeof(uint32_t), toread);
+	if (ret < 0) {
+		L9P_LOG(L9P_ERROR, "read(): %s", strerror(errno));
+		return (-1);
+	}
+
+	if (ret != (ssize_t)toread) {
+		L9P_LOG(L9P_ERROR, "short read: %zd bytes of %zd expected",
+		    ret, toread);
+		return (-1);
+	}
+
+	*size = msize;
+	*buf = buffer;
+	L9P_LOG(L9P_INFO, "%p: read complete message, buf=%p size=%d",
+	    (void *)sc->ls_conn, buffer, msize);
+
+	return (0);
+}
+
+static int
+l9p_socket_get_response_buffer(struct l9p_request *req, struct iovec *iov,
+    size_t *niovp, void *arg __unused)
+{
+	size_t size = req->lr_conn->lc_msize;
+	void *buf;
+
+	buf = l9p_malloc(size);
+	iov[0].iov_base = buf;
+	iov[0].iov_len = size;
+
+	*niovp = 1;
+	return (0);
+}
+
+static int
+l9p_socket_send_response(struct l9p_request *req __unused,
+    const struct iovec *iov, const size_t niov __unused, const size_t iolen,
+    void *arg)
+{
+	struct l9p_socket_softc *sc = (struct l9p_socket_softc *)arg;
+
+	assert(sc->ls_fd >= 0);
+
+	L9P_LOG(L9P_DEBUG, "%p: sending reply, buf=%p, size=%d", arg,
+	    iov[0].iov_base, iolen);
+
+	if (xwrite(sc->ls_fd, iov[0].iov_base, iolen) != (int)iolen) {
+		L9P_LOG(L9P_ERROR, "short write: %s", strerror(errno));
+		return (-1);
+	}
+
+	free(iov[0].iov_base);
+	return (0);
+}
+
+static void
+l9p_socket_drop_response(struct l9p_request *req __unused,
+    const struct iovec *iov, size_t niov __unused, void *arg)
+{
+
+	L9P_LOG(L9P_DEBUG, "%p: drop buf=%p", arg, iov[0].iov_base);
+	free(iov[0].iov_base);
+}
+
+static ssize_t
+xread(int fd, void *buf, size_t count)
+{
+	size_t done = 0;
+	ssize_t ret;
+
+	while (done < count) {
+		ret = read(fd, (char *)buf + done, count - done);
+		if (ret < 0) {
+			if (errno == EINTR)
+				continue;
+
+			return (-1);
+		}
+
+		if (ret == 0)
+			return ((ssize_t)done);
+
+		done += (size_t)ret;
+	}
+
+	return ((ssize_t)done);
+}
+
+static ssize_t
+xwrite(int fd, void *buf, size_t count)
+{
+	size_t done = 0;
+	ssize_t ret;
+
+	while (done < count) {
+		ret = write(fd, (char *)buf + done, count - done);
+		if (ret < 0) {
+			if (errno == EINTR)
+				continue;
+
+			return (-1);
+		}
+
+		if (ret == 0)
+			return ((ssize_t)done);
+
+		done += (size_t)ret;
+	}
+
+	return ((ssize_t)done);
+}
diff --git a/usr/src/lib/lib9p/common/transport/socket.h b/usr/src/lib/lib9p/common/transport/socket.h
new file mode 100644
index 0000000000..df950ffb7d
--- /dev/null
+++ b/usr/src/lib/lib9p/common/transport/socket.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_SOCKET_H
+#define LIB9P_SOCKET_H
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "../lib9p.h"
+
+int l9p_start_server(struct l9p_server *server, const char *host,
+    const char *port);
+void l9p_socket_accept(struct l9p_server *server, int serv_fd);
+
+#endif /* LIB9P_SOCKET_H */
diff --git a/usr/src/lib/lib9p/common/utils.c b/usr/src/lib/lib9p/common/utils.c
new file mode 100644
index 0000000000..10c9683c0a
--- /dev/null
+++ b/usr/src/lib/lib9p/common/utils.c
@@ -0,0 +1,1363 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#if defined(__FreeBSD__)
+#include <sys/sbuf.h>
+#else
+#include "sbuf/sbuf.h"
+#endif
+#include "lib9p.h"
+#include "fcall.h"
+#include "linux_errno.h"
+
+#ifdef __illumos__
+#include <sys/sysmacros.h>
+#include <grp.h>
+#endif
+
+#ifdef __APPLE__
+  #define GETGROUPS_GROUP_TYPE_IS_INT
+#endif
+
+#define N(ary)          (sizeof(ary) / sizeof(*ary))
+
+/* See l9p_describe_bits() below. */
+struct descbits {
+	uint64_t	db_mask;	/* mask value */
+	uint64_t	db_match;	/* match value */
+	const char	*db_name;	/* name for matched value */
+};
+
+
+static bool l9p_describe_bits(const char *, uint64_t, const char *,
+    const struct descbits *, struct sbuf *);
+static void l9p_describe_fid(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_mode(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_name(const char *, char *, struct sbuf *);
+static void l9p_describe_perm(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_lperm(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_qid(const char *, struct l9p_qid *, struct sbuf *);
+static void l9p_describe_l9stat(const char *, struct l9p_stat *,
+    enum l9p_version, struct sbuf *);
+static void l9p_describe_statfs(const char *, struct l9p_statfs *,
+    struct sbuf *);
+static void l9p_describe_time(struct sbuf *, const char *, uint64_t, uint64_t);
+static void l9p_describe_readdir(struct sbuf *, struct l9p_f_io *);
+static void l9p_describe_size(const char *, uint64_t, struct sbuf *);
+static void l9p_describe_ugid(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_getattr_mask(uint64_t, struct sbuf *);
+static void l9p_describe_unlinkat_flags(const char *, uint32_t, struct sbuf *);
+static const char *lookup_linux_errno(uint32_t, char *, size_t);
+
+/*
+ * Using indexed initializers, we can have these occur in any order.
+ * Using adjacent-string concatenation ("T" #name, "R" #name), we
+ * get both Tfoo and Rfoo strings with one copy of the name.
+ * Alas, there is no stupid cpp trick to lowercase-ify, so we
+ * have to write each name twice.  In which case we might as well
+ * make the second one a string in the first place and not bother
+ * with the stringizing.
+ *
+ * This table should have entries for each enum value in fcall.h.
+ */
+#define X(NAME, name)	[L9P_T##NAME - L9P__FIRST] = "T" name, \
+			[L9P_R##NAME - L9P__FIRST] = "R" name
+static const char *ftype_names[] = {
+	X(VERSION,	"version"),
+	X(AUTH,		"auth"),
+	X(ATTACH,	"attach"),
+	X(ERROR,	"error"),
+	X(LERROR,	"lerror"),
+	X(FLUSH,	"flush"),
+	X(WALK,		"walk"),
+	X(OPEN,		"open"),
+	X(CREATE,	"create"),
+	X(READ,		"read"),
+	X(WRITE,	"write"),
+	X(CLUNK,	"clunk"),
+	X(REMOVE,	"remove"),
+	X(STAT,		"stat"),
+	X(WSTAT,	"wstat"),
+	X(STATFS,	"statfs"),
+	X(LOPEN,	"lopen"),
+	X(LCREATE,	"lcreate"),
+	X(SYMLINK,	"symlink"),
+	X(MKNOD,	"mknod"),
+	X(RENAME,	"rename"),
+	X(READLINK,	"readlink"),
+	X(GETATTR,	"getattr"),
+	X(SETATTR,	"setattr"),
+	X(XATTRWALK,	"xattrwalk"),
+	X(XATTRCREATE,	"xattrcreate"),
+	X(READDIR,	"readdir"),
+	X(FSYNC,	"fsync"),
+	X(LOCK,		"lock"),
+	X(GETLOCK,	"getlock"),
+	X(LINK,		"link"),
+	X(MKDIR,	"mkdir"),
+	X(RENAMEAT,	"renameat"),
+	X(UNLINKAT,	"unlinkat"),
+};
+#undef X
+
+void
+l9p_seek_iov(const struct iovec *iov1, size_t niov1, struct iovec *iov2,
+    size_t *niov2, size_t seek)
+{
+	size_t remainder = 0;
+	size_t left = seek;
+	size_t i, j;
+
+	assert(niov1 <= L9P_MAX_IOV);
+
+	for (i = 0; i < niov1; i++) {
+		size_t toseek = MIN(left, iov1[i].iov_len);
+		left -= toseek;
+
+		if (toseek == iov1[i].iov_len)
+			continue;
+
+		if (left == 0) {
+			remainder = toseek;
+			break;
+		}
+	}
+
+	for (j = i; j < niov1; j++) {
+		iov2[j - i].iov_base = (char *)iov1[j].iov_base + remainder;
+		iov2[j - i].iov_len = iov1[j].iov_len - remainder;
+		remainder = 0;
+	}
+
+	*niov2 = j - i;
+}
+
+size_t
+l9p_truncate_iov(struct iovec *iov, size_t niov, size_t length)
+{
+	size_t i, done = 0;
+
+	for (i = 0; i < niov; i++) {
+		size_t toseek = MIN(length - done, iov[i].iov_len);
+		done += toseek;
+
+		if (toseek < iov[i].iov_len) {
+			iov[i].iov_len = toseek;
+			return (i + 1);
+		}
+	}
+
+	return (niov);
+}
+
+/*
+ * This wrapper for getgrouplist() that calloc'ed memory, and
+ * papers over FreeBSD vs Mac differences in the getgrouplist()
+ * argument types.
+ *
+ * Note that this function guarantees that *either*:
+ *     return value != NULL and *angroups has been set
+ * or: return value == NULL and *angroups is 0
+ */
+gid_t *
+l9p_getgrlist(const char *name, gid_t basegid, int *angroups)
+{
+#ifdef GETGROUPS_GROUP_TYPE_IS_INT
+	int i, *int_groups;
+#endif
+	gid_t *groups;
+	int ngroups;
+
+	/*
+	 * Todo, perhaps: while getgrouplist() returns -1, expand.
+	 * For now just use NGROUPS_MAX.
+	 */
+	ngroups = NGROUPS_MAX;
+	groups = calloc((size_t)ngroups, sizeof(*groups));
+#ifdef GETGROUPS_GROUP_TYPE_IS_INT
+	int_groups = groups ? calloc((size_t)ngroups, sizeof(*int_groups)) :
+	    NULL;
+	if (int_groups == NULL) {
+		free(groups);
+		groups = NULL;
+	}
+#endif
+	if (groups == NULL) {
+		*angroups = 0;
+		return (NULL);
+	}
+#ifdef GETGROUPS_GROUP_TYPE_IS_INT
+	if (getgrouplist(name, (int)basegid, int_groups, &ngroups) < 0) {
+		free(groups);
+		free(int_groups);
+		return (NULL);
+	}
+	for (i = 0; i < ngroups; i++)
+		groups[i] = (gid_t)int_groups[i];
+	free(int_groups);
+#else
+	if (getgrouplist(name, basegid, groups, &ngroups) < 0) {
+		free(groups);
+		return (NULL);
+	}
+#endif
+	*angroups = ngroups;
+	return (groups);
+}
+
+/*
+ * For the various debug describe ops: decode bits in a bit-field-y
+ * value.  For example, we might produce:
+ *     value=0x3c[FOO,BAR,QUUX,?0x20]
+ * when FOO is bit 0x10, BAR is 0x08, and QUUX is 0x04 (as defined
+ * by the table).  This leaves 0x20 (bit 5) as a mystery, while bits
+ * 4, 3, and 2 were decoded.  (Bits 0 and 1 were 0 on input hence
+ * were not attempted here.)
+ *
+ * For general use we take a uint64_t <value>.  The bit description
+ * table <db> is an array of {mask, match, str} values ending with
+ * {0, 0, NULL}.
+ *
+ * If <str> is non-NULL we'll print it and the mask as well (if
+ * str is NULL we'll print neither).  The mask is always printed in
+ * hex at the moment.  See undec description too.
+ *
+ * For convenience, you can use a mask-and-match value, e.g., to
+ * decode a 2-bit field in bits 0 and 1 you can mask against 3 and
+ * match the values 0, 1, 2, and 3.  To handle this, make sure that
+ * all masks-with-same-match are sequential.
+ *
+ * If there are any nonzero undecoded bits, print them after
+ * all the decode-able bits have been handled.
+ *
+ * The <oc> argument defines the open and close bracket characters,
+ * typically "[]", that surround the entire string.  If NULL, no
+ * brackets are added, else oc[0] goes in the front and oc[1] at
+ * the end, after printing any <str><value> part.
+ *
+ * Returns true if it printed anything (other than the implied
+ * str-and-value, that is).
+ */
+static bool
+l9p_describe_bits(const char *str, uint64_t value, const char *oc,
+    const struct descbits *db, struct sbuf *sb)
+{
+	const char *sep;
+	char bracketbuf[2] = "";
+	bool printed = false;
+
+	if (str != NULL)
+		sbuf_printf(sb, "%s0x%" PRIx64, str, value);
+
+	if (oc != NULL)
+		bracketbuf[0] = oc[0];
+	sep = bracketbuf;
+	for (; db->db_name != NULL; db++) {
+		if ((value & db->db_mask) == db->db_match) {
+			sbuf_printf(sb, "%s%s", sep, db->db_name);
+			sep = ",";
+			printed = true;
+
+			/*
+			 * Clear the field, and make sure we
+			 * won't match a zero-valued field with
+			 * this same mask.
+			 */
+			value &= ~db->db_mask;
+			while (db[1].db_mask == db->db_mask &&
+			    db[1].db_name != NULL)
+				db++;
+		}
+	}
+	if (value != 0) {
+		sbuf_printf(sb, "%s?0x%" PRIx64, sep, value);
+		printed = true;
+	}
+	if (printed && oc != NULL) {
+		bracketbuf[0] = oc[1];
+		sbuf_cat(sb, bracketbuf);
+	}
+	return (printed);
+}
+
+/*
+ * Show file ID.
+ */
+static void
+l9p_describe_fid(const char *str, uint32_t fid, struct sbuf *sb)
+{
+
+	sbuf_printf(sb, "%s%" PRIu32, str, fid);
+}
+
+/*
+ * Show user or group ID.
+ */
+static void
+l9p_describe_ugid(const char *str, uint32_t ugid, struct sbuf *sb)
+{
+
+	sbuf_printf(sb, "%s%" PRIu32, str, ugid);
+}
+
+/*
+ * Show file mode (O_RDWR, O_RDONLY, etc).  The argument is
+ * an l9p_omode, not a Linux flags mode.  Linux flags are
+ * decoded with l9p_describe_lflags.
+ */
+static void
+l9p_describe_mode(const char *str, uint32_t mode, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		{ L9P_OACCMODE,	L9P_OREAD,	"OREAD" },
+		{ L9P_OACCMODE,	L9P_OWRITE,	"OWRITE" },
+		{ L9P_OACCMODE,	L9P_ORDWR,	"ORDWR" },
+		{ L9P_OACCMODE,	L9P_OEXEC,	"OEXEC" },
+
+		{ L9P_OCEXEC,	L9P_OCEXEC,	"OCEXEC" },
+		{ L9P_ODIRECT,	L9P_ODIRECT,	"ODIRECT" },
+		{ L9P_ORCLOSE,	L9P_ORCLOSE,	"ORCLOSE" },
+		{ L9P_OTRUNC,	L9P_OTRUNC,	"OTRUNC" },
+		{ 0, 0, NULL }
+	};
+
+	(void) l9p_describe_bits(str, mode, "[]", bits, sb);
+}
+
+/*
+ * Show Linux mode/flags.
+ */
+static void
+l9p_describe_lflags(const char *str, uint32_t flags, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+	    { L9P_OACCMODE,	L9P_OREAD,		"O_READ" },
+	    { L9P_OACCMODE,	L9P_OWRITE,		"O_WRITE" },
+	    { L9P_OACCMODE,	L9P_ORDWR,		"O_RDWR" },
+	    { L9P_OACCMODE,	L9P_OEXEC,		"O_EXEC" },
+
+	    { L9P_L_O_APPEND,	L9P_L_O_APPEND,		"O_APPEND" },
+	    { L9P_L_O_CLOEXEC,	L9P_L_O_CLOEXEC,	"O_CLOEXEC" },
+	    { L9P_L_O_CREAT,	L9P_L_O_CREAT,		"O_CREAT" },
+	    { L9P_L_O_DIRECT,	L9P_L_O_DIRECT,		"O_DIRECT" },
+	    { L9P_L_O_DIRECTORY, L9P_L_O_DIRECTORY,	"O_DIRECTORY" },
+	    { L9P_L_O_DSYNC,	L9P_L_O_DSYNC,		"O_DSYNC" },
+	    { L9P_L_O_EXCL,	L9P_L_O_EXCL,		"O_EXCL" },
+	    { L9P_L_O_FASYNC,	L9P_L_O_FASYNC,		"O_FASYNC" },
+	    { L9P_L_O_LARGEFILE, L9P_L_O_LARGEFILE,	"O_LARGEFILE" },
+	    { L9P_L_O_NOATIME,	L9P_L_O_NOATIME,	"O_NOATIME" },
+	    { L9P_L_O_NOCTTY,	L9P_L_O_NOCTTY,		"O_NOCTTY" },
+	    { L9P_L_O_NOFOLLOW,	L9P_L_O_NOFOLLOW,	"O_NOFOLLOW" },
+	    { L9P_L_O_NONBLOCK,	L9P_L_O_NONBLOCK,	"O_NONBLOCK" },
+	    { L9P_L_O_PATH,	L9P_L_O_PATH,		"O_PATH" },
+	    { L9P_L_O_SYNC,	L9P_L_O_SYNC,		"O_SYNC" },
+	    { L9P_L_O_TMPFILE,	L9P_L_O_TMPFILE,	"O_TMPFILE" },
+	    { L9P_L_O_TMPFILE,	L9P_L_O_TMPFILE,	"O_TMPFILE" },
+	    { L9P_L_O_TRUNC,	L9P_L_O_TRUNC,		"O_TRUNC" },
+	    { 0, 0, NULL }
+	};
+
+	(void) l9p_describe_bits(str, flags, "[]", bits, sb);
+}
+
+/*
+ * Show file name or other similar, potentially-very-long string.
+ * Actual strings get quotes, a NULL name (if it occurs) gets
+ * <null> (no quotes), so you can tell the difference.
+ */
+static void
+l9p_describe_name(const char *str, char *name, struct sbuf *sb)
+{
+	size_t len;
+
+	if (name == NULL) {
+		sbuf_printf(sb, "%s<null>", str);
+		return;
+	}
+
+	len = strlen(name);
+
+	if (len > 32)
+		sbuf_printf(sb, "%s\"%.*s...\"", str, 32 - 3, name);
+	else
+		sbuf_printf(sb, "%s\"%.*s\"", str, (int)len, name);
+}
+
+#define	STRMODE_SIZE 12
+
+#ifdef __illumos__
+static void
+strmode(mode_t mode, char *bp)
+{
+	char *const sbp = bp;
+
+	/*
+	 * The single caller does not pass in the file type as part of 'mode',
+	 * and ignores the first character in the returned buffer anyway.
+	 */
+	*bp++ = '?';
+
+#define	ONE(_cmp, _ch) ((mode & (_cmp)) != 0) ? (_ch) : '-'
+	*bp++ = ONE(S_IRUSR, 'r');
+	*bp++ = ONE(S_IWUSR, 'w');
+	switch (mode & (S_ISUID|S_IXUSR)) {
+	case S_ISUID|S_IXUSR:
+		*bp++ = 's';
+		break;
+	case S_ISUID:
+		*bp++ = 'S';
+		break;
+	case S_IXUSR:
+		*bp++ = 'x';
+		break;
+	case 0:
+		*bp++ = '-';
+	}
+
+	*bp++ = ONE(S_IRGRP, 'r');
+	*bp++ = ONE(S_IWGRP, 'w');
+	switch (mode & (S_ISGID|S_IXGRP|S_IFREG)) {
+	case S_ISGID|S_IXGRP:
+		*bp++ = 's';
+		break;
+	case S_ISGID|S_IFREG:
+		*bp++ = 'L';
+		break;
+	case S_ISGID:
+		*bp++ = 'S';
+		break;
+	case S_IXGRP:
+		*bp++ = 'x';
+		break;
+	default:
+		*bp++ = '-';
+	}
+
+	*bp++ = ONE(S_IROTH, 'r');
+	*bp++ = ONE(S_IWOTH, 'w');
+	switch (mode & (S_ISVTX|S_IXOTH)) {
+	case S_ISVTX|S_IXOTH:
+		*bp++ = 't';
+		break;
+	case S_ISVTX:
+		*bp++ = 'T';
+		break;
+	case S_IXOTH:
+		*bp++ = 'x';
+		break;
+	default:
+		*bp++ = '-';
+	}
+
+	*bp++ = ' ';
+	*bp = '\0';
+
+	assert(bp - sbp <= STRMODE_SIZE);
+#undef ONE
+}
+#endif /* __illumos__ */
+
+/*
+ * Show permissions (rwx etc).  Prints the value in hex only if
+ * the rwx bits do not cover the entire value.
+ */
+static void
+l9p_describe_perm(const char *str, uint32_t mode, struct sbuf *sb)
+{
+	char pbuf[STRMODE_SIZE];
+
+	strmode(mode & 0777, pbuf);
+	if ((mode & ~(uint32_t)0777) != 0)
+		sbuf_printf(sb, "%s0x%" PRIx32 "<%.9s>", str, mode, pbuf + 1);
+	else
+		sbuf_printf(sb, "%s<%.9s>", str, pbuf + 1);
+}
+
+/*
+ * Show "extended" permissions: regular permissions, but also the
+ * various DM* extension bits from 9P2000.u.
+ */
+static void
+l9p_describe_ext_perm(const char *str, uint32_t mode, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		{ L9P_DMDIR,	L9P_DMDIR,	"DMDIR" },
+		{ L9P_DMAPPEND,	L9P_DMAPPEND,	"DMAPPEND" },
+		{ L9P_DMEXCL,	L9P_DMEXCL,	"DMEXCL" },
+		{ L9P_DMMOUNT,	L9P_DMMOUNT,	"DMMOUNT" },
+		{ L9P_DMAUTH,	L9P_DMAUTH,	"DMAUTH" },
+		{ L9P_DMTMP,	L9P_DMTMP,	"DMTMP" },
+		{ L9P_DMSYMLINK, L9P_DMSYMLINK,	"DMSYMLINK" },
+		{ L9P_DMDEVICE,	L9P_DMDEVICE,	"DMDEVICE" },
+		{ L9P_DMNAMEDPIPE, L9P_DMNAMEDPIPE, "DMNAMEDPIPE" },
+		{ L9P_DMSOCKET,	L9P_DMSOCKET,	"DMSOCKET" },
+		{ L9P_DMSETUID,	L9P_DMSETUID,	"DMSETUID" },
+		{ L9P_DMSETGID,	L9P_DMSETGID,	"DMSETGID" },
+		{ 0, 0, NULL }
+	};
+	bool need_sep;
+
+	sbuf_printf(sb, "%s[", str);
+	need_sep = l9p_describe_bits(NULL, mode & ~(uint32_t)0777, NULL,
+	    bits, sb);
+	l9p_describe_perm(need_sep ? "," : "", mode & 0777, sb);
+	sbuf_cat(sb, "]");
+}
+
+/*
+ * Show Linux-specific permissions: regular permissions, but also
+ * the S_IFMT field.
+ */
+static void
+l9p_describe_lperm(const char *str, uint32_t mode, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		{ S_IFMT,	S_IFIFO,	"S_IFIFO" },
+		{ S_IFMT,	S_IFCHR,	"S_IFCHR" },
+		{ S_IFMT,	S_IFDIR,	"S_IFDIR" },
+		{ S_IFMT,	S_IFBLK,	"S_IFBLK" },
+		{ S_IFMT,	S_IFREG,	"S_IFREG" },
+		{ S_IFMT,	S_IFLNK,	"S_IFLNK" },
+		{ S_IFMT,	S_IFSOCK,	"S_IFSOCK" },
+#ifdef __illumos__
+		{ S_IFMT,	S_IFDOOR,	"S_IFDOOR" },
+		{ S_IFMT,	S_IFPORT,	"S_IFPORT" },
+#endif
+		{ 0, 0, NULL }
+	};
+	bool need_sep;
+
+	sbuf_printf(sb, "%s[", str);
+	need_sep = l9p_describe_bits(NULL, mode & ~(uint32_t)0777, NULL,
+	    bits, sb);
+	l9p_describe_perm(need_sep ? "," : "", mode & 0777, sb);
+	sbuf_cat(sb, "]");
+}
+
+/*
+ * Show qid (<type, version, path> tuple).
+ */
+static void
+l9p_describe_qid(const char *str, struct l9p_qid *qid, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		/*
+		 * NB: L9P_QTFILE is 0, i.e., is implied by no
+		 * other bits being set.  We get this produced
+		 * when we mask against 0xff and compare for
+		 * L9P_QTFILE, but we must do it first so that
+		 * we mask against the original (not-adjusted)
+		 * value.
+		 */
+		{ 0xff,		L9P_QTFILE,	"FILE" },
+		{ L9P_QTDIR,	L9P_QTDIR,	"DIR" },
+		{ L9P_QTAPPEND,	L9P_QTAPPEND,	"APPEND" },
+		{ L9P_QTEXCL,	L9P_QTEXCL,	"EXCL" },
+		{ L9P_QTMOUNT,	L9P_QTMOUNT,	"MOUNT" },
+		{ L9P_QTAUTH,	L9P_QTAUTH,	"AUTH" },
+		{ L9P_QTTMP,	L9P_QTTMP,	"TMP" },
+		{ L9P_QTSYMLINK, L9P_QTSYMLINK,	"SYMLINK" },
+		{ 0, 0, NULL }
+	};
+
+	assert(qid != NULL);
+
+	sbuf_cat(sb, str);
+	(void) l9p_describe_bits("<", qid->type, "[]", bits, sb);
+	sbuf_printf(sb, ",%" PRIu32 ",0x%016" PRIx64 ">",
+	    qid->version, qid->path);
+}
+
+/*
+ * Show size.
+ */
+static void
+l9p_describe_size(const char *str, uint64_t size, struct sbuf *sb)
+{
+
+	sbuf_printf(sb, "%s%" PRIu64, str, size);
+}
+
+/*
+ * Show l9stat (including 9P2000.u extensions if appropriate).
+ */
+static void
+l9p_describe_l9stat(const char *str, struct l9p_stat *st,
+    enum l9p_version version, struct sbuf *sb)
+{
+	bool dotu = version >= L9P_2000U;
+
+	assert(st != NULL);
+
+	sbuf_printf(sb, "%stype=0x%04" PRIx32 " dev=0x%08" PRIx32, str,
+	    st->type, st->dev);
+	l9p_describe_qid(" qid=", &st->qid, sb);
+	l9p_describe_ext_perm(" mode=", st->mode, sb);
+	if (st->atime != (uint32_t)-1)
+		sbuf_printf(sb, " atime=%" PRIu32, st->atime);
+	if (st->mtime != (uint32_t)-1)
+		sbuf_printf(sb, " mtime=%" PRIu32, st->mtime);
+	if (st->length != (uint64_t)-1)
+		sbuf_printf(sb, " length=%" PRIu64, st->length);
+	l9p_describe_name(" name=", st->name, sb);
+	/*
+	 * It's pretty common to have NULL name+gid+muid.  They're
+	 * just noise if NULL *and* dot-u; decode only if non-null
+	 * or not-dot-u.
+	 */
+	if (st->uid != NULL || !dotu)
+		l9p_describe_name(" uid=", st->uid, sb);
+	if (st->gid != NULL || !dotu)
+		l9p_describe_name(" gid=", st->gid, sb);
+	if (st->muid != NULL || !dotu)
+		l9p_describe_name(" muid=", st->muid, sb);
+	if (dotu) {
+		if (st->extension != NULL)
+			l9p_describe_name(" extension=", st->extension, sb);
+		sbuf_printf(sb,
+		    " n_uid=%" PRIu32 " n_gid=%" PRIu32 " n_muid=%" PRIu32,
+		    st->n_uid, st->n_gid, st->n_muid);
+	}
+}
+
+static void
+l9p_describe_statfs(const char *str, struct l9p_statfs *st, struct sbuf *sb)
+{
+
+	assert(st != NULL);
+
+	sbuf_printf(sb, "%stype=0x%04lx bsize=%lu blocks=%" PRIu64
+	    " bfree=%" PRIu64 " bavail=%" PRIu64 " files=%" PRIu64
+	    " ffree=%" PRIu64 " fsid=0x%" PRIx64 " namelen=%" PRIu32 ">",
+	    str, (u_long)st->type, (u_long)st->bsize, st->blocks,
+	    st->bfree, st->bavail, st->files,
+	    st->ffree, st->fsid, st->namelen);
+}
+
+/*
+ * Decode a <seconds,nsec> timestamp.
+ *
+ * Perhaps should use asctime_r.  For now, raw values.
+ */
+static void
+l9p_describe_time(struct sbuf *sb, const char *s, uint64_t sec, uint64_t nsec)
+{
+
+	sbuf_cat(sb, s);
+	if (nsec > 999999999)
+		sbuf_printf(sb, "%" PRIu64 ".<invalid nsec %" PRIu64 ">)",
+		    sec, nsec);
+	else
+		sbuf_printf(sb, "%" PRIu64 ".%09" PRIu64, sec, nsec);
+}
+
+/*
+ * Decode readdir data (.L format, variable length names).
+ */
+static void
+l9p_describe_readdir(struct sbuf *sb, struct l9p_f_io *io)
+{
+	uint32_t count;
+#ifdef notyet
+	int i;
+	struct l9p_message msg;
+	struct l9p_dirent de;
+#endif
+
+	if ((count = io->count) == 0) {
+		sbuf_printf(sb, " EOF (count=0)");
+		return;
+	}
+
+	/*
+	 * Can't do this yet because we do not have the original
+	 * req.
+	 */
+#ifdef notyet
+	sbuf_printf(sb, " count=%" PRIu32 " [", count);
+
+	l9p_init_msg(&msg, req, L9P_UNPACK);
+	for (i = 0; msg.lm_size < count; i++) {
+		if (l9p_pudirent(&msg, &de) < 0) {
+			sbuf_printf(sb, " bad count");
+			break;
+		}
+
+		sbuf_printf(sb, i ? ", " : " ");
+		l9p_describe_qid(" qid=", &de.qid, sb);
+		sbuf_printf(sb, " offset=%" PRIu64 " type=%d",
+		    de.offset, de.type);
+		l9p_describe_name(" name=", de.name);
+		free(de.name);
+	}
+	sbuf_printf(sb, "]=%d dir entries", i);
+#else /* notyet */
+	sbuf_printf(sb, " count=%" PRIu32, count);
+#endif
+}
+
+/*
+ * Decode Tgetattr request_mask field.
+ */
+static void
+l9p_describe_getattr_mask(uint64_t request_mask, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		/*
+		 * Note: ALL and BASIC must occur first and second.
+		 * This is a little dirty: it depends on the way the
+		 * describe_bits code clears the values.  If we
+		 * match ALL, we clear all those bits and do not
+		 * match BASIC; if we match BASIC, we clear all
+		 * those bits and do not match individual bits.  Thus
+		 * if we have BASIC but not all the additional bits,
+		 * we'll see, e.g., [BASIC,BTIME,GEN]; if we have
+		 * all the additional bits too, we'll see [ALL].
+		 *
+		 * Since <undec> is true below, we'll also spot any
+		 * bits added to the protocol since we made this table.
+		 */
+		{ L9PL_GETATTR_ALL,	L9PL_GETATTR_ALL,	"ALL" },
+		{ L9PL_GETATTR_BASIC,	L9PL_GETATTR_BASIC,	"BASIC" },
+
+		/* individual bits in BASIC */
+		{ L9PL_GETATTR_MODE,	L9PL_GETATTR_MODE,	"MODE" },
+		{ L9PL_GETATTR_NLINK,	L9PL_GETATTR_NLINK,	"NLINK" },
+		{ L9PL_GETATTR_UID,	L9PL_GETATTR_UID,	"UID" },
+		{ L9PL_GETATTR_GID,	L9PL_GETATTR_GID,	"GID" },
+		{ L9PL_GETATTR_RDEV,	L9PL_GETATTR_RDEV,	"RDEV" },
+		{ L9PL_GETATTR_ATIME,	L9PL_GETATTR_ATIME,	"ATIME" },
+		{ L9PL_GETATTR_MTIME,	L9PL_GETATTR_MTIME,	"MTIME" },
+		{ L9PL_GETATTR_CTIME,	L9PL_GETATTR_CTIME,	"CTIME" },
+		{ L9PL_GETATTR_INO,	L9PL_GETATTR_INO,	"INO" },
+		{ L9PL_GETATTR_SIZE,	L9PL_GETATTR_SIZE,	"SIZE" },
+		{ L9PL_GETATTR_BLOCKS,	L9PL_GETATTR_BLOCKS,	"BLOCKS" },
+
+		/* additional bits in ALL */
+		{ L9PL_GETATTR_BTIME,	L9PL_GETATTR_BTIME,	"BTIME" },
+		{ L9PL_GETATTR_GEN,	L9PL_GETATTR_GEN,	"GEN" },
+		{ L9PL_GETATTR_DATA_VERSION, L9PL_GETATTR_DATA_VERSION,
+							"DATA_VERSION" },
+		{ 0, 0, NULL }
+	};
+
+	(void) l9p_describe_bits(" request_mask=", request_mask, "[]", bits,
+	    sb);
+}
+
+/*
+ * Decode Tunlinkat flags.
+ */
+static void
+l9p_describe_unlinkat_flags(const char *str, uint32_t flags, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		{ L9PL_AT_REMOVEDIR, L9PL_AT_REMOVEDIR, "AT_REMOVEDIR" },
+		{ 0, 0, NULL }
+	};
+
+	(void) l9p_describe_bits(str, flags, "[]", bits, sb);
+}
+
+static const char *
+lookup_linux_errno(uint32_t linux_errno, char *buf, size_t len)
+{
+	/*
+	 * Error numbers in the "base" range (1..ERANGE) are common
+	 * across BSD, MacOS, Linux, and Plan 9.
+	 *
+	 * Error numbers outside that range require translation.
+	 */
+	const char *const table[] = {
+#define X0(name) [name] = name ## _STR
+#define	X(name) [name] = name ## _STR
+		X(LINUX_EAGAIN),
+		X(LINUX_EDEADLK),
+		X(LINUX_ENAMETOOLONG),
+		X(LINUX_ENOLCK),
+		X(LINUX_ENOSYS),
+		X(LINUX_ENOTEMPTY),
+		X(LINUX_ELOOP),
+		X(LINUX_ENOMSG),
+		X(LINUX_EIDRM),
+		X(LINUX_ECHRNG),
+		X(LINUX_EL2NSYNC),
+		X(LINUX_EL3HLT),
+		X(LINUX_EL3RST),
+		X(LINUX_ELNRNG),
+		X(LINUX_EUNATCH),
+		X(LINUX_ENOCSI),
+		X(LINUX_EL2HLT),
+		X(LINUX_EBADE),
+		X(LINUX_EBADR),
+		X(LINUX_EXFULL),
+		X(LINUX_ENOANO),
+		X(LINUX_EBADRQC),
+		X(LINUX_EBADSLT),
+		X(LINUX_EBFONT),
+		X(LINUX_ENOSTR),
+		X(LINUX_ENODATA),
+		X(LINUX_ETIME),
+		X(LINUX_ENOSR),
+		X(LINUX_ENONET),
+		X(LINUX_ENOPKG),
+		X(LINUX_EREMOTE),
+		X(LINUX_ENOLINK),
+		X(LINUX_EADV),
+		X(LINUX_ESRMNT),
+		X(LINUX_ECOMM),
+		X(LINUX_EPROTO),
+		X(LINUX_EMULTIHOP),
+		X(LINUX_EDOTDOT),
+		X(LINUX_EBADMSG),
+		X(LINUX_EOVERFLOW),
+		X(LINUX_ENOTUNIQ),
+		X(LINUX_EBADFD),
+		X(LINUX_EREMCHG),
+		X(LINUX_ELIBACC),
+		X(LINUX_ELIBBAD),
+		X(LINUX_ELIBSCN),
+		X(LINUX_ELIBMAX),
+		X(LINUX_ELIBEXEC),
+		X(LINUX_EILSEQ),
+		X(LINUX_ERESTART),
+		X(LINUX_ESTRPIPE),
+		X(LINUX_EUSERS),
+		X(LINUX_ENOTSOCK),
+		X(LINUX_EDESTADDRREQ),
+		X(LINUX_EMSGSIZE),
+		X(LINUX_EPROTOTYPE),
+		X(LINUX_ENOPROTOOPT),
+		X(LINUX_EPROTONOSUPPORT),
+		X(LINUX_ESOCKTNOSUPPORT),
+		X(LINUX_EOPNOTSUPP),
+		X(LINUX_EPFNOSUPPORT),
+		X(LINUX_EAFNOSUPPORT),
+		X(LINUX_EADDRINUSE),
+		X(LINUX_EADDRNOTAVAIL),
+		X(LINUX_ENETDOWN),
+		X(LINUX_ENETUNREACH),
+		X(LINUX_ENETRESET),
+		X(LINUX_ECONNABORTED),
+		X(LINUX_ECONNRESET),
+		X(LINUX_ENOBUFS),
+		X(LINUX_EISCONN),
+		X(LINUX_ENOTCONN),
+		X(LINUX_ESHUTDOWN),
+		X(LINUX_ETOOMANYREFS),
+		X(LINUX_ETIMEDOUT),
+		X(LINUX_ECONNREFUSED),
+		X(LINUX_EHOSTDOWN),
+		X(LINUX_EHOSTUNREACH),
+		X(LINUX_EALREADY),
+		X(LINUX_EINPROGRESS),
+		X(LINUX_ESTALE),
+		X(LINUX_EUCLEAN),
+		X(LINUX_ENOTNAM),
+		X(LINUX_ENAVAIL),
+		X(LINUX_EISNAM),
+		X(LINUX_EREMOTEIO),
+		X(LINUX_EDQUOT),
+		X(LINUX_ENOMEDIUM),
+		X(LINUX_EMEDIUMTYPE),
+		X(LINUX_ECANCELED),
+		X(LINUX_ENOKEY),
+		X(LINUX_EKEYEXPIRED),
+		X(LINUX_EKEYREVOKED),
+		X(LINUX_EKEYREJECTED),
+		X(LINUX_EOWNERDEAD),
+		X(LINUX_ENOTRECOVERABLE),
+		X(LINUX_ERFKILL),
+		X(LINUX_EHWPOISON),
+#undef X0
+#undef X
+	};
+	if ((size_t)linux_errno < N(table) && table[linux_errno] != NULL)
+		return (table[linux_errno]);
+	if (linux_errno <= ERANGE)
+		return (strerror((int)linux_errno));
+	(void) snprintf(buf, len, "Unknown error %d", linux_errno);
+	return (buf);
+}
+
+void
+l9p_describe_fcall(union l9p_fcall *fcall, enum l9p_version version,
+    struct sbuf *sb)
+{
+	uint64_t mask;
+	uint8_t type;
+	int i;
+
+	assert(fcall != NULL);
+	assert(sb != NULL);
+	assert(version <= L9P_2000L);
+
+	type = fcall->hdr.type;
+
+	if (type < L9P__FIRST || type >= L9P__LAST_PLUS_1 ||
+	    ftype_names[type - L9P__FIRST] == NULL) {
+		const char *rr;
+
+		/*
+		 * Can't say for sure that this distinction --
+		 * an even number is a request, an odd one is
+		 * a response -- will be maintained forever,
+		 * but it's good enough for now.
+		 */
+		rr = (type & 1) != 0 ? "response" : "request";
+		sbuf_printf(sb, "<unknown %s %d> tag=%d", rr, type,
+		    fcall->hdr.tag);
+	} else {
+		sbuf_printf(sb, "%s tag=%d", ftype_names[type - L9P__FIRST],
+		    fcall->hdr.tag);
+	}
+
+	switch (type) {
+	case L9P_TVERSION:
+	case L9P_RVERSION:
+		sbuf_printf(sb, " version=\"%s\" msize=%d", fcall->version.version,
+		    fcall->version.msize);
+		return;
+
+	case L9P_TAUTH:
+		l9p_describe_fid(" afid=", fcall->hdr.fid, sb);
+		sbuf_printf(sb, " uname=\"%s\" aname=\"%s\"",
+		    fcall->tauth.uname, fcall->tauth.aname);
+		return;
+
+	case L9P_TATTACH:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_fid(" afid=", fcall->tattach.afid, sb);
+		sbuf_printf(sb, " uname=\"%s\" aname=\"%s\"",
+		    fcall->tattach.uname, fcall->tattach.aname);
+		if (version >= L9P_2000U)
+			sbuf_printf(sb, " n_uname=%d", fcall->tattach.n_uname);
+		return;
+
+	case L9P_RATTACH:
+		l9p_describe_qid(" ", &fcall->rattach.qid, sb);
+		return;
+
+	case L9P_RERROR:
+		sbuf_printf(sb, " ename=\"%s\" errnum=%d", fcall->error.ename,
+		    fcall->error.errnum);
+		return;
+
+	case L9P_RLERROR: {
+		char unknown[50];
+
+		sbuf_printf(sb, " errnum=%d (%s)", fcall->error.errnum,
+		    lookup_linux_errno(fcall->error.errnum,
+		    unknown, sizeof(unknown)));
+		return;
+	}
+
+	case L9P_TFLUSH:
+		sbuf_printf(sb, " oldtag=%d", fcall->tflush.oldtag);
+		return;
+
+	case L9P_RFLUSH:
+		return;
+
+	case L9P_TWALK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_fid(" newfid=", fcall->twalk.newfid, sb);
+		if (fcall->twalk.nwname) {
+			sbuf_cat(sb, " wname=\"");
+			for (i = 0; i < fcall->twalk.nwname; i++)
+				sbuf_printf(sb, "%s%s", i == 0 ? "" : "/",
+				    fcall->twalk.wname[i]);
+			sbuf_cat(sb, "\"");
+		}
+		return;
+
+	case L9P_RWALK:
+		sbuf_printf(sb, " wqid=[");
+		for (i = 0; i < fcall->rwalk.nwqid; i++)
+			l9p_describe_qid(i == 0 ? "" : ",",
+			    &fcall->rwalk.wqid[i], sb);
+		sbuf_cat(sb, "]");
+		return;
+
+	case L9P_TOPEN:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_mode(" mode=", fcall->tcreate.mode, sb);
+		return;
+
+	case L9P_ROPEN:
+		l9p_describe_qid(" qid=", &fcall->ropen.qid, sb);
+		sbuf_printf(sb, " iounit=%d", fcall->ropen.iounit);
+		return;
+
+	case L9P_TCREATE:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tcreate.name, sb);
+		l9p_describe_ext_perm(" perm=", fcall->tcreate.perm, sb);
+		l9p_describe_mode(" mode=", fcall->tcreate.mode, sb);
+		if (version >= L9P_2000U && fcall->tcreate.extension != NULL)
+			l9p_describe_name(" extension=",
+			    fcall->tcreate.extension, sb);
+		return;
+
+	case L9P_RCREATE:
+		l9p_describe_qid(" qid=", &fcall->rcreate.qid, sb);
+		sbuf_printf(sb, " iounit=%d", fcall->rcreate.iounit);
+		return;
+
+	case L9P_TREAD:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		sbuf_printf(sb, " offset=%" PRIu64 " count=%" PRIu32,
+		    fcall->io.offset, fcall->io.count);
+		return;
+
+	case L9P_RREAD:
+	case L9P_RWRITE:
+		sbuf_printf(sb, " count=%" PRIu32, fcall->io.count);
+		return;
+
+	case L9P_TWRITE:
+	case L9P_TREADDIR:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		sbuf_printf(sb, " offset=%" PRIu64 " count=%" PRIu32,
+		    fcall->io.offset, fcall->io.count);
+		return;
+
+	case L9P_TCLUNK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RCLUNK:
+		return;
+
+	case L9P_TREMOVE:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RREMOVE:
+		return;
+
+	case L9P_TSTAT:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RSTAT:
+		l9p_describe_l9stat(" ", &fcall->rstat.stat, version, sb);
+		return;
+
+	case L9P_TWSTAT:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_l9stat(" ", &fcall->twstat.stat, version, sb);
+		return;
+
+	case L9P_RWSTAT:
+		return;
+
+	case L9P_TSTATFS:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RSTATFS:
+		l9p_describe_statfs(" ", &fcall->rstatfs.statfs, sb);
+		return;
+
+	case L9P_TLOPEN:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_lflags(" flags=", fcall->tlcreate.flags, sb);
+		return;
+
+	case L9P_RLOPEN:
+		l9p_describe_qid(" qid=", &fcall->rlopen.qid, sb);
+		sbuf_printf(sb, " iounit=%d", fcall->rlopen.iounit);
+		return;
+
+	case L9P_TLCREATE:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tlcreate.name, sb);
+		/* confusing: "flags" is open-mode, "mode" is permissions */
+		l9p_describe_lflags(" flags=", fcall->tlcreate.flags, sb);
+		/* TLCREATE mode/permissions have S_IFREG (0x8000) set */
+		l9p_describe_lperm(" mode=", fcall->tlcreate.mode, sb);
+		l9p_describe_ugid(" gid=", fcall->tlcreate.gid, sb);
+		return;
+
+	case L9P_RLCREATE:
+		l9p_describe_qid(" qid=", &fcall->rlcreate.qid, sb);
+		sbuf_printf(sb, " iounit=%d", fcall->rlcreate.iounit);
+		return;
+
+	case L9P_TSYMLINK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tsymlink.name, sb);
+		l9p_describe_name(" symtgt=", fcall->tsymlink.symtgt, sb);
+		l9p_describe_ugid(" gid=", fcall->tsymlink.gid, sb);
+		return;
+
+	case L9P_RSYMLINK:
+		l9p_describe_qid(" qid=", &fcall->ropen.qid, sb);
+		return;
+
+	case L9P_TMKNOD:
+		l9p_describe_fid(" dfid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tmknod.name, sb);
+		/*
+		 * TMKNOD mode/permissions have S_IFBLK/S_IFCHR/S_IFIFO
+		 * bits.  The major and minor values are only meaningful
+		 * for S_IFBLK and S_IFCHR, but just decode always here.
+		 */
+		l9p_describe_lperm(" mode=", fcall->tmknod.mode, sb);
+		sbuf_printf(sb, " major=%u minor=%u",
+		    fcall->tmknod.major, fcall->tmknod.minor);
+		l9p_describe_ugid(" gid=", fcall->tmknod.gid, sb);
+		return;
+
+	case L9P_RMKNOD:
+		l9p_describe_qid(" qid=", &fcall->rmknod.qid, sb);
+		return;
+
+	case L9P_TRENAME:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_fid(" dfid=", fcall->trename.dfid, sb);
+		l9p_describe_name(" name=", fcall->trename.name, sb);
+		return;
+
+	case L9P_RRENAME:
+		return;
+
+	case L9P_TREADLINK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RREADLINK:
+		l9p_describe_name(" target=", fcall->rreadlink.target, sb);
+		return;
+
+	case L9P_TGETATTR:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_getattr_mask(fcall->tgetattr.request_mask, sb);
+		return;
+
+	case L9P_RGETATTR:
+		/* Don't need to decode bits: they're implied by the output */
+		mask = fcall->rgetattr.valid;
+		sbuf_printf(sb, " valid=0x%016" PRIx64, mask);
+		l9p_describe_qid(" qid=", &fcall->rgetattr.qid, sb);
+		if (mask & L9PL_GETATTR_MODE)
+			l9p_describe_lperm(" mode=", fcall->rgetattr.mode, sb);
+		if (mask & L9PL_GETATTR_UID)
+			l9p_describe_ugid(" uid=", fcall->rgetattr.uid, sb);
+		if (mask & L9PL_GETATTR_GID)
+			l9p_describe_ugid(" gid=", fcall->rgetattr.gid, sb);
+		if (mask & L9PL_GETATTR_NLINK)
+			sbuf_printf(sb, " nlink=%" PRIu64,
+			    fcall->rgetattr.nlink);
+		if (mask & L9PL_GETATTR_RDEV)
+			sbuf_printf(sb, " rdev=0x%" PRIx64,
+			    fcall->rgetattr.rdev);
+		if (mask & L9PL_GETATTR_SIZE)
+			l9p_describe_size(" size=", fcall->rgetattr.size, sb);
+		if (mask & L9PL_GETATTR_BLOCKS)
+			sbuf_printf(sb, " blksize=%" PRIu64 " blocks=%" PRIu64,
+			    fcall->rgetattr.blksize, fcall->rgetattr.blocks);
+		if (mask & L9PL_GETATTR_ATIME)
+			l9p_describe_time(sb, " atime=",
+			    fcall->rgetattr.atime_sec,
+			    fcall->rgetattr.atime_nsec);
+		if (mask & L9PL_GETATTR_MTIME)
+			l9p_describe_time(sb, " mtime=",
+			    fcall->rgetattr.mtime_sec,
+			    fcall->rgetattr.mtime_nsec);
+		if (mask & L9PL_GETATTR_CTIME)
+			l9p_describe_time(sb, " ctime=",
+			    fcall->rgetattr.ctime_sec,
+			    fcall->rgetattr.ctime_nsec);
+		if (mask & L9PL_GETATTR_BTIME)
+			l9p_describe_time(sb, " btime=",
+			    fcall->rgetattr.btime_sec,
+			    fcall->rgetattr.btime_nsec);
+		if (mask & L9PL_GETATTR_GEN)
+			sbuf_printf(sb, " gen=0x%" PRIx64, fcall->rgetattr.gen);
+		if (mask & L9PL_GETATTR_DATA_VERSION)
+			sbuf_printf(sb, " data_version=0x%" PRIx64,
+			    fcall->rgetattr.data_version);
+		return;
+
+	case L9P_TSETATTR:
+		/* As with RGETATTR, we'll imply decode via output. */
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		mask = fcall->tsetattr.valid;
+		/* NB: tsetattr valid mask is only 32 bits, hence %08x */
+		sbuf_printf(sb, " valid=0x%08" PRIx64, mask);
+		if (mask & L9PL_SETATTR_MODE)
+			l9p_describe_lperm(" mode=", fcall->tsetattr.mode, sb);
+		if (mask & L9PL_SETATTR_UID)
+			l9p_describe_ugid(" uid=", fcall->tsetattr.uid, sb);
+		if (mask & L9PL_SETATTR_GID)
+			l9p_describe_ugid(" uid=", fcall->tsetattr.gid, sb);
+		if (mask & L9PL_SETATTR_SIZE)
+			l9p_describe_size(" size=", fcall->tsetattr.size, sb);
+		if (mask & L9PL_SETATTR_ATIME) {
+			if (mask & L9PL_SETATTR_ATIME_SET)
+				l9p_describe_time(sb, " atime=",
+				    fcall->tsetattr.atime_sec,
+				    fcall->tsetattr.atime_nsec);
+			else
+				sbuf_cat(sb, " atime=now");
+		}
+		if (mask & L9PL_SETATTR_MTIME) {
+			if (mask & L9PL_SETATTR_MTIME_SET)
+				l9p_describe_time(sb, " mtime=",
+				    fcall->tsetattr.mtime_sec,
+				    fcall->tsetattr.mtime_nsec);
+			else
+				sbuf_cat(sb, " mtime=now");
+		}
+		if (mask & L9PL_SETATTR_CTIME)
+			sbuf_cat(sb, " ctime=now");
+		return;
+
+	case L9P_RSETATTR:
+		return;
+
+	case L9P_TXATTRWALK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_fid(" newfid=", fcall->txattrwalk.newfid, sb);
+		l9p_describe_name(" name=", fcall->txattrwalk.name, sb);
+		return;
+
+	case L9P_RXATTRWALK:
+		l9p_describe_size(" size=", fcall->rxattrwalk.size, sb);
+		return;
+
+	case L9P_TXATTRCREATE:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->txattrcreate.name, sb);
+		l9p_describe_size(" size=", fcall->txattrcreate.attr_size, sb);
+		sbuf_printf(sb, " flags=%" PRIu32, fcall->txattrcreate.flags);
+		return;
+
+	case L9P_RXATTRCREATE:
+		return;
+
+	case L9P_RREADDIR:
+		l9p_describe_readdir(sb, &fcall->io);
+		return;
+
+	case L9P_TFSYNC:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RFSYNC:
+		return;
+
+	case L9P_TLOCK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		/* decode better later */
+		sbuf_printf(sb, " type=%d flags=0x%" PRIx32
+		    " start=%" PRIu64 " length=%" PRIu64
+		    " proc_id=0x%" PRIx32 " client_id=\"%s\"",
+		    fcall->tlock.type, fcall->tlock.flags,
+		    fcall->tlock.start, fcall->tlock.length,
+		    fcall->tlock.proc_id, fcall->tlock.client_id);
+		return;
+
+	case L9P_RLOCK:
+		sbuf_printf(sb, " status=%d", fcall->rlock.status);
+		return;
+
+	case L9P_TGETLOCK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		/* FALLTHROUGH */
+
+	case L9P_RGETLOCK:
+		/* decode better later */
+		sbuf_printf(sb, " type=%d "
+		    " start=%" PRIu64 " length=%" PRIu64
+		    " proc_id=0x%" PRIx32 " client_id=\"%s\"",
+		    fcall->getlock.type,
+		    fcall->getlock.start, fcall->getlock.length,
+		    fcall->getlock.proc_id, fcall->getlock.client_id);
+		return;
+
+	case L9P_TLINK:
+		l9p_describe_fid(" dfid=", fcall->tlink.dfid, sb);
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tlink.name, sb);
+		return;
+
+	case L9P_RLINK:
+		return;
+
+	case L9P_TMKDIR:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tmkdir.name, sb);
+		/* TMKDIR mode/permissions have S_IFDIR set */
+		l9p_describe_lperm(" mode=", fcall->tmkdir.mode, sb);
+		l9p_describe_ugid(" gid=", fcall->tmkdir.gid, sb);
+		return;
+
+	case L9P_RMKDIR:
+		l9p_describe_qid(" qid=", &fcall->rmkdir.qid, sb);
+		return;
+
+	case L9P_TRENAMEAT:
+		l9p_describe_fid(" olddirfid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" oldname=", fcall->trenameat.oldname,
+		    sb);
+		l9p_describe_fid(" newdirfid=", fcall->trenameat.newdirfid, sb);
+		l9p_describe_name(" newname=", fcall->trenameat.newname,
+		    sb);
+		return;
+
+	case L9P_RRENAMEAT:
+		return;
+
+	case L9P_TUNLINKAT:
+		l9p_describe_fid(" dirfd=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tunlinkat.name, sb);
+		l9p_describe_unlinkat_flags(" flags=",
+		    fcall->tunlinkat.flags, sb);
+		return;
+
+	case L9P_RUNLINKAT:
+		return;
+
+	default:
+		sbuf_printf(sb, " <missing case in %s()>", __func__);
+	}
+}
diff --git a/usr/src/lib/lib9p/mapfile-vers b/usr/src/lib/lib9p/mapfile-vers
new file mode 100644
index 0000000000..9bf38cc847
--- /dev/null
+++ b/usr/src/lib/lib9p/mapfile-vers
@@ -0,0 +1,58 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING:  STOP NOW.  DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+#	usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION ILLUMOSprivate {
+    global:
+	l9p_backend_fs_init;
+	l9p_connection_alloc_fid;
+	l9p_connection_close;
+	l9p_connection_free;
+	l9p_connection_init;
+	l9p_connection_recv;
+	l9p_connection_remove_fid;
+	l9p_describe_fcall;
+	l9p_dispatch_request;
+	l9p_freefcall;
+	l9p_freestat;
+	l9p_getgrlist;
+	l9p_init_msg;
+	l9p_pack_stat;
+	l9p_pudirent;
+	l9p_pufcall;
+	l9p_pustat;
+	l9p_respond;
+	l9p_seek_iov;
+	l9p_server_init;
+	l9p_sizeof_stat;
+	l9p_truncate_iov;
+    local:
+	*;
+};
diff --git a/usr/src/man/man1m/bhyve.1m b/usr/src/man/man1m/bhyve.1m
index a6c4637538..cab588665e 100644
--- a/usr/src/man/man1m/bhyve.1m
+++ b/usr/src/man/man1m/bhyve.1m
@@ -24,7 +24,7 @@
 .\"
 .\" Portions Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
 .\"
-.Dd March 18, 2021
+.Dd April 20, 2021
 .Dt BHYVE 1M
 .Os
 .Sh NAME
@@ -263,6 +263,8 @@ Accelerated Virtio network interface.
 Legacy Virtio network interface.
 .It Li virtio-blk
 Virtio block storage interface.
+.It Li virtio-9p
+Virtio 9p (VirtFS) interface.
 .It Li virtio-rnd
 Virtio random number generator interface.
 .It Li virtio-console
@@ -390,6 +392,24 @@ Disable emulation of guest trim requests via
 requests.
 .El
 .Pp
+9P devices:
+.Bl -tag -width 10n
+.It Xo
+.Sm off
+.Cm sharename Sy = Pa /path/to/share
+.Op Cm \&, Ar 9p-device-options
+.Sm on
+.Xc
+.El
+.Pp
+The
+.Ar 9p-device-options
+are:
+.Bl -tag -width 10n
+.It Cm ro
+Expose the share in read-only mode.
+.El
+.Pp
 TTY devices:
 .Bl -tag -width 10n
 .It Cm stdio
diff --git a/usr/src/man/man4/bhyve_config.4 b/usr/src/man/man4/bhyve_config.4
index 23e1e33c5a..668b363115 100644
--- a/usr/src/man/man4/bhyve_config.4
+++ b/usr/src/man/man4/bhyve_config.4
@@ -25,7 +25,7 @@
 .\"
 .\" Portions Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
 .\"
-.Dd May 6, 2021
+.Dd May 7, 2021
 .Dt BHYVE_CONFIG 4
 .Os
 .Sh NAME
@@ -217,6 +217,8 @@ NVM Express (NVMe) controller.
 PCI pass-through device.
 .It Li uart
 PCI 16550 serial device.
+.It Li virtio-9p
+VirtIO 9p (VirtFS) interface.
 .It Li virtio-blk
 VirtIO block storage interface.
 .It Li virtio-console
@@ -474,6 +476,17 @@ where
 .Ar N
 is the device number.
 .El
+.Ss VirtIO 9p Settings
+Each VirtIO 9p device exposes a single filesystem from a host path.
+.Bl -column "sharename" "Format" "Default"
+.It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description
+.It Va sharename Ta string Ta Ta
+The share name exposed to the guest.
+.It Va path Ta path Ta Ta
+The path of a directory on the host to export to the guest.
+.It Va ro Ta bool Ta false Ta
+If true, the guest filesystem is read-only.
+.El
 .Ss VirtIO Console Device Settings
 Each VirtIO Console device contains one or more console ports.
 Each port stores its settings in a node named
diff --git a/usr/src/pkg/manifests/system-library-bhyve.mf b/usr/src/pkg/manifests/system-library-bhyve.mf
index f425c83034..c29a0ab1cf 100644
--- a/usr/src/pkg/manifests/system-library-bhyve.mf
+++ b/usr/src/pkg/manifests/system-library-bhyve.mf
@@ -14,7 +14,7 @@
 #
 
 #
-# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+# Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
 #
 
 set name=pkg.fmri value=pkg:/system/library/bhyve@$(PKGVERS)
@@ -30,8 +30,10 @@ dir path=usr/lib group=bin
 dir path=usr/lib/$(ARCH64) group=bin
 file path=lib/$(ARCH64)/libvmm.so.1
 file path=lib/$(ARCH64)/libvmmapi.so.1
+file path=usr/lib/$(ARCH64)/lib9p.so.1
 file path=usr/lib/$(ARCH64)/libppt.so.1
 file path=usr/lib/libppt.so.1
 license lic_CDDL license=lic_CDDL
+license usr/src/lib/lib9p/COPYRIGHT license=usr/src/lib/lib9p/COPYRIGHT
 license usr/src/lib/libvmmapi/THIRDPARTYLICENSE \
     license=usr/src/lib/libvmmapi/THIRDPARTYLICENSE
author	Jason King <jason.brian.king@gmail.com>	2021-04-17 09:08:24 +0000
committer	Andy Fiddaman <omnios@citrus-it.co.uk>	2021-10-07 09:11:03 +0000
commit	aa693e996c2928c92cccd8a3efe91373e85a6967 (patch)
tree	23d7431e48a5194bf8ae93968c3caedc6c8bc7a6
parent	2d2dd8359f765a17f6caaa2d37d86837c0c40915 (diff)
download	illumos-gate-aa693e996c2928c92cccd8a3efe91373e85a6967.tar.gz