summaryrefslogtreecommitdiff
path: root/usr/src/lib/varpd
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/lib/varpd')
-rw-r--r--usr/src/lib/varpd/Makefile33
-rw-r--r--usr/src/lib/varpd/Makefile.plugin19
-rw-r--r--usr/src/lib/varpd/direct/Makefile40
-rw-r--r--usr/src/lib/varpd/direct/Makefile.com35
-rw-r--r--usr/src/lib/varpd/direct/amd64/Makefile19
-rw-r--r--usr/src/lib/varpd/direct/common/libvarpd_direct.c395
-rw-r--r--usr/src/lib/varpd/direct/common/llib-lvarpd_direct18
-rw-r--r--usr/src/lib/varpd/direct/common/mapfile-vers35
-rw-r--r--usr/src/lib/varpd/direct/i386/Makefile18
-rw-r--r--usr/src/lib/varpd/direct/sparc/Makefile18
-rw-r--r--usr/src/lib/varpd/direct/sparcv9/Makefile19
-rw-r--r--usr/src/lib/varpd/files/Makefile40
-rw-r--r--usr/src/lib/varpd/files/Makefile.com36
-rw-r--r--usr/src/lib/varpd/files/amd64/Makefile19
-rw-r--r--usr/src/lib/varpd/files/common/libvarpd_files.c598
-rw-r--r--usr/src/lib/varpd/files/common/libvarpd_files_json.c744
-rw-r--r--usr/src/lib/varpd/files/common/libvarpd_files_json.h40
-rw-r--r--usr/src/lib/varpd/files/common/llib-lvarpd_files18
-rw-r--r--usr/src/lib/varpd/files/common/mapfile-vers35
-rw-r--r--usr/src/lib/varpd/files/i386/Makefile18
-rw-r--r--usr/src/lib/varpd/files/sparc/Makefile18
-rw-r--r--usr/src/lib/varpd/files/sparcv9/Makefile19
-rw-r--r--usr/src/lib/varpd/libvarpd/Makefile54
-rw-r--r--usr/src/lib/varpd/libvarpd/Makefile.com44
-rw-r--r--usr/src/lib/varpd/libvarpd/amd64/Makefile19
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd.c355
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd.h78
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c645
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_client.c626
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_client.h94
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_door.c457
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h247
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c574
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c48
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c590
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c233
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c238
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h226
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_util.c98
-rw-r--r--usr/src/lib/varpd/libvarpd/common/llib-lvarpd19
-rw-r--r--usr/src/lib/varpd/libvarpd/common/mapfile-plugin47
-rw-r--r--usr/src/lib/varpd/libvarpd/common/mapfile-vers113
-rw-r--r--usr/src/lib/varpd/libvarpd/i386/Makefile18
-rw-r--r--usr/src/lib/varpd/libvarpd/sparc/Makefile18
-rw-r--r--usr/src/lib/varpd/libvarpd/sparcv9/Makefile19
-rw-r--r--usr/src/lib/varpd/svp/Makefile40
-rw-r--r--usr/src/lib/varpd/svp/Makefile.com48
-rw-r--r--usr/src/lib/varpd/svp/amd64/Makefile19
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp.c755
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp.h377
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c945
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c50
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_host.c173
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c206
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h172
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c596
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c144
-rw-r--r--usr/src/lib/varpd/svp/common/llib-lvarpd_svp18
-rw-r--r--usr/src/lib/varpd/svp/common/mapfile-vers35
-rw-r--r--usr/src/lib/varpd/svp/i386/Makefile18
-rw-r--r--usr/src/lib/varpd/svp/sparc/Makefile18
-rw-r--r--usr/src/lib/varpd/svp/sparcv9/Makefile19
62 files changed, 10727 insertions, 0 deletions
diff --git a/usr/src/lib/varpd/Makefile b/usr/src/lib/varpd/Makefile
new file mode 100644
index 0000000000..5fb179c1fe
--- /dev/null
+++ b/usr/src/lib/varpd/Makefile
@@ -0,0 +1,33 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+SUBDIRS = libvarpd .WAIT direct files svp
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+check := TARGET = check
+install := TARGET = install
+install_h := TARGET = install_h
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install install_h check lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/varpd/Makefile.plugin b/usr/src/lib/varpd/Makefile.plugin
new file mode 100644
index 0000000000..67410742df
--- /dev/null
+++ b/usr/src/lib/varpd/Makefile.plugin
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+ROOTLIBDIR = $(ROOT)/usr/lib/varpd
+ROOTLIBDIR64 = $(ROOT)/usr/lib/varpd/$(MACH64)
+
+MAPFILES += ../../libvarpd/common/mapfile-plugin
diff --git a/usr/src/lib/varpd/direct/Makefile b/usr/src/lib/varpd/direct/Makefile
new file mode 100644
index 0000000000..f026c620e6
--- /dev/null
+++ b/usr/src/lib/varpd/direct/Makefile
@@ -0,0 +1,40 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../../Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h:
+
+check:
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/varpd/direct/Makefile.com b/usr/src/lib/varpd/direct/Makefile.com
new file mode 100644
index 0000000000..e48efcfcc0
--- /dev/null
+++ b/usr/src/lib/varpd/direct/Makefile.com
@@ -0,0 +1,35 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = libvarpd_direct.a
+VERS = .1
+OBJECTS = libvarpd_direct.o
+
+include ../../../Makefile.lib
+include ../../Makefile.plugin
+
+LIBS = $(DYNLIB)
+LDLIBS += -lc -lvarpd -lumem -lnvpair -lnsl
+CPPFLAGS += -I../common
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../Makefile.targ
diff --git a/usr/src/lib/varpd/direct/amd64/Makefile b/usr/src/lib/varpd/direct/amd64/Makefile
new file mode 100644
index 0000000000..5c586c1d40
--- /dev/null
+++ b/usr/src/lib/varpd/direct/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/direct/common/libvarpd_direct.c b/usr/src/lib/varpd/direct/common/libvarpd_direct.c
new file mode 100644
index 0000000000..fd2ee0154a
--- /dev/null
+++ b/usr/src/lib/varpd/direct/common/libvarpd_direct.c
@@ -0,0 +1,395 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Point to point plug-in for varpd.
+ */
+
+#include <libvarpd_provider.h>
+#include <umem.h>
+#include <errno.h>
+#include <thread.h>
+#include <synch.h>
+#include <strings.h>
+#include <assert.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <libnvpair.h>
+
+typedef struct varpd_direct {
+ overlay_plugin_dest_t vad_dest; /* RO */
+ mutex_t vad_lock; /* Protects the rest */
+ boolean_t vad_hip;
+ boolean_t vad_hport;
+ struct in6_addr vad_ip;
+ uint16_t vad_port;
+} varpd_direct_t;
+
+static const char *varpd_direct_props[] = {
+ "direct/dest_ip",
+ "direct/dest_port"
+};
+
+static boolean_t
+varpd_direct_valid_dest(overlay_plugin_dest_t dest)
+{
+ if (dest & ~(OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
+ return (B_FALSE);
+
+ if (!(dest & (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+static int
+varpd_direct_create(varpd_provider_handle_t *hdl, void **outp,
+ overlay_plugin_dest_t dest)
+{
+ int ret;
+ varpd_direct_t *vdp;
+
+ if (varpd_direct_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ vdp = umem_alloc(sizeof (varpd_direct_t), UMEM_DEFAULT);
+ if (vdp == NULL)
+ return (ENOMEM);
+
+ if ((ret = mutex_init(&vdp->vad_lock, USYNC_THREAD, NULL)) != 0) {
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (ret);
+ }
+
+ vdp->vad_dest = dest;
+ vdp->vad_hip = B_FALSE;
+ vdp->vad_hport = B_FALSE;
+ *outp = vdp;
+ return (0);
+}
+
+static int
+varpd_direct_start(void *arg)
+{
+ varpd_direct_t *vdp = arg;
+
+ mutex_lock(&vdp->vad_lock);
+ if (vdp->vad_hip == B_FALSE ||((vdp->vad_dest & OVERLAY_PLUGIN_D_IP) &&
+ vdp->vad_hport == B_FALSE)) {
+ mutex_unlock(&vdp->vad_lock);
+ return (EAGAIN);
+ }
+ mutex_unlock(&vdp->vad_lock);
+
+ return (0);
+}
+
+static void
+varpd_direct_stop(void *arg)
+{
+}
+
+static void
+varpd_direct_destroy(void *arg)
+{
+ varpd_direct_t *vdp = arg;
+
+ if (mutex_destroy(&vdp->vad_lock) != 0)
+ abort();
+ umem_free(vdp, sizeof (varpd_direct_t));
+}
+
+static int
+varpd_direct_default(void *arg, overlay_target_point_t *otp)
+{
+ varpd_direct_t *vdp = arg;
+
+ mutex_lock(&vdp->vad_lock);
+ bcopy(&vdp->vad_ip, &otp->otp_ip, sizeof (struct in6_addr));
+ otp->otp_port = vdp->vad_port;
+ mutex_unlock(&vdp->vad_lock);
+
+ return (VARPD_LOOKUP_OK);
+}
+
+static int
+varpd_direct_nprops(void *arg, uint_t *nprops)
+{
+ const varpd_direct_t *vdp = arg;
+
+ *nprops = 0;
+ if (vdp->vad_dest & OVERLAY_PLUGIN_D_ETHERNET)
+ *nprops += 1;
+
+ if (vdp->vad_dest & OVERLAY_PLUGIN_D_IP)
+ *nprops += 1;
+
+ if (vdp->vad_dest & OVERLAY_PLUGIN_D_PORT)
+ *nprops += 1;
+
+ assert(*nprops == 1 || *nprops == 2);
+
+ return (0);
+}
+
+static int
+varpd_direct_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
+{
+ varpd_direct_t *vdp = arg;
+
+ /*
+ * Because we only support IP + port combos right now, prop 0 should
+ * always be the IP. We don't support a port without an IP.
+ */
+ assert(vdp->vad_dest & OVERLAY_PLUGIN_D_IP);
+ if (propid == 0) {
+ libvarpd_prop_set_name(vph, varpd_direct_props[0]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP);
+ libvarpd_prop_set_nodefault(vph);
+ return (0);
+ }
+
+ if (propid == 1 && vdp->vad_dest & OVERLAY_PLUGIN_D_PORT) {
+ libvarpd_prop_set_name(vph, varpd_direct_props[1]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
+ libvarpd_prop_set_nodefault(vph);
+ libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_direct_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
+{
+ varpd_direct_t *vdp = arg;
+
+ /* direct/dest_ip */
+ if (strcmp(pname, varpd_direct_props[0]) == 0) {
+ if (*sizep < sizeof (struct in6_addr))
+ return (EOVERFLOW);
+ mutex_lock(&vdp->vad_lock);
+ if (vdp->vad_hip == B_FALSE) {
+ *sizep = 0;
+ } else {
+ bcopy(&vdp->vad_ip, buf, sizeof (struct in6_addr));
+ *sizep = sizeof (struct in6_addr);
+ }
+ mutex_unlock(&vdp->vad_lock);
+ return (0);
+ }
+
+ /* direct/dest_port */
+ if (strcmp(pname, varpd_direct_props[1]) == 0) {
+ uint64_t val;
+
+ if (*sizep < sizeof (uint64_t))
+ return (EOVERFLOW);
+ mutex_lock(&vdp->vad_lock);
+ if (vdp->vad_hport == B_FALSE) {
+ *sizep = 0;
+ } else {
+ val = vdp->vad_port;
+ bcopy(&val, buf, sizeof (uint64_t));
+ *sizep = sizeof (uint64_t);
+ }
+ mutex_unlock(&vdp->vad_lock);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_direct_setprop(void *arg, const char *pname, const void *buf,
+ const uint32_t size)
+{
+ varpd_direct_t *vdp = arg;
+
+ /* direct/dest_ip */
+ if (strcmp(pname, varpd_direct_props[0]) == 0) {
+ const struct in6_addr *ipv6 = buf;
+
+ if (size < sizeof (struct in6_addr))
+ return (EOVERFLOW);
+ /*
+ * XXX What else should be disallowed?
+ */
+ if (IN6_IS_ADDR_V4COMPAT(ipv6))
+ return (EINVAL);
+
+ mutex_lock(&vdp->vad_lock);
+ bcopy(buf, &vdp->vad_ip, sizeof (struct in6_addr));
+ vdp->vad_hip = B_TRUE;
+ mutex_unlock(&vdp->vad_lock);
+ return (0);
+ }
+
+ /* direct/dest_port */
+ if (strcmp(pname, varpd_direct_props[1]) == 0) {
+ const uint64_t *valp = buf;
+ if (size < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ if (*valp == 0 || *valp > UINT16_MAX)
+ return (EINVAL);
+
+ mutex_lock(&vdp->vad_lock);
+ vdp->vad_port = (uint16_t)*valp;
+ vdp->vad_hport = B_TRUE;
+ mutex_unlock(&vdp->vad_lock);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_direct_save(void *arg, nvlist_t *nvp)
+{
+ int ret;
+ varpd_direct_t *vdp = arg;
+
+ mutex_lock(&vdp->vad_lock);
+ if (vdp->vad_hport == B_TRUE) {
+ if ((ret = nvlist_add_uint16(nvp, varpd_direct_props[1],
+ vdp->vad_port)) != 0) {
+ mutex_unlock(&vdp->vad_lock);
+ return (ret);
+ }
+ }
+
+ if (vdp->vad_hip == B_TRUE) {
+ char buf[INET6_ADDRSTRLEN];
+
+ if (inet_ntop(AF_INET6, &vdp->vad_ip, buf, sizeof (buf)) ==
+ NULL)
+ abort();
+ if ((ret = nvlist_add_string(nvp, varpd_direct_props[0],
+ buf)) != 0) {
+ mutex_unlock(&vdp->vad_lock);
+ return (ret);
+ }
+ }
+ mutex_unlock(&vdp->vad_lock);
+
+ return (0);
+}
+
+static int
+varpd_direct_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
+ overlay_plugin_dest_t dest, void **outp)
+{
+ int ret;
+ char *ipstr;
+ varpd_direct_t *vdp;
+
+ if (varpd_direct_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ vdp = umem_alloc(sizeof (varpd_direct_t), UMEM_DEFAULT);
+ if (vdp == NULL)
+ return (ENOMEM);
+
+ if ((ret = mutex_init(&vdp->vad_lock, USYNC_THREAD, NULL)) != 0) {
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (ret);
+ }
+
+ if ((ret = nvlist_lookup_uint16(nvp, varpd_direct_props[1],
+ &vdp->vad_port)) != 0) {
+ if (ret != ENOENT) {
+ if (mutex_destroy(&vdp->vad_lock) != 0)
+ abort();
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (ret);
+ }
+ vdp->vad_hport = B_FALSE;
+ } else {
+ vdp->vad_hport = B_TRUE;
+ }
+
+ if ((ret = nvlist_lookup_string(nvp, varpd_direct_props[0],
+ &ipstr)) != 0) {
+ if (ret != ENOENT) {
+ if (mutex_destroy(&vdp->vad_lock) != 0)
+ abort();
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (ret);
+ }
+ vdp->vad_hip = B_FALSE;
+ } else {
+ ret = inet_pton(AF_INET6, ipstr, &vdp->vad_ip);
+ /*
+ * inet_pton is only defined to return -1 with errno set to
+ * EAFNOSUPPORT, which really, shouldn't happen.
+ */
+ if (ret == -1) {
+ assert(errno == EAFNOSUPPORT);
+ abort();
+ }
+ if (ret == 0) {
+ if (mutex_destroy(&vdp->vad_lock) != 0)
+ abort();
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (EINVAL);
+ }
+ }
+
+ *outp = vdp;
+ return (0);
+}
+
+static const varpd_plugin_ops_t varpd_direct_ops = {
+ 0,
+ varpd_direct_create,
+ varpd_direct_start,
+ varpd_direct_stop,
+ varpd_direct_destroy,
+ varpd_direct_default,
+ NULL,
+ varpd_direct_nprops,
+ varpd_direct_propinfo,
+ varpd_direct_getprop,
+ varpd_direct_setprop,
+ varpd_direct_save,
+ varpd_direct_restore
+};
+
+#pragma init(varpd_direct_init)
+static void
+varpd_direct_init(void)
+{
+ int err;
+ varpd_plugin_register_t *vpr;
+
+ vpr = libvarpd_plugin_alloc(VARPD_VERSION_ONE, &err);
+ /* XXX How should we communicate this failure? */
+ if (vpr == NULL)
+ return;
+
+ vpr->vpr_mode = OVERLAY_TARGET_POINT;
+ vpr->vpr_name = "direct";
+ vpr->vpr_ops = &varpd_direct_ops;
+ /* XXX We care about failure, but what do we do? */
+ (void) libvarpd_plugin_register(vpr);
+ libvarpd_plugin_free(vpr);
+}
diff --git a/usr/src/lib/varpd/direct/common/llib-lvarpd_direct b/usr/src/lib/varpd/direct/common/llib-lvarpd_direct
new file mode 100644
index 0000000000..31b3d36fbe
--- /dev/null
+++ b/usr/src/lib/varpd/direct/common/llib-lvarpd_direct
@@ -0,0 +1,18 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
diff --git a/usr/src/lib/varpd/direct/common/mapfile-vers b/usr/src/lib/varpd/direct/common/mapfile-vers
new file mode 100644
index 0000000000..642ef72adc
--- /dev/null
+++ b/usr/src/lib/varpd/direct/common/mapfile-vers
@@ -0,0 +1,35 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate {
+ local:
+ *;
+};
diff --git a/usr/src/lib/varpd/direct/i386/Makefile b/usr/src/lib/varpd/direct/i386/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/varpd/direct/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/direct/sparc/Makefile b/usr/src/lib/varpd/direct/sparc/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/varpd/direct/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/direct/sparcv9/Makefile b/usr/src/lib/varpd/direct/sparcv9/Makefile
new file mode 100644
index 0000000000..5c586c1d40
--- /dev/null
+++ b/usr/src/lib/varpd/direct/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/files/Makefile b/usr/src/lib/varpd/files/Makefile
new file mode 100644
index 0000000000..f026c620e6
--- /dev/null
+++ b/usr/src/lib/varpd/files/Makefile
@@ -0,0 +1,40 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../../Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h:
+
+check:
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/varpd/files/Makefile.com b/usr/src/lib/varpd/files/Makefile.com
new file mode 100644
index 0000000000..5433f79fd1
--- /dev/null
+++ b/usr/src/lib/varpd/files/Makefile.com
@@ -0,0 +1,36 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = libvarpd_files.a
+VERS = .1
+OBJECTS = libvarpd_files.o \
+ libvarpd_files_json.o
+
+include ../../../Makefile.lib
+include ../../Makefile.plugin
+
+LIBS = $(DYNLIB)
+LDLIBS += -lc -lvarpd -lumem -lnvpair -lsocket -lnsl
+CPPFLAGS += -I../common
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../Makefile.targ
diff --git a/usr/src/lib/varpd/files/amd64/Makefile b/usr/src/lib/varpd/files/amd64/Makefile
new file mode 100644
index 0000000000..5c586c1d40
--- /dev/null
+++ b/usr/src/lib/varpd/files/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/files/common/libvarpd_files.c b/usr/src/lib/varpd/files/common/libvarpd_files.c
new file mode 100644
index 0000000000..0b6812bfc0
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/libvarpd_files.c
@@ -0,0 +1,598 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Files based plug in for varpd
+ *
+ * This is a dynamic varpd plug-in that has a static backing store. In this
+ * case, the idea here is that the full set of mappings is fixed at creation
+ * time and specified in a single file which is currently expected to be in a
+ * JSON format of the following form:
+ *
+ * {
+ * "aa:bb:cc:dd:ee:ff": {
+ * "arp": "10.23.69.1",
+ * "ndp": "2600:3c00::f03c:91ff:fe96:a264",
+ * "ip": "192.168.1.1",
+ * "port": 8080
+ * }
+ * }
+ */
+
+#include <libvarpd_provider.h>
+#include <umem.h>
+#include <errno.h>
+#include <thread.h>
+#include <synch.h>
+#include <strings.h>
+#include <assert.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <libnvpair.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/ethernet.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include <libvarpd_files_json.h>
+
+typedef struct varpd_files {
+ overlay_plugin_dest_t vaf_dest; /* RO */
+ varpd_provider_handle_t *vaf_hdl; /* RO */
+ char *vaf_path; /* WO */
+ nvlist_t *vaf_nvl; /* WO */
+ uint64_t vaf_nmisses; /* Atomic */
+ uint64_t vaf_narp; /* Atomic */
+} varpd_files_t;
+
+static const char *varpd_files_props[] = {
+ "files/config"
+};
+
+static boolean_t
+varpd_files_valid_dest(overlay_plugin_dest_t dest)
+{
+ if (dest & ~(OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
+ return (B_FALSE);
+
+ if (!(dest & (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+static int
+varpd_files_create(varpd_provider_handle_t *hdl, void **outp,
+ overlay_plugin_dest_t dest)
+{
+ varpd_files_t *vaf;
+
+ if (varpd_files_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ vaf = umem_alloc(sizeof (varpd_files_t), UMEM_DEFAULT);
+ if (vaf == NULL)
+ return (ENOMEM);
+
+ bzero(vaf, sizeof (varpd_files_t));
+ vaf->vaf_dest = dest;
+ vaf->vaf_path = NULL;
+ vaf->vaf_nvl = NULL;
+ vaf->vaf_hdl = hdl;
+ *outp = vaf;
+ return (0);
+}
+
+static int
+varpd_files_normalize_nvlist(varpd_files_t *vaf, nvlist_t *nvl)
+{
+ int ret;
+ nvlist_t *out;
+ nvpair_t *pair;
+
+ if ((ret = nvlist_alloc(&out, NV_UNIQUE_NAME, 0)) != 0)
+ return (ret);
+
+ for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(nvl, pair)) {
+ char *name, fname[ETHERADDRSTRL];
+ nvlist_t *data;
+ struct ether_addr ether, *e;
+ e = &ether;
+
+ if (nvpair_type(pair) != DATA_TYPE_NVLIST) {
+ nvlist_free(out);
+ return (EINVAL);
+ }
+
+ name = nvpair_name(pair);
+ if ((ret = nvpair_value_nvlist(pair, &data)) != 0) {
+ nvlist_free(out);
+ return (EINVAL);
+ }
+
+ if (ether_aton_r(name, e) == NULL) {
+ nvlist_free(out);
+ return (EINVAL);
+ }
+
+ if (ether_ntoa_r(e, fname) == NULL) {
+ nvlist_free(out);
+ return (ENOMEM);
+ }
+
+ if ((ret = nvlist_add_nvlist(out, fname, data)) != 0) {
+ nvlist_free(out);
+ return (EINVAL);
+ }
+ }
+
+ vaf->vaf_nvl = out;
+ return (0);
+}
+
+static int
+varpd_files_start(void *arg)
+{
+ int fd, ret;
+ void *maddr;
+ struct stat st;
+ nvlist_t *nvl;
+ varpd_files_t *vaf = arg;
+
+ if (vaf->vaf_path == NULL)
+ return (EAGAIN);
+
+ if ((fd = open(vaf->vaf_path, O_RDONLY)) < 0)
+ return (errno);
+
+ if (fstat(fd, &st) != 0) {
+ ret = errno;
+ if (close(fd) != 0)
+ abort();
+ return (ret);
+ }
+
+ maddr = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+ fd, 0);
+ if (maddr == NULL) {
+ ret = errno;
+ if (close(fd) != 0)
+ abort();
+ return (ret);
+ }
+
+ ret = nvlist_parse_json(maddr, st.st_size, &nvl,
+ NVJSON_FORCE_INTEGER);
+ if (ret == 0) {
+ ret = varpd_files_normalize_nvlist(vaf, nvl);
+ nvlist_free(nvl);
+ }
+ if (munmap(maddr, st.st_size) != 0)
+ abort();
+ if (close(fd) != 0)
+ abort();
+
+ return (ret);
+}
+
+static void
+varpd_files_stop(void *arg)
+{
+ varpd_files_t *vaf = arg;
+
+ nvlist_free(vaf->vaf_nvl);
+ vaf->vaf_nvl = NULL;
+}
+
+static void
+varpd_files_destroy(void *arg)
+{
+ varpd_files_t *vaf = arg;
+
+ assert(vaf->vaf_nvl == NULL);
+ if (vaf->vaf_path != NULL) {
+ umem_free(vaf->vaf_path, strlen(vaf->vaf_path) + 1);
+ vaf->vaf_path = NULL;
+ }
+ umem_free(vaf, sizeof (varpd_files_t));
+}
+
+static void
+varpd_files_lookup(void *arg, varpd_query_handle_t *qh,
+ const overlay_targ_lookup_t *otl, overlay_target_point_t *otp)
+{
+ char macstr[ETHERADDRSTRL], *ipstr;
+ nvlist_t *nvl;
+ varpd_files_t *vaf = arg;
+ int32_t port;
+ static const uint8_t bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+ /* We don't support a default */
+ if (otl == NULL) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (otl->otl_sap == ETHERTYPE_ARP) {
+ libvarpd_plugin_proxy_arp(vaf->vaf_hdl, qh, otl);
+ return;
+ }
+
+ if (otl->otl_sap == ETHERTYPE_IPV6 &&
+ otl->otl_dstaddr[0] == 0x33 &&
+ otl->otl_dstaddr[1] == 0x33) {
+ libvarpd_plugin_proxy_ndp(vaf->vaf_hdl, qh, otl);
+ return;
+ }
+
+ if (otl->otl_sap == ETHERTYPE_IP &&
+ bcmp(otl->otl_dstaddr, bcast, ETHERADDRL) == 0) {
+ char *mac;
+ struct ether_addr a, *addr;
+
+ addr = &a;
+ if (ether_ntoa_r((struct ether_addr *)otl->otl_srcaddr,
+ macstr) == NULL) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_string(nvl, "dhcp-proxy", &mac) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (ether_aton_r(mac, addr) == NULL) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ libvarpd_plugin_proxy_dhcp(vaf->vaf_hdl, qh, otl);
+ return;
+ }
+
+ if (ether_ntoa_r((struct ether_addr *)otl->otl_dstaddr,
+ macstr) == NULL) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_int32(nvl, "port", &port) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (port <= 0 || port > UINT16_MAX) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ otp->otp_port = port;
+
+ if (nvlist_lookup_string(nvl, "ip", &ipstr) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ /*
+ * Try to parse it as a v6 address and then if it's not, try to
+ * transform it into a v4 address which we'll then wrap it into a v4
+ * mapped address.
+ */
+ if (inet_pton(AF_INET6, ipstr, &otp->otp_ip) != 1) {
+ uint32_t v4;
+ if (inet_pton(AF_INET, ipstr, &v4) != 1) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ IN6_IPADDR_TO_V4MAPPED(v4, &otp->otp_ip);
+ }
+
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_OK);
+}
+
+static int
+varpd_files_nprops(void *arg, uint_t *nprops)
+{
+ *nprops = 1;
+ return (0);
+}
+
+static int
+varpd_files_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
+{
+ if (propid != 0)
+ return (EINVAL);
+
+ libvarpd_prop_set_name(vph, varpd_files_props[0]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING);
+ libvarpd_prop_set_nodefault(vph);
+ return (0);
+}
+
+static int
+varpd_files_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
+{
+ varpd_files_t *vaf = arg;
+
+ if (strcmp(pname, varpd_files_props[0]) != 0)
+ return (EINVAL);
+
+ if (vaf->vaf_path != NULL) {
+ size_t len = strlen(vaf->vaf_path) + 1;
+ if (*sizep < len)
+ return (EOVERFLOW);
+ *sizep = len;
+ (void) strlcpy(buf, vaf->vaf_path, *sizep);
+
+ } else {
+ *sizep = 0;
+ }
+
+ return (0);
+}
+
+static int
+varpd_files_setprop(void *arg, const char *pname, const void *buf,
+ const uint32_t size)
+{
+ varpd_files_t *vaf = arg;
+
+ if (strcmp(pname, varpd_files_props[0]) != 0)
+ return (EINVAL);
+
+ if (vaf->vaf_path != NULL)
+ umem_free(vaf->vaf_path, strlen(vaf->vaf_path) + 1);
+
+ vaf->vaf_path = umem_alloc(size, UMEM_DEFAULT);
+ if (vaf->vaf_path == NULL)
+ return (ENOMEM);
+ (void) strlcpy(vaf->vaf_path, buf, size);
+ return (0);
+}
+
+static int
+varpd_files_save(void *arg, nvlist_t *nvp)
+{
+ int ret;
+ varpd_files_t *vaf = arg;
+
+ if (vaf->vaf_path == NULL)
+ return (0);
+
+ if ((ret = nvlist_add_string(nvp, varpd_files_props[0],
+ vaf->vaf_path)) != 0)
+ return (ret);
+
+ if ((ret = nvlist_add_uint64(nvp, "files/vaf_nmisses",
+ vaf->vaf_nmisses)) != 0)
+ return (ret);
+
+ if ((ret = nvlist_add_uint64(nvp, "files/vaf_narp",
+ vaf->vaf_narp)) != 0)
+ return (ret);
+ return (0);
+}
+
+static int
+varpd_files_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
+ overlay_plugin_dest_t dest, void **outp)
+{
+ varpd_files_t *vaf;
+ char *str;
+ int ret;
+ uint64_t nmisses, narp;
+
+ if (varpd_files_valid_dest(dest) == B_FALSE)
+ return (EINVAL);
+
+ ret = nvlist_lookup_string(nvp, varpd_files_props[0], &str);
+ if (ret != 0 && ret != ENOENT)
+ return (ret);
+ else if (ret == ENOENT)
+ str = NULL;
+
+ if (nvlist_lookup_uint64(nvp, "files/vaf_nmisses", &nmisses) != 0)
+ return (EINVAL);
+ if (nvlist_lookup_uint64(nvp, "files/vaf_narp", &narp) != 0)
+ return (EINVAL);
+
+ vaf = umem_alloc(sizeof (varpd_files_t), UMEM_DEFAULT);
+ if (vaf == NULL)
+ return (ENOMEM);
+
+ bzero(vaf, sizeof (varpd_files_t));
+ vaf->vaf_dest = dest;
+ if (str != NULL) {
+ size_t len = strlen(str) + 1;
+ vaf->vaf_path = umem_alloc(len, UMEM_DEFAULT);
+ if (vaf->vaf_path == NULL) {
+ umem_free(vaf, sizeof (varpd_files_t));
+ return (ENOMEM);
+ }
+ (void) strlcpy(vaf->vaf_path, str, len);
+ }
+
+ vaf->vaf_hdl = hdl;
+ *outp = vaf;
+ return (0);
+}
+
+static void
+varpd_files_proxy_arp(void *arg, varpd_arp_handle_t *vah, int kind,
+ const struct sockaddr *sock, uint8_t *out)
+{
+ varpd_files_t *vaf = arg;
+ const struct sockaddr_in *ip;
+ const struct sockaddr_in6 *ip6;
+ nvpair_t *pair;
+
+ if (kind != VARPD_QTYPE_ETHERNET) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (sock->sa_family != AF_INET && sock->sa_family != AF_INET6) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ ip = (const struct sockaddr_in *)sock;
+ ip6 = (const struct sockaddr_in6 *)sock;
+ for (pair = nvlist_next_nvpair(vaf->vaf_nvl, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(vaf->vaf_nvl, pair)) {
+ char *mac, *ipstr;
+ nvlist_t *data;
+ struct in_addr ia;
+ struct in6_addr ia6;
+ struct ether_addr ether, *e;
+ e = &ether;
+
+ if (nvpair_type(pair) != DATA_TYPE_NVLIST)
+ continue;
+
+ mac = nvpair_name(pair);
+ if (nvpair_value_nvlist(pair, &data) != 0)
+ continue;
+
+
+ if (sock->sa_family == AF_INET) {
+ if (nvlist_lookup_string(data, "arp", &ipstr) != 0)
+ continue;
+
+ if (inet_pton(AF_INET, ipstr, &ia) != 1)
+ continue;
+
+ if (bcmp(&ia, &ip->sin_addr,
+ sizeof (struct in_addr)) != 0)
+ continue;
+ } else {
+ if (nvlist_lookup_string(data, "ndp", &ipstr) != 0)
+ continue;
+
+ if (inet_pton(AF_INET6, ipstr, &ia6) != 1)
+ continue;
+
+ if (bcmp(&ia6, &ip6->sin6_addr,
+ sizeof (struct in6_addr)) != 0)
+ continue;
+ }
+
+ /* XXX Crappy errno */
+ if (ether_aton_r(mac, e) == NULL) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ bcopy(e, out, ETHERADDRL);
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_OK);
+ return;
+ }
+
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+}
+
+static void
+varpd_files_proxy_dhcp(void *arg, varpd_dhcp_handle_t *vdh, int type,
+ const overlay_targ_lookup_t *otl, uint8_t *out)
+{
+ varpd_files_t *vaf = arg;
+ nvlist_t *nvl;
+ char macstr[ETHERADDRSTRL], *mac;
+ struct ether_addr a, *addr;
+
+ addr = &a;
+ if (type != VARPD_QTYPE_ETHERNET) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (ether_ntoa_r((struct ether_addr *)otl->otl_srcaddr,
+ macstr) == NULL) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_string(nvl, "dhcp-proxy", &mac) != 0) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (ether_aton_r(mac, addr) == NULL) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ bcopy(addr, out, ETHERADDRL);
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_OK);
+}
+
+static const varpd_plugin_ops_t varpd_files_ops = {
+ 0,
+ varpd_files_create,
+ varpd_files_start,
+ varpd_files_stop,
+ varpd_files_destroy,
+ NULL,
+ varpd_files_lookup,
+ varpd_files_nprops,
+ varpd_files_propinfo,
+ varpd_files_getprop,
+ varpd_files_setprop,
+ varpd_files_save,
+ varpd_files_restore,
+ varpd_files_proxy_arp,
+ varpd_files_proxy_dhcp
+};
+
+#pragma init(varpd_files_init)
+static void
+varpd_files_init(void)
+{
+ int err;
+ varpd_plugin_register_t *vpr;
+
+ vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err);
+ /* XXX How should we communicate this failure? */
+ if (vpr == NULL)
+ return;
+
+ vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC;
+ vpr->vpr_name = "files";
+ vpr->vpr_ops = &varpd_files_ops;
+ /* XXX We care about failure, but what do we do? */
+ (void) libvarpd_plugin_register(vpr);
+ libvarpd_plugin_free(vpr);
+}
diff --git a/usr/src/lib/varpd/files/common/libvarpd_files_json.c b/usr/src/lib/varpd/files/common/libvarpd_files_json.c
new file mode 100644
index 0000000000..ed1f34b9fe
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/libvarpd_files_json.c
@@ -0,0 +1,744 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <strings.h>
+#include <errno.h>
+#include <libnvpair.h>
+
+#include <libvarpd_files_json.h>
+
+typedef enum json_type {
+ JSON_TYPE_NOTHING,
+ JSON_TYPE_STRING = 1,
+ JSON_TYPE_INTEGER,
+ JSON_TYPE_DOUBLE,
+ JSON_TYPE_BOOLEAN,
+ JSON_TYPE_NULL,
+ JSON_TYPE_OBJECT,
+ JSON_TYPE_ARRAY
+} json_type_t;
+
+typedef enum parse_state {
+ PARSE_ERROR = -1,
+ PARSE_DONE = 0,
+ PARSE_REST,
+ PARSE_OBJECT,
+ PARSE_KEY_STRING,
+ PARSE_COLON,
+ PARSE_STRING,
+ PARSE_OBJECT_COMMA,
+ PARSE_ARRAY,
+ PARSE_BAREWORD,
+ PARSE_NUMBER,
+ PARSE_ARRAY_VALUE,
+ PARSE_ARRAY_COMMA,
+} parse_state_t;
+
+#define JSON_MARKER ".__json_"
+#define JSON_MARKER_ARRAY JSON_MARKER "array"
+
+typedef struct parse_frame {
+ parse_state_t pf_ps;
+ nvlist_t *pf_nvl;
+
+ char *pf_key;
+ void *pf_value;
+ json_type_t pf_value_type;
+ int pf_array_index;
+
+ struct parse_frame *pf_next;
+} parse_frame_t;
+
+typedef struct state {
+ char *s_in;
+ off_t s_pos;
+ size_t s_len;
+
+ parse_frame_t *s_top;
+
+ nvlist_parse_json_flags_t s_flags;
+} state_t;
+
+typedef void (*parse_handler_t)(state_t *);
+
+static void
+movestate(state_t *s, parse_state_t ps)
+{
+#ifdef DEBUG
+ fprintf(stderr, "move state %d -> %d\n", s->s_top->pf_ps, ps);
+#endif
+ s->s_top->pf_ps = ps;
+}
+
+static void
+pushstate(state_t *s, parse_state_t ps, parse_state_t retps)
+{
+ parse_frame_t *n = calloc(1, sizeof (*n));
+
+#ifdef DEBUG
+ fprintf(stderr, "push state %d -> %d (ret %d)\n", s->s_top->pf_ps, ps,
+ retps);
+#endif
+
+ /*
+ * Store the state we'll return to when popping this
+ * frame:
+ */
+ s->s_top->pf_ps = retps;
+
+ /*
+ * Store the initial state for the new frame, and
+ * put it on top of the stack:
+ */
+ n->pf_ps = ps;
+ n->pf_value_type = JSON_TYPE_NOTHING;
+
+ n->pf_next = s->s_top;
+ s->s_top = n;
+}
+
+static void
+posterror(state_t *s, char *error)
+{
+ /*
+ * XXX do something better here.
+ */
+ if (s->s_flags & NVJSON_ERRORS_TO_STDERR)
+ fprintf(stderr, "error (pos %lld): %s\n", s->s_pos, error);
+ movestate(s, PARSE_ERROR);
+}
+
+static char
+popchar(state_t *s)
+{
+ if (s->s_pos > s->s_len) {
+ return (0);
+ }
+ return (s->s_in[s->s_pos++]);
+}
+
+static char
+peekchar(state_t *s)
+{
+ if (s->s_pos > s->s_len) {
+ return (0);
+ }
+ return (s->s_in[s->s_pos]);
+}
+
+static void
+discard_whitespace(state_t *s)
+{
+ while (isspace(peekchar(s)))
+ popchar(s);
+}
+
+static char *escape_pairs[] = {
+ "\"\"", "\\\\", "//", "b\b", "f\f", "n\n", "r\r", "t\t", NULL
+};
+
+static char
+collect_string_escape(state_t *s)
+{
+ int i;
+ char c = popchar(s);
+
+ if (c == '\0') {
+ fprintf(stderr, "ERROR: eof mid-escape\n");
+ return ('\0');
+ } else if (c == 'u') {
+ int res;
+ int ndigs = 0;
+ char digs[5];
+ /*
+ * Deal with 4-digit unicode escape.
+ */
+ while (ndigs < 4) {
+ if ((digs[ndigs++] = popchar(s)) == '\0') {
+ fprintf(stderr, "ERROR: eof mid-escape\n");
+ return ('\0');
+ }
+ }
+ digs[4] = '\0';
+ res = atoi(digs);
+ if (res > 127) {
+ fprintf(stderr, "ERROR: unicode escape above 0x7f\n");
+ return ('\0');
+ }
+ return (res);
+ }
+
+ for (i = 0; escape_pairs[i] != NULL; i++) {
+ char *ep = escape_pairs[i];
+ if (ep[0] == c)
+ return (ep[1]);
+ }
+
+ fprintf(stderr, "ERROR: unrecognised escape char %c\n", c);
+ return ('\0');
+}
+
+static char *
+collect_string(state_t *s)
+{
+ /* XXX make this not static: */
+ char buf[1000];
+ char *pos = buf;
+
+ for (;;) {
+ char c = popchar(s);
+ if (c == '\0') {
+ /*
+ * Unexpected EOF
+ */
+ fprintf(stderr, "ERROR: unexpected EOF mid-string\n");
+ return (NULL);
+ } else if (c == '\\') {
+ char esc;
+ /*
+ * Escape Character.
+ *
+ * XXX better error handling here.
+ */
+ if ((esc = collect_string_escape(s)) == '\0')
+ return (NULL);
+ *pos++ = esc;
+ } else if (c == '"') {
+ /*
+ * Legal End of String.
+ */
+ break;
+ } else {
+ *pos++ = c;
+ }
+ }
+ *pos = '\0';
+ return (strdup(buf));
+}
+
+static char *
+collect_bareword(state_t *s)
+{
+ /* XXX make this not static: */
+ char out[100];
+ char *pos = out;
+ char c;
+ for (;;) {
+ c = peekchar(s);
+ if (islower(c)) {
+ *pos++ = popchar(s);
+ } else {
+ /*
+ * We're done...
+ */
+ *pos = '\0';
+ break;
+ }
+ }
+ return (strdup(out));
+}
+
+static void
+hdlr_bareword(state_t *s)
+{
+ char *str = collect_bareword(s);
+ if (strcmp(str, "true") == 0) {
+ s->s_top->pf_value_type = JSON_TYPE_BOOLEAN;
+ s->s_top->pf_value = (void *) B_TRUE;
+ } else if (strcmp(str, "false") == 0) {
+ s->s_top->pf_value_type = JSON_TYPE_BOOLEAN;
+ s->s_top->pf_value = (void *) B_FALSE;
+ } else if (strcmp(str, "null") == 0) {
+ s->s_top->pf_value_type = JSON_TYPE_NULL;
+ } else {
+ free(str);
+ return (posterror(s, "expected 'true', 'false' or 'null'"));
+ }
+ free(str);
+ return (movestate(s, PARSE_DONE));
+}
+
+static int
+collect_number(state_t *s, boolean_t *isint, int32_t *result,
+ double *fresult __UNUSED)
+{
+ /* XXX make not static */
+ char out[100];
+ char *pos = out;
+ boolean_t neg = B_FALSE;
+ char c;
+
+ if (peekchar(s) == '-') {
+ neg = B_TRUE;
+ popchar(s);
+ }
+ /*
+ * Read the 'int' portion:
+ */
+ if (!isdigit(c = peekchar(s))) {
+ fprintf(stderr, "expected a digit (0-9)\n");
+ return (-1);
+ }
+ for (;;) {
+ if (!isdigit(peekchar(s)))
+ break;
+ *pos++ = popchar(s);
+ }
+ if (peekchar(s) == '.' || peekchar(s) == 'e' || peekchar(s) == 'E') {
+ fprintf(stderr, "do not yet support FRACs or EXPs\n");
+ return (-1);
+ }
+
+ *isint = B_TRUE;
+ *pos = '\0';
+ *result = neg == B_TRUE ? -atoi(out) : atoi(out);
+ return (0);
+}
+
+static void
+hdlr_number(state_t *s)
+{
+ boolean_t isint;
+ int32_t result;
+ double fresult;
+
+ if (collect_number(s, &isint, &result, &fresult) != 0) {
+ return (posterror(s, "malformed number"));
+ }
+
+ if (isint == B_TRUE) {
+ s->s_top->pf_value = (void *)(uintptr_t)result;
+ s->s_top->pf_value_type = JSON_TYPE_INTEGER;
+ } else {
+ s->s_top->pf_value = malloc(sizeof (fresult));
+ bcopy(&fresult, s->s_top->pf_value, sizeof (fresult));
+ s->s_top->pf_value_type = JSON_TYPE_DOUBLE;
+ }
+
+ return (movestate(s, PARSE_DONE));
+}
+
+static void
+hdlr_rest(state_t *s)
+{
+ char c;
+ discard_whitespace(s);
+ c = popchar(s);
+ switch (c) {
+ case '{':
+ return (movestate(s, PARSE_OBJECT));
+ case '[':
+ return (movestate(s, PARSE_ARRAY));
+ default:
+ return (posterror(s, "eof before object or array"));
+ }
+}
+
+static int
+add_empty_child(state_t *s)
+{
+ /*
+ * Here, we create an empty nvlist to represent this object
+ * or array:
+ */
+ nvlist_t *empty;
+ if (nvlist_alloc(&empty, NV_UNIQUE_NAME, 0) != 0)
+ return (-1);
+ if (s->s_top->pf_next != NULL) {
+ /*
+ * If we're a child of the frame above, we store ourselves in
+ * that frame's nvlist:
+ */
+ nvlist_t *nvl = s->s_top->pf_next->pf_nvl;
+ char *key = s->s_top->pf_next->pf_key;
+
+ if (nvlist_add_nvlist(nvl, key, empty) != 0) {
+ nvlist_free(empty);
+ return (-1);
+ }
+ nvlist_free(empty);
+ if (nvlist_lookup_nvlist(nvl, key, &empty) != 0) {
+ return (-1);
+ }
+ }
+ s->s_top->pf_nvl = empty;
+ return (0);
+}
+
+static int
+decorate_array(state_t *s)
+{
+ /*
+ * When we are done creating an array, we store a 'length'
+ * property on it, as well as an internal-use marker value.
+ */
+ if (nvlist_add_boolean(s->s_top->pf_nvl, JSON_MARKER_ARRAY) != 0 ||
+ nvlist_add_uint32(s->s_top->pf_nvl, "length",
+ s->s_top->pf_array_index))
+ return (-1);
+ return (0);
+}
+
+static void
+hdlr_array(state_t *s)
+{
+ char c;
+ s->s_top->pf_value_type = JSON_TYPE_ARRAY;
+
+ if (add_empty_child(s) == -1)
+ return (posterror(s, "nvlist error"));
+
+ discard_whitespace(s);
+ c = peekchar(s);
+ switch (c) {
+ case ']':
+ popchar(s);
+ decorate_array(s);
+ return (movestate(s, PARSE_DONE));
+ default:
+ return (movestate(s, PARSE_ARRAY_VALUE));
+ }
+}
+
+static void
+hdlr_array_comma(state_t *s)
+{
+ discard_whitespace(s);
+
+ switch (popchar(s)) {
+ case ']':
+ decorate_array(s);
+ return (movestate(s, PARSE_DONE));
+ case ',':
+ return (movestate(s, PARSE_ARRAY_VALUE));
+ default:
+ return (posterror(s, "expected ',' or ']'"));
+ }
+}
+
+static void
+hdlr_array_value(state_t *s)
+{
+ char c;
+ discard_whitespace(s);
+
+ /*
+ * Generate keyname from the next array index:
+ */
+ if (s->s_top->pf_key != NULL) {
+ fprintf(stderr, "pf_key not null! was %s\n", s->s_top->pf_key);
+ abort();
+ }
+ s->s_top->pf_key = malloc(11); /* 10 digits in uint32_t */
+ if (s->s_top->pf_key == NULL)
+ return (posterror(s, "could not allocate memory"));
+ (void) snprintf(s->s_top->pf_key, 11, "%d", s->s_top->pf_array_index++);
+
+ /*
+ * Select which type handler we need for the next value:
+ */
+ switch (c = peekchar(s)) {
+ case '"':
+ popchar(s);
+ return (pushstate(s, PARSE_STRING, PARSE_ARRAY_COMMA));
+ case '{':
+ popchar(s);
+ return (pushstate(s, PARSE_OBJECT, PARSE_ARRAY_COMMA));
+ case '[':
+ popchar(s);
+ return (pushstate(s, PARSE_ARRAY, PARSE_ARRAY_COMMA));
+ default:
+ if (islower(c))
+ return (pushstate(s, PARSE_BAREWORD,
+ PARSE_ARRAY_COMMA));
+ else if (c == '-' || isdigit(c))
+ return (pushstate(s, PARSE_NUMBER, PARSE_ARRAY_COMMA));
+ else
+ return (posterror(s, "unexpected character at start "
+ "of value"));
+ }
+}
+
+static void
+hdlr_object(state_t *s)
+{
+ char c;
+ s->s_top->pf_value_type = JSON_TYPE_OBJECT;
+
+ if (add_empty_child(s) == -1)
+ return (posterror(s, "nvlist error"));
+
+ discard_whitespace(s);
+ c = popchar(s);
+ switch (c) {
+ case '}':
+ return (movestate(s, PARSE_DONE));
+ case '"':
+ return (movestate(s, PARSE_KEY_STRING));
+ default:
+ return (posterror(s, "expected key or '}'"));
+ }
+}
+
+static void
+hdlr_key_string(state_t *s)
+{
+ char *str = collect_string(s);
+ if (str == NULL)
+ return (posterror(s, "could not collect key string"));
+
+ /*
+ * Record the name of the next
+ */
+ s->s_top->pf_key = str;
+ return (movestate(s, PARSE_COLON));
+}
+
+static void
+hdlr_colon(state_t *s)
+{
+ char c;
+ discard_whitespace(s);
+
+ if ((c = popchar(s)) != ':')
+ return (posterror(s, "expected ':'"));
+
+ discard_whitespace(s);
+
+ /*
+ * Select which type handler we need for the value after the colon:
+ */
+ switch (c = peekchar(s)) {
+ case '"':
+ popchar(s);
+ return (pushstate(s, PARSE_STRING, PARSE_OBJECT_COMMA));
+ case '{':
+ popchar(s);
+ return (pushstate(s, PARSE_OBJECT, PARSE_OBJECT_COMMA));
+ case '[':
+ popchar(s);
+ return (pushstate(s, PARSE_ARRAY, PARSE_OBJECT_COMMA));
+ default:
+ if (islower(c))
+ return (pushstate(s, PARSE_BAREWORD,
+ PARSE_OBJECT_COMMA));
+ else if (c == '-' || isdigit(c))
+ return (pushstate(s, PARSE_NUMBER, PARSE_OBJECT_COMMA));
+ else
+ return (posterror(s, "unexpected character at start "
+ "of value"));
+ }
+}
+
+static void
+hdlr_object_comma(state_t *s)
+{
+ char c;
+ discard_whitespace(s);
+
+ switch (c = popchar(s)) {
+ case '}':
+ return (movestate(s, PARSE_DONE));
+ case ',':
+ discard_whitespace(s);
+ if ((c = popchar(s)) != '"')
+ return (posterror(s, "expected '\"'"));
+ return (movestate(s, PARSE_KEY_STRING));
+ default:
+ return (posterror(s, "expected ',' or '}'"));
+ }
+}
+
+static void
+hdlr_string(state_t *s)
+{
+ s->s_top->pf_value = collect_string(s);
+ if (s == NULL)
+ return (posterror(s, "could not collect string"));
+ s->s_top->pf_value_type = JSON_TYPE_STRING;
+ return (movestate(s, PARSE_DONE));
+}
+
+static int
+store_value(state_t *s)
+{
+ nvlist_t *targ = s->s_top->pf_next->pf_nvl;
+ char *key = s->s_top->pf_next->pf_key;
+ json_type_t type = s->s_top->pf_value_type;
+ int ret = 0;
+
+ switch (type) {
+ case JSON_TYPE_STRING:
+ ret = nvlist_add_string(targ, key, s->s_top->pf_value);
+ free(s->s_top->pf_value);
+ goto out;
+ case JSON_TYPE_BOOLEAN:
+ ret = nvlist_add_boolean_value(targ, key,
+ (boolean_t)s->s_top->pf_value);
+ goto out;
+ case JSON_TYPE_NULL:
+ ret = nvlist_add_boolean(targ, key);
+ goto out;
+ case JSON_TYPE_INTEGER:
+ ret = nvlist_add_int32(targ, key,
+ (int32_t)(uintptr_t)s->s_top->pf_value);
+ goto out;
+ case JSON_TYPE_ARRAY:
+ /* FALLTHRU */
+ case JSON_TYPE_OBJECT:
+ /*
+ * Objects and arrays are already 'stored' in their target
+ * nvlist on creation. See: hdlr_object, hdlr_array.
+ */
+ goto out;
+ default:
+ fprintf(stderr, "ERROR: could not store unknown type %d\n",
+ type);
+ abort();
+ }
+out:
+ s->s_top->pf_value = NULL;
+ free(s->s_top->pf_next->pf_key);
+ s->s_top->pf_next->pf_key = NULL;
+ return (ret);
+}
+
+static parse_frame_t *
+parse_frame_free(parse_frame_t *pf, boolean_t free_nvl)
+{
+ parse_frame_t *next = pf->pf_next;
+ if (pf->pf_key != NULL)
+ free(pf->pf_key);
+ if (pf->pf_value != NULL)
+ abort();
+ if (free_nvl && pf->pf_nvl != NULL)
+ nvlist_free(pf->pf_nvl);
+ free(pf);
+ return (next);
+}
+
+static parse_handler_t hdlrs[] = {
+ NULL, /* PARSE_DONE */
+ hdlr_rest, /* PARSE_REST */
+ hdlr_object, /* PARSE_OBJECT */
+ hdlr_key_string, /* PARSE_KEY_STRING */
+ hdlr_colon, /* PARSE_COLON */
+ hdlr_string, /* PARSE_STRING */
+ hdlr_object_comma, /* PARSE_OBJECT_COMMA */
+ hdlr_array, /* PARSE_ARRAY */
+ hdlr_bareword, /* PARSE_BAREWORD */
+ hdlr_number, /* PARSE_NUMBER */
+ hdlr_array_value, /* PARSE_ARRAY_VALUE */
+ hdlr_array_comma, /* PARSE_ARRAY_COMMA */
+};
+#define NUM_PARSE_HANDLERS (int)(sizeof (hdlrs) / sizeof (hdlrs[0]))
+
+int
+nvlist_parse_json(char *buf, size_t buflen, nvlist_t **nvlp,
+ nvlist_parse_json_flags_t flag)
+{
+ int ret = 0;
+ state_t s;
+
+ /*
+ * Check for valid flags:
+ */
+ if ((flag & (NVJSON_FORCE_INTEGER | NVJSON_FORCE_DOUBLE)) ==
+ (NVJSON_FORCE_INTEGER | NVJSON_FORCE_DOUBLE))
+ return (EINVAL);
+
+ /*
+ * Initialise parsing state structure:
+ */
+ bzero(&s, sizeof (s));
+ s.s_in = buf;
+ s.s_pos = 0;
+ s.s_len = buflen;
+ s.s_flags = flag;
+
+ /*
+ * Allocate top-most stack frame:
+ */
+ s.s_top = calloc(1, sizeof (*s.s_top));
+ if (s.s_top == NULL) {
+ ret = errno;
+ goto out;
+ }
+
+ s.s_top->pf_ps = PARSE_REST;
+ for (;;) {
+ if (s.s_top->pf_ps < 0) {
+ /*
+ * The parser reported an error.
+ */
+#if 0
+ fprintf(stderr, "parse error\n");
+#endif
+ ret = EFAULT;
+ goto out;
+ } else if (s.s_top->pf_ps == PARSE_DONE) {
+ if (s.s_top->pf_next == NULL) {
+ /*
+ * Last frame, so we're really
+ * done.
+ */
+ *nvlp = s.s_top->pf_nvl;
+ goto out;
+ } else {
+ /*
+ * Otherwise, pop a frame and continue
+ * in previous state.
+ */
+#if 0
+ parse_frame_t *t = s.s_top->pf_next;
+#endif
+
+ /*
+ * Copy out the value we created in the
+ * old frame:
+ */
+ if ((ret = store_value(&s)) != 0)
+ goto out;
+#if 0
+ fprintf(stderr, "pop state %d -> %d\n",
+ s.s_top->pf_ps, t->pf_ps);
+#endif
+ /*
+ * Free old frame:
+ */
+ s.s_top = parse_frame_free(s.s_top, B_FALSE);
+ }
+ }
+ /*
+ * Dispatch to parser handler routine for this state:
+ */
+ if (s.s_top->pf_ps >= NUM_PARSE_HANDLERS ||
+ hdlrs[s.s_top->pf_ps] == NULL) {
+ fprintf(stderr, "no handler for state %d\n",
+ s.s_top->pf_ps);
+ abort();
+ }
+ hdlrs[s.s_top->pf_ps](&s);
+ }
+
+out:
+ while (s.s_top != NULL)
+ s.s_top = parse_frame_free(s.s_top, ret == 0 ? B_FALSE :
+ B_TRUE);
+ return (ret);
+}
diff --git a/usr/src/lib/varpd/files/common/libvarpd_files_json.h b/usr/src/lib/varpd/files/common/libvarpd_files_json.h
new file mode 100644
index 0000000000..2a96e55206
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/libvarpd_files_json.h
@@ -0,0 +1,40 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LIBVARPD_FILES_JSON_H
+#define _LIBVARPD_FILES_JSON_H
+
+#include <libnvpair.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum nvlist_parse_json_flags {
+ NVJSON_FORCE_INTEGER = 0x01,
+ NVJSON_FORCE_DOUBLE = 0x02,
+ NVJSON_ERRORS_TO_STDERR = 0x04
+} nvlist_parse_json_flags_t;
+
+extern int nvlist_parse_json(char *, size_t, nvlist_t **,
+ nvlist_parse_json_flags_t);
+
+#define __UNUSED __attribute__((unused))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_FILES_JSON_H */
diff --git a/usr/src/lib/varpd/files/common/llib-lvarpd_files b/usr/src/lib/varpd/files/common/llib-lvarpd_files
new file mode 100644
index 0000000000..31b3d36fbe
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/llib-lvarpd_files
@@ -0,0 +1,18 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
diff --git a/usr/src/lib/varpd/files/common/mapfile-vers b/usr/src/lib/varpd/files/common/mapfile-vers
new file mode 100644
index 0000000000..642ef72adc
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/mapfile-vers
@@ -0,0 +1,35 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate {
+ local:
+ *;
+};
diff --git a/usr/src/lib/varpd/files/i386/Makefile b/usr/src/lib/varpd/files/i386/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/varpd/files/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/files/sparc/Makefile b/usr/src/lib/varpd/files/sparc/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/varpd/files/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/files/sparcv9/Makefile b/usr/src/lib/varpd/files/sparcv9/Makefile
new file mode 100644
index 0000000000..5c586c1d40
--- /dev/null
+++ b/usr/src/lib/varpd/files/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/libvarpd/Makefile b/usr/src/lib/varpd/libvarpd/Makefile
new file mode 100644
index 0000000000..60c9cfa07f
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/Makefile
@@ -0,0 +1,54 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../../Makefile.lib
+
+HDRS = libvarpd.h libvarpd_client.h libvarpd_provider.h
+HDRDIR = common
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+TYPECHECK_LIB = libvarpd.so.1
+TYPELIST = \
+ varpd_client_instance_arg_t \
+ varpd_client_nprops_arg_t \
+ varpd_client_propinfo_arg_t \
+ varpd_client_eresp_t \
+ overlay_targ_cache_entry_t \
+ overlay_targ_cache_t \
+ overlay_targ_cache_iter_t
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber lint: $(SUBDIRS)
+
+install: $(SUBDIRS) $(VARPD_MAPFILES) install_h
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS) $(TYPECHECK)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/varpd/libvarpd/Makefile.com b/usr/src/lib/varpd/libvarpd/Makefile.com
new file mode 100644
index 0000000000..e0378f3229
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/Makefile.com
@@ -0,0 +1,44 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = libvarpd.a
+VERS = .1
+OBJECTS = libvarpd.o \
+ libvarpd_arp.o \
+ libvarpd_client.o \
+ libvarpd_door.o \
+ libvarpd_overlay.o \
+ libvarpd_panic.o \
+ libvarpd_persist.o \
+ libvarpd_prop.o \
+ libvarpd_plugin.o \
+ libvarpd_util.o
+
+include ../../../Makefile.lib
+
+LIBS = $(DYNLIB)
+LDLIBS += -lc -lavl -lumem -lidspace -lnvpair -lmd5 -lrename \
+ -lbunyan
+CPPFLAGS += -I../common
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../Makefile.targ
diff --git a/usr/src/lib/varpd/libvarpd/amd64/Makefile b/usr/src/lib/varpd/libvarpd/amd64/Makefile
new file mode 100644
index 0000000000..5c586c1d40
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd.c b/usr/src/lib/varpd/libvarpd/common/libvarpd.c
new file mode 100644
index 0000000000..3c08a3316b
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd.c
@@ -0,0 +1,355 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * varpd library
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <umem.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/avl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <strings.h>
+
+#include <libvarpd_impl.h>
+
+static int
+libvarpd_instance_comparator(const void *lp, const void *rp)
+{
+ const varpd_instance_t *lpp, *rpp;
+ lpp = lp;
+ rpp = rp;
+
+ if (lpp->vri_id > rpp->vri_id)
+ return (1);
+ if (lpp->vri_id < rpp->vri_id)
+ return (-1);
+ return (0);
+}
+
+static int
+libvarpd_instance_lcomparator(const void *lp, const void *rp)
+{
+ const varpd_instance_t *lpp, *rpp;
+ lpp = lp;
+ rpp = rp;
+
+ if (lpp->vri_linkid > rpp->vri_linkid)
+ return (1);
+ if (lpp->vri_linkid < rpp->vri_linkid)
+ return (-1);
+ return (0);
+}
+
+
+int
+libvarpd_create(varpd_handle_t **vphp)
+{
+ int ret;
+ varpd_impl_t *vip;
+ char buf[32];
+
+ if (vphp == NULL)
+ return (EINVAL);
+
+ *vphp = NULL;
+ vip = umem_alloc(sizeof (varpd_impl_t), UMEM_DEFAULT);
+ if (vip == NULL)
+ return (errno);
+
+ bzero(vip, sizeof (varpd_impl_t));
+ (void) snprintf(buf, sizeof (buf), "varpd_%p", vip);
+ vip->vdi_idspace = id_space_create(buf, LIBVARPD_ID_MIN,
+ LIBVARPD_ID_MAX);
+ if (vip->vdi_idspace == NULL) {
+ int ret = errno;
+ umem_free(vip, sizeof (varpd_impl_t));
+ return (ret);
+ }
+
+ vip->vdi_qcache = umem_cache_create("query", sizeof (varpd_query_t), 0,
+ NULL, NULL, NULL, NULL, NULL, 0);
+ if (vip->vdi_qcache == NULL) {
+ int ret = errno;
+ id_space_destroy(vip->vdi_idspace);
+ umem_free(vip, sizeof (varpd_impl_t));
+ return (ret);
+ }
+
+ if ((ret = libvarpd_overlay_init(vip)) != 0) {
+ umem_cache_destroy(vip->vdi_qcache);
+ id_space_destroy(vip->vdi_idspace);
+ umem_free(vip, sizeof (varpd_impl_t));
+ return (ret);
+ }
+
+ if ((ret = bunyan_init("varpd", &vip->vdi_bunyan)) != 0) {
+ libvarpd_overlay_fini(vip);
+ umem_cache_destroy(vip->vdi_qcache);
+ id_space_destroy(vip->vdi_idspace);
+ umem_free(vip, sizeof (varpd_impl_t));
+ return (ret);
+ }
+
+ libvarpd_persist_init(vip);
+
+ avl_create(&vip->vdi_plugins, libvarpd_plugin_comparator,
+ sizeof (varpd_plugin_t), offsetof(varpd_plugin_t, vpp_node));
+
+ avl_create(&vip->vdi_instances, libvarpd_instance_comparator,
+ sizeof (varpd_instance_t), offsetof(varpd_instance_t, vri_inode));
+ avl_create(&vip->vdi_linstances, libvarpd_instance_lcomparator,
+ sizeof (varpd_instance_t), offsetof(varpd_instance_t, vri_lnode));
+
+ if (mutex_init(&vip->vdi_lock, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create mutex: %d", errno);
+
+ vip->vdi_doorfd = -1;
+ *vphp = (varpd_handle_t *)vip;
+ return (0);
+}
+
+void
+libvarpd_destroy(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ if (mutex_destroy(&vip->vdi_lock) != 0)
+ libvarpd_panic("failed to destroy mutex: %d", errno);
+ libvarpd_persist_fini(vip);
+ libvarpd_overlay_fini(vip);
+ umem_cache_destroy(vip->vdi_qcache);
+ id_space_destroy(vip->vdi_idspace);
+ umem_free(vip, sizeof (varpd_impl_t));
+}
+
+int
+libvarpd_instance_create(varpd_handle_t *vhp, datalink_id_t linkid,
+ const char *pname, varpd_instance_handle_t **outp)
+{
+ int ret;
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+ varpd_plugin_t *plugin;
+ varpd_instance_t *inst, lookup;
+ overlay_plugin_dest_t dest;
+ uint64_t vid;
+
+ /* XXX Really want our own errnos */
+ plugin = libvarpd_plugin_lookup(vip, pname);
+ if (plugin == NULL)
+ return (ENOENT);
+
+ if ((ret = libvarpd_overlay_info(vip, linkid, &dest, NULL, &vid)) != 0)
+ return (ret);
+
+ inst = umem_alloc(sizeof (varpd_instance_t), UMEM_DEFAULT);
+ if (inst == NULL)
+ return (ENOMEM);
+
+ inst->vri_id = id_alloc(vip->vdi_idspace);
+ if (inst->vri_id == -1)
+ libvarpd_panic("failed to allocate id from vdi_idspace: %d",
+ errno);
+ inst->vri_linkid = linkid;
+ inst->vri_vnetid = vid;
+ inst->vri_mode = plugin->vpp_mode;
+ inst->vri_dest = dest;
+ inst->vri_plugin = plugin;
+ inst->vri_impl = vip;
+ inst->vri_flags = 0;
+ if ((ret = plugin->vpp_ops->vpo_create((varpd_provider_handle_t *)inst,
+ &inst->vri_private, dest)) != 0) {
+ id_free(vip->vdi_idspace, inst->vri_id);
+ umem_free(inst, sizeof (varpd_instance_t));
+ return (ret);
+ }
+
+ if (mutex_init(&inst->vri_lock, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create mutex: %d", errno);
+
+ mutex_lock(&vip->vdi_lock);
+ lookup.vri_id = inst->vri_id;
+ if (avl_find(&vip->vdi_instances, &lookup, NULL) != NULL)
+ libvarpd_panic("found duplicate instance with id %d",
+ lookup.vri_id);
+ avl_add(&vip->vdi_instances, inst);
+ lookup.vri_linkid = inst->vri_linkid;
+ if (avl_find(&vip->vdi_linstances, &lookup, NULL) != NULL)
+ libvarpd_panic("found duplicate linstance with id %d",
+ lookup.vri_linkid);
+ avl_add(&vip->vdi_linstances, inst);
+ mutex_unlock(&vip->vdi_lock);
+ *outp = (varpd_instance_handle_t *)inst;
+ return (0);
+}
+
+uint64_t
+libvarpd_instance_id(varpd_instance_handle_t *ihp)
+{
+ varpd_instance_t *inst = (varpd_instance_t *)ihp;
+ return (inst->vri_id);
+}
+
+uint64_t
+libvarpd_plugin_vnetid(varpd_provider_handle_t *vhp)
+{
+ varpd_instance_t *inst = (varpd_instance_t *)vhp;
+ return (inst->vri_vnetid);
+}
+
+varpd_instance_handle_t *
+libvarpd_instance_lookup(varpd_handle_t *vhp, uint64_t id)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+ varpd_instance_t lookup, *retp;
+
+ lookup.vri_id = id;
+ mutex_lock(&vip->vdi_lock);
+ retp = avl_find(&vip->vdi_instances, &lookup, NULL);
+ mutex_unlock(&vip->vdi_lock);
+ return ((varpd_instance_handle_t *)retp);
+}
+
+/*
+ * If this function becomes external to varpd, we need to change it to return a
+ * varpd_instance_handle_t.
+ */
+varpd_instance_t *
+libvarpd_instance_lookup_by_dlid(varpd_impl_t *vip, datalink_id_t linkid)
+{
+ varpd_instance_t lookup, *retp;
+
+ lookup.vri_linkid = linkid;
+ mutex_lock(&vip->vdi_lock);
+ retp = avl_find(&vip->vdi_linstances, &lookup, NULL);
+ mutex_unlock(&vip->vdi_lock);
+ return (retp);
+}
+
+/*
+ * When an instance is being destroyed, that means we should deactivate it, as
+ * well as clean it up. That means here, the proper order is calling the plug-in
+ * stop.
+ */
+void
+libvarpd_instance_destroy(varpd_instance_handle_t *ihp)
+{
+ varpd_instance_t *inst = (varpd_instance_t *)ihp;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ /*
+ * First things first, remove it from global visibility.
+ */
+ mutex_lock(&vip->vdi_lock);
+ avl_remove(&vip->vdi_instances, inst);
+ avl_remove(&vip->vdi_linstances, inst);
+ mutex_unlock(&vip->vdi_lock);
+
+ /*
+ * XXX We probably need a reference counting strategy here so we know
+ * it's safe to remove.
+ */
+ mutex_lock(&inst->vri_lock);
+
+ /*
+ * We need to clean up this instance, that means remove it from
+ * persistence and stopping it. Then finally we'll have to clean it up
+ * entirely.
+ */
+ if (inst->vri_flags & VARPD_INSTANCE_F_ACTIVATED) {
+ inst->vri_flags &= ~VARPD_INSTANCE_F_ACTIVATED;
+ libvarpd_torch_instance(vip, inst);
+ inst->vri_plugin->vpp_ops->vpo_stop(inst->vri_private);
+ inst->vri_plugin->vpp_ops->vpo_destroy(inst->vri_private);
+ inst->vri_private = NULL;
+ }
+ mutex_unlock(&inst->vri_lock);
+
+ /* Do the full clean up of the instance */
+ if (mutex_destroy(&inst->vri_lock) != 0)
+ libvarpd_panic("failed to destroy instance vri_lock");
+ id_free(vip->vdi_idspace, inst->vri_id);
+ umem_free(inst, sizeof (varpd_instance_t));
+}
+
+int
+libvarpd_instance_activate(varpd_instance_handle_t *ihp)
+{
+ int ret;
+ varpd_instance_t *inst = (varpd_instance_t *)ihp;
+
+ mutex_lock(&inst->vri_lock);
+
+ if (inst->vri_flags & VARPD_INSTANCE_F_ACTIVATED) {
+ ret = EEXIST;
+ goto out;
+ }
+
+ if ((ret = inst->vri_plugin->vpp_ops->vpo_start(inst->vri_private)) !=
+ 0)
+ goto out;
+
+ if ((ret = libvarpd_persist_instance(inst->vri_impl, inst)) != 0)
+ goto out;
+
+ /* XXX We should call stop if this fails */
+ if ((ret = libvarpd_overlay_associate(inst)) != 0)
+ goto out;
+
+ inst->vri_flags |= VARPD_INSTANCE_F_ACTIVATED;
+
+out:
+ mutex_unlock(&inst->vri_lock);
+ return (ret);
+}
+
+const bunyan_logger_t *
+libvarpd_plugin_bunyan(varpd_provider_handle_t *vhp)
+{
+ varpd_instance_t *inst = (varpd_instance_t *)vhp;
+ return (inst->vri_impl->vdi_bunyan);
+}
+
+static void
+libvarpd_prefork(void)
+{
+ libvarpd_plugin_prefork();
+}
+
+static void
+libvarpd_postfork(void)
+{
+ libvarpd_plugin_postfork();
+}
+
+#pragma init(libvarpd_init)
+static void
+libvarpd_init(void)
+{
+ libvarpd_plugin_init();
+ if (pthread_atfork(NULL, libvarpd_prefork, libvarpd_postfork) != 0)
+ libvarpd_panic("failed to create varpd atfork: %d", errno);
+}
+
+#pragma fini(libvarpd_fini)
+static void
+libvarpd_fini(void)
+{
+ libvarpd_plugin_fini();
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd.h b/usr/src/lib/varpd/libvarpd/common/libvarpd.h
new file mode 100644
index 0000000000..b2dc57dd4e
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd.h
@@ -0,0 +1,78 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_H
+#define _LIBVARPD_H
+
+/*
+ * varpd interfaces
+ */
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <sys/mac.h>
+#include <libvarpd_client.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct __varpd_handle varpd_handle_t;
+typedef struct __varpd_prop_handle varpd_prop_handle_t;
+typedef struct __varpd_instance_handle varpd_instance_handle_t;
+
+extern int libvarpd_create(varpd_handle_t **);
+extern void libvarpd_destroy(varpd_handle_t *);
+
+extern int libvarpd_persist_enable(varpd_handle_t *, const char *);
+extern int libvarpd_persist_restore(varpd_handle_t *);
+extern int libvarpd_persist_disable(varpd_handle_t *);
+
+extern int libvarpd_instance_create(varpd_handle_t *, datalink_id_t,
+ const char *, varpd_instance_handle_t **);
+extern uint64_t libvarpd_instance_id(varpd_instance_handle_t *);
+extern varpd_instance_handle_t *libvarpd_instance_lookup(varpd_handle_t *,
+ uint64_t);
+extern void libvarpd_instance_destroy(varpd_instance_handle_t *);
+extern int libvarpd_instance_activate(varpd_instance_handle_t *);
+
+extern int libvarpd_plugin_load(varpd_handle_t *, const char *);
+typedef int (*libvarpd_plugin_walk_f)(varpd_handle_t *, const char *, void *);
+extern int libvarpd_plugin_walk(varpd_handle_t *, libvarpd_plugin_walk_f,
+ void *);
+
+extern int libvarpd_prop_handle_alloc(varpd_handle_t *,
+ varpd_instance_handle_t *, varpd_prop_handle_t **);
+extern void libvarpd_prop_handle_free(varpd_prop_handle_t *);
+extern int libvarpd_prop_nprops(varpd_instance_handle_t *, uint_t *);
+/* XXX Do we need the next two from a server perspective? */
+extern int libvarpd_prop_info_fill(varpd_prop_handle_t *, uint_t);
+extern int libvarpd_prop_info(varpd_prop_handle_t *, const char **, uint_t *,
+ uint_t *, const void **, uint32_t *, const mac_propval_range_t **);
+extern int libvarpd_prop_get(varpd_prop_handle_t *, void *, uint32_t *);
+extern int libvarpd_prop_set(varpd_prop_handle_t *, const void *, uint32_t);
+
+extern int libvarpd_door_server_create(varpd_handle_t *, const char *);
+extern void libvarpd_door_server_destroy(varpd_handle_t *);
+
+extern void libvarpd_overlay_lookup_run(varpd_handle_t *);
+extern void libvarpd_overlay_lookup_quiesce(varpd_handle_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_H */
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c
new file mode 100644
index 0000000000..04fbe0e05b
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c
@@ -0,0 +1,645 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Common routines for implmeenting proxy arp
+ */
+
+#include <sys/types.h>
+#include <net/if.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet/udp.h>
+#include <netinet/dhcp.h>
+#include <libvarpd_impl.h>
+#include <sys/vlan.h>
+#include <strings.h>
+#include <assert.h>
+
+#define IPV6_VERSION 6
+
+typedef struct varpd_arp_query {
+ int vaq_type;
+ char vaq_buf[ETHERMAX + VLAN_TAGSZ];
+ size_t vaq_bsize;
+ uint8_t vaq_lookup[ETHERADDRL];
+ struct sockaddr_storage vaq_sock;
+ varpd_instance_t *vaq_inst;
+ struct ether_arp *vaq_ea;
+ varpd_query_handle_t *vaq_query;
+ const overlay_targ_lookup_t *vaq_otl;
+ ip6_t *vaq_ip6;
+ nd_neighbor_solicit_t *vaq_ns;
+} varpd_arp_query_t;
+
+typedef struct varpd_dhcp_query {
+ char vdq_buf[ETHERMAX + VLAN_TAGSZ];
+ size_t vdq_bsize;
+ uint8_t vdq_lookup[ETHERADDRL];
+ const overlay_targ_lookup_t *vdq_otl;
+ varpd_instance_t *vdq_inst;
+ varpd_query_handle_t *vdq_query;
+ struct ether_header *vdq_ether;
+} varpd_dhcp_query_t;
+
+static const uint8_t libvarpd_arp_bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff };
+
+void
+libvarpd_plugin_proxy_arp(varpd_provider_handle_t *hdl,
+ varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl)
+{
+ varpd_arp_query_t *vaq;
+ varpd_instance_t *inst = (varpd_instance_t *)hdl;
+ struct ether_arp *ea;
+ struct sockaddr_in *ip;
+
+ vaq = umem_alloc(sizeof (varpd_arp_query_t), UMEM_DEFAULT);
+ if (vaq == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ vaq->vaq_bsize = sizeof (vaq->vaq_buf);
+
+ if (otl->otl_sap != ETHERTYPE_ARP) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * An ARP packet should not be very large because it's definited to only
+ * be allowed to have a single entry at a given time. But our data must
+ * be at least as large as an ether_arp and our header must be at least
+ * as large as a standard ethernet header.
+ */
+ if (otl->otl_hdrsize + otl->otl_pktsize > vaq->vaq_bsize ||
+ otl->otl_pktsize < sizeof (struct ether_arp) ||
+ otl->otl_hdrsize < sizeof (struct ether_header)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (libvarpd_overlay_packet(inst->vri_impl, otl, vaq->vaq_buf,
+ &vaq->vaq_bsize) != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /* XXX Check how many bytes we actually have */
+
+ ea = (void *)((uintptr_t)vaq->vaq_buf + (uintptr_t)otl->otl_hdrsize);
+
+ /*
+ * Make sure it matches something that we know about.
+ */
+ if (ntohs(ea->ea_hdr.ar_hrd) != ARPHRD_ETHER ||
+ ntohs(ea->ea_hdr.ar_pro) != ETHERTYPE_IP ||
+ ea->ea_hdr.ar_hln != ETHERADDRL ||
+ ea->ea_hdr.ar_pln != sizeof (ea->arp_spa) ||
+ ntohs(ea->ea_hdr.ar_op) != ARPOP_REQUEST) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * Now that we've verified that our data is sane, see if we're doing a
+ * gratuitous arp and if so, drop it. Otherwise, we may end up
+ * triggering duplicate address detection.
+ */
+ if (bcmp(ea->arp_spa, ea->arp_tpa, sizeof (ea->arp_spa)) == 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ bzero(&vaq->vaq_sock, sizeof (struct sockaddr_storage));
+ ip = (struct sockaddr_in *)&vaq->vaq_sock;
+ ip->sin_family = AF_INET;
+ bcopy(ea->arp_tpa, &ip->sin_addr, sizeof (ea->arp_tpa));
+
+ vaq->vaq_type = AF_INET;
+ vaq->vaq_inst = inst;
+ vaq->vaq_ea = ea;
+ vaq->vaq_query = vqh;
+ vaq->vaq_otl = otl;
+
+ if (inst->vri_plugin->vpp_ops->vpo_arp == NULL)
+ libvarpd_panic("%s plugin asked to do arp, but has no method",
+ inst->vri_plugin->vpp_name);
+
+ inst->vri_plugin->vpp_ops->vpo_arp(inst->vri_private,
+ (varpd_arp_handle_t *)vaq, VARPD_QTYPE_ETHERNET,
+ (struct sockaddr *)ip, vaq->vaq_lookup);
+}
+
+static void
+libvarpd_proxy_arp_fini(varpd_arp_query_t *vaq)
+{
+ struct ether_header *ether;
+ struct sockaddr_in *ip;
+
+ ip = (struct sockaddr_in *)&vaq->vaq_sock;
+ /*
+ * Modify our packet in place for a reply. We need to swap around the
+ * sender and target addresses.
+ */
+ vaq->vaq_ea->ea_hdr.ar_op = htons(ARPOP_REPLY);
+ bcopy(vaq->vaq_ea->arp_sha, vaq->vaq_ea->arp_tha, ETHERADDRL);
+ bcopy(vaq->vaq_lookup, vaq->vaq_ea->arp_sha, ETHERADDRL);
+ bcopy(vaq->vaq_ea->arp_spa, &ip->sin_addr,
+ sizeof (vaq->vaq_ea->arp_spa));
+ bcopy(vaq->vaq_ea->arp_tpa, vaq->vaq_ea->arp_spa,
+ sizeof (vaq->vaq_ea->arp_spa));
+ bcopy(&ip->sin_addr, vaq->vaq_ea->arp_tpa,
+ sizeof (vaq->vaq_ea->arp_spa));
+
+ /*
+ * Finally go ahead and fix up the mac header and reply to the sender
+ * explicitly.
+ */
+ ether = (struct ether_header *)vaq->vaq_buf;
+ bcopy(&ether->ether_shost, &ether->ether_dhost, ETHERADDRL);
+ bcopy(vaq->vaq_lookup, &ether->ether_shost, ETHERADDRL);
+
+ (void) libvarpd_overlay_inject(vaq->vaq_inst->vri_impl, vaq->vaq_otl,
+ vaq->vaq_buf, vaq->vaq_bsize);
+
+ libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+}
+
+static uint16_t
+libvarpd_icmpv6_checksum(const ip6_t *v6hdr, const uint16_t *buf, uint16_t mlen)
+{
+ int i;
+ uint16_t *v;
+ uint32_t sum = 0;
+
+ assert(mlen % 2 == 0);
+ v = (uint16_t *)&v6hdr->ip6_src;
+ for (i = 0; i < sizeof (struct in6_addr); i += 2, v++)
+ sum += *v;
+ v = (uint16_t *)&v6hdr->ip6_dst;
+ for (i = 0; i < sizeof (struct in6_addr); i += 2, v++)
+ sum += *v;
+ sum += htons(mlen);
+#ifdef _BIG_ENDIAN
+ sum += IPPROTO_ICMPV6;
+#else
+ sum += IPPROTO_ICMPV6 << 8;
+#endif /* _BIG_ENDIAN */
+
+ for (i = 0; i < mlen; i += 2, buf++)
+ sum += *buf;
+
+ while ((sum >> 16) != 0)
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return (sum & 0xffff);
+}
+
+/*
+ * Proxying NDP is much more involved than proxying ARP. For starters, NDP
+ * neighbor solicitations are implemented in terms of IPv6 ICMP as opposed to
+ * its own Ethertype. Therefore, we're going to have to grab a packet if it's a
+ * multicast packet and then determine if we actually want to do anything with
+ * it.
+ */
+void
+libvarpd_plugin_proxy_ndp(varpd_provider_handle_t *hdl,
+ varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl)
+{
+ size_t bsize, plen;
+ varpd_arp_query_t *vaq;
+ ip6_t *v6hdr;
+ nd_neighbor_solicit_t *ns;
+ nd_opt_hdr_t *opt;
+ struct sockaddr_in6 *s6;
+
+ varpd_instance_t *inst = (varpd_instance_t *)hdl;
+ uint8_t *eth = NULL;
+
+ vaq = umem_alloc(sizeof (varpd_arp_query_t), UMEM_DEFAULT);
+ if (vaq == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ vaq->vaq_bsize = sizeof (vaq->vaq_buf);
+
+ if (otl->otl_dstaddr[0] != 0x33 ||
+ otl->otl_dstaddr[1] != 0x33) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * If we have more than a standard frame size for the ICMP neighbor
+ * solicitation, drop it. Similarly if there isn't enough data present
+ * for us, drop it.
+ */
+ if (otl->otl_hdrsize + otl->otl_pktsize > vaq->vaq_bsize) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (otl->otl_pktsize < sizeof (ip6_t) +
+ sizeof (nd_neighbor_solicit_t)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (libvarpd_overlay_packet(inst->vri_impl, otl, vaq->vaq_buf,
+ &vaq->vaq_bsize) != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ bsize = vaq->vaq_bsize;
+ bsize -= otl->otl_hdrsize;
+ assert(bsize > sizeof (ip6_t));
+
+ v6hdr = (ip6_t *)(vaq->vaq_buf + otl->otl_hdrsize);
+ if (((v6hdr->ip6_vfc & 0xf0) >> 4) != IPV6_VERSION) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (v6hdr->ip6_nxt != IPPROTO_ICMPV6) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * In addition to getting these requests on the multicast address for
+ * node solicitation, we may also end up getting them on a generic
+ * multicast address due to timeouts or other choices by various OSes.
+ * We should fairly liberal and accept both, even though the standard
+ * wants them to a solicitation address.
+ */
+ if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&v6hdr->ip6_dst) &&
+ !IN6_IS_ADDR_MC_LINKLOCAL(&v6hdr->ip6_dst)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ bsize -= sizeof (ip6_t);
+ plen = ntohs(v6hdr->ip6_plen);
+ if (bsize < plen) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * Now we know that this is an ICMPv6 request targetting the right
+ * IPv6 multicast prefix. Let's go through and verify that ICMPv6
+ * indicates that we have the real thing and ensure that per RFC 4861
+ * the target address is not a multicast address. Further, because this
+ * is a multicast on Ethernet, we must have a source link-layer address.
+ *
+ * XXX We should probably validate the checksum here...
+ */
+ ns = (nd_neighbor_solicit_t *)(vaq->vaq_buf + otl->otl_hdrsize +
+ sizeof (ip6_t));
+ if (ns->nd_ns_type != ND_NEIGHBOR_SOLICIT && ns->nd_ns_code != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (IN6_IS_ADDR_MULTICAST(&ns->nd_ns_target) ||
+ IN6_IS_ADDR_V4MAPPED(&ns->nd_ns_target) ||
+ IN6_IS_ADDR_LOOPBACK(&ns->nd_ns_target)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ plen -= sizeof (nd_neighbor_solicit_t);
+ opt = (nd_opt_hdr_t *)(ns+1);
+ while (plen >= sizeof (struct nd_opt_hdr)) {
+ /* If we have an option with no lenght, that's clear bogus */
+ if (opt->nd_opt_len == 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (opt->nd_opt_type == ND_OPT_SOURCE_LINKADDR) {
+ eth = (uint8_t *)((uintptr_t)opt +
+ sizeof (nd_opt_hdr_t));
+ }
+ plen -= opt->nd_opt_len * 8;
+ opt = (nd_opt_hdr_t *)((uintptr_t)opt +
+ opt->nd_opt_len * 8);
+ }
+
+ if (eth == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ bzero(&vaq->vaq_sock, sizeof (struct sockaddr_storage));
+ s6 = (struct sockaddr_in6 *)&vaq->vaq_sock;
+ s6->sin6_family = AF_INET6;
+ bcopy(&ns->nd_ns_target, &s6->sin6_addr, sizeof (s6->sin6_addr));
+
+ if (inst->vri_plugin->vpp_ops->vpo_arp == NULL)
+ libvarpd_panic("%s plugin asked to do arp, but has no method",
+ inst->vri_plugin->vpp_name);
+
+ vaq->vaq_type = AF_INET6;
+ vaq->vaq_inst = inst;
+ vaq->vaq_ea = NULL;
+ vaq->vaq_query = vqh;
+ vaq->vaq_otl = otl;
+ vaq->vaq_ns = ns;
+ vaq->vaq_ip6 = v6hdr;
+ inst->vri_plugin->vpp_ops->vpo_arp(inst->vri_private,
+ (varpd_arp_handle_t *)vaq, VARPD_QTYPE_ETHERNET,
+ (struct sockaddr *)s6, vaq->vaq_lookup);
+}
+
+static void
+libvarpd_proxy_ndp_fini(varpd_arp_query_t *vaq)
+{
+ char resp[ETHERMAX + VLAN_TAGSZ];
+ struct ether_header *ether;
+ nd_neighbor_advert_t *na;
+ nd_opt_hdr_t *opt;
+ ip6_t *v6hdr;
+ size_t roff = 0;
+
+ /*
+ * Now we need to assemble an RA as a response. Unlike with arp, we opt
+ * to use a new packet just to make things a bit simpler saner here.
+ */
+ v6hdr = vaq->vaq_ip6;
+ bcopy(vaq->vaq_buf, resp, vaq->vaq_otl->otl_hdrsize);
+ ether = (struct ether_header *)resp;
+ bcopy(&ether->ether_shost, &ether->ether_dhost, ETHERADDRL);
+ bcopy(vaq->vaq_lookup, &ether->ether_shost, ETHERADDRL);
+ roff += vaq->vaq_otl->otl_hdrsize;
+ bcopy(v6hdr, resp + roff, sizeof (ip6_t));
+ v6hdr = (ip6_t *)(resp + roff);
+ bcopy(&v6hdr->ip6_src, &v6hdr->ip6_dst, sizeof (struct in6_addr));
+ bcopy(&vaq->vaq_ns->nd_ns_target, &v6hdr->ip6_src,
+ sizeof (struct in6_addr));
+ roff += sizeof (ip6_t);
+ na = (nd_neighbor_advert_t *)(resp + roff);
+ na->nd_na_type = ND_NEIGHBOR_ADVERT;
+ na->nd_na_code = 0;
+ /*
+ * RFC 4443 defines that we should set the checksum to zero before we
+ * calculate the checksumat we should set the checksum to zero before we
+ * calculate it.
+ */
+ na->nd_na_cksum = 0;
+ /*
+ * Nota bene, the header <netinet/icmp6.h> has already transformed this
+ * into the appropriate host order. Don't use htonl.
+ */
+ na->nd_na_flags_reserved = ND_NA_FLAG_SOLICITED | ND_NA_FLAG_OVERRIDE;
+ bcopy(&vaq->vaq_ns->nd_ns_target, &na->nd_na_target,
+ sizeof (struct in6_addr));
+ roff += sizeof (nd_neighbor_advert_t);
+
+ opt = (nd_opt_hdr_t *)(resp + roff);
+ opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
+ opt->nd_opt_len = 1;
+ roff += sizeof (nd_opt_hdr_t);
+ bcopy(vaq->vaq_lookup, resp + roff, ETHERADDRL);
+ roff += ETHERADDRL;
+
+ /*
+ * Now that we've filled in the packet, go back and compute the checksum
+ * and fill in the IPv6 payload size.
+ */
+ v6hdr->ip6_plen = htons(roff - sizeof (ip6_t) -
+ vaq->vaq_otl->otl_hdrsize);
+ na->nd_na_cksum = ~libvarpd_icmpv6_checksum(v6hdr, (uint16_t *)na,
+ ntohs(v6hdr->ip6_plen)) & 0xffff;
+
+ (void) libvarpd_overlay_inject(vaq->vaq_inst->vri_impl, vaq->vaq_otl,
+ resp, roff);
+
+ libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+}
+
+void
+libvarpd_plugin_arp_reply(varpd_arp_handle_t *vah, int action)
+{
+ varpd_arp_query_t *vaq = (varpd_arp_query_t *)vah;
+
+ if (vaq == NULL)
+ libvarpd_panic("unknown plugin passed invalid "
+ "varpd_arp_handle_t");
+
+ if (action == VARPD_LOOKUP_DROP) {
+ libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ } else if (action != VARPD_LOOKUP_OK)
+ libvarpd_panic("%s plugin returned invalid action %d",
+ vaq->vaq_inst->vri_plugin->vpp_name, action);
+
+ switch (vaq->vaq_type) {
+ case AF_INET:
+ libvarpd_proxy_arp_fini(vaq);
+ break;
+ case AF_INET6:
+ libvarpd_proxy_ndp_fini(vaq);
+ break;
+ default:
+ libvarpd_panic("encountered unknown vaq_type: %d",
+ vaq->vaq_type);
+ }
+}
+
+void
+libvarpd_plugin_proxy_dhcp(varpd_provider_handle_t *hdl,
+ varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl)
+{
+ varpd_dhcp_query_t *vdq;
+ struct ether_header *ether;
+ struct ip *ip;
+ struct udphdr *udp;
+ varpd_instance_t *inst = (varpd_instance_t *)hdl;
+
+ vdq = umem_alloc(sizeof (varpd_dhcp_query_t), UMEM_DEFAULT);
+ if (vdq == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ vdq->vdq_bsize = sizeof (vdq->vdq_buf);
+
+ if (otl->otl_sap != ETHERTYPE_IP) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (bcmp(otl->otl_dstaddr, libvarpd_arp_bcast, ETHERADDRL) != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (otl->otl_hdrsize + otl->otl_pktsize > vdq->vdq_bsize ||
+ otl->otl_pktsize < sizeof (struct ip) + sizeof (struct udphdr) +
+ sizeof (struct dhcp) ||
+ otl->otl_hdrsize < sizeof (struct ether_header)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (libvarpd_overlay_packet(inst->vri_impl, otl, vdq->vdq_buf,
+ &vdq->vdq_bsize) != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (vdq->vdq_bsize != otl->otl_hdrsize + otl->otl_pktsize) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ ether = (struct ether_header *)vdq->vdq_buf;
+ ip = (struct ip *)(vdq->vdq_buf + otl->otl_hdrsize);
+
+ if (ip->ip_v != IPVERSION && ip->ip_p != IPPROTO_UDP) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (otl->otl_hdrsize + ip->ip_hl * 4 + sizeof (struct udphdr) >
+ vdq->vdq_bsize) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ udp = (struct udphdr *)(vdq->vdq_buf + otl->otl_hdrsize +
+ ip->ip_hl * 4);
+
+ if (ntohs(udp->uh_sport) != IPPORT_BOOTPC ||
+ ntohs(udp->uh_dport) != IPPORT_BOOTPS) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ vdq->vdq_ether = ether;
+ vdq->vdq_inst = inst;
+ vdq->vdq_query = vqh;
+ vdq->vdq_otl = otl;
+
+ if (inst->vri_plugin->vpp_ops->vpo_dhcp == NULL)
+ libvarpd_panic("%s plugin asked to do dhcp, but has no method",
+ inst->vri_plugin->vpp_name);
+
+ inst->vri_plugin->vpp_ops->vpo_dhcp(inst->vri_private,
+ (varpd_dhcp_handle_t *)vdq, VARPD_QTYPE_ETHERNET, otl,
+ vdq->vdq_lookup);
+}
+
+void
+libvarpd_plugin_dhcp_reply(varpd_dhcp_handle_t *vdh, int action)
+{
+ varpd_dhcp_query_t *vdq = (varpd_dhcp_query_t *)vdh;
+
+ if (vdq == NULL)
+ libvarpd_panic("unknown plugin passed invalid "
+ "varpd_dhcp_handle_t");
+
+ if (action == VARPD_LOOKUP_DROP) {
+ libvarpd_plugin_query_reply(vdq->vdq_query, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ } else if (action != VARPD_LOOKUP_OK)
+ libvarpd_panic("%s plugin returned invalid action %d",
+ vdq->vdq_inst->vri_plugin->vpp_name, action);
+
+ bcopy(vdq->vdq_lookup, &vdq->vdq_ether->ether_dhost, ETHERADDRL);
+ (void) libvarpd_overlay_resend(vdq->vdq_inst->vri_impl, vdq->vdq_otl,
+ vdq->vdq_buf, vdq->vdq_bsize);
+
+ libvarpd_plugin_query_reply(vdq->vdq_query, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+}
+
+/*
+ * Inject a gratuitious ARP packet to the specified mac address.
+ */
+void
+libvarpd_inject_arp(varpd_provider_handle_t *vph, const uint16_t vlan,
+ const uint8_t *srcmac, const struct in_addr *srcip, const uint8_t *dstmac)
+{
+ char buf[1500];
+ size_t bsize = 0;
+ struct ether_arp *ea;
+ varpd_instance_t *inst = (varpd_instance_t *)vph;
+
+ if (vlan != 0) {
+ struct ether_vlan_header *eh;
+ eh = (struct ether_vlan_header *)(buf + bsize);
+ bsize += sizeof (struct ether_vlan_header);
+ bcopy(dstmac, &eh->ether_dhost, ETHERADDRL);
+ bcopy(srcmac, &eh->ether_shost, ETHERADDRL);
+ eh->ether_tpid = htons(ETHERTYPE_VLAN);
+ eh->ether_tci = htons(VLAN_TCI(0, ETHER_CFI, vlan));
+ eh->ether_type = htons(ETHERTYPE_ARP);
+ } else {
+ struct ether_header *eh;
+ eh = (struct ether_header *)(buf + bsize);
+ bsize += sizeof (struct ether_header);
+ bcopy(dstmac, &eh->ether_dhost, ETHERADDRL);
+ bcopy(srcmac, &eh->ether_shost, ETHERADDRL);
+ eh->ether_type = htons(ETHERTYPE_ARP);
+ }
+
+ ea = (struct ether_arp *)(buf + bsize);
+ bsize += sizeof (struct ether_arp);
+ ea->ea_hdr.ar_hrd = htons(ARPHRD_ETHER);
+ ea->ea_hdr.ar_pro = htons(ETHERTYPE_IP);
+ ea->ea_hdr.ar_hln = ETHERADDRL;
+ ea->ea_hdr.ar_pln = sizeof (struct in_addr);
+ ea->ea_hdr.ar_op = htons(ARPOP_REQUEST);
+ bcopy(srcmac, ea->arp_sha, ETHERADDRL);
+ bcopy(srcip, ea->arp_spa, sizeof (struct in_addr));
+ bcopy(libvarpd_arp_bcast, ea->arp_tha, ETHERADDRL);
+ bcopy(srcip, ea->arp_tpa, sizeof (struct in_addr));
+
+ (void) libvarpd_overlay_instance_inject(inst, buf, bsize);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c
new file mode 100644
index 0000000000..0f616b9533
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c
@@ -0,0 +1,626 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * varpd client interfaces
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <umem.h>
+#include <unistd.h>
+#include <string.h>
+#include <strings.h>
+#include <door.h>
+
+#include <libvarpd_impl.h>
+
+typedef struct varpd_client {
+ int vcl_doorfd;
+} varpd_client_t;
+
+typedef struct varpd_client_prop_info {
+ varpd_client_t *vcprop_client;
+ uint64_t vcprop_instance;
+ uint_t vcprop_propid;
+ uint_t vcprop_type;
+ uint_t vcprop_prot;
+ uint32_t vcprop_defsize;
+ uint32_t vcprop_psize;
+ char vcprop_name[LIBVARPD_PROP_NAMELEN];
+ uint8_t vcprop_default[LIBVARPD_PROP_SIZEMAX];
+ uint8_t vcprop_poss[LIBVARPD_PROP_SIZEMAX];
+} varpd_client_prop_info_t;
+
+static int
+libvarpd_c_door_call(varpd_client_t *client, varpd_client_arg_t *argp,
+ size_t altsize)
+{
+ int ret;
+ door_arg_t darg;
+
+ darg.data_ptr = (char *)argp;
+ darg.desc_ptr = NULL;
+ darg.desc_num = 0;
+ darg.rbuf = (char *)argp;
+ if (altsize != 0) {
+ darg.data_size = altsize;
+ darg.rsize = altsize;
+ } else {
+ darg.data_size = sizeof (varpd_client_arg_t);
+ darg.rsize = sizeof (varpd_client_arg_t);
+ }
+
+ do {
+ ret = door_call(client->vcl_doorfd, &darg);
+ } while (ret != 0 && errno == EINTR);
+ if (ret != 0) {
+ switch (errno) {
+ case E2BIG:
+ case EFAULT:
+ case EINVAL:
+ case ENOTSUP:
+ case EOVERFLOW:
+ case ENFILE:
+ libvarpd_panic("unhandalable errno from door_call: %d",
+ errno);
+ }
+ ret = errno;
+ }
+
+ return (ret);
+}
+
+int
+libvarpd_c_create(varpd_client_handle_t **chpp, const char *doorname)
+{
+ varpd_client_t *client;
+
+ client = umem_alloc(sizeof (varpd_client_t), UMEM_DEFAULT);
+ if (client == NULL)
+ return (ENOMEM);
+
+ client->vcl_doorfd = open(doorname, O_RDWR);
+ if (client->vcl_doorfd < 0) {
+ int ret = errno;
+ umem_free(client, sizeof (varpd_client_t));
+ return (ret);
+ }
+
+ *chpp = (varpd_client_handle_t *)client;
+ return (0);
+}
+
+int
+libvarpd_c_destroy(varpd_client_handle_t *chp)
+{
+ varpd_client_t *client = (varpd_client_t *)chp;
+ if (close(client->vcl_doorfd) != 0)
+ libvarpd_panic("failed to close door fd %d: %d",
+ client->vcl_doorfd, errno);
+
+ umem_free(chp, sizeof (varpd_client_handle_t *));
+ return (0);
+}
+
+int
+libvarpd_c_instance_create(varpd_client_handle_t *chp, datalink_id_t linkid,
+ const char *search, uint64_t *cidp)
+{
+ int ret;
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t carg;
+ varpd_client_create_arg_t *cap = &carg.vca_un.vca_create;
+
+ if (strlen(search) >= LIBVARPD_PROP_NAMELEN)
+ return (EINVAL);
+ carg.vca_command = VARPD_CLIENT_CREATE;
+ carg.vca_errno = 0;
+ cap->vcca_linkid = linkid;
+ (void) strlcpy(cap->vcca_plugin, search, LIBVARPD_PROP_NAMELEN);
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ *cidp = cap->vcca_id;
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_activate(varpd_client_handle_t *chp, uint64_t cid)
+{
+ int ret;
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t carg;
+ varpd_client_instance_arg_t *vciap = &carg.vca_un.vca_instance;
+
+ carg.vca_command = VARPD_CLIENT_ACTIVATE;
+ carg.vca_errno = 0;
+ vciap->vcia_id = cid;
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_destroy(varpd_client_handle_t *chp, uint64_t cid)
+{
+ int ret;
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t carg;
+ varpd_client_instance_arg_t *vciap = &carg.vca_un.vca_instance;
+
+ carg.vca_command = VARPD_CLIENT_DESTROY;
+ carg.vca_errno = 0;
+ vciap->vcia_id = cid;
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_prop_nprops(varpd_client_handle_t *chp, uint64_t cid, uint_t *nprops)
+{
+ int ret;
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t carg;
+ varpd_client_nprops_arg_t *vcnap = &carg.vca_un.vca_nprops;
+
+ carg.vca_command = VARPD_CLIENT_NPROPS;
+ carg.vca_errno = 0;
+ vcnap->vcna_id = cid;
+ vcnap->vcna_nprops = 0;
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+ *nprops = vcnap->vcna_nprops;
+ return (0);
+}
+
+int
+libvarpd_c_prop_handle_alloc(varpd_client_handle_t *chp, uint64_t cid,
+ varpd_client_prop_handle_t **phdlp)
+{
+ varpd_client_prop_info_t *infop;
+
+ infop = umem_alloc(sizeof (varpd_client_prop_info_t), UMEM_DEFAULT);
+ if (infop == NULL)
+ return (ENOMEM);
+
+ bzero(infop, sizeof (varpd_client_prop_info_t));
+ infop->vcprop_client = (varpd_client_t *)chp;
+ infop->vcprop_instance = cid;
+ infop->vcprop_propid = UINT_MAX;
+ *phdlp = (varpd_client_prop_handle_t *)infop;
+ return (0);
+}
+
+void
+libvarpd_c_prop_handle_free(varpd_client_prop_handle_t *phdl)
+{
+ umem_free(phdl, sizeof (varpd_client_prop_info_t));
+ phdl = NULL;
+}
+
+static void
+libvarpd_c_prop_info_from_door(varpd_client_prop_info_t *infop,
+ const varpd_client_propinfo_arg_t *vcfap)
+{
+ infop->vcprop_propid = vcfap->vcfa_propid;
+ infop->vcprop_type = vcfap->vcfa_type;
+ infop->vcprop_prot = vcfap->vcfa_prot;
+ infop->vcprop_defsize = vcfap->vcfa_defsize;
+ infop->vcprop_psize = vcfap->vcfa_psize;
+ bcopy(vcfap->vcfa_name, infop->vcprop_name, LIBVARPD_PROP_NAMELEN);
+ bcopy(vcfap->vcfa_default, infop->vcprop_default,
+ LIBVARPD_PROP_SIZEMAX);
+ bcopy(vcfap->vcfa_poss, infop->vcprop_poss, LIBVARPD_PROP_SIZEMAX);
+}
+
+int
+libvarpd_c_prop_info_fill_by_name(varpd_client_prop_handle_t *phdl,
+ const char *name)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_propinfo_arg_t *vcfap = &carg.vca_un.vca_info;
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+
+ if (strlen(name) >= LIBVARPD_PROP_NAMELEN)
+ return (EINVAL);
+ bzero(&carg, sizeof (varpd_client_arg_t));
+ carg.vca_command = VARPD_CLIENT_PROPINFO;
+ carg.vca_errno = 0;
+ vcfap->vcfa_id = infop->vcprop_instance;
+ vcfap->vcfa_propid = UINT_MAX;
+ (void) strlcpy(vcfap->vcfa_name, name, LIBVARPD_PROP_NAMELEN);
+
+ ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ libvarpd_c_prop_info_from_door(infop, vcfap);
+ return (0);
+}
+
+int
+libvarpd_c_prop_info_fill(varpd_client_prop_handle_t *phdl, uint_t propid)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_propinfo_arg_t *vcfap = &carg.vca_un.vca_info;
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+
+ bzero(&carg, sizeof (varpd_client_arg_t));
+ carg.vca_command = VARPD_CLIENT_PROPINFO;
+ carg.vca_errno = 0;
+ vcfap->vcfa_id = infop->vcprop_instance;
+ vcfap->vcfa_propid = propid;
+
+ ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ libvarpd_c_prop_info_from_door(infop, vcfap);
+ return (0);
+}
+
+int
+libvarpd_c_prop_info(varpd_client_prop_handle_t *phdl, const char **namep,
+ uint_t *typep, uint_t *protp, const void **defp, uint32_t *defsizep,
+ const mac_propval_range_t **possp)
+{
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+ if (infop->vcprop_propid == UINT_MAX)
+ return (EINVAL);
+
+ if (namep != NULL)
+ *namep = infop->vcprop_name;
+ if (typep != NULL)
+ *typep = infop->vcprop_type;
+ if (protp != NULL)
+ *protp = infop->vcprop_prot;
+ if (defp != NULL)
+ *defp = infop->vcprop_default;
+ if (defsizep != NULL)
+ *defsizep = infop->vcprop_defsize;
+ if (possp != NULL)
+ *possp = (const mac_propval_range_t *)infop->vcprop_poss;
+ return (0);
+}
+
+int
+libvarpd_c_prop_get(varpd_client_prop_handle_t *phdl, void *buf, uint32_t *len)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_prop_arg_t *vcpap = &carg.vca_un.vca_prop;
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+
+ if (len == NULL || buf == NULL || infop->vcprop_propid == UINT_MAX)
+ return (EINVAL);
+ if (*len < LIBVARPD_PROP_SIZEMAX)
+ return (EOVERFLOW);
+
+ bzero(&carg, sizeof (varpd_client_arg_t));
+ carg.vca_command = VARPD_CLIENT_GETPROP;
+ carg.vca_errno = 0;
+ vcpap->vcpa_id = infop->vcprop_instance;
+ vcpap->vcpa_propid = infop->vcprop_propid;
+
+ ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ /*
+ * XXX We should really abort, as the server shouldn't send bad input,
+ * but a library really shouldn't kill the client. Therefore we have a
+ * shitty, shitty, error case.
+ */
+ if (vcpap->vcpa_bufsize > LIBVARPD_PROP_SIZEMAX)
+ return (E2BIG);
+
+ bcopy(vcpap->vcpa_buf, buf, vcpap->vcpa_bufsize);
+ *len = vcpap->vcpa_bufsize;
+ return (0);
+}
+
+int
+libvarpd_c_prop_set(varpd_client_prop_handle_t *phdl, const void *buf,
+ uint32_t len)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_prop_arg_t *vcpap = &carg.vca_un.vca_prop;
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+
+ if (len == NULL || buf == NULL || infop->vcprop_propid == UINT_MAX)
+ return (EINVAL);
+ if (len > LIBVARPD_PROP_SIZEMAX)
+ return (EOVERFLOW);
+
+ carg.vca_command = VARPD_CLIENT_SETPROP;
+ carg.vca_errno = 0;
+ vcpap->vcpa_id = infop->vcprop_instance;
+ vcpap->vcpa_propid = infop->vcprop_propid;
+ vcpap->vcpa_bufsize = len;
+ bcopy(buf, vcpap->vcpa_buf, len);
+
+ ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_lookup(varpd_client_handle_t *chp, datalink_id_t linkid,
+ uint64_t *instp)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_lookup_arg_t *vclap = &carg.vca_un.vca_lookup;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ carg.vca_command = VARPD_CLIENT_LOOKUP;
+ carg.vca_errno = 0;
+ vclap->vcla_linkid = linkid;
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+ if (instp != NULL)
+ *instp = vclap->vcla_id;
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_target_mode(varpd_client_handle_t *chp, uint64_t cid,
+ uint_t *dtype, uint_t *mtype)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_mode_arg_t *vctmap = &carg.vca_un.vca_mode;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ carg.vca_command = VARPD_CLIENT_TARGET_MODE;
+ carg.vca_errno = 0;
+ vctmap->vtma_id = cid;
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+ if (ret == 0) {
+ if (mtype != NULL)
+ *mtype = vctmap->vtma_mode;
+ if (dtype != NULL)
+ *dtype = vctmap->vtma_dest;
+ }
+
+ return (ret);
+}
+
+int
+libvarpd_c_instance_cache_flush(varpd_client_handle_t *chp, uint64_t cid)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ carg.vca_command = VARPD_CLIENT_CACHE_FLUSH;
+ carg.vca_errno = 0;
+
+ vctcap->vtca_id = cid;
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_cache_delete(varpd_client_handle_t *chp, uint64_t cid,
+ const struct ether_addr *key)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ if (key == NULL)
+ return (EINVAL);
+
+ carg.vca_command = VARPD_CLIENT_CACHE_DELETE;
+ carg.vca_errno = 0;
+ vctcap->vtca_id = cid;
+ bcopy(key, vctcap->vtca_key, ETHERADDRL);
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_cache_get(varpd_client_handle_t *chp, uint64_t cid,
+ const struct ether_addr *key, varpd_client_cache_entry_t *entry)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ if (key == NULL || entry == NULL)
+ return (EINVAL);
+
+ carg.vca_command = VARPD_CLIENT_CACHE_GET;
+ carg.vca_errno = 0;
+ vctcap->vtca_id = cid;
+ bcopy(key, vctcap->vtca_key, ETHERADDRL);
+ bzero(&vctcap->vtca_entry, sizeof (varpd_client_cache_entry_t));
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ bcopy(&vctcap->vtca_entry, entry, sizeof (varpd_client_cache_entry_t));
+ return (0);
+}
+
+int
+libvarpd_c_instance_cache_set(varpd_client_handle_t *chp, uint64_t cid,
+ const struct ether_addr *key, const varpd_client_cache_entry_t *entry)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ if (key == NULL || entry == NULL)
+ return (EINVAL);
+
+ carg.vca_command = VARPD_CLIENT_CACHE_SET;
+ carg.vca_errno = 0;
+ vctcap->vtca_id = cid;
+ bcopy(key, vctcap->vtca_key, ETHERADDRL);
+ bcopy(entry, &vctcap->vtca_entry, sizeof (varpd_client_cache_entry_t));
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_cache_walk(varpd_client_handle_t *chp, uint64_t cid,
+ varpd_client_cache_f func, void *arg)
+{
+ int ret = 0;
+ size_t bufsize = sizeof (varpd_client_arg_t) +
+ 100 * sizeof (varpd_client_cache_entry_t);
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t *cargp;
+ varpd_client_target_walk_arg_t *vctwap;
+
+ /*
+ * Because the number of entries involved in a walk may be large, we
+ * dynamically allocate a number of queries to make at a single time.
+ * This also means that the average door request doesn't inflate by the
+ * number of entries we want. For now, let's always grab 100 entries in
+ * a request.
+ */
+ cargp = umem_zalloc(bufsize, UMEM_DEFAULT);
+ if (cargp == NULL)
+ return (errno);
+ vctwap = &cargp->vca_un.vca_walk;
+ for (;;) {
+ int i;
+
+ cargp->vca_command = VARPD_CLIENT_CACHE_WALK;
+ cargp->vca_errno = 0;
+ vctwap->vtcw_id = cid;
+ vctwap->vtcw_count = 100;
+
+ ret = libvarpd_c_door_call(client, cargp, bufsize);
+ if (ret != 0)
+ break;
+
+ if (cargp->vca_errno != 0) {
+ ret = cargp->vca_errno;
+ break;
+ }
+
+ if (vctwap->vtcw_count == 0) {
+ ret = 0;
+ break;
+ }
+
+ for (i = 0; i < vctwap->vtcw_count; i++) {
+ varpd_client_cache_entry_t ent;
+
+ ent.vcp_flags = vctwap->vtcw_ents[i].otce_flags;
+ bcopy(vctwap->vtcw_ents[i].otce_dest.otp_mac,
+ &ent.vcp_mac, ETHERADDRL);
+ ent.vcp_ip = vctwap->vtcw_ents[i].otce_dest.otp_ip;
+ ent.vcp_port = vctwap->vtcw_ents[i].otce_dest.otp_port;
+ ret = func(chp, cid,
+ (struct ether_addr *)vctwap->vtcw_ents[i].otce_mac,
+ &ent, arg);
+ if (ret != 0) {
+ ret = 0;
+ goto done;
+ }
+ }
+ }
+
+done:
+ umem_free(cargp, bufsize);
+ return (ret);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h
new file mode 100644
index 0000000000..b794472bc1
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h
@@ -0,0 +1,94 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_CLIENT_H
+#define _LIBVARPD_CLIENT_H
+
+/*
+ * varpd interfaces
+ */
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <sys/mac.h>
+/* XXX Should we have our own, but compatible types? */
+#include <sys/overlay_target.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct __varpd_client_handle varpd_client_handle_t;
+typedef struct __varpd_client_prop_handle varpd_client_prop_handle_t;
+
+typedef struct varpd_client_cache_entry {
+ struct ether_addr vcp_mac;
+ uint16_t vcp_flags;
+ struct in6_addr vcp_ip;
+ uint16_t vcp_port;
+} varpd_client_cache_entry_t;
+
+/*
+ * This should stay in sync with OVERLAY_PROP_*
+ */
+#define LIBVARPD_PROP_SIZEMAX 256
+#define LIBVARPD_PROP_NAMELEN 32
+
+extern int libvarpd_c_create(varpd_client_handle_t **, const char *);
+extern int libvarpd_c_destroy(varpd_client_handle_t *);
+extern int libvarpd_c_instance_create(varpd_client_handle_t *, datalink_id_t,
+ const char *, uint64_t *);
+extern int libvarpd_c_instance_activate(varpd_client_handle_t *, uint64_t);
+extern int libvarpd_c_instance_destroy(varpd_client_handle_t *, uint64_t);
+
+extern int libvarpd_c_prop_nprops(varpd_client_handle_t *, uint64_t, uint_t *);
+extern int libvarpd_c_prop_handle_alloc(varpd_client_handle_t *, uint64_t,
+ varpd_client_prop_handle_t **);
+extern void libvarpd_c_prop_handle_free(varpd_client_prop_handle_t *);
+extern int libvarpd_c_prop_info_fill(varpd_client_prop_handle_t *, uint_t);
+extern int libvarpd_c_prop_info_fill_by_name(varpd_client_prop_handle_t *,
+ const char *);
+extern int libvarpd_c_prop_info(varpd_client_prop_handle_t *, const char **,
+ uint_t *, uint_t *, const void **, uint32_t *,
+ const mac_propval_range_t **);
+extern int libvarpd_c_prop_get(varpd_client_prop_handle_t *, void *,
+ uint32_t *);
+extern int libvarpd_c_prop_set(varpd_client_prop_handle_t *, const void *,
+ uint32_t);
+
+/* XXX Strawman */
+extern int libvarpd_c_instance_lookup(varpd_client_handle_t *, datalink_id_t,
+ uint64_t *);
+extern int libvarpd_c_instance_target_mode(varpd_client_handle_t *, uint64_t,
+ uint_t *, uint_t *);
+extern int libvarpd_c_instance_cache_flush(varpd_client_handle_t *, uint64_t);
+extern int libvarpd_c_instance_cache_delete(varpd_client_handle_t *, uint64_t,
+ const struct ether_addr *);
+extern int libvarpd_c_instance_cache_get(varpd_client_handle_t *, uint64_t,
+ const struct ether_addr *, varpd_client_cache_entry_t *);
+extern int libvarpd_c_instance_cache_set(varpd_client_handle_t *, uint64_t,
+ const struct ether_addr *, const varpd_client_cache_entry_t *);
+
+typedef int (*varpd_client_cache_f)(varpd_client_handle_t *, uint64_t,
+ const struct ether_addr *, const varpd_client_cache_entry_t *, void *);
+extern int libvarpd_c_instance_cache_walk(varpd_client_handle_t *, uint64_t,
+ varpd_client_cache_f, void *);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_CLIENT_H */
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c
new file mode 100644
index 0000000000..f2f93562ac
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c
@@ -0,0 +1,457 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL. *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/*
+ * varpd door server logic
+ */
+
+#include <door.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <priv.h>
+#include <libvarpd_impl.h>
+
+typedef int (libvarpd_door_f)(varpd_impl_t *, varpd_client_arg_t *, ucred_t *);
+
+static boolean_t
+libvarpd_door_privileged(ucred_t *credp)
+{
+ const priv_set_t *ps;
+
+ ps = ucred_getprivset(credp, PRIV_EFFECTIVE);
+ if (ps == NULL)
+ return (B_FALSE);
+
+ return (priv_ismember(ps, PRIV_SYS_NET_CONFIG));
+}
+
+static int
+libvarpd_door_f_create(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ int ret;
+ varpd_instance_handle_t *ihdl;
+ varpd_client_create_arg_t *vccap = &vcap->vca_un.vca_create;
+
+ vccap->vcca_plugin[LIBVARPD_PROP_NAMELEN-1] = '\0';
+ ret = libvarpd_instance_create((varpd_handle_t *)vip,
+ vccap->vcca_linkid, vccap->vcca_plugin, &ihdl);
+ if (ret == 0)
+ vccap->vcca_id = libvarpd_instance_id(ihdl);
+
+ return (ret);
+}
+
+static int
+libvarpd_door_f_activate(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_instance_arg_t *vciap = &vcap->vca_un.vca_instance;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vciap->vcia_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ return (libvarpd_instance_activate(ihp));
+}
+
+static int
+libvarpd_door_f_destroy(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_instance_arg_t *vciap = &vcap->vca_un.vca_instance;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vciap->vcia_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ libvarpd_instance_destroy(ihp);
+ return (0);
+}
+
+static int
+libvarpd_door_f_nprops(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_nprops_arg_t *vcnap = &vcap->vca_un.vca_nprops;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcnap->vcna_id);
+ if (ihp == NULL)
+ return (ENOENT);
+
+ return (libvarpd_prop_nprops(ihp, &vcnap->vcna_nprops));
+}
+
+static int
+libvarpd_door_f_propinfo(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ int ret;
+ varpd_instance_handle_t *ihp;
+ varpd_prop_handle_t *phdl;
+ varpd_client_propinfo_arg_t *vcfap = &vcap->vca_un.vca_info;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcfap->vcfa_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl);
+ if (ret != 0)
+ return (ret);
+
+ if (vcfap->vcfa_propid != UINT_MAX) {
+ ret = libvarpd_prop_info_fill(phdl, vcfap->vcfa_propid);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+ } else {
+ uint_t i, nprop;
+ const char *name;
+
+ vcfap->vcfa_name[LIBVARPD_PROP_NAMELEN-1] = '\0';
+ ret = libvarpd_prop_nprops(ihp, &nprop);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+ for (i = 0; i < nprop; i++) {
+ ret = libvarpd_prop_info_fill(phdl, i);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+ ret = libvarpd_prop_info(phdl, &name, NULL, NULL, NULL,
+ NULL, NULL);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+ if (strcmp(vcfap->vcfa_name, name) == 0)
+ break;
+ }
+
+ if (i == nprop) {
+ libvarpd_prop_handle_free(phdl);
+ return (ENOENT);
+ }
+ vcfap->vcfa_propid = i;
+ }
+ libvarpd_prop_door_convert(phdl, vcfap);
+ libvarpd_prop_handle_free(phdl);
+ return (0);
+}
+
+static int
+libvarpd_door_f_getprop(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ int ret;
+ uint32_t size;
+ varpd_instance_handle_t *ihp;
+ varpd_prop_handle_t *phdl;
+ varpd_client_prop_arg_t *vcpap = &vcap->vca_un.vca_prop;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcpap->vcpa_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl);
+ if (ret != 0)
+ return (ret);
+
+ ret = libvarpd_prop_info_fill(phdl, vcpap->vcpa_propid);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+
+ ret = libvarpd_prop_get(phdl, vcpap->vcpa_buf, &size);
+ if (ret == 0)
+ vcpap->vcpa_bufsize = size;
+ libvarpd_prop_handle_free(phdl);
+ return (0);
+}
+
+static int
+libvarpd_door_f_setprop(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ int ret;
+ varpd_instance_handle_t *ihp;
+ varpd_prop_handle_t *phdl;
+ varpd_client_prop_arg_t *vcpap = &vcap->vca_un.vca_prop;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcpap->vcpa_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl);
+ if (ret != 0)
+ return (ret);
+
+ ret = libvarpd_prop_info_fill(phdl, vcpap->vcpa_propid);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+
+ ret = libvarpd_prop_set(phdl, vcpap->vcpa_buf, vcpap->vcpa_bufsize);
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+}
+
+static int
+libvarpd_door_f_lookup(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_t *inst;
+ varpd_client_lookup_arg_t *vclap = &vcap->vca_un.vca_lookup;
+
+ inst = libvarpd_instance_lookup_by_dlid(vip, vclap->vcla_linkid);
+ if (inst == NULL)
+ return (ENOENT);
+
+ vclap->vcla_id = inst->vri_id;
+ return (0);
+}
+
+static int
+libvarpd_door_f_target(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_instance_t *inst;
+ varpd_client_target_mode_arg_t *vtmap = &vcap->vca_un.vca_mode;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtmap->vtma_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ inst = (varpd_instance_t *)ihp;
+ vtmap->vtma_dest = inst->vri_dest;
+ vtmap->vtma_mode = inst->vri_mode;
+ return (0);
+}
+
+static int
+libvarpd_door_f_flush(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache;
+
+ if (libvarpd_door_privileged(credp) == B_FALSE)
+ return (EPERM);
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ return (libvarpd_overlay_cache_flush((varpd_instance_t *)ihp));
+}
+
+static int
+libvarpd_door_f_delete(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache;
+
+ if (libvarpd_door_privileged(credp) == B_FALSE)
+ return (EPERM);
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ return (libvarpd_overlay_cache_delete((varpd_instance_t *)ihp,
+ vtcap->vtca_key));
+}
+
+static int
+libvarpd_door_f_get(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache;
+
+ /* XXX Should this be privileged? */
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ return (libvarpd_overlay_cache_get((varpd_instance_t *)ihp,
+ vtcap->vtca_key, &vtcap->vtca_entry));
+}
+
+static int
+libvarpd_door_f_set(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache;
+
+ if (libvarpd_door_privileged(credp) == B_FALSE)
+ return (EPERM);
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id);
+ if (ihp == NULL)
+ return (ENOENT);
+
+ return (libvarpd_overlay_cache_set((varpd_instance_t *)ihp,
+ vtcap->vtca_key, &vtcap->vtca_entry));
+}
+
+static int
+libvarpd_door_f_walk(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_walk_arg_t *vctwp = &vcap->vca_un.vca_walk;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vctwp->vtcw_id);
+ if (ihp == NULL)
+ return (ENOENT);
+
+ return (libvarpd_overlay_cache_walk_fill((varpd_instance_t *)ihp,
+ &vctwp->vtcw_marker, &vctwp->vtcw_count, vctwp->vtcw_ents));
+}
+
+static libvarpd_door_f *libvarpd_door_table[] = {
+ libvarpd_door_f_create,
+ libvarpd_door_f_activate,
+ libvarpd_door_f_destroy,
+ libvarpd_door_f_nprops,
+ libvarpd_door_f_propinfo,
+ libvarpd_door_f_getprop,
+ libvarpd_door_f_setprop,
+ libvarpd_door_f_lookup,
+ libvarpd_door_f_target,
+ libvarpd_door_f_flush,
+ libvarpd_door_f_delete,
+ libvarpd_door_f_get,
+ libvarpd_door_f_set,
+ libvarpd_door_f_walk
+};
+
+static void
+libvarpd_door_server(void *cookie, char *argp, size_t argsz, door_desc_t *dp,
+ uint_t ndesc)
+{
+ int ret;
+ varpd_client_eresp_t err;
+ ucred_t *credp = NULL;
+ varpd_impl_t *vip = cookie;
+ varpd_client_arg_t *vcap = (varpd_client_arg_t *)argp;
+
+ err.vce_command = VARPD_CLIENT_INVALID;
+ /* XXX Get a check for max size */
+ if (argsz < sizeof (varpd_client_arg_t)) {
+ err.vce_errno = EINVAL;
+ goto errout;
+ }
+
+ if ((ret = door_ucred(&credp)) != 0) {
+ err.vce_errno = ret;
+ goto errout;
+ }
+
+ if (vcap->vca_command <= 0 || vcap->vca_command >= VARPD_CLIENT_MAX) {
+ err.vce_errno = EINVAL;
+ goto errout;
+ }
+
+ vcap->vca_errno = 0;
+ ret = libvarpd_door_table[vcap->vca_command - 1](vip, vcap, credp);
+ if (ret != 0)
+ vcap->vca_errno = ret;
+
+ ucred_free(credp);
+ (void) door_return(argp, argsz, NULL, 0);
+ return;
+
+errout:
+ /* XXX Should we do something here? */
+ (void) door_return((char *)&err, sizeof (err), NULL, 0);
+}
+
+int
+libvarpd_door_server_create(varpd_handle_t *vhp, const char *path)
+{
+ int fd, ret;
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ mutex_lock(&vip->vdi_lock);
+ if (vip->vdi_doorfd >= 0) {
+ mutex_unlock(&vip->vdi_lock);
+ return (EEXIST);
+ }
+
+ vip->vdi_doorfd = door_create(libvarpd_door_server, vip,
+ DOOR_REFUSE_DESC | DOOR_NO_CANCEL);
+ if (vip->vdi_doorfd == -1) {
+ mutex_unlock(&vip->vdi_lock);
+ return (errno);
+ }
+
+ if ((fd = open(path, O_CREAT | O_RDWR, 0666)) == -1) {
+ ret = errno;
+ if (door_revoke(vip->vdi_doorfd) != 0)
+ libvarpd_panic("failed to revoke door: %d",
+ errno);
+ mutex_unlock(&vip->vdi_lock);
+ return (errno);
+ }
+ /* XXX Really? */
+ if (fchown(fd, UID_NETADM, GID_NETADM) != 0) {
+ ret = errno;
+ if (door_revoke(vip->vdi_doorfd) != 0)
+ libvarpd_panic("failed to revoke door: %d",
+ errno);
+ mutex_unlock(&vip->vdi_lock);
+ return (ret);
+ }
+
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close door fd %d: %d",
+ fd, errno);
+ (void) fdetach(path);
+ if (fattach(vip->vdi_doorfd, path) != 0) {
+ ret = errno;
+ if (door_revoke(vip->vdi_doorfd) != 0)
+ libvarpd_panic("failed to revoke door: %d",
+ errno);
+ mutex_unlock(&vip->vdi_lock);
+ return (ret);
+ }
+
+ mutex_unlock(&vip->vdi_lock);
+ return (0);
+}
+
+void
+libvarpd_door_server_destroy(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ mutex_lock(&vip->vdi_lock);
+ if (vip->vdi_doorfd != 0) {
+ if (door_revoke(vip->vdi_doorfd) != 0)
+ libvarpd_panic("failed to revoke door: %d",
+ errno);
+ vip->vdi_doorfd = -1;
+ }
+ mutex_unlock(&vip->vdi_lock);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h
new file mode 100644
index 0000000000..44bee7d92a
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h
@@ -0,0 +1,247 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_IMPL_H
+#define _LIBVARPD_IMPL_H
+
+/*
+ * varpd internal interfaces
+ */
+
+#include <libvarpd.h>
+#include <libvarpd_provider.h>
+#include <sys/avl.h>
+#include <thread.h>
+#include <synch.h>
+#include <limits.h>
+#include <libidspace.h>
+#include <umem.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LIBVARPD_ID_MIN 1
+#define LIBVARPD_ID_MAX INT32_MAX
+
+typedef struct varpd_plugin {
+ avl_node_t vpp_node;
+ const char *vpp_name;
+ overlay_target_mode_t vpp_mode;
+ const varpd_plugin_ops_t *vpp_ops;
+ mutex_t vpp_lock;
+ uint_t vpp_active;
+} varpd_plugin_t;
+
+typedef struct varpd_impl {
+ mutex_t vdi_lock;
+ mutex_t vdi_loglock;
+ rwlock_t vdi_pfdlock;
+ avl_tree_t vdi_plugins; /* vdi_lock */
+ avl_tree_t vdi_instances; /* vdi_lock */
+ avl_tree_t vdi_linstances; /* vdi_lock */
+ id_space_t *vdi_idspace; /* RO */
+ umem_cache_t *vdi_qcache; /* RO */
+ bunyan_logger_t *vdi_bunyan; /* RO */
+ int vdi_overlayfd; /* RO */
+ int vdi_doorfd; /* vdi_lock */
+ int vdi_persistfd; /* vdi_plock */
+ cond_t vdi_lthr_cv; /* vdi_lock */
+ boolean_t vdi_lthr_quiesce; /* vdi_lock */
+ uint_t vdi_lthr_count; /* vdi_lock */
+} varpd_impl_t;
+
+typedef enum varpd_instance_flags {
+ VARPD_INSTANCE_F_ACTIVATED = 0x01
+} varpd_instance_flags_t;
+
+typedef struct varpd_instance {
+ avl_node_t vri_inode;
+ avl_node_t vri_lnode;
+ uint64_t vri_id; /* RO */
+ uint64_t vri_vnetid; /* RO */
+ datalink_id_t vri_linkid; /* RO */
+ overlay_target_mode_t vri_mode; /* RO */
+ overlay_plugin_dest_t vri_dest; /* RO */
+ varpd_impl_t *vri_impl; /* RO */
+ varpd_plugin_t *vri_plugin; /* RO */
+ void *vri_private; /* RO */
+ mutex_t vri_lock;
+ varpd_instance_flags_t vri_flags; /* vri_lock */
+} varpd_instance_t;
+
+typedef struct varpd_query {
+ overlay_targ_lookup_t vq_lookup;
+ overlay_targ_resp_t vq_response;
+ varpd_instance_t *vq_instance;
+} varpd_query_t;
+
+typedef struct varpd_client_create_arg {
+ datalink_id_t vcca_linkid;
+ uint64_t vcca_id;
+ char vcca_plugin[LIBVARPD_PROP_NAMELEN];
+} varpd_client_create_arg_t;
+
+typedef struct varpd_client_instance_arg {
+ uint64_t vcia_id;
+} varpd_client_instance_arg_t;
+
+typedef struct varpd_client_nprops_arg {
+ uint64_t vcna_id;
+ uint_t vcna_nprops;
+} varpd_client_nprops_arg_t;
+
+typedef struct varpd_client_propinfo_arg {
+ uint64_t vcfa_id;
+ uint_t vcfa_propid;
+ uint_t vcfa_type;
+ uint_t vcfa_prot;
+ uint32_t vcfa_defsize;
+ uint32_t vcfa_psize;
+ char vcfa_name[LIBVARPD_PROP_NAMELEN];
+ uint8_t vcfa_default[LIBVARPD_PROP_SIZEMAX];
+ uint8_t vcfa_poss[LIBVARPD_PROP_SIZEMAX];
+} varpd_client_propinfo_arg_t;
+
+typedef struct varpd_client_prop_arg {
+ uint64_t vcpa_id;
+ uint_t vcpa_propid;
+ uint8_t vcpa_buf[LIBVARPD_PROP_SIZEMAX];
+ size_t vcpa_bufsize;
+} varpd_client_prop_arg_t;
+
+typedef struct varpd_client_lookup_arg {
+ datalink_id_t vcla_linkid;
+ uint32_t vcla_pad;
+ uint64_t vcla_id;
+} varpd_client_lookup_arg_t;
+
+typedef struct varpd_client_target_mode_arg {
+ uint64_t vtma_id;
+ uint32_t vtma_dest;
+ uint32_t vtma_mode;
+} varpd_client_target_mode_arg_t;
+
+typedef struct varpd_client_target_cache_arg {
+ uint64_t vtca_id;
+ uint8_t vtca_key[ETHERADDRL];
+ uint8_t vtca_pad[2];
+ varpd_client_cache_entry_t vtca_entry;
+} varpd_client_target_cache_arg_t;
+
+typedef struct varpd_client_target_walk_arg {
+ uint64_t vtcw_id;
+ uint64_t vtcw_marker;
+ uint64_t vtcw_count;
+ overlay_targ_cache_entry_t vtcw_ents[];
+} varpd_client_target_walk_arg_t;
+
+typedef enum varpd_client_command {
+ VARPD_CLIENT_INVALID = 0x0,
+ VARPD_CLIENT_CREATE,
+ VARPD_CLIENT_ACTIVATE,
+ VARPD_CLIENT_DESTROY,
+ VARPD_CLIENT_NPROPS,
+ VARPD_CLIENT_PROPINFO,
+ VARPD_CLIENT_GETPROP,
+ VARPD_CLIENT_SETPROP,
+ VARPD_CLIENT_LOOKUP,
+ VARPD_CLIENT_TARGET_MODE,
+ VARPD_CLIENT_CACHE_FLUSH,
+ VARPD_CLIENT_CACHE_DELETE,
+ VARPD_CLIENT_CACHE_GET,
+ VARPD_CLIENT_CACHE_SET,
+ VARPD_CLIENT_CACHE_WALK,
+ VARPD_CLIENT_MAX
+} varpd_client_command_t;
+
+typedef struct varpd_client_arg {
+ uint_t vca_command;
+ uint_t vca_errno;
+ union {
+ varpd_client_create_arg_t vca_create;
+ varpd_client_instance_arg_t vca_instance;
+ varpd_client_nprops_arg_t vca_nprops;
+ varpd_client_propinfo_arg_t vca_info;
+ varpd_client_prop_arg_t vca_prop;
+ varpd_client_lookup_arg_t vca_lookup;
+ varpd_client_target_mode_arg_t vca_mode;
+ varpd_client_target_cache_arg_t vca_cache;
+ varpd_client_target_walk_arg_t vca_walk;
+ } vca_un;
+} varpd_client_arg_t;
+
+typedef struct varpd_client_eresp {
+ uint_t vce_command;
+ uint_t vce_errno;
+} varpd_client_eresp_t;
+
+extern void libvarpd_plugin_init(void);
+extern void libvarpd_plugin_prefork(void);
+extern void libvarpd_plugin_postfork(void);
+extern void libvarpd_plugin_fini(void);
+extern int libvarpd_plugin_comparator(const void *, const void *);
+extern varpd_plugin_t *libvarpd_plugin_lookup(varpd_impl_t *, const char *);
+extern varpd_instance_t *libvarpd_instance_lookup_by_dlid(varpd_impl_t *,
+ datalink_id_t);
+
+extern void libvarpd_prop_door_convert(const varpd_prop_handle_t *,
+ varpd_client_propinfo_arg_t *);
+
+extern const char *libvarpd_isaext(void);
+typedef int (*libvarpd_dirwalk_f)(varpd_impl_t *, const char *, void *);
+extern int libvarpd_dirwalk(varpd_impl_t *, const char *, const char *,
+ libvarpd_dirwalk_f, void *);
+
+extern int libvarpd_overlay_init(varpd_impl_t *);
+extern void libvarpd_overlay_fini(varpd_impl_t *);
+extern int libvarpd_overlay_info(varpd_impl_t *, datalink_id_t,
+ overlay_plugin_dest_t *, uint64_t *, uint64_t *);
+extern int libvarpd_overlay_associate(varpd_instance_t *);
+extern int libvarpd_overlay_disassociate(varpd_instance_t *);
+extern int libvarpd_overlay_degrade(varpd_instance_t *, const char *);
+extern int libvarpd_overlay_degrade_datalink(varpd_impl_t *, datalink_id_t,
+ const char *);
+extern int libvarpd_overlay_restore(varpd_instance_t *);
+extern int libvarpd_overlay_packet(varpd_impl_t *,
+ const overlay_targ_lookup_t *, void *, size_t *);
+extern int libvarpd_overlay_inject(varpd_impl_t *,
+ const overlay_targ_lookup_t *, void *, size_t);
+extern int libvarpd_overlay_instance_inject(varpd_instance_t *, void *, size_t);
+extern int libvarpd_overlay_resend(varpd_impl_t *,
+ const overlay_targ_lookup_t *, void *, size_t);
+typedef int (*libvarpd_overlay_iter_f)(varpd_impl_t *, datalink_id_t, void *);
+extern int libvarpd_overlay_iter(varpd_impl_t *, libvarpd_overlay_iter_f,
+ void *);
+extern int libvarpd_overlay_cache_flush(varpd_instance_t *);
+extern int libvarpd_overlay_cache_delete(varpd_instance_t *, const uint8_t *);
+extern int libvarpd_overlay_cache_delete(varpd_instance_t *, const uint8_t *);
+extern int libvarpd_overlay_cache_get(varpd_instance_t *, const uint8_t *,
+ varpd_client_cache_entry_t *);
+extern int libvarpd_overlay_cache_set(varpd_instance_t *, const uint8_t *,
+ const varpd_client_cache_entry_t *);
+extern int libvarpd_overlay_cache_walk_fill(varpd_instance_t *, uint64_t *,
+ uint64_t *, overlay_targ_cache_entry_t *);
+
+extern void libvarpd_persist_init(varpd_impl_t *);
+extern void libvarpd_persist_fini(varpd_impl_t *);
+extern int libvarpd_persist_instance(varpd_impl_t *, varpd_instance_t *);
+extern void libvarpd_torch_instance(varpd_impl_t *, varpd_instance_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_IMPL_H */
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c
new file mode 100644
index 0000000000..f2cad47394
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c
@@ -0,0 +1,574 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc.
+ */
+
+/*
+ * Interactions with /dev/overlay
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stropts.h>
+#include <strings.h>
+#include <umem.h>
+
+#include <libvarpd_impl.h>
+#include <sys/overlay_target.h>
+
+#define OVERLAY_PATH "/dev/overlay"
+
+int
+libvarpd_overlay_init(varpd_impl_t *vip)
+{
+ vip->vdi_overlayfd = open(OVERLAY_PATH, O_RDWR);
+ if (vip->vdi_overlayfd == -1)
+ return (errno);
+ return (0);
+}
+
+void
+libvarpd_overlay_fini(varpd_impl_t *vip)
+{
+ assert(vip->vdi_overlayfd > 0);
+ if (close(vip->vdi_overlayfd) != 0)
+ libvarpd_panic("failed to close /dev/overlay fd %d: %d",
+ vip->vdi_overlayfd, errno);
+}
+
+int
+libvarpd_overlay_info(varpd_impl_t *vip, datalink_id_t linkid,
+ overlay_plugin_dest_t *destp, uint64_t *flags, uint64_t *vnetid)
+{
+ overlay_targ_info_t oti;
+
+ oti.oti_linkid = linkid;
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_INFO, &oti) != 0)
+ return (errno);
+
+ if (destp != NULL)
+ *destp = oti.oti_needs;
+ if (flags != NULL)
+ *flags = oti.oti_flags;
+ if (vnetid != NULL)
+ *vnetid = oti.oti_vnetid;
+ return (0);
+}
+
+int
+libvarpd_overlay_associate(varpd_instance_t *inst)
+{
+ overlay_targ_associate_t ota;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&ota, sizeof (overlay_targ_associate_t));
+ ota.ota_linkid = inst->vri_linkid;
+ ota.ota_mode = inst->vri_mode;
+ ota.ota_id = inst->vri_id;
+ ota.ota_provides = inst->vri_dest;
+
+ if (ota.ota_mode == OVERLAY_TARGET_POINT) {
+ int ret;
+ ret = inst->vri_plugin->vpp_ops->vpo_default(inst->vri_private,
+ &ota.ota_point);
+ if (ret != VARPD_LOOKUP_OK)
+ return (ret);
+ }
+
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_ASSOCIATE, &ota) != 0)
+ return (errno);
+
+ return (0);
+}
+
+int
+libvarpd_overlay_disassociate(varpd_instance_t *inst)
+{
+ overlay_targ_id_t otid;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ otid.otid_linkid = inst->vri_linkid;
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_DISASSOCIATE, &otid) != 0)
+ return (errno);
+ return (0);
+}
+
+int
+libvarpd_overlay_degrade_datalink(varpd_impl_t *vip, datalink_id_t linkid,
+ const char *msg)
+{
+ overlay_targ_degrade_t otd;
+
+ otd.otd_linkid = linkid;
+ (void) strlcpy(otd.otd_buf, msg, OVERLAY_STATUS_BUFLEN);
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_DEGRADE, &otd) != 0)
+ return (errno);
+ return (0);
+
+}
+
+int
+libvarpd_overlay_degrade(varpd_instance_t *inst, const char *msg)
+{
+ return (libvarpd_overlay_degrade_datalink(inst->vri_impl,
+ inst->vri_linkid, msg));
+}
+
+int
+libvarpd_overlay_restore(varpd_instance_t *inst)
+{
+ overlay_targ_id_t otid;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ otid.otid_linkid = inst->vri_linkid;
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_RESTORE, &otid) != 0)
+ return (errno);
+ return (0);
+}
+
+int
+libvarpd_overlay_packet(varpd_impl_t *vip, const overlay_targ_lookup_t *otl,
+ void *buf, size_t *buflen)
+{
+ int ret;
+ overlay_targ_pkt_t otp;
+
+ otp.otp_linkid = UINT64_MAX;
+ otp.otp_reqid = otl->otl_reqid;
+ otp.otp_size = *buflen;
+ otp.otp_buf = buf;
+
+ do {
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_PKT, &otp);
+ } while (ret != 0 && errno == EINTR);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_PKT ioctl efault");
+ else if (ret != 0)
+ ret = errno;
+
+ if (ret == 0)
+ *buflen = otp.otp_size;
+
+ return (ret);
+}
+
+static int
+libvarpd_overlay_inject_common(varpd_impl_t *vip, varpd_instance_t *inst,
+ const overlay_targ_lookup_t *otl, void *buf, size_t buflen, int cmd)
+{
+ int ret;
+ overlay_targ_pkt_t otp;
+
+ if (otl == NULL) {
+ otp.otp_linkid = inst->vri_linkid;
+ otp.otp_reqid = 0;
+ } else {
+ otp.otp_linkid = UINT64_MAX;
+ otp.otp_reqid = otl->otl_reqid;
+ }
+ otp.otp_size = buflen;
+ otp.otp_buf = buf;
+
+ do {
+ ret = ioctl(vip->vdi_overlayfd, cmd, &otp);
+ } while (ret != 0 && errno == EINTR);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("overlay_inject_common ioctl efault");
+ else if (ret != 0)
+ ret = errno;
+
+ return (ret);
+}
+
+int
+libvarpd_overlay_inject(varpd_impl_t *vip, const overlay_targ_lookup_t *otl,
+ void *buf, size_t buflen)
+{
+ return (libvarpd_overlay_inject_common(vip, NULL, otl, buf, buflen,
+ OVERLAY_TARG_INJECT));
+}
+
+int
+libvarpd_overlay_instance_inject(varpd_instance_t *inst, void *buf,
+ size_t buflen)
+{
+ return (libvarpd_overlay_inject_common(inst->vri_impl, inst, NULL, buf,
+ buflen, OVERLAY_TARG_INJECT));
+}
+
+int
+libvarpd_overlay_resend(varpd_impl_t *vip, const overlay_targ_lookup_t *otl,
+ void *buf, size_t buflen)
+{
+ return (libvarpd_overlay_inject_common(vip, NULL, otl, buf, buflen,
+ OVERLAY_TARG_RESEND));
+}
+
+static void
+libvarpd_overlay_lookup_reply(varpd_impl_t *vip,
+ const overlay_targ_lookup_t *otl, overlay_targ_resp_t *otr, int cmd)
+{
+ int ret;
+
+ otr->otr_reqid = otl->otl_reqid;
+ do {
+ ret = ioctl(vip->vdi_overlayfd, cmd, otr);
+ } while (ret != 0 && errno == EINTR);
+ /* XXX abort feels wrong here */
+ if (ret != 0 && errno != EINVAL)
+ libvarpd_panic("receieved bad errno from lookup_reply "
+ "(cmd %d): %d\n", cmd, errno);
+}
+
+static void
+libvarpd_overlay_lookup_handle(varpd_impl_t *vip)
+{
+ int ret;
+ varpd_query_t *vqp;
+ overlay_targ_lookup_t *otl;
+ overlay_targ_resp_t *otr;
+ varpd_instance_t *inst;
+
+ vqp = umem_cache_alloc(vip->vdi_qcache, UMEM_DEFAULT);
+ otl = &vqp->vq_lookup;
+ otr = &vqp->vq_response;
+ /*
+ * XXX abort doesn't really help here, we should figure out what to do.
+ * Try and force a reap, then some?
+ */
+ if (vqp == NULL)
+ libvarpd_panic("failed to allocate memory for lookup "
+ "handle..., we should not panic()");
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_LOOKUP, otl);
+ if (ret != 0 && errno != ETIME && errno != EINTR)
+ libvarpd_panic("received bad errno from OVERLAY_TARG_LOOKUP: "
+ "%d", errno);
+
+ if (ret != 0) {
+ umem_cache_free(vip->vdi_qcache, vqp);
+ return;
+ }
+
+ inst = (varpd_instance_t *)libvarpd_instance_lookup(
+ (varpd_handle_t *)vip, otl->otl_varpdid);
+ if (inst == NULL) {
+ libvarpd_overlay_lookup_reply(vip, otl, otr,
+ OVERLAY_TARG_DROP);
+ umem_cache_free(vip->vdi_qcache, vqp);
+ return;
+ }
+ vqp->vq_instance = inst;
+
+ inst->vri_plugin->vpp_ops->vpo_lookup(inst->vri_private,
+ (varpd_query_handle_t *)vqp, otl, &otr->otr_answer);
+}
+
+void
+libvarpd_overlay_lookup_run(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ mutex_lock(&vip->vdi_lock);
+ if (vip->vdi_lthr_quiesce == B_TRUE) {
+ mutex_unlock(&vip->vdi_lock);
+ return;
+ }
+ vip->vdi_lthr_count++;
+
+ for (;;) {
+ mutex_unlock(&vip->vdi_lock);
+ libvarpd_overlay_lookup_handle(vip);
+ mutex_lock(&vip->vdi_lock);
+ if (vip->vdi_lthr_quiesce == B_TRUE)
+ break;
+ }
+ assert(vip->vdi_lthr_count > 0);
+ vip->vdi_lthr_count--;
+ cond_signal(&vip->vdi_lthr_cv);
+ mutex_unlock(&vip->vdi_lock);
+}
+
+void
+libvarpd_overlay_lookup_quiesce(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ mutex_lock(&vip->vdi_lock);
+ if (vip->vdi_lthr_count == 0) {
+ mutex_unlock(&vip->vdi_lock);
+ return;
+ }
+ vip->vdi_lthr_quiesce = B_TRUE;
+ while (vip->vdi_lthr_count > 0)
+ (void) cond_wait(&vip->vdi_lthr_cv, &vip->vdi_lock);
+ vip->vdi_lthr_quiesce = B_FALSE;
+ mutex_unlock(&vip->vdi_lock);
+}
+
+int
+libvarpd_overlay_iter(varpd_impl_t *vip, libvarpd_overlay_iter_f func,
+ void *arg)
+{
+ uint32_t curents = 0, i;
+ size_t size;
+ overlay_targ_list_t *otl;
+
+ for (;;) {
+ size = sizeof (overlay_targ_list_t) +
+ sizeof (uint32_t) * curents;
+ otl = umem_alloc(size, UMEM_DEFAULT);
+ if (otl == NULL)
+ return (ENOMEM);
+
+ otl->otl_nents = curents;
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_LIST, otl) != 0) {
+ if (errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_LIST ioctl "
+ "efault");
+ umem_free(otl, size);
+ if (errno == EINTR)
+ continue;
+ else
+ return (errno);
+ }
+
+ if (otl->otl_nents == curents)
+ break;
+
+ curents = otl->otl_nents;
+ umem_free(otl, size);
+ }
+
+ for (i = 0; i < otl->otl_nents; i++) {
+ if (func(vip, otl->otl_ents[i], arg) != 0)
+ break;
+ }
+ umem_free(otl, size);
+ return (0);
+}
+
+int
+libvarpd_overlay_cache_flush(varpd_instance_t *inst)
+{
+ int ret;
+ overlay_targ_cache_t cache;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&cache, sizeof (overlay_targ_cache_t));
+ cache.otc_linkid = inst->vri_linkid;
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_FLUSH, &cache);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_FLUSH ioctl efault");
+ else if (ret != 0)
+ ret = errno;
+
+ return (ret);
+}
+
+int
+libvarpd_overlay_cache_delete(varpd_instance_t *inst, const uint8_t *key)
+{
+ int ret;
+ overlay_targ_cache_t cache;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&cache, sizeof (overlay_targ_cache_t));
+ cache.otc_linkid = inst->vri_linkid;
+ bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL);
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_REMOVE, &cache);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_REMOVE ioctl efault");
+ else if (ret != 0)
+ ret = errno;
+
+ return (ret);
+
+}
+
+int
+libvarpd_overlay_cache_get(varpd_instance_t *inst, const uint8_t *key,
+ varpd_client_cache_entry_t *entry)
+{
+ int ret;
+ overlay_targ_cache_t cache;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&cache, sizeof (overlay_targ_cache_t));
+ cache.otc_linkid = inst->vri_linkid;
+ bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL);
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_GET, &cache);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_GET ioctl efault");
+ else if (ret != 0)
+ return (errno);
+
+ bcopy(cache.otc_entry.otce_dest.otp_mac, &entry->vcp_mac, ETHERADDRL);
+ entry->vcp_flags = cache.otc_entry.otce_flags;
+ entry->vcp_ip = cache.otc_entry.otce_dest.otp_ip;
+ entry->vcp_port = cache.otc_entry.otce_dest.otp_port;
+
+ return (0);
+}
+
+int
+libvarpd_overlay_cache_set(varpd_instance_t *inst, const uint8_t *key,
+ const varpd_client_cache_entry_t *entry)
+{
+ int ret;
+ overlay_targ_cache_t cache;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&cache, sizeof (overlay_targ_cache_t));
+ cache.otc_linkid = inst->vri_linkid;
+ bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL);
+ bcopy(&entry->vcp_mac, cache.otc_entry.otce_dest.otp_mac, ETHERADDRL);
+ cache.otc_entry.otce_flags = entry->vcp_flags;
+ cache.otc_entry.otce_dest.otp_ip = entry->vcp_ip;
+ cache.otc_entry.otce_dest.otp_port = entry->vcp_port;
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_SET, &cache);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_SET ioctl efault");
+ else if (ret != 0)
+ return (errno);
+
+ return (0);
+}
+
+int
+libvarpd_overlay_cache_walk_fill(varpd_instance_t *inst, uint64_t *markerp,
+ uint64_t *countp, overlay_targ_cache_entry_t *ents)
+{
+ int ret;
+ size_t asize;
+ overlay_targ_cache_iter_t *iter;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ if (*countp > 200)
+ return (E2BIG);
+
+ asize = sizeof (overlay_targ_cache_iter_t) +
+ *countp * sizeof (overlay_targ_cache_entry_t);
+ iter = umem_alloc(asize, UMEM_DEFAULT);
+ if (iter == NULL)
+ return (ENOMEM);
+
+ iter->otci_linkid = inst->vri_linkid;
+ iter->otci_marker = *markerp;
+ iter->otci_count = *countp;
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_ITER, iter);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_ITER ioctl efault");
+ else if (ret != 0) {
+ ret = errno;
+ goto out;
+ }
+
+ *markerp = iter->otci_marker;
+ *countp = iter->otci_count;
+ bcopy(iter->otci_ents, ents,
+ *countp * sizeof (overlay_targ_cache_entry_t));
+out:
+ umem_free(iter, asize);
+ return (ret);
+}
+
+void
+libvarpd_plugin_query_reply(varpd_query_handle_t *vqh, int action)
+{
+ varpd_query_t *vqp = (varpd_query_t *)vqh;
+
+ if (vqp == NULL)
+ libvarpd_panic("unkonwn plugin passed invalid "
+ "varpd_query_handle_t");
+
+ if (action == VARPD_LOOKUP_DROP)
+ libvarpd_overlay_lookup_reply(vqp->vq_instance->vri_impl,
+ &vqp->vq_lookup, &vqp->vq_response, OVERLAY_TARG_DROP);
+ else if (action == VARPD_LOOKUP_OK)
+ libvarpd_overlay_lookup_reply(vqp->vq_instance->vri_impl,
+ &vqp->vq_lookup, &vqp->vq_response, OVERLAY_TARG_RESPOND);
+ else
+ libvarpd_panic("plugin %s passed in an invalid action: %d",
+ vqp->vq_instance->vri_plugin->vpp_name, action);
+}
+
+void
+libvarpd_inject_varp(varpd_provider_handle_t *vph, const uint8_t *mac,
+ const overlay_target_point_t *otp)
+{
+ int ret;
+ overlay_targ_cache_t otc;
+ varpd_instance_t *inst = (varpd_instance_t *)vph;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ otc.otc_linkid = inst->vri_linkid;
+ otc.otc_entry.otce_flags = 0;
+ bcopy(mac, otc.otc_entry.otce_mac, ETHERADDRL);
+ bcopy(otp, &otc.otc_entry.otce_dest, sizeof (overlay_target_point_t));
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_SET, &otc);
+ if (ret != 0) {
+ switch (errno) {
+ case EBADF:
+ case EFAULT:
+ case ENOTSUP:
+ libvarpd_panic("received bad errno from "
+ "OVERLAY_TARG_CACHE_SET: %d", errno);
+ default:
+ break;
+ }
+ }
+}
+
+void
+libvarpd_fma_degrade(varpd_provider_handle_t *vph, const char *msg)
+{
+ int ret;
+ varpd_instance_t *inst = (varpd_instance_t *)vph;
+
+ ret = libvarpd_overlay_degrade(inst, msg);
+ switch (ret) {
+ case ENOENT:
+ case EFAULT:
+ libvarpd_panic("received bad errno from degrade ioctl: %d",
+ errno);
+ default:
+ break;
+ }
+}
+
+void
+libvarpd_fma_restore(varpd_provider_handle_t *vph)
+{
+ int ret;
+ varpd_instance_t *inst = (varpd_instance_t *)vph;
+
+ ret = libvarpd_overlay_restore(inst);
+ switch (ret) {
+ case ENOENT:
+ case EFAULT:
+ libvarpd_panic("received bad errno from restore ioctl: %d",
+ errno);
+ default:
+ break;
+ }
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c
new file mode 100644
index 0000000000..9d02504139
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c
@@ -0,0 +1,48 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+/*
+ * No, 'tis not so deep as a well, nor so wide as a church door; but 'tis
+ * enough,'twill serve. Ask for me tomorrow, and you shall find me a grave man.
+ *
+ * This file maintains various routines for handling when we die.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <thread.h>
+#include <stdlib.h>
+
+static int varpd_panic_errno;
+static char varpd_panic_buf[1024];
+static thread_t varpd_panic_thread;
+
+void
+libvarpd_panic(const char *fmt, ...)
+{
+ va_list ap;
+
+ /* Always save errno first! */
+ varpd_panic_errno = errno;
+ varpd_panic_thread = thr_self();
+
+ if (fmt != NULL) {
+ va_start(ap, fmt);
+ (void) vsnprintf(varpd_panic_buf, sizeof (varpd_panic_buf), fmt,
+ ap);
+ }
+ abort();
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c
new file mode 100644
index 0000000000..255622b63b
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c
@@ -0,0 +1,590 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * varpd persistence backend
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <strings.h>
+#include <librename.h>
+#include <md5.h>
+#include <sys/sysmacros.h>
+#include <dirent.h>
+#include <sys/mman.h>
+#include <umem.h>
+
+#include <libvarpd_impl.h>
+
+static uint8_t varpd_persist_magic[4] = {
+ 'v',
+ 'a',
+ 'r',
+ 'p',
+};
+
+#define VARPD_PERSIST_MAXWRITE 4096
+#define VARPD_PERSIST_VERSION_ONE 1
+#define VARPD_PERSIST_SUFFIX ".varpd"
+
+/*
+ * XXX ctfdiff this structure
+ */
+typedef struct varpd_persist_header {
+ uint8_t vph_magic[4];
+ uint32_t vph_version;
+ uint8_t vph_md5[16];
+} varpd_persist_header_t;
+
+void
+libvarpd_persist_init(varpd_impl_t *vip)
+{
+ vip->vdi_persistfd = -1;
+ if (rwlock_init(&vip->vdi_pfdlock, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create rw vdi_pfdlock");
+}
+
+void
+libvarpd_persist_fini(varpd_impl_t *vip)
+{
+ /*
+ * Clean up for someone that left something behind.
+ */
+ if (vip->vdi_persistfd != -1) {
+ if (close(vip->vdi_persistfd) != 0)
+ libvarpd_panic("failed to close persist fd %d: %d",
+ vip->vdi_persistfd, errno);
+ vip->vdi_persistfd = -1;
+ }
+ if (rwlock_destroy(&vip->vdi_pfdlock) != 0)
+ libvarpd_panic("failed to destroy rw vdi_pfdlock");
+}
+
+int
+libvarpd_persist_enable(varpd_handle_t *vhp, const char *rootdir)
+{
+ int fd;
+ struct stat st;
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ fd = open(rootdir, O_RDONLY);
+ if (fd < 0)
+ return (errno);
+
+ if (fstat(fd, &st) != 0) {
+ int ret = errno;
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
+ rootdir, fd, errno);
+ return (ret);
+ }
+
+ if (!S_ISDIR(st.st_mode)) {
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
+ rootdir, fd, errno);
+ return (EINVAL);
+ }
+
+
+ rw_wrlock(&vip->vdi_pfdlock);
+ if (vip->vdi_persistfd != -1) {
+ rw_unlock(&vip->vdi_pfdlock);
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
+ rootdir, fd, errno);
+ return (EEXIST);
+ }
+ vip->vdi_persistfd = fd;
+ rw_unlock(&vip->vdi_pfdlock);
+
+ return (0);
+}
+
+static int
+libvarpd_persist_write(int fd, const void *buf, size_t buflen)
+{
+ size_t ret;
+ off_t off = 0;
+
+ while (buflen > 0) {
+ ret = write(fd, buf + off,
+ MIN(buflen, VARPD_PERSIST_MAXWRITE));
+ if (ret == -1 && errno == EINTR)
+ continue;
+ if (ret == -1)
+ return (errno);
+
+ off += ret;
+ buflen -= ret;
+ }
+
+ return (0);
+}
+
+static int
+libvarpd_persist_nvlist(int dirfd, uint64_t id, nvlist_t *nvl)
+{
+ int err, fd;
+ size_t size;
+ varpd_persist_header_t hdr;
+ librename_atomic_t *lrap;
+ char *buf = NULL, *name;
+
+ if ((err = nvlist_pack(nvl, &buf, &size, NV_ENCODE_XDR, 0)) != 0)
+ return (err);
+
+ if (asprintf(&name, "%lld%s", id, ".varpd") == -1) {
+ err = errno;
+ free(buf);
+ return (err);
+ }
+
+ if ((err = librename_atomic_fdinit(dirfd, name, NULL, 0600, 0,
+ &lrap)) != 0) {
+ free(name);
+ free(buf);
+ return (err);
+ }
+
+ fd = librename_atomic_fd(lrap);
+
+ bzero(&hdr, sizeof (varpd_persist_header_t));
+ bcopy(varpd_persist_magic, hdr.vph_magic, sizeof (varpd_persist_magic));
+ hdr.vph_version = VARPD_PERSIST_VERSION_ONE;
+ md5_calc(hdr.vph_md5, buf, size);
+
+ if ((err = libvarpd_persist_write(fd, &hdr,
+ sizeof (varpd_persist_header_t))) != 0) {
+ librename_atomic_fini(lrap);
+ free(name);
+ free(buf);
+ return (err);
+ }
+
+ if ((err = libvarpd_persist_write(fd, buf, size)) != 0) {
+ librename_atomic_fini(lrap);
+ free(name);
+ free(buf);
+ return (err);
+ }
+
+ do {
+ err = librename_atomic_commit(lrap);
+ } while (err == EINTR);
+
+ librename_atomic_fini(lrap);
+ free(name);
+ free(buf);
+ return (err);
+}
+
+int
+libvarpd_persist_instance(varpd_impl_t *vip, varpd_instance_t *inst)
+{
+ int err = 0;
+ nvlist_t *nvl = NULL, *cvl = NULL;
+
+ rw_rdlock(&vip->vdi_pfdlock);
+ /* Check if persistence exists */
+ if (vip->vdi_persistfd == -1)
+ goto out;
+
+ if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
+ goto out;
+
+ if ((err = nvlist_alloc(&cvl, NV_UNIQUE_NAME, 0)) != 0)
+ goto out;
+
+ if ((err = nvlist_add_uint64(nvl, "vri_id", inst->vri_id)) != 0)
+ goto out;
+
+ if ((err = nvlist_add_uint32(nvl, "vri_linkid", inst->vri_linkid)) != 0)
+ goto out;
+
+ if ((err = nvlist_add_uint32(nvl, "vri_dest",
+ (uint32_t)inst->vri_dest)) != 0)
+ goto out;
+ if ((err = nvlist_add_uint32(nvl, "vri_mode",
+ (uint32_t)inst->vri_mode)) != 0)
+ goto out;
+
+ if ((err = nvlist_add_string(nvl, "vri_plugin",
+ inst->vri_plugin->vpp_name)) != 0)
+ goto out;
+
+ err = inst->vri_plugin->vpp_ops->vpo_save(inst->vri_private, cvl);
+ if (err != 0)
+ goto out;
+
+ if ((err = nvlist_add_nvlist(nvl, "vri_private", cvl)) != 0)
+ goto out;
+
+ err = libvarpd_persist_nvlist(vip->vdi_persistfd, inst->vri_id, nvl);
+out:
+ nvlist_free(nvl);
+ nvlist_free(cvl);
+ rw_unlock(&vip->vdi_pfdlock);
+ return (err);
+}
+
+void
+libvarpd_torch_instance(varpd_impl_t *vip, varpd_instance_t *inst)
+{
+ char buf[32];
+ int ret;
+
+ rw_rdlock(&vip->vdi_pfdlock);
+ if (vip->vdi_persistfd == -1) {
+ rw_unlock(&vip->vdi_pfdlock);
+ return;
+ }
+
+ if (snprintf(buf, sizeof (buf), "%lld.varpd", inst->vri_id) >= 32)
+ libvarpd_panic("somehow exceeded static value for "
+ "libvarpd_torch_instance buffer");
+
+ do {
+ ret = unlinkat(vip->vdi_persistfd, buf, 0);
+ } while (ret == -1 && errno == EINTR);
+ if (ret != 0) {
+ switch (errno) {
+ case ENOENT:
+ break;
+ default:
+ libvarpd_panic("failed to unlinkat %d`%s: %s",
+ vip->vdi_persistfd, buf, strerror(errno));
+ }
+ }
+
+ rw_unlock(&vip->vdi_pfdlock);
+}
+
+static int
+libvarpd_persist_restore_instance(varpd_impl_t *vip, nvlist_t *nvl)
+{
+ nvlist_t *pvl;
+ uint64_t id, flags, vid;
+ uint32_t linkid, dest, mode;
+ char *pluginstr;
+ varpd_plugin_t *plugin;
+ overlay_plugin_dest_t adest;
+ varpd_instance_t *inst, lookup;
+
+ if (nvlist_lookup_uint64(nvl, "vri_id", &id) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_uint32(nvl, "vri_linkid", &linkid) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_uint32(nvl, "vri_dest", &dest) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_uint32(nvl, "vri_mode", &mode) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_string(nvl, "vri_plugin", &pluginstr) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_nvlist(nvl, "vri_private", &pvl) != 0)
+ return (EINVAL);
+
+ plugin = libvarpd_plugin_lookup(vip, pluginstr);
+ if (plugin == NULL)
+ return (EINVAL);
+
+ if (plugin->vpp_mode != mode)
+ return (EINVAL);
+
+ if (libvarpd_overlay_info(vip, linkid, &adest, &flags, &vid) != 0)
+ return (EINVAL);
+
+ if (dest != adest)
+ return (EINVAL);
+
+ /* XXX This failure shouldn't cause us to unlink... */
+ inst = umem_alloc(sizeof (varpd_instance_t), UMEM_DEFAULT);
+ if (inst == NULL)
+ return (ENOMEM);
+
+ inst->vri_id = id_alloc_specific(vip->vdi_idspace, id);
+ if (inst->vri_id != id) {
+ umem_free(inst, sizeof (varpd_instance_t));
+ return (EINVAL);
+ }
+
+ inst->vri_linkid = linkid;
+ inst->vri_vnetid = vid;
+ inst->vri_mode = plugin->vpp_mode;
+ inst->vri_dest = dest;
+ inst->vri_plugin = plugin;
+ inst->vri_impl = vip;
+ inst->vri_flags = 0;
+ if (plugin->vpp_ops->vpo_restore(pvl, (varpd_provider_handle_t *)inst,
+ dest, &inst->vri_private) != 0) {
+ id_free(vip->vdi_idspace, id);
+ umem_free(inst, sizeof (varpd_instance_t));
+ return (EINVAL);
+ }
+
+ if (mutex_init(&inst->vri_lock, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create vri_lock mutex");
+
+ mutex_lock(&vip->vdi_lock);
+ lookup.vri_id = inst->vri_id;
+ if (avl_find(&vip->vdi_instances, &lookup, NULL) != NULL)
+ libvarpd_panic("found duplicate instance with id %d",
+ lookup.vri_id);
+ avl_add(&vip->vdi_instances, inst);
+ lookup.vri_linkid = inst->vri_linkid;
+ if (avl_find(&vip->vdi_linstances, &lookup, NULL) != NULL)
+ libvarpd_panic("found duplicate linstance with id %d",
+ lookup.vri_linkid);
+ avl_add(&vip->vdi_linstances, inst);
+ mutex_unlock(&vip->vdi_lock);
+
+ if (plugin->vpp_ops->vpo_start(inst->vri_private) != 0) {
+ libvarpd_instance_destroy((varpd_instance_handle_t *)inst);
+ return (EINVAL);
+ }
+
+ if (flags & OVERLAY_TARG_INFO_F_ACTIVE)
+ libvarpd_overlay_disassociate(inst);
+
+ if (libvarpd_overlay_associate(inst) != 0) {
+ libvarpd_instance_destroy((varpd_instance_handle_t *)inst);
+ return (EINVAL);
+ }
+
+ if (flags & OVERLAY_TARG_INFO_F_DEGRADED)
+ libvarpd_overlay_restore(inst);
+
+ mutex_lock(&inst->vri_lock);
+ inst->vri_flags |= VARPD_INSTANCE_F_ACTIVATED;
+ mutex_unlock(&inst->vri_lock);
+
+ return (0);
+}
+
+static int
+libvarpd_persist_restore_one(varpd_impl_t *vip, int fd)
+{
+ int err;
+ size_t fsize;
+ struct stat st;
+ void *buf, *datap;
+ varpd_persist_header_t *hdr;
+ uint8_t md5[16];
+ nvlist_t *nvl;
+
+ if (fstat(fd, &st) != 0)
+ return (errno);
+
+ if (st.st_size <= sizeof (varpd_persist_header_t))
+ return (EINVAL);
+ fsize = st.st_size - sizeof (varpd_persist_header_t);
+
+ buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (buf == MAP_FAILED)
+ return (errno);
+
+ hdr = buf;
+ if (bcmp(varpd_persist_magic, hdr->vph_magic,
+ sizeof (varpd_persist_magic)) != 0) {
+ if (munmap(buf, st.st_size) != 0)
+ libvarpd_panic("failed to munmap %p: %d", buf, errno);
+ return (EINVAL);
+ }
+
+ if (hdr->vph_version != VARPD_PERSIST_VERSION_ONE) {
+ if (munmap(buf, st.st_size) != 0)
+ libvarpd_panic("failed to munmap %p: %d", buf, errno);
+ return (EINVAL);
+ }
+
+ datap = (void *)((uintptr_t)buf + sizeof (varpd_persist_header_t));
+ md5_calc(md5, datap, fsize);
+ if (bcmp(md5, hdr->vph_md5, sizeof (uint8_t) * 16) != 0) {
+ if (munmap(buf, st.st_size) != 0)
+ libvarpd_panic("failed to munmap %p: %d", buf, errno);
+ return (EINVAL);
+ }
+
+ err = nvlist_unpack(datap, fsize, &nvl, 0);
+ if (munmap(buf, st.st_size) != 0)
+ libvarpd_panic("failed to munmap %p: %d", buf, errno);
+
+ if (err != 0)
+ return (EINVAL);
+
+ err = libvarpd_persist_restore_instance(vip, nvl);
+ nvlist_free(nvl);
+ return (err);
+}
+
+/*
+ * XXX ew, O(n^2)
+ */
+static int
+libvarpd_check_degrade_cb(varpd_impl_t *vip, datalink_id_t linkid, void *arg)
+{
+ varpd_instance_t *inst;
+
+ mutex_lock(&vip->vdi_lock);
+ for (inst = avl_first(&vip->vdi_instances); inst != NULL;
+ inst = AVL_NEXT(&vip->vdi_instances, inst)) {
+ if (inst->vri_linkid == linkid) {
+ mutex_unlock(&vip->vdi_lock);
+ return (0);
+ }
+ }
+
+ mutex_unlock(&vip->vdi_lock);
+
+ (void) libvarpd_overlay_degrade_datalink(vip, linkid,
+ "no varpd instance exists");
+ return (0);
+}
+
+static void
+libvarpd_check_degrade(varpd_impl_t *vip)
+{
+ libvarpd_overlay_iter(vip, libvarpd_check_degrade_cb, NULL);
+}
+
+/*
+ * XXX We need to go through and mark any kernel devices that we don't know
+ * about as degraded.
+ */
+int
+libvarpd_persist_restore(varpd_handle_t *vhp)
+{
+ int dirfd;
+ int ret = 0;
+ DIR *dirp = NULL;
+ struct dirent *dp;
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ rw_rdlock(&vip->vdi_pfdlock);
+ if ((dirfd = dup(vip->vdi_persistfd)) < 0) {
+ ret = errno;
+ goto out;
+ }
+
+ if ((dirp = fdopendir(dirfd)) == NULL) {
+ ret = errno;
+ if (close(dirfd) != 0)
+ libvarpd_panic("failed to close dirfd %d: %d",
+ dirfd, errno);
+ goto out;
+ }
+
+ for (;;) {
+ int fd;
+ uint64_t id;
+ char *eptr;
+ struct stat st;
+
+ errno = 0;
+ dp = readdir(dirp);
+ if (dp == NULL) {
+ ret = errno;
+ break;
+ }
+
+ if (strcmp(dp->d_name, ".") == 0 ||
+ strcmp(dp->d_name, "..") == 0)
+ continue;
+
+ /*
+ * Leave files that we don't recognize alone. A valid file has
+ * the format `%llu.varpd`.
+ */
+ errno = 0;
+ id = strtoull(dp->d_name, &eptr, 10);
+ if ((id == 0 && errno == EINVAL) ||
+ (id == ULLONG_MAX && errno == ERANGE))
+ continue;
+
+ if (strcmp(eptr, VARPD_PERSIST_SUFFIX) != 0)
+ continue;
+
+ fd = openat(vip->vdi_persistfd, dp->d_name, O_RDONLY);
+ if (fd < 0) {
+ ret = errno;
+ break;
+ }
+
+ if (fstat(fd, &st) != 0) {
+ ret = errno;
+ break;
+ }
+
+ if (!S_ISREG(st.st_mode)) {
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close fd (%s) %d: "
+ "%d\n", dp->d_name, fd, errno);
+ continue;
+ }
+
+ ret = libvarpd_persist_restore_one(vip, fd);
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close fd (%s) %d: "
+ "%d\n", dp->d_name, fd, errno);
+ /*
+ * This is an invalid file. We'll unlink it to save us this
+ * trouble in the future. XXX We shouldn't unlink on all
+ * failures presumably...
+ */
+ if (ret != 0) {
+ if (unlinkat(vip->vdi_persistfd, dp->d_name, 0) != 0) {
+ ret = errno;
+ break;
+ }
+ }
+ }
+
+ libvarpd_check_degrade(vip);
+
+out:
+ if (dirp != NULL)
+ closedir(dirp);
+ rw_unlock(&vip->vdi_pfdlock);
+ return (ret);
+}
+
+int
+libvarpd_persist_disable(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ rw_wrlock(&vip->vdi_pfdlock);
+ if (vip->vdi_persistfd == -1) {
+ mutex_unlock(&vip->vdi_lock);
+ rw_unlock(&vip->vdi_pfdlock);
+ return (ENOENT);
+ }
+ if (close(vip->vdi_persistfd) != 0)
+ libvarpd_panic("failed to close persist fd %d: %d",
+ vip->vdi_persistfd, errno);
+ vip->vdi_persistfd = -1;
+ rw_unlock(&vip->vdi_pfdlock);
+ return (0);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c
new file mode 100644
index 0000000000..df53ee5d1d
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c
@@ -0,0 +1,233 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/*
+ * varpd plugin management
+ */
+
+#include <libvarpd_impl.h>
+#include <errno.h>
+#include <umem.h>
+#include <assert.h>
+#include <strings.h>
+#include <dlfcn.h>
+#include <link.h>
+#include <stdio.h>
+
+static varpd_impl_t *varpd_load_handle;
+static mutex_t varpd_load_lock;
+static cond_t varpd_load_cv;
+
+int
+libvarpd_plugin_comparator(const void *lp, const void *rp)
+{
+ int ret;
+ const varpd_plugin_t *lpp, *rpp;
+
+ lpp = lp;
+ rpp = rp;
+
+ ret = strcmp(lpp->vpp_name, rpp->vpp_name);
+ if (ret > 0)
+ return (1);
+ if (ret < 0)
+ return (-1);
+ return (0);
+}
+
+varpd_plugin_register_t *
+libvarpd_plugin_alloc(uint_t version, int *errp)
+{
+ int err;
+ varpd_plugin_register_t *vprp;
+
+ if (errp == NULL)
+ errp = &err;
+
+ if (version != VARPD_VERSION_ONE) {
+ *errp = EINVAL;
+ return (NULL);
+ }
+
+ vprp = umem_alloc(sizeof (varpd_plugin_register_t), UMEM_DEFAULT);
+ if (vprp == NULL) {
+ *errp = ENOMEM;
+ return (NULL);
+ }
+
+ vprp->vpr_version = VARPD_VERSION_ONE;
+
+ return (vprp);
+}
+
+void
+libvarpd_plugin_free(varpd_plugin_register_t *vprp)
+{
+ umem_free(vprp, sizeof (varpd_plugin_register_t));
+}
+
+int
+libvarpd_plugin_register(varpd_plugin_register_t *vprp)
+{
+ varpd_plugin_t *vpp;
+ varpd_plugin_t lookup;
+
+ vpp = umem_alloc(sizeof (varpd_plugin_t), UMEM_DEFAULT);
+ if (vpp == NULL)
+ return (ENOMEM);
+
+ /* Watch out for an evil plugin */
+ if (vprp->vpr_version != VARPD_VERSION_ONE)
+ return (EINVAL);
+
+ mutex_lock(&varpd_load_lock);
+ if (varpd_load_handle == NULL)
+ libvarpd_panic("varpd_load_handle was unexpectedly null");
+
+ mutex_lock(&varpd_load_handle->vdi_lock);
+ lookup.vpp_name = vprp->vpr_name;
+ if (avl_find(&varpd_load_handle->vdi_plugins, &lookup, NULL) != NULL) {
+ mutex_unlock(&varpd_load_handle->vdi_lock);
+ mutex_unlock(&varpd_load_lock);
+ umem_free(vpp, sizeof (varpd_plugin_t));
+ return (EEXIST);
+ }
+ vpp->vpp_name = strdup(vprp->vpr_name);
+ if (vpp->vpp_name == NULL) {
+ mutex_unlock(&varpd_load_handle->vdi_lock);
+ mutex_unlock(&varpd_load_lock);
+ umem_free(vpp, sizeof (varpd_plugin_t));
+ return (EEXIST);
+ }
+
+ vpp->vpp_mode = vprp->vpr_mode;
+ vpp->vpp_ops = vprp->vpr_ops;
+ if (mutex_init(&vpp->vpp_lock, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create plugin's vpp_lock");
+ vpp->vpp_active = 0;
+ avl_add(&varpd_load_handle->vdi_plugins, vpp);
+ mutex_unlock(&varpd_load_handle->vdi_lock);
+ mutex_unlock(&varpd_load_lock);
+
+ return (0);
+}
+
+varpd_plugin_t *
+libvarpd_plugin_lookup(varpd_impl_t *vip, const char *name)
+{
+ varpd_plugin_t lookup, *ret;
+
+ lookup.vpp_name = name;
+ mutex_lock(&vip->vdi_lock);
+ ret = avl_find(&vip->vdi_plugins, &lookup, NULL);
+ mutex_unlock(&vip->vdi_lock);
+
+ return (ret);
+}
+
+static int
+libvarpd_plugin_load_cb(varpd_impl_t *vip, const char *path, void *unused)
+{
+ void *dlp;
+
+ dlp = dlopen(path, RTLD_LOCAL | RTLD_NOW);
+ if (dlp == NULL) {
+ /* XXX This should be a real error */
+ fprintf(stderr, "dlopen failed: %s\n", dlerror());
+ }
+
+ return (0);
+}
+
+int
+libvarpd_plugin_load(varpd_handle_t *vph, const char *path)
+{
+ int ret = 0;
+ varpd_impl_t *vip = (varpd_impl_t *)vph;
+
+ if (vip == NULL || path == NULL)
+ return (EINVAL);
+ mutex_lock(&varpd_load_lock);
+ while (varpd_load_handle != NULL)
+ cond_wait(&varpd_load_cv, &varpd_load_lock);
+ varpd_load_handle = vip;
+ mutex_unlock(&varpd_load_lock);
+
+ ret = libvarpd_dirwalk(vip, path, ".so", libvarpd_plugin_load_cb, NULL);
+
+ mutex_lock(&varpd_load_lock);
+ varpd_load_handle = NULL;
+ cond_signal(&varpd_load_cv);
+ mutex_unlock(&varpd_load_lock);
+
+ return (ret);
+}
+
+int
+libvarpd_plugin_walk(varpd_handle_t *vph, libvarpd_plugin_walk_f func,
+ void *arg)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vph;
+ varpd_plugin_t *vpp;
+
+ mutex_lock(&vip->vdi_lock);
+ for (vpp = avl_first(&vip->vdi_plugins); vpp != NULL;
+ vpp = AVL_NEXT(&vip->vdi_plugins, vpp)) {
+ if (func(vph, vpp->vpp_name, arg) != 0) {
+ mutex_unlock(&vip->vdi_lock);
+ return (1);
+ }
+ }
+ mutex_unlock(&vip->vdi_lock);
+ return (0);
+}
+
+void
+libvarpd_plugin_init(void)
+{
+ if (mutex_init(&varpd_load_lock, USYNC_THREAD | LOCK_RECURSIVE |
+ LOCK_ERRORCHECK, NULL) != 0)
+ libvarpd_panic("failed to create varpd_load_lock");
+
+ if (cond_init(&varpd_load_cv, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create varpd_load_cv");
+
+ varpd_load_handle = NULL;
+}
+
+void
+libvarpd_plugin_fini(void)
+{
+ assert(varpd_load_handle == NULL);
+ if (mutex_destroy(&varpd_load_lock) != 0)
+ libvarpd_panic("failed to destroy varpd_load_lock");
+ if (cond_destroy(&varpd_load_cv) != 0)
+ libvarpd_panic("failed to destroy varpd_load_cv");
+}
+
+void
+libvarpd_plugin_prefork(void)
+{
+ mutex_lock(&varpd_load_lock);
+ while (varpd_load_handle != NULL)
+ cond_wait(&varpd_load_cv, &varpd_load_lock);
+}
+
+void
+libvarpd_plugin_postfork(void)
+{
+ cond_signal(&varpd_load_cv);
+ mutex_unlock(&varpd_load_lock);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c
new file mode 100644
index 0000000000..32ab9e81ab
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c
@@ -0,0 +1,238 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/*
+ * varpd property management
+ */
+
+#include <libvarpd_impl.h>
+#include <errno.h>
+#include <strings.h>
+#include <sys/mac.h>
+#include <umem.h>
+
+typedef struct varpd_prop_info {
+ varpd_impl_t *vprop_vip;
+ varpd_instance_t *vprop_instance;
+ uint_t vprop_type;
+ uint_t vprop_prot;
+ uint32_t vprop_defsize;
+ uint32_t vprop_psize;
+ char vprop_name[LIBVARPD_PROP_NAMELEN];
+ uint8_t vprop_default[LIBVARPD_PROP_SIZEMAX];
+ uint8_t vprop_poss[LIBVARPD_PROP_SIZEMAX];
+} varpd_prop_info_t;
+
+void
+libvarpd_prop_set_name(varpd_prop_handle_t *phdl, const char *name)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ (void) strlcpy(infop->vprop_name, name, OVERLAY_PROP_NAMELEN);
+}
+
+void
+libvarpd_prop_set_prot(varpd_prop_handle_t *phdl, overlay_prop_prot_t perm)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ infop->vprop_prot = perm;
+}
+
+void
+libvarpd_prop_set_type(varpd_prop_handle_t *phdl, overlay_prop_type_t type)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ infop->vprop_type = type;
+}
+
+int
+libvarpd_prop_set_default(varpd_prop_handle_t *phdl, void *buf, ssize_t len)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+
+ if (len > LIBVARPD_PROP_SIZEMAX)
+ return (E2BIG);
+
+ if (len < 0)
+ return (EOVERFLOW);
+
+ bcopy(buf, infop->vprop_default, len);
+ infop->vprop_defsize = len;
+ return (0);
+}
+
+void
+libvarpd_prop_set_nodefault(varpd_prop_handle_t *phdl)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+
+ infop->vprop_default[0] = '\0';
+ infop->vprop_defsize = 0;
+}
+
+void
+libvarpd_prop_set_range_uint32(varpd_prop_handle_t *phdl, uint32_t min,
+ uint32_t max)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss;
+
+ /* XXX We should probably set some kind of error here... */
+ if (rangep->mpr_count != 0 && rangep->mpr_type != MAC_PROPVAL_UINT32)
+ return;
+
+ /* XXX We should probably set some kind of error here... */
+ if (infop->vprop_psize + sizeof (mac_propval_uint32_range_t) >
+ sizeof (infop->vprop_poss))
+ return;
+
+ infop->vprop_psize += sizeof (mac_propval_uint32_range_t);
+ rangep->mpr_count++;
+ rangep->mpr_type = MAC_PROPVAL_UINT32;
+ rangep->u.mpr_uint32[rangep->mpr_count-1].mpur_min = min;
+ rangep->u.mpr_uint32[rangep->mpr_count-1].mpur_max = max;
+}
+
+void
+libvarpd_prop_set_range_str(varpd_prop_handle_t *phdl, const char *str)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ size_t len = strlen(str) + 1; /* Account for a null terminator */
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss;
+ mac_propval_str_range_t *pstr = &rangep->u.mpr_str;
+
+ /* XXX Errors */
+ if (rangep->mpr_count != 0 && rangep->mpr_type != MAC_PROPVAL_STR)
+ return;
+
+ /* XXX Errors */
+ if (infop->vprop_psize + len > sizeof (infop->vprop_poss))
+ return;
+
+ rangep->mpr_count++;
+ rangep->mpr_type = MAC_PROPVAL_STR;
+ strlcpy((char *)&pstr->mpur_data[pstr->mpur_nextbyte], str,
+ sizeof (infop->vprop_poss) - infop->vprop_psize);
+ pstr->mpur_nextbyte += len;
+ infop->vprop_psize += len;
+}
+
+int
+libvarpd_prop_handle_alloc(varpd_handle_t *vph, varpd_instance_handle_t *inst,
+ varpd_prop_handle_t **phdlp)
+{
+ varpd_prop_info_t *infop;
+
+ infop = umem_alloc(sizeof (varpd_prop_info_t), UMEM_DEFAULT);
+ if (infop == NULL)
+ return (ENOMEM);
+
+ bzero(infop, sizeof (varpd_prop_info_t));
+ infop->vprop_vip = (varpd_impl_t *)vph;
+ infop->vprop_instance = (varpd_instance_t *)inst;
+
+ *phdlp = (varpd_prop_handle_t *)infop;
+ return (0);
+}
+
+void
+libvarpd_prop_handle_free(varpd_prop_handle_t *phdl)
+{
+ umem_free(phdl, sizeof (varpd_prop_info_t));
+}
+
+int
+libvarpd_prop_nprops(varpd_instance_handle_t *ihdl, uint_t *np)
+{
+ varpd_instance_t *instp = (varpd_instance_t *)ihdl;
+
+ return (instp->vri_plugin->vpp_ops->vpo_nprops(instp->vri_private, np));
+}
+
+int
+libvarpd_prop_info_fill(varpd_prop_handle_t *phdl, uint_t propid)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ varpd_instance_t *instp = infop->vprop_instance;
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss;
+
+ infop->vprop_psize = sizeof (mac_propval_range_t);
+ bzero(rangep, sizeof (mac_propval_range_t));
+ return (instp->vri_plugin->vpp_ops->vpo_propinfo(instp->vri_private,
+ propid, phdl));
+}
+
+int
+libvarpd_prop_info(varpd_prop_handle_t *phdl, const char **namep,
+ uint_t *typep, uint_t *protp, const void **defp, uint32_t *sizep,
+ const mac_propval_range_t **possp)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ if (namep != NULL)
+ *namep = infop->vprop_name;
+ if (typep != NULL)
+ *typep = infop->vprop_type;
+ if (protp != NULL)
+ *protp = infop->vprop_prot;
+ if (defp != NULL)
+ *defp = infop->vprop_default;
+ if (sizep != NULL)
+ *sizep = infop->vprop_psize;
+ if (possp != NULL)
+ *possp = (mac_propval_range_t *)infop->vprop_poss;
+ return (0);
+}
+
+int
+libvarpd_prop_get(varpd_prop_handle_t *phdl, void *buf, uint32_t *sizep)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ varpd_instance_t *instp = infop->vprop_instance;
+
+ /* XXX We should maybe keep a boolean to keep track of this? */
+ if (infop->vprop_name[0] == '\0')
+ return (EINVAL);
+
+ return (instp->vri_plugin->vpp_ops->vpo_getprop(instp->vri_private,
+ infop->vprop_name, buf, sizep));
+}
+
+int
+libvarpd_prop_set(varpd_prop_handle_t *phdl, const void *buf, uint32_t size)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ varpd_instance_t *instp = infop->vprop_instance;
+
+ /* XXX We should maybe keep a boolean to keep track of this? */
+ if (infop->vprop_name[0] == '\0')
+ return (EINVAL);
+
+ return (instp->vri_plugin->vpp_ops->vpo_setprop(instp->vri_private,
+ infop->vprop_name, buf, size));
+}
+
+void
+libvarpd_prop_door_convert(const varpd_prop_handle_t *phdl,
+ varpd_client_propinfo_arg_t *vcfap)
+{
+ const varpd_prop_info_t *infop = (const varpd_prop_info_t *)phdl;
+
+ vcfap->vcfa_type = infop->vprop_type;
+ vcfap->vcfa_prot = infop->vprop_prot;
+ vcfap->vcfa_defsize = infop->vprop_defsize;
+ vcfap->vcfa_psize = infop->vprop_psize;
+ bcopy(infop->vprop_name, vcfap->vcfa_name, LIBVARPD_PROP_NAMELEN);
+ bcopy(infop->vprop_default, vcfap->vcfa_default, LIBVARPD_PROP_SIZEMAX);
+ bcopy(infop->vprop_poss, vcfap->vcfa_poss, LIBVARPD_PROP_SIZEMAX);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h
new file mode 100644
index 0000000000..232d92e82e
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h
@@ -0,0 +1,226 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_PROVIDER_H
+#define _LIBVARPD_PROVIDER_H
+
+/*
+ * varpd provider interface
+ *
+ * This header file defines all the structures and functions that a given plugin
+ * should register.
+ */
+
+#include <bunyan.h>
+#include <libvarpd.h>
+#include <libnvpair.h>
+#include <sys/socket.h>
+#include <sys/overlay_target.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define VARPD_VERSION_ONE 1
+#define VARPD_CURRENT_VERSION VARPD_VERSION_ONE
+
+typedef struct __varpd_provier_handle varpd_provider_handle_t;
+typedef struct __varpd_query_handle varpd_query_handle_t;
+typedef struct __varpd_arp_handle varpd_arp_handle_t;
+typedef struct __varpd_dhcp_handle varpd_dhcp_handle_t;
+
+/*
+ * Create a new instance of a plugin.
+ */
+typedef int (*varpd_plugin_create_f)(varpd_provider_handle_t *, void **,
+ overlay_plugin_dest_t);
+
+/*
+ * Upon the return of this, the lookup function will be called.
+ */
+typedef int (*varpd_plugin_start_f)(void *);
+
+/*
+ * Upon the entry of this function, the lookup function will not be called.
+ */
+typedef void (*varpd_plugin_stop_f)(void *);
+
+/*
+ * Destroy an instance of a plugin.
+ */
+typedef void (*varpd_plugin_destroy_f)(void *);
+
+/*
+ * The varpd_plugin_default_f and varpd_plugin_lookup_f both look up
+ * destinations and should have them written into the overlay_target_point_t.
+ * The varpd_plugin_default_f should only be implemented for plugins which are
+ * of type OVERLAY_TARGET_POINT, where as only the lookup function should be
+ * implemented by plugins that are of type OVERLAY_TARGET_DYNAMIC.
+ *
+ * In both cases, the answer should be filled into the overlay_target_point_t.
+ * In the case of the varpd_plugin_default_f, one of the VARPD_LOOKUP_* values
+ * should be returned by the function.
+ *
+ * In the case of the varpd_plugin_lookup_f, no value is returned. Instead, this
+ * is allowed to be an asynchronous operation and therefore any thread may call
+ * back the status by using the function varpd_plugin_reply. Again, specifying
+ * the appropriate VARPD_LOOKUP_* flags.
+ *
+ * The flag, VARPD_LOOKUP_OK indicates that the overlay_target_point_t has been
+ * filled in completely. The flag, VARPD_LOOKUP_DROP indicates that the packet
+ * in question should be dropped.
+ */
+#define VARPD_LOOKUP_OK (0)
+#define VARPD_LOOKUP_DROP (-1)
+typedef int (*varpd_plugin_default_f)(void *, overlay_target_point_t *);
+typedef void (*varpd_plugin_lookup_f)(void *, varpd_query_handle_t *,
+ const overlay_targ_lookup_t *, overlay_target_point_t *);
+
+/*
+ * Do a proxy ARP/NDP lookup.
+ */
+#define VARPD_QTYPE_ETHERNET 0x0
+typedef void (*varpd_plugin_arp_f)(void *, varpd_arp_handle_t *, int,
+ const struct sockaddr *, uint8_t *);
+
+typedef void (*varpd_plugin_dhcp_f)(void *, varpd_dhcp_handle_t *, int,
+ const overlay_targ_lookup_t *, uint8_t *);
+
+/*
+ * The following four functions all revolve around properties that exist for
+ * varpd. A plugin should strive to have a uniform set of properties that exist,
+ * however a given plugin may not always support every property. For example, in
+ * a vxlan world, the target IP address and port are both required; however,
+ * there are other encapsulation protocols which only require an IP address, or
+ * maybe require something else.
+ */
+
+/*
+ * Obtain a total number of properties.
+ */
+typedef int (*varpd_plugin_nprops_f)(void *, uint_t *);
+
+/*
+ * Obtain information about a property.
+ */
+typedef int (*varpd_plugin_propinfo_f)(void *, const uint_t,
+ varpd_prop_handle_t *);
+
+/*
+ * Get the value for a single property.
+ */
+typedef int (*varpd_plugin_getprop_f)(void *, const char *, void *, uint32_t *);
+
+/*
+ * Set the value for a single property.
+ */
+typedef int (*varpd_plugin_setprop_f)(void *, const char *, const void *,
+ const uint32_t);
+
+/*
+ * Save a plugin's private data into an nvlist.
+ */
+typedef int (*varpd_plugin_save_f)(void *, nvlist_t *);
+
+/*
+ * Restore a plugin's private data to an nvlist.
+ */
+typedef int (*varpd_plugin_restore_f)(nvlist_t *, varpd_provider_handle_t *,
+ overlay_plugin_dest_t, void **);
+
+typedef struct varpd_plugin_ops {
+ uint_t vpo_callbacks;
+ varpd_plugin_create_f vpo_create;
+ varpd_plugin_start_f vpo_start;
+ varpd_plugin_stop_f vpo_stop;
+ varpd_plugin_destroy_f vpo_destroy;
+ varpd_plugin_default_f vpo_default;
+ varpd_plugin_lookup_f vpo_lookup;
+ varpd_plugin_nprops_f vpo_nprops;
+ varpd_plugin_propinfo_f vpo_propinfo;
+ varpd_plugin_getprop_f vpo_getprop;
+ varpd_plugin_setprop_f vpo_setprop;
+ varpd_plugin_save_f vpo_save;
+ varpd_plugin_restore_f vpo_restore;
+ varpd_plugin_arp_f vpo_arp;
+ varpd_plugin_dhcp_f vpo_dhcp;
+} varpd_plugin_ops_t;
+
+typedef struct varpd_plugin_register {
+ uint_t vpr_version;
+ uint_t vpr_mode;
+ const char *vpr_name;
+ const varpd_plugin_ops_t *vpr_ops;
+} varpd_plugin_register_t;
+
+extern varpd_plugin_register_t *libvarpd_plugin_alloc(uint_t, int *);
+extern void libvarpd_plugin_free(varpd_plugin_register_t *);
+extern int libvarpd_plugin_register(varpd_plugin_register_t *);
+
+/*
+ * Blowing up and logging
+ */
+extern void libvarpd_panic(const char *, ...) __NORETURN;
+extern const bunyan_logger_t *libvarpd_plugin_bunyan(varpd_provider_handle_t *);
+
+/*
+ * Misc. Information APIs
+ */
+extern uint64_t libvarpd_plugin_vnetid(varpd_provider_handle_t *);
+
+/*
+ * Lookup Replying query and proxying
+ */
+extern void libvarpd_plugin_query_reply(varpd_query_handle_t *, int);
+
+extern void libvarpd_plugin_proxy_arp(varpd_provider_handle_t *,
+ varpd_query_handle_t *, const overlay_targ_lookup_t *);
+extern void libvarpd_plugin_proxy_ndp(varpd_provider_handle_t *,
+ varpd_query_handle_t *, const overlay_targ_lookup_t *);
+extern void libvarpd_plugin_arp_reply(varpd_arp_handle_t *, int);
+
+extern void libvarpd_plugin_proxy_dhcp(varpd_provider_handle_t *,
+ varpd_query_handle_t *, const overlay_targ_lookup_t *);
+extern void libvarpd_plugin_dhcp_reply(varpd_dhcp_handle_t *, int);
+
+
+/*
+ * Property information callbacks
+ */
+extern void libvarpd_prop_set_name(varpd_prop_handle_t *, const char *);
+extern void libvarpd_prop_set_prot(varpd_prop_handle_t *, overlay_prop_prot_t);
+extern void libvarpd_prop_set_type(varpd_prop_handle_t *, overlay_prop_type_t);
+extern int libvarpd_prop_set_default(varpd_prop_handle_t *, void *, ssize_t);
+extern void libvarpd_prop_set_nodefault(varpd_prop_handle_t *);
+extern void libvarpd_prop_set_range_uint32(varpd_prop_handle_t *, uint32_t,
+ uint32_t);
+extern void libvarpd_prop_set_range_str(varpd_prop_handle_t *, const char *);
+
+/*
+ * Various injecting and invalidation routines
+ */
+extern void libvarpd_inject_varp(varpd_provider_handle_t *, const uint8_t *,
+ const overlay_target_point_t *);
+extern void libvarpd_inject_arp(varpd_provider_handle_t *, const uint16_t,
+ const uint8_t *, const struct in_addr *, const uint8_t *);
+extern void libvarpd_fma_degrade(varpd_provider_handle_t *, const char *);
+extern void libvarpd_fma_restore(varpd_provider_handle_t *);
+/* TODO NDP */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_PROVIDER_H */
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c
new file mode 100644
index 0000000000..f5d8f3c796
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c
@@ -0,0 +1,98 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#include <libvarpd_impl.h>
+#include <assert.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+const char *
+libvarpd_isaext(void)
+{
+#if defined(__sparc)
+#if defined(__sparcv9)
+ return ("64");
+#else /* __sparcv9 */
+ return ("");
+#endif /* __sparvc9 */
+#elif defined(__amd64)
+ return ("64");
+#elif defined(__i386)
+ return ("");
+#else
+#error "unkonwn ISA"
+#endif
+}
+
+int
+libvarpd_dirwalk(varpd_impl_t *vip, const char *path, const char *suffix,
+ libvarpd_dirwalk_f func, void *arg)
+{
+ int ret;
+ size_t slen;
+ char *dirpath, *filepath;
+ DIR *dirp;
+ struct dirent *dp;
+ assert(vip != NULL && path != NULL);
+
+ if (asprintf(&dirpath, "%s/%s", path, libvarpd_isaext()) == -1)
+ return (errno);
+
+ if ((dirp = opendir(dirpath)) == NULL) {
+ ret = errno;
+ free(filepath);
+ return (ret);
+ }
+
+ slen = strlen(suffix);
+ for (;;) {
+ size_t len;
+
+ errno = 0;
+ dp = readdir(dirp);
+ if (dp == NULL) {
+ ret = errno;
+ break;
+ }
+
+ len = strlen(dp->d_name);
+ if (len <= slen)
+ continue;
+
+ if (strcmp(suffix, dp->d_name + (len - slen)) != 0)
+ continue;
+
+ if (asprintf(&filepath, "%s/%s", dirpath, dp->d_name) == -1) {
+ ret = errno;
+ break;
+ }
+
+ if (func(vip, filepath, arg) != 0) {
+ free(filepath);
+ ret = 0;
+ break;
+ }
+
+ free(filepath);
+ }
+
+ closedir(dirp);
+ free(dirpath);
+ return (ret);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/llib-lvarpd b/usr/src/lib/varpd/libvarpd/common/llib-lvarpd
new file mode 100644
index 0000000000..24d819d290
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/llib-lvarpd
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+#include <libvarpd.h>
diff --git a/usr/src/lib/varpd/libvarpd/common/mapfile-plugin b/usr/src/lib/varpd/libvarpd/common/mapfile-plugin
new file mode 100644
index 0000000000..7d208168e8
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/mapfile-plugin
@@ -0,0 +1,47 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_SCOPE {
+ global:
+ libvarpd_plugin_alloc { FLAGS = EXTERN };
+ libvarpd_plugin_free { FLAGS = EXTERN };
+ libvarpd_plugin_proxy_arp { FLAGS = EXTERN };
+ libvarpd_plugin_proxy_dhcp { FLAGS = EXTERN };
+ libvarpd_plugin_proxy_ndp { FLAGS = EXTERN };
+ libvarpd_plugin_register { FLAGS = EXTERN };
+ libvarpd_prop_set_name { FLAGS = EXTERN };
+ libvarpd_prop_set_prot { FLAGS = EXTERN };
+ libvarpd_prop_set_type { FLAGS = EXTERN };
+ libvarpd_prop_set_default { FLAGS = EXTERN };
+ libvarpd_prop_set_nodefault { FLAGS = EXTERN };
+ libvarpd_prop_set_range_uint32 { FLAGS = EXTERN };
+ libvarpd_prop_set_rangestr { FLAGS = EXTERN };
+};
diff --git a/usr/src/lib/varpd/libvarpd/common/mapfile-vers b/usr/src/lib/varpd/libvarpd/common/mapfile-vers
new file mode 100644
index 0000000000..62fbb5e879
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/mapfile-vers
@@ -0,0 +1,113 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate {
+ global:
+ libvarpd_c_create;
+ libvarpd_c_destroy;
+ libvarpd_c_instance_activate;
+ libvarpd_c_instance_create;
+ libvarpd_c_instance_destroy;
+ libvarpd_c_prop_nprops;
+ libvarpd_c_prop_handle_alloc;
+ libvarpd_c_prop_handle_free;
+ libvarpd_c_prop_info_fill;
+ libvarpd_c_prop_info_fill_by_name;
+ libvarpd_c_prop_info;
+ libvarpd_c_prop_get;
+ libvarpd_c_prop_set;
+
+ libvarpd_c_instance_lookup;
+ libvarpd_c_instance_target_mode;
+ libvarpd_c_instance_cache_flush;
+ libvarpd_c_instance_cache_delete;
+ libvarpd_c_instance_cache_get;
+ libvarpd_c_instance_cache_set;
+ libvarpd_c_instance_cache_walk;
+
+ libvarpd_create;
+ libvarpd_destroy;
+
+ libvarpd_door_server_create;
+ libvarpd_door_server_destroy;
+
+ libvarpd_fma_degrade;
+ libvarpd_fma_restore;
+
+ libvarpd_inject_varp;
+ libvarpd_inject_arp;
+
+ libvarpd_instance_activate;
+ libvarpd_instance_create;
+ libvarpd_instance_destroy;
+ libvarpd_instance_lookup;
+ libvarpd_instance_id;
+
+ libvarpd_panic;
+
+ libvarpd_persist_disable;
+ libvarpd_persist_enable;
+ libvarpd_persist_restore;
+
+ libvarpd_plugin_alloc;
+ libvarpd_plugin_load;
+ libvarpd_plugin_free;
+ libvarpd_plugin_arp_reply;
+ libvarpd_plugin_dhcp_reply;
+ libvarpd_plugin_query_reply;
+ libvarpd_plugin_proxy_arp;
+ libvarpd_plugin_proxy_dhcp;
+ libvarpd_plugin_proxy_ndp;
+ libvarpd_plugin_register;
+ libvarpd_plugin_walk;
+ libvarpd_plugin_vnetid;
+
+ libvarpd_prop_set_default;
+ libvarpd_prop_set_nodefault;
+ libvarpd_prop_set_name;
+ libvarpd_prop_set_prot;
+ libvarpd_prop_set_range_uint32;
+ libvarpd_prop_set_range_str;
+ libvarpd_prop_set_type;
+
+ libvarpd_prop_handle_alloc;
+ libvarpd_prop_handle_free;
+ libvarpd_prop_nprops;
+ libvarpd_prop_info_fill;
+ libvarpd_prop_info;
+ libvarpd_prop_get;
+ libvarpd_prop_set;
+
+ libvarpd_overlay_lookup_quiesce;
+ libvarpd_overlay_lookup_run;
+ local:
+ *;
+};
diff --git a/usr/src/lib/varpd/libvarpd/i386/Makefile b/usr/src/lib/varpd/libvarpd/i386/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/libvarpd/sparc/Makefile b/usr/src/lib/varpd/libvarpd/sparc/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/libvarpd/sparcv9/Makefile b/usr/src/lib/varpd/libvarpd/sparcv9/Makefile
new file mode 100644
index 0000000000..5c586c1d40
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/svp/Makefile b/usr/src/lib/varpd/svp/Makefile
new file mode 100644
index 0000000000..f026c620e6
--- /dev/null
+++ b/usr/src/lib/varpd/svp/Makefile
@@ -0,0 +1,40 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../../Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h:
+
+check:
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/varpd/svp/Makefile.com b/usr/src/lib/varpd/svp/Makefile.com
new file mode 100644
index 0000000000..814b70354e
--- /dev/null
+++ b/usr/src/lib/varpd/svp/Makefile.com
@@ -0,0 +1,48 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = libvarpd_svp.a
+VERS = .1
+OBJECTS = libvarpd_svp.o \
+ libvarpd_svp_conn.o \
+ libvarpd_svp_crc.o \
+ libvarpd_svp_host.o \
+ libvarpd_svp_loop.o \
+ libvarpd_svp_remote.o \
+ libvarpd_svp_timer.o
+
+include ../../../Makefile.lib
+include ../../Makefile.plugin
+
+LIBS = $(DYNLIB)
+
+#
+# Yes, this isn't a command, but libcmdutils does have the list(9F)
+# functions and better to use that then compile list.o yet again
+# ourselves... probably.
+#
+LDLIBS += -lc -lvarpd -lumem -lnvpair -lsocket -lnsl -lavl \
+ -lcmdutils -lidspace -lbunyan
+CPPFLAGS += -I../common
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../Makefile.targ
diff --git a/usr/src/lib/varpd/svp/amd64/Makefile b/usr/src/lib/varpd/svp/amd64/Makefile
new file mode 100644
index 0000000000..b64b830ddd
--- /dev/null
+++ b/usr/src/lib/varpd/svp/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp.c b/usr/src/lib/varpd/svp/common/libvarpd_svp.c
new file mode 100644
index 0000000000..23f9586ba6
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp.c
@@ -0,0 +1,755 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+/*
+ * This plugin implements the SDC VXLAN Protocol (SVP).
+ *
+ * XXX Expand on everything.
+ */
+
+#include <umem.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <libnvpair.h>
+#include <strings.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+
+#include <libvarpd_provider.h>
+#include "libvarpd_svp.h"
+
+bunyan_logger_t *svp_bunyan;
+static int svp_defport = 1296;
+static int svp_defuport = 1339;
+static umem_cache_t *svp_lookup_cache;
+
+typedef enum svp_lookup_type {
+ SVP_L_UNKNOWN = 0x0,
+ SVP_L_VL2 = 0x1,
+ SVP_L_VL3 = 0x2
+} svp_lookup_type_t;
+
+typedef struct svp_lookup {
+ int svl_type;
+ union {
+ struct svl_lookup_vl2 {
+ varpd_query_handle_t *svl_handle;
+ overlay_target_point_t *svl_point;
+ } svl_vl2;
+ struct svl_lookup_vl3 {
+ varpd_arp_handle_t *svl_vah;
+ uint8_t *svl_out;
+ } svl_vl3;
+ } svl_u;
+ svp_query_t svl_query;
+} svp_lookup_t;
+
+static const char *varpd_svp_props[] = {
+ "svp/host",
+ "svp/port",
+ "svp/underlay_ip",
+ "svp/underlay_port"
+};
+
+int
+svp_comparator(const void *l, const void *r)
+{
+ const svp_t *ls = l;
+ const svp_t *rs = r;
+
+ if (ls->svp_vid > rs->svp_vid)
+ return (1);
+ if (ls->svp_vid < rs->svp_vid)
+ return (-1);
+ return (0);
+}
+
+static void
+svp_vl2_lookup_cb(svp_t *svp, svp_status_t status, const struct in6_addr *uip,
+ const uint16_t uport, void *arg)
+{
+ svp_lookup_t *svl = arg;
+ overlay_target_point_t *otp;
+
+ assert(svp != NULL);
+ assert(arg != NULL);
+
+ if (status != SVP_S_OK) {
+ libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
+ VARPD_LOOKUP_DROP);
+ umem_cache_free(svp_lookup_cache, svl);
+ return;
+ }
+
+ otp = svl->svl_u.svl_vl2.svl_point;
+ bcopy(uip, &otp->otp_ip, sizeof (struct in6_addr));
+ otp->otp_port = uport;
+ libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
+ VARPD_LOOKUP_OK);
+ umem_cache_free(svp_lookup_cache, svl);
+}
+
+static void
+svp_vl3_lookup_cb(svp_t *svp, svp_status_t status, const uint8_t *vl2mac,
+ const struct in6_addr *uip, const uint16_t uport, void *arg)
+{
+ overlay_target_point_t point;
+ svp_lookup_t *svl = arg;
+
+ assert(svp != NULL);
+ assert(svl != NULL);
+
+ if (status != SVP_S_OK) {
+ libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
+ VARPD_LOOKUP_DROP);
+ umem_cache_free(svp_lookup_cache, svl);
+ return;
+ }
+
+ /* Inject the L2 mapping before the L3 */
+ bcopy(uip, &point.otp_ip, sizeof (struct in6_addr));
+ point.otp_port = uport;
+ libvarpd_inject_varp(svp->svp_hdl, vl2mac, &point);
+
+ bcopy(vl2mac, svl->svl_u.svl_vl3.svl_out, ETHERADDRL);
+ libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
+ VARPD_LOOKUP_OK);
+ umem_cache_free(svp_lookup_cache, svl);
+}
+
+static void
+svp_vl2_invalidate_cb(svp_t *svp, const uint8_t *vl2mac)
+{
+ libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
+}
+
+static void
+svp_vl3_inject_cb(svp_t *svp, const uint16_t vlan, const struct in6_addr *vl3ip,
+ const uint8_t *vl2mac, const uint8_t *targmac)
+{
+ struct in_addr v4;
+
+ if (IN6_IS_ADDR_V4MAPPED(vl3ip) == 0)
+ libvarpd_panic("implement libvarpd_inject_ndp");
+ IN6_V4MAPPED_TO_INADDR(vl3ip, &v4);
+ libvarpd_inject_arp(svp->svp_hdl, vlan, vl2mac, &v4, targmac);
+}
+
+static void
+svp_shootdown_cb(svp_t *svp, const uint8_t *vl2mac, const struct in6_addr *uip,
+ const uint16_t uport)
+{
+ /*
+ * XXX We should probably do a conditional invlaidation here.
+ */
+ libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
+}
+
+static svp_cb_t svp_defops = {
+ svp_vl2_lookup_cb,
+ svp_vl3_lookup_cb,
+ svp_vl2_invalidate_cb,
+ svp_vl3_inject_cb,
+ svp_shootdown_cb
+};
+
+static boolean_t
+varpd_svp_valid_dest(overlay_plugin_dest_t dest)
+{
+ if (dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+static int
+varpd_svp_create(varpd_provider_handle_t *hdl, void **outp,
+ overlay_plugin_dest_t dest)
+{
+ int ret;
+ svp_t *svp;
+
+ if (varpd_svp_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ svp = umem_zalloc(sizeof (svp_t), UMEM_DEFAULT);
+ if (svp == NULL)
+ return (ENOMEM);
+
+ if ((ret = mutex_init(&svp->svp_lock, USYNC_THREAD, NULL) != 0)) {
+ umem_free(svp, sizeof (svp_t));
+ return (ret);
+ }
+
+ svp->svp_port = svp_defport;
+ svp->svp_uport = svp_defuport;
+ svp->svp_cb = svp_defops;
+ svp->svp_hdl = hdl;
+ svp->svp_vid = libvarpd_plugin_vnetid(svp->svp_hdl);
+ *outp = svp;
+ return (0);
+}
+
+static int
+varpd_svp_start(void *arg)
+{
+ int ret;
+ svp_remote_t *srp;
+ svp_t *svp = arg;
+
+ mutex_lock(&svp->svp_lock);
+ if (svp->svp_host == NULL || svp->svp_port == 0 ||
+ svp->svp_huip == B_FALSE || svp->svp_uport == 0) {
+ mutex_unlock(&svp->svp_lock);
+ return (EAGAIN);
+ }
+ mutex_unlock(&svp->svp_lock);
+
+ if ((ret = svp_remote_find(svp->svp_host, svp->svp_port, &srp)) != 0)
+ return (ret);
+
+ if ((ret = svp_remote_attach(srp, svp)) != 0) {
+ svp_remote_release(srp);
+ return (ret);
+ }
+
+ return (0);
+}
+
+static void
+varpd_svp_stop(void *arg)
+{
+ svp_t *svp = arg;
+
+ svp_remote_detach(svp);
+}
+
+static void
+varpd_svp_destroy(void *arg)
+{
+ svp_t *svp = arg;
+
+ if (svp->svp_host != NULL)
+ umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
+
+ if (mutex_destroy(&svp->svp_lock) != 0)
+ libvarpd_panic("failed to destroy svp_t`svp_lock");
+
+ umem_free(svp, sizeof (svp_t));
+}
+
+static void
+varpd_svp_lookup(void *arg, varpd_query_handle_t *vqh,
+ const overlay_targ_lookup_t *otl, overlay_target_point_t *otp)
+{
+ svp_lookup_t *slp;
+ svp_t *svp = arg;
+
+ /*
+ * Check if this is something that we need to proxy, eg. arp or ndp.
+ */
+ if (otl->otl_sap == ETHERTYPE_ARP) {
+ libvarpd_plugin_proxy_arp(svp->svp_hdl, vqh, otl);
+ return;
+ }
+
+ if (otl->otl_sap == ETHERTYPE_IPV6 &&
+ otl->otl_dstaddr[0] == 0x33 &&
+ otl->otl_dstaddr[1] == 0x33) {
+ libvarpd_plugin_proxy_ndp(svp->svp_hdl, vqh, otl);
+ }
+
+ /* XXX CACHES */
+
+ /*
+ * If we have a failure to allocate memory for this, that's not good.
+ * However, telling the kernel to just drop this packet is much better
+ * than the alternative at this moment. At least we'll try again and we
+ * may have something more available to us in a little bit.
+ *
+ * TODO We need to have observability around this case.
+ */
+ slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
+ if (slp == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ slp->svl_type = SVP_L_VL2;
+ slp->svl_u.svl_vl2.svl_handle = vqh;
+ slp->svl_u.svl_vl2.svl_point = otp;
+
+ svp_remote_vl2_lookup(svp, &slp->svl_query, otl->otl_dstaddr, slp);
+}
+
+static int
+varpd_svp_nprops(void *arg, uint_t *nprops)
+{
+ *nprops = sizeof (varpd_svp_props) / sizeof (char *);
+ return (0);
+}
+
+static int
+varpd_svp_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
+{
+ switch (propid) {
+ case 0:
+ /* svp/host */
+ libvarpd_prop_set_name(vph, varpd_svp_props[0]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING);
+ libvarpd_prop_set_nodefault(vph);
+ break;
+ case 1:
+ /* svp/port */
+ libvarpd_prop_set_name(vph, varpd_svp_props[1]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
+ libvarpd_prop_set_default(vph, &svp_defport,
+ sizeof (svp_defport));
+ libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
+ break;
+ case 2:
+ /* svp/underlay_ip */
+ libvarpd_prop_set_name(vph, varpd_svp_props[2]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP);
+ libvarpd_prop_set_nodefault(vph);
+ break;
+ case 3:
+ /* svp/underlay_port */
+ libvarpd_prop_set_name(vph, varpd_svp_props[3]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
+ libvarpd_prop_set_default(vph, &svp_defuport,
+ sizeof (svp_defuport));
+ libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
+ break;
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+
+static int
+varpd_svp_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
+{
+ svp_t *svp = arg;
+
+ /* svp/host */
+ if (strcmp(pname, varpd_svp_props[0]) == 0) {
+ size_t len;
+
+ mutex_lock(&svp->svp_lock);
+ if (svp->svp_host == NULL) {
+ *sizep = 0;
+ } else {
+ len = strlen(svp->svp_host) + 1;
+ if (*sizep < len) {
+ mutex_unlock(&svp->svp_lock);
+ return (EOVERFLOW);
+ }
+ *sizep = len;
+ (void) strlcpy(buf, svp->svp_host, *sizep);
+ }
+ mutex_unlock(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/port */
+ if (strcmp(pname, varpd_svp_props[1]) == 0) {
+ uint64_t val;
+
+ if (*sizep < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ mutex_lock(&svp->svp_lock);
+ if (svp->svp_port == 0) {
+ *sizep = 0;
+ } else {
+ val = svp->svp_port;
+ bcopy(&val, buf, sizeof (uint64_t));
+ *sizep = sizeof (uint64_t);
+ }
+
+ mutex_unlock(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/underlay_ip */
+ if (strcmp(pname, varpd_svp_props[2]) == 0) {
+ if (*sizep > sizeof (struct in6_addr))
+ return (EOVERFLOW);
+ mutex_lock(&svp->svp_lock);
+ if (svp->svp_huip == B_FALSE) {
+ *sizep = 0;
+ } else {
+ bcopy(&svp->svp_uip, buf, sizeof (struct in6_addr));
+ *sizep = sizeof (struct in6_addr);
+ }
+ return (0);
+ }
+
+ /* svp/underlay_port */
+ if (strcmp(pname, varpd_svp_props[3]) == 0) {
+ uint64_t val;
+
+ if (*sizep < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ mutex_lock(&svp->svp_lock);
+ if (svp->svp_uport == 0) {
+ *sizep = 0;
+ } else {
+ val = svp->svp_uport;
+ bcopy(&val, buf, sizeof (uint64_t));
+ *sizep = sizeof (uint64_t);
+ }
+
+ mutex_unlock(&svp->svp_lock);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_svp_setprop(void *arg, const char *pname, const void *buf,
+ const uint32_t size)
+{
+ svp_t *svp = arg;
+
+ /* svp/host */
+ if (strcmp(pname, varpd_svp_props[0]) == 0) {
+ char *dup;
+ /* XXX Validate hostname characters, maybe grab a C locale */
+ dup = umem_alloc(size, UMEM_DEFAULT);
+ (void) strlcpy(dup, buf, size);
+ if (dup == NULL)
+ return (ENOMEM);
+ mutex_lock(&svp->svp_lock);
+ if (svp->svp_host != NULL)
+ umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
+ svp->svp_host = dup;
+ mutex_unlock(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/port */
+ if (strcmp(pname, varpd_svp_props[1]) == 0) {
+ const uint64_t *valp = buf;
+ if (size < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ if (*valp == 0 || *valp > UINT16_MAX)
+ return (EINVAL);
+
+ mutex_lock(&svp->svp_lock);
+ svp->svp_port = (uint16_t)*valp;
+ mutex_unlock(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/underlay_ip */
+ if (strcmp(pname, varpd_svp_props[2]) == 0) {
+ const struct in6_addr *ipv6 = buf;
+
+ if (size < sizeof (struct in6_addr))
+ return (EOVERFLOW);
+
+ /*
+ * XXX Is anything else disallowed?
+ */
+ if (IN6_IS_ADDR_V4COMPAT(ipv6))
+ return (EINVAL);
+ mutex_lock(&svp->svp_lock);
+ bcopy(buf, &svp->svp_uip, sizeof (struct in6_addr));
+ svp->svp_huip = B_TRUE;
+ mutex_unlock(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/underlay_port */
+ if (strcmp(pname, varpd_svp_props[3]) == 0) {
+ const uint64_t *valp = buf;
+ if (size < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ if (*valp == 0 || *valp > UINT16_MAX)
+ return (EINVAL);
+
+ mutex_lock(&svp->svp_lock);
+ svp->svp_uport = (uint16_t)*valp;
+ mutex_unlock(&svp->svp_lock);
+
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_svp_save(void *arg, nvlist_t *nvp)
+{
+ int ret;
+ svp_t *svp = arg;
+
+ mutex_lock(&svp->svp_lock);
+ if (svp->svp_host != NULL) {
+ if ((ret = nvlist_add_string(nvp, varpd_svp_props[0],
+ svp->svp_host)) != 0) {
+ mutex_unlock(&svp->svp_lock);
+ return (ret);
+ }
+ }
+
+ if (svp->svp_port != 0) {
+ if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[1],
+ svp->svp_port)) != 0) {
+ mutex_unlock(&svp->svp_lock);
+ return (ret);
+ }
+ }
+
+ if (svp->svp_huip == B_TRUE) {
+ char buf[INET6_ADDRSTRLEN];
+
+ if (inet_ntop(AF_INET6, &svp->svp_uip, buf, sizeof (buf)) ==
+ NULL)
+ libvarpd_panic("unexpected inet_ntop failure: %d",
+ errno);
+
+ if ((ret = nvlist_add_string(nvp, varpd_svp_props[2],
+ buf)) != 0) {
+ mutex_unlock(&svp->svp_lock);
+ return (ret);
+ }
+ }
+
+ if (svp->svp_uport != 0) {
+ if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[3],
+ svp->svp_uport)) != 0) {
+ mutex_unlock(&svp->svp_lock);
+ return (ret);
+ }
+ }
+
+ mutex_unlock(&svp->svp_lock);
+ return (0);
+}
+
+static int
+varpd_svp_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
+ overlay_plugin_dest_t dest, void **outp)
+{
+ int ret;
+ svp_t *svp;
+ char *ipstr, *hstr;
+
+ if (varpd_svp_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ if ((ret = varpd_svp_create(hdl, (void **)&svp, dest)) != 0)
+ return (ret);
+
+ /* XXX Validate hostname */
+ if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[0],
+ &hstr)) != 0) {
+ if (ret != ENOENT) {
+ varpd_svp_destroy(svp);
+ return (ret);
+ }
+ svp->svp_host = NULL;
+ } else {
+ size_t blen = strlen(hstr) + 1;
+ svp->svp_host = umem_alloc(blen, UMEM_DEFAULT);
+ (void) strlcpy(svp->svp_host, hstr, blen);
+ }
+
+ if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[1],
+ &svp->svp_port)) != 0) {
+ if (ret != ENOENT) {
+ varpd_svp_destroy(svp);
+ return (ret);
+ }
+ svp->svp_port = 0;
+ }
+
+ if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[2],
+ &ipstr)) != 0) {
+ if (ret != ENOENT) {
+ varpd_svp_destroy(svp);
+ return (ret);
+ }
+ svp->svp_huip = B_FALSE;
+ } else {
+ ret = inet_pton(AF_INET6, ipstr, &svp->svp_uip);
+ if (ret == -1) {
+ assert(errno == EAFNOSUPPORT);
+ libvarpd_panic("unexpected inet_pton failure: %d",
+ errno);
+ }
+
+ if (ret == 0) {
+ varpd_svp_destroy(svp);
+ return (EINVAL);
+ }
+ svp->svp_huip = B_TRUE;
+ }
+
+ if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[3],
+ &svp->svp_uport)) != 0) {
+ if (ret != ENOENT) {
+ varpd_svp_destroy(svp);
+ return (ret);
+ }
+ svp->svp_uport = 0;
+ }
+
+ svp->svp_hdl = hdl;
+ *outp = svp;
+ return (0);
+}
+
+static void
+varpd_svp_arp(void *arg, varpd_arp_handle_t *vah, int type,
+ const struct sockaddr *sock, uint8_t *out)
+{
+ svp_t *svp = arg;
+ svp_lookup_t *svl;
+
+ if (type != VARPD_QTYPE_ETHERNET) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ /* XXX CACHES */
+
+ svl = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
+ if (svl == NULL) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ svl->svl_type = SVP_L_VL3;
+ svl->svl_u.svl_vl3.svl_vah = vah;
+ svl->svl_u.svl_vl3.svl_out = out;
+ svp_remote_vl3_lookup(svp, &svl->svl_query, sock, svl);
+}
+
+static const varpd_plugin_ops_t varpd_svp_ops = {
+ 0,
+ varpd_svp_create,
+ varpd_svp_start,
+ varpd_svp_stop,
+ varpd_svp_destroy,
+ NULL,
+ varpd_svp_lookup,
+ varpd_svp_nprops,
+ varpd_svp_propinfo,
+ varpd_svp_getprop,
+ varpd_svp_setprop,
+ varpd_svp_save,
+ varpd_svp_restore,
+ varpd_svp_arp,
+ NULL
+};
+
+static int
+svp_bunyan_init(void)
+{
+ int ret;
+
+ if ((ret = bunyan_init("svp", &svp_bunyan)) != 0)
+ return (ret);
+ ret = bunyan_stream_add(svp_bunyan, "stderr", BUNYAN_L_INFO,
+ bunyan_stream_fd, (void *)STDERR_FILENO);
+ if (ret != 0)
+ bunyan_fini(svp_bunyan);
+ return (ret);
+}
+
+static void
+svp_bunyan_fini(void)
+{
+ if (svp_bunyan != NULL)
+ bunyan_fini(svp_bunyan);
+}
+
+#pragma init(varpd_svp_init)
+static void
+varpd_svp_init(void)
+{
+ int err;
+ varpd_plugin_register_t *vpr;
+
+ /* XXX Revisit and make sure we have proper clean up */
+ if (svp_bunyan_init() != 0)
+ return;
+
+ if ((err == svp_host_init()) != 0) {
+ svp_bunyan_fini();
+ return;
+ }
+
+ /* XXX Communicate failure */
+ svp_lookup_cache = umem_cache_create("svp_lookup",
+ sizeof (svp_lookup_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+ if (svp_lookup_cache == NULL) {
+ svp_bunyan_fini();
+ return;
+ }
+
+ if ((err = svp_event_init()) != 0) {
+ svp_bunyan_fini();
+ umem_cache_destroy(svp_lookup_cache);
+ return;
+ }
+
+ if ((err = svp_timer_init()) != 0) {
+ svp_event_fini();
+ umem_cache_destroy(svp_lookup_cache);
+ svp_bunyan_fini();
+ return;
+ }
+
+ if ((err = svp_remote_init()) != 0) {
+ svp_event_fini();
+ umem_cache_destroy(svp_lookup_cache);
+ svp_bunyan_fini();
+ return;
+ }
+
+ /* XXX Revisit failure semantics here */
+ vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err);
+ if (vpr == NULL) {
+ svp_remote_fini();
+ svp_event_fini();
+ umem_cache_destroy(svp_lookup_cache);
+ return;
+ }
+
+ vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC;
+ vpr->vpr_name = "svp";
+ vpr->vpr_ops = &varpd_svp_ops;
+
+ (void) libvarpd_plugin_register(vpr);
+ libvarpd_plugin_free(vpr);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp.h b/usr/src/lib/varpd/svp/common/libvarpd_svp.h
new file mode 100644
index 0000000000..90acf325aa
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp.h
@@ -0,0 +1,377 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_SVP_H
+#define _LIBVARPD_SVP_H
+
+/*
+ * Implementation details of the SVP plugin and the SVP protocol.
+ */
+
+#include <netinet/in.h>
+#include <sys/ethernet.h>
+#include <thread.h>
+#include <synch.h>
+#include <libvarpd_provider.h>
+#include <sys/avl.h>
+#include <port.h>
+#include <sys/list.h>
+#include <bunyan.h>
+
+#include <libvarpd_svp_prot.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct svp svp_t;
+typedef struct svp_remote svp_remote_t;
+typedef struct svp_conn svp_conn_t;
+typedef struct svp_query svp_query_t;
+
+typedef void (*svp_event_f)(port_event_t *, void *);
+
+typedef struct svp_event {
+ svp_event_f se_func;
+ void *se_arg;
+ int se_events;
+} svp_event_t;
+
+typedef void (*svp_timer_f)(void *);
+
+typedef struct svp_timer {
+ svp_timer_f st_func;
+ void *st_arg;
+ boolean_t st_oneshot;
+ uint32_t st_value;
+ /* Fields below here are private to the svp_timer implementaiton */
+ uint64_t st_expire;
+ boolean_t st_delivering;
+ avl_node_t st_link;
+} svp_timer_t;
+
+typedef union svp_query_data {
+ svp_vl2_req_t sqd_vl2r;
+ svp_vl2_ack_t sqd_vl2a;
+ svp_vl3_req_t sdq_vl3r;
+ svp_vl3_ack_t sdq_vl3a;
+} svp_query_data_t;
+
+typedef void (*svp_query_f)(svp_query_t *, void *);
+
+typedef enum svp_query_state {
+ SVP_QUERY_INIT = 0x00,
+ SVP_QUERY_WRITING = 0x01,
+ SVP_QUERY_READING = 0x02,
+ SVP_QUERY_FINISHED = 0x03
+} svp_query_state_t;
+
+/*
+ * The query structure is usable for all forms of svp queries that end up
+ * getting passed across. Right now it's optimized for the fixed size data
+ * requests as opposed to requests whose responses will always be streaming in
+ * nature. Though, the streaming requests are the less common ones we have.
+ *
+ * XXX Fix that and make this streaming friendly
+ */
+struct svp_query {
+ list_node_t sq_lnode;
+ svp_query_f sq_func;
+ svp_query_state_t sq_state;
+ void *sq_arg;
+ svp_t *sq_svp;
+ svp_req_t sq_header;
+ svp_query_data_t sq_rdun;
+ svp_query_data_t sq_wdun;
+ svp_status_t sq_status;
+ void *sq_rdata;
+ size_t sq_rsize;
+ void *sq_wdata;
+ size_t sq_wsize;
+ hrtime_t sq_acttime;
+};
+
+/*
+ * XXX Centralize this somewhere more generally, big theroy statement, where are
+ * you?
+ *
+ * We have a connection pool that's built upon DNS records. DNS describes the
+ * membership of the set of remote peers that make up our pool and we maintain
+ * one connection to each of them. In addition, we maintain an exponential
+ * backoff for each peer and will attempt to reconect immediately before backing
+ * off. The following are the valid states that a connection can be in:
+ *
+ * SVP_CS_INITIAL This is the initial state of a connection, all
+ * that should exist is an unbound socket.
+ *
+ * SVP_CS_CONNECTING A call to connect has been made and we are
+ * polling for it to complete.
+ *
+ * SVP_CS_BACKOFF A connect attempt has failed and we are
+ * currently backing off, waiting to try again.
+ *
+ * SVP_CS_ACTIVE We have successfully connected to the remote
+ * system.
+ *
+ * SVP_CS_WINDDOWN This connection is going to valhalla. In other
+ * words, a previously active connection is no
+ * longer valid in DNS, so we should curb our use
+ * of it, and reap it as soon as we have other
+ * active connections.
+ *
+ * SVP_CS_REAPING This connection object will be freed and reaped.
+ * It will no longer be used.
+ *
+ * The following diagram attempts to describe our state transition scheme, and
+ * when we transition from one state to the next.
+ *
+ * |
+ * * New remote IP from DNS resolution,
+ * | not currently active in the system.
+ * |
+ * v Socket Error,
+ * +----------------+ still in DNS
+ * | SVP_CS_INITIAL |<----------------------*-----+
+ * +----------------+ |
+ * | |
+ * Connection failed .. Always * Successful |
+ * backoff limit . | connect() |
+ * not exceeded +----*---------+ | +-----------*--+ |
+ * | | | | | |
+ * V ^ v ^ V ^
+ * +----------------+ +-------------------+ +---------------+
+ * +-<-| SVP_CS_BACKOFF | | SVP_CS_CONNECTING | | SVP_CS_ACTIVE |
+ * | +----------------+ +-------------------+ +---------------+
+ * | V ^ | ^ V
+ * | Backoff wait * | v | * Removed
+ * | interval +--------------+ | Added to * | from DNS
+ * | finished | DNS | |
+ * | | | |
+ * | | ^ V
+ * | | +-----------------+
+ * +---->---------------+-----<-------+ +-<-| SVP_CS_WINDDOWN |
+ * v Conn * +-----------------+
+ * | Error | V
+ * Removed from * v |
+ * DNS | +----------------+ * Connection
+ * +----------->| SVP_CS_REAPING |<--------+ Quiesced
+ * +----------------+
+ *
+ */
+typedef enum svp_conn_state {
+ SVP_CS_ERROR = 0x00,
+ SVP_CS_INITIAL = 0x01,
+ SVP_CS_CONNECTING = 0x02,
+ SVP_CS_BACKOFF = 0x03,
+ SVP_CS_ACTIVE = 0x04,
+ SVP_CS_WINDDOWN = 0x05
+} svp_conn_state_t;
+
+typedef enum svp_conn_error {
+ SVP_CE_NONE = 0x00,
+ SVP_CE_ASSOCIATE = 0x01,
+ SVP_CE_NOPOLLOUT = 0x02,
+ SVP_CE_SOCKET = 0x03
+} svp_conn_error_t;
+
+typedef enum svp_conn_flags {
+ SVP_CF_ADDED = 0x01,
+ SVP_CF_DEGRADED = 0x02,
+ SVP_CF_REAP = 0x04,
+ SVP_CF_TEARDOWN = 0x08,
+ SVP_CF_UFLAG = 0x0c,
+ SVP_CF_USER = 0x10
+} svp_conn_flags_t;
+
+typedef struct svp_conn_out {
+ svp_query_t *sco_query;
+ size_t sco_offset;
+} svp_conn_out_t;
+
+typedef struct svp_conn_in {
+ svp_query_t *sci_query;
+ svp_req_t sci_req;
+ size_t sci_offset;
+} svp_conn_in_t;
+
+struct svp_conn {
+ svp_remote_t *sc_remote; /* RO */
+ struct in6_addr sc_addr; /* RO */
+ list_node_t sc_rlist; /* svp_remote_t`sr_lock */
+ mutex_t sc_lock;
+ svp_event_t sc_event;
+ svp_timer_t sc_btimer;
+ svp_timer_t sc_qtimer;
+ int sc_socket;
+ uint_t sc_gen;
+ uint_t sc_nbackoff;
+ svp_conn_flags_t sc_flags;
+ svp_conn_state_t sc_cstate;
+ svp_conn_error_t sc_error;
+ int sc_errno;
+ hrtime_t sc_lastact;
+ list_t sc_queries;
+ svp_conn_out_t sc_output;
+ svp_conn_in_t sc_input;
+};
+
+typedef enum svp_remote_state {
+ SVP_RS_LOOKUP_SCHEDULED = 0x01, /* On the DNS Queue */
+ SVP_RS_LOOKUP_INPROGRESS = 0x02, /* Doing a DNS lookup */
+ SVP_RS_LOOKUP_VALID = 0x04 /* addrinfo valid */
+} svp_remote_state_t;
+
+/*
+ * These series of bit-based flags should be ordered such that the most severe
+ * is first. We only can set one message that user land can see, so if more than
+ * one is set we want to make sure that one is there.
+ */
+typedef enum svp_degrade_state {
+ SVP_RD_DNS_FAIL = 0x01, /* DNS Resolution Failure */
+ SVP_RD_REMOTE_FAIL = 0x02, /* cannot reach any remote peers */
+ SVP_RD_ALL = 0x03 /* Only suitable for restore */
+} svp_degrade_state_t;
+
+struct svp_remote {
+ char *sr_hostname; /* RO */
+ uint16_t sr_rport; /* RO */
+ avl_node_t sr_gnode; /* svp_remote_lock */
+ svp_remote_t *sr_nexthost; /* svp_host_lock */
+ mutex_t sr_lock;
+ svp_remote_state_t sr_state;
+ svp_degrade_state_t sr_degrade;
+ struct addrinfo *sr_addrinfo;
+ avl_tree_t sr_tree;
+ uint_t sr_count; /* active count */
+ uint_t sr_gen;
+ uint_t sr_tconns; /* total conns + dconns */
+ uint_t sr_ndconns; /* number of degraded conns */
+ list_t sr_conns; /* all conns */
+};
+
+/*
+ * We have a bunch of different things that we get back from the API at the
+ * plug-in layer. These include:
+ *
+ * o OOB Shootdowns
+ * o VL3->VL2 Lookups
+ * o VL2->UL3 Lookups
+ * o VL2 Log invalidations
+ * o VL3 Log injections
+ */
+typedef void (*svp_vl2_lookup_f)(svp_t *, svp_status_t, const struct in6_addr *,
+ const uint16_t, void *);
+typedef void (*svp_vl3_lookup_f)(svp_t *, svp_status_t, const uint8_t *,
+ const struct in6_addr *, const uint16_t, void *);
+typedef void (*svp_vl2_invalidation_f)(svp_t *, const uint8_t *);
+typedef void (*svp_vl3_inject_f)(svp_t *, const uint16_t,
+ const struct in6_addr *, const uint8_t *, const uint8_t *);
+typedef void (*svp_shootdown_f)(svp_t *, const uint8_t *,
+ const struct in6_addr *, const uint16_t uport);
+
+typedef struct svp_cb {
+ svp_vl2_lookup_f scb_vl2_lookup;
+ svp_vl3_lookup_f scb_vl3_lookup;
+ svp_vl2_invalidation_f scb_vl2_invalidate;
+ svp_vl3_inject_f scb_vl3_inject;
+ svp_shootdown_f scb_shootdown;
+} svp_cb_t;
+
+/*
+ * Core implementation structure.
+ */
+struct svp {
+ overlay_plugin_dest_t svp_dest; /* RO */
+ varpd_provider_handle_t *svp_hdl; /* RO */
+ svp_cb_t svp_cb; /* RO */
+ uint64_t svp_vid; /* RO? */
+ avl_node_t svp_rlink; /* Owned by svp_remote */
+ svp_remote_t *svp_remote; /* ROish XXX */
+ mutex_t svp_lock;
+ char *svp_host;
+ uint16_t svp_port;
+ uint16_t svp_uport;
+ boolean_t svp_huip;
+ struct in6_addr svp_uip;
+};
+
+extern bunyan_logger_t *svp_bunyan;
+
+/*
+ * XXX Strawman backend APIs
+ */
+extern int svp_remote_find(char *, uint16_t, svp_remote_t **);
+extern int svp_remote_attach(svp_remote_t *, svp_t *);
+extern void svp_remote_detach(svp_t *);
+extern void svp_remote_release(svp_remote_t *);
+extern void svp_remote_vl3_lookup(svp_t *, svp_query_t *,
+ const struct sockaddr *, void *);
+extern void svp_remote_vl2_lookup(svp_t *, svp_query_t *, const uint8_t *,
+ void *);
+
+/*
+ * Init functions
+ */
+extern int svp_remote_init(void);
+extern void svp_remote_fini(void);
+extern int svp_event_init(void);
+extern int svp_event_timer_init(svp_event_t *);
+extern void svp_event_fini(void);
+extern int svp_host_init(void);
+extern int svp_timer_init(void);
+
+/*
+ * Timers
+ */
+extern int svp_tickrate;
+extern void svp_timer_add(svp_timer_t *);
+extern void svp_timer_remove(svp_timer_t *);
+
+/*
+ * Event loop management
+ */
+extern int svp_event_associate(svp_event_t *, int);
+extern int svp_event_dissociate(svp_event_t *, int);
+extern int svp_event_inject(void *);
+
+/*
+ * Connection manager
+ */
+extern int svp_conn_create(svp_remote_t *, const struct in6_addr *);
+extern void svp_conn_destroy(svp_conn_t *);
+extern void svp_conn_fallout(svp_conn_t *);
+extern void svp_conn_queue(svp_conn_t *, svp_query_t *);
+
+/*
+ * FMA related
+ */
+extern void svp_remote_degrade(svp_remote_t *, svp_degrade_state_t);
+extern void svp_remote_restore(svp_remote_t *, svp_degrade_state_t);
+
+/*
+ * Misc.
+ */
+extern int svp_comparator(const void *, const void *);
+extern void svp_remote_reassign(svp_remote_t *, svp_conn_t *);
+extern void svp_remote_resolved(svp_remote_t *, struct addrinfo *);
+extern void svp_host_queue(svp_remote_t *);
+extern void svp_query_release(svp_query_t *);
+extern void svp_query_crc32(svp_req_t *, void *, size_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_SVP_H */
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c
new file mode 100644
index 0000000000..b9d3925b64
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c
@@ -0,0 +1,945 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/*
+ * Logic to manage an individual connection to a remote host.
+ *
+ * Individual connections always are associated with an svp_remote_t from their
+ * creation to their destruction.
+ */
+
+#include <assert.h>
+#include <umem.h>
+#include <errno.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/uio.h>
+
+#include <libvarpd_svp.h>
+
+static int svp_conn_query_timeout = 30;
+static int svp_conn_backoff_tbl[] = { 1, 2, 4, 8, 16, 32 };
+static int svp_conn_nbackoff = sizeof (svp_conn_backoff_tbl) / sizeof (int);
+
+typedef enum svp_conn_act {
+ SVP_RA_NONE = 0x00,
+ SVP_RA_DEGRADE = 0x01,
+ SVP_RA_RESTORE = 0x02,
+ SVP_RA_ERROR = 0x03
+} svp_conn_act_t;
+
+static void
+svp_conn_inject(svp_conn_t *scp)
+{
+ int ret;
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (scp->sc_flags & SVP_CF_USER)
+ return;
+ scp->sc_flags |= SVP_CF_USER;
+ if ((ret = svp_event_inject(scp)) != 0)
+ libvarpd_panic("failed to inject event: %d\n", ret);
+}
+
+static void
+svp_conn_degrade(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (scp->sc_flags & SVP_CF_DEGRADED)
+ return;
+
+ scp->sc_flags |= SVP_CF_DEGRADED;
+ srp->sr_ndconns++;
+ if (srp->sr_ndconns == srp->sr_tconns)
+ svp_remote_degrade(srp, SVP_RD_REMOTE_FAIL);
+}
+
+static void
+svp_conn_restore(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (!(scp->sc_flags & SVP_CF_DEGRADED))
+ return;
+
+ scp->sc_flags &= ~SVP_CF_DEGRADED;
+ if (srp->sr_ndconns == srp->sr_tconns)
+ svp_remote_restore(srp, SVP_RD_REMOTE_FAIL);
+ srp->sr_ndconns--;
+}
+
+static void
+svp_conn_add(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (scp->sc_flags & SVP_CF_ADDED)
+ return;
+
+ list_insert_tail(&srp->sr_conns, scp);
+ scp->sc_flags |= SVP_CF_ADDED;
+ srp->sr_tconns++;
+}
+
+static void
+svp_conn_remove(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (!(scp->sc_flags & SVP_CF_ADDED))
+ return;
+
+ scp->sc_flags &= ~SVP_CF_ADDED;
+ if (scp->sc_flags & SVP_CF_DEGRADED)
+ srp->sr_ndconns--;
+ srp->sr_tconns--;
+ if (srp->sr_tconns == srp->sr_ndconns)
+ svp_remote_degrade(srp, SVP_RD_REMOTE_FAIL);
+}
+
+static svp_query_t *
+svp_conn_query_find(svp_conn_t *scp, uint32_t id)
+{
+ svp_query_t *sqp;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ for (sqp = list_head(&scp->sc_queries); sqp != NULL;
+ sqp = list_next(&scp->sc_queries, sqp)) {
+ if (sqp->sq_header.svp_id == id)
+ break;
+ }
+
+ return (sqp);
+}
+
+static svp_conn_act_t
+svp_conn_backoff(svp_conn_t *scp)
+{
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (close(scp->sc_socket) != 0)
+ libvarpd_panic("failed to close socket %d: %d\n",
+ scp->sc_socket, errno);
+ scp->sc_socket = -1;
+
+ scp->sc_cstate = SVP_CS_BACKOFF;
+ scp->sc_nbackoff++;
+ if (scp->sc_nbackoff >= svp_conn_nbackoff) {
+ scp->sc_btimer.st_value =
+ svp_conn_backoff_tbl[svp_conn_nbackoff - 1];
+ } else {
+ scp->sc_btimer.st_value =
+ svp_conn_backoff_tbl[scp->sc_nbackoff - 1];
+ }
+ svp_timer_add(&scp->sc_btimer);
+
+ if (scp->sc_nbackoff > svp_conn_nbackoff)
+ return (SVP_RA_DEGRADE);
+ return (SVP_RA_NONE);
+}
+
+static svp_conn_act_t
+svp_conn_connect(svp_conn_t *scp)
+{
+ int ret;
+ struct sockaddr_in6 in6;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+ assert(scp->sc_cstate == SVP_CS_BACKOFF ||
+ scp->sc_cstate == SVP_CS_INITIAL);
+ assert(scp->sc_socket == -1);
+ if (scp->sc_cstate == SVP_CS_INITIAL)
+ scp->sc_nbackoff = 0;
+
+ scp->sc_socket = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
+ if (scp->sc_socket == -1) {
+ scp->sc_error = SVP_CE_SOCKET;
+ scp->sc_errno = errno;
+ scp->sc_cstate = SVP_CS_ERROR;
+ return (SVP_RA_DEGRADE);
+ }
+
+ bzero(&in6, sizeof (struct sockaddr_in6));
+ in6.sin6_family = AF_INET6;
+ in6.sin6_port = htons(scp->sc_remote->sr_rport);
+ bcopy(&scp->sc_addr, &in6.sin6_addr, sizeof (struct in6_addr));
+ ret = connect(scp->sc_socket, (struct sockaddr *)&in6,
+ sizeof (struct sockaddr_in6));
+ if (ret != 0) {
+ boolean_t async = B_FALSE;
+
+ switch (errno) {
+ case EACCES:
+ case EADDRINUSE:
+ case EAFNOSUPPORT:
+ case EALREADY:
+ case EBADF:
+ case EISCONN:
+ case ELOOP:
+ case ENOENT:
+ case ENOSR:
+ case EWOULDBLOCK:
+ libvarpd_panic("unanticipated connect errno %d", errno);
+ case EINPROGRESS:
+ case EINTR:
+ async = B_TRUE;
+ default:
+ break;
+ }
+
+ /*
+ * So, we will be connecting to this in the future, advance our
+ * state and make sure that we poll for the next round.
+ */
+ if (async == B_TRUE) {
+ scp->sc_cstate = SVP_CS_CONNECTING;
+ scp->sc_event.se_events = POLLOUT | POLLHUP;
+ ret = svp_event_associate(&scp->sc_event,
+ scp->sc_socket);
+ if (ret == 0)
+ return (SVP_RA_NONE);
+ scp->sc_error = SVP_CE_ASSOCIATE;
+ scp->sc_errno = ret;
+ scp->sc_cstate = SVP_CS_ERROR;
+ return (SVP_RA_DEGRADE);
+ } else {
+ /*
+ * This call failed, which means that we obtained one of
+ * the following:
+ *
+ * EADDRNOTAVAIL
+ * ECONNREFUSED
+ * EIO
+ * ENETUNREACH
+ * EHOSTUNREACH
+ * ENXIO
+ * ETIMEDOUT
+ *
+ * Therefore we need to set ourselves into backoff and
+ * wait for that to clear up.
+ */
+ return (svp_conn_backoff(scp));
+ }
+ }
+
+ /*
+ * We've connected. Successfully move ourselves to the bound
+ * state and start polling.
+ */
+ scp->sc_cstate = SVP_CS_ACTIVE;
+ scp->sc_event.se_events = POLLIN | POLLRDNORM | POLLHUP;
+ ret = svp_event_associate(&scp->sc_event, scp->sc_socket);
+ if (ret == 0)
+ return (SVP_RA_RESTORE);
+ scp->sc_error = SVP_CE_ASSOCIATE;
+ scp->sc_cstate = SVP_CS_ERROR;
+
+ return (SVP_RA_DEGRADE);
+}
+
+/*
+ * This should be the first call we get after a connect. If we have successfully
+ * connected, we should see a writeable event. We may also see an error or a
+ * hang up. In either of these cases, we transition to error mode. If there is
+ * also a readable event, we ignore it at the moment and just let a
+ * reassociation pick it up so we can simplify the set of state transitions that
+ * we have.
+ */
+static svp_conn_act_t
+svp_conn_poll_connect(port_event_t *pe, svp_conn_t *scp)
+{
+ int ret, err;
+ socklen_t sl = sizeof (err);
+ if (!(pe->portev_events & POLLOUT)) {
+ scp->sc_errno = 0;
+ scp->sc_error = SVP_CE_NOPOLLOUT;
+ scp->sc_cstate = SVP_CS_ERROR;
+ return (SVP_RA_DEGRADE);
+ }
+
+ ret = getsockopt(scp->sc_socket, SOL_SOCKET, SO_ERROR, &err, &sl);
+ /* XXX Really none of these? */
+ if (ret != 0)
+ libvarpd_panic("unanticipated getsockopt error");
+ if (err != 0) {
+ return (svp_conn_backoff(scp));
+ }
+
+ scp->sc_cstate = SVP_CS_ACTIVE;
+ scp->sc_event.se_events = POLLIN | POLLRDNORM | POLLHUP;
+ ret = svp_event_associate(&scp->sc_event, scp->sc_socket);
+ if (ret == 0)
+ return (SVP_RA_RESTORE);
+ scp->sc_error = SVP_CE_ASSOCIATE;
+ scp->sc_errno = ret;
+ scp->sc_cstate = SVP_CS_ERROR;
+ return (SVP_RA_DEGRADE);
+}
+
+static svp_conn_act_t
+svp_conn_pollout(svp_conn_t *scp)
+{
+ svp_query_t *sqp;
+ svp_req_t *req;
+ size_t off;
+ struct iovec iov[2];
+ int nvecs = 0;
+ ssize_t ret;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ /*
+ * We need to find a query and start writing it out.
+ */
+ if (scp->sc_output.sco_query == NULL) {
+ for (sqp = list_head(&scp->sc_queries); sqp != NULL;
+ sqp = list_next(&scp->sc_queries, sqp)) {
+ if (sqp->sq_state != SVP_QUERY_INIT)
+ continue;
+ break;
+ }
+
+ if (sqp == NULL) {
+ scp->sc_event.se_events &= ~POLLOUT;
+ return (SVP_RA_NONE);
+ }
+
+ scp->sc_output.sco_query = sqp;
+ scp->sc_output.sco_offset = 0;
+ sqp->sq_state = SVP_QUERY_WRITING;
+ svp_query_crc32(&sqp->sq_header, sqp->sq_rdata, sqp->sq_rsize);
+ }
+
+ sqp = scp->sc_output.sco_query;
+ req = &sqp->sq_header;
+ off = scp->sc_output.sco_offset;
+ if (off < sizeof (svp_req_t)) {
+ iov[nvecs].iov_base = (void *)((uintptr_t)req + off);
+ iov[nvecs].iov_len = sizeof (svp_req_t) - off;
+ nvecs++;
+ off = 0;
+ } else {
+ off -= sizeof (svp_req_t);
+ }
+
+ iov[nvecs].iov_base = (void *)((uintptr_t)sqp->sq_rdata + off);
+ iov[nvecs].iov_len = sqp->sq_rsize - off;
+ nvecs++;
+
+ do {
+ ret = writev(scp->sc_socket, iov, nvecs);
+ } while (ret == -1 && errno == EAGAIN);
+ if (ret == -1) {
+ switch (errno) {
+ case EAGAIN:
+ scp->sc_event.se_events |= POLLOUT;
+ return (SVP_RA_NONE);
+ case EIO:
+ case ENXIO:
+ case ECONNRESET:
+ return (SVP_RA_ERROR);
+ default:
+ libvarpd_panic("unexpected errno: %d", errno);
+ }
+ }
+
+ scp->sc_output.sco_offset += ret;
+ if (ret >= sizeof (svp_req_t) + sqp->sq_rsize) {
+ sqp->sq_state = SVP_QUERY_READING;
+ scp->sc_output.sco_query = NULL;
+ scp->sc_output.sco_offset = 0;
+ scp->sc_event.se_events |= POLLOUT;
+ }
+ return (SVP_RA_NONE);
+}
+
+static boolean_t
+svp_conn_pollin_validate(svp_conn_t *scp)
+{
+ svp_query_t *sqp;
+ uint32_t nsize;
+ uint16_t nvers, nop;
+ svp_req_t *resp = &scp->sc_input.sci_req;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ nvers = ntohs(resp->svp_ver);
+ nop = ntohs(resp->svp_op);
+ nsize = ntohl(resp->svp_size);
+
+ /* XXX Best practice around spaces in key names */
+ if (nvers != SVP_CURRENT_VERSION) {
+ bunyan_warn(svp_bunyan, "unsupported version",
+ BUNYAN_T_IP, "remote ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response id", resp->svp_id,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ if (nop != SVP_R_VL2_ACK && nop != SVP_R_VL3_ACK) {
+ bunyan_warn(svp_bunyan, "unsupported operation",
+ BUNYAN_T_IP, "remote ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response id", resp->svp_id,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ sqp = svp_conn_query_find(scp, resp->svp_id);
+ if (sqp == NULL) {
+ bunyan_warn(svp_bunyan, "unknown response id",
+ BUNYAN_T_IP, "remote ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response id", resp->svp_id,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ if (sqp->sq_state != SVP_QUERY_READING) {
+ bunyan_warn(svp_bunyan, "got response for unexpecting query",
+ BUNYAN_T_IP, "remote ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response id", resp->svp_id,
+ BUNYAN_T_INT32, "query state", sqp->sq_state,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ if ((nop == SVP_R_VL2_ACK && nsize != sizeof (svp_vl2_ack_t)) ||
+ (nop == SVP_R_VL3_ACK && nsize != sizeof (svp_vl3_ack_t))) {
+ bunyan_warn(svp_bunyan, "response size too large",
+ BUNYAN_T_IP, "remote ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response id", resp->svp_id,
+ BUNYAN_T_INT32, "response size", nsize,
+ BUNYAN_T_INT32, "expected size", nop == SVP_R_VL2_ACK ?
+ sizeof (svp_vl2_ack_t) : sizeof (svp_vl3_ack_t),
+ BUNYAN_T_INT32, "query state", sqp->sq_state,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ scp->sc_input.sci_query = sqp;
+ sqp->sq_wdata = &sqp->sq_wdun;
+ sqp->sq_wsize = sizeof (svp_query_data_t);
+
+ return (B_TRUE);
+}
+
+static svp_conn_act_t
+svp_conn_pollin(svp_conn_t *scp)
+{
+ size_t off, total;
+ ssize_t ret;
+ svp_query_t *sqp;
+ uint32_t crc;
+ uint16_t nop;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ /*
+ * No query implies that we're reading in the header and that the offset
+ * is associted with it.
+ */
+ off = scp->sc_input.sci_offset;
+ sqp = scp->sc_input.sci_query;
+ if (scp->sc_input.sci_query == NULL) {
+ svp_req_t *resp = &scp->sc_input.sci_req;
+
+ assert(off < sizeof (svp_req_t));
+
+ do {
+ ret = read(scp->sc_socket,
+ (void *)((uintptr_t)resp + off),
+ sizeof (svp_req_t) - off);
+ } while (ret == -1 && errno == EINTR);
+ if (ret == -1) {
+ switch (errno) {
+ case EAGAIN:
+ scp->sc_event.se_events |= POLLIN | POLLRDNORM;
+ return (SVP_RA_NONE);
+ case EIO:
+ case ECONNRESET:
+ return (SVP_RA_ERROR);
+ break;
+ default:
+ libvarpd_panic("unexpeted read errno: %d",
+ errno);
+ }
+ } else if (ret == 0) {
+ /* Try to reconnect to the remote host */
+ return (SVP_RA_ERROR);
+ }
+
+ /* Didn't get all the data we need */
+ if (off + ret < sizeof (svp_req_t)) {
+ scp->sc_input.sci_offset += ret;
+ scp->sc_event.se_events |= POLLIN | POLLRDNORM;
+ return (SVP_RA_NONE);
+ }
+
+ if (svp_conn_pollin_validate(scp) != B_TRUE)
+ return (SVP_RA_ERROR);
+ }
+
+ sqp = scp->sc_input.sci_query;
+ assert(sqp != NULL);
+ total = ntohl(scp->sc_input.sci_req.svp_size);
+ do {
+ ret = read(scp->sc_socket, sqp->sq_wdata + off, total - off);
+ } while (ret == -1 && errno == EINTR);
+
+ if (ret == -1) {
+ switch (errno) {
+ case EAGAIN:
+ scp->sc_event.se_events |= POLLIN | POLLRDNORM;
+ return (SVP_RA_NONE);
+ case EIO:
+ case ECONNRESET:
+ return (SVP_RA_ERROR);
+ break;
+ default:
+ libvarpd_panic("unexpeted read errno: %d", errno);
+ }
+ } else if (ret == 0) {
+ /* Try to reconnect to the remote host */
+ return (SVP_RA_ERROR);
+ }
+
+ if (ret + off < total) {
+ scp->sc_input.sci_offset += ret;
+ return (SVP_RA_NONE);
+ }
+
+ nop = ntohs(scp->sc_input.sci_req.svp_op);
+ crc = scp->sc_input.sci_req.svp_crc32;
+ svp_query_crc32(&scp->sc_input.sci_req, sqp->sq_wdata, total);
+ if (crc != scp->sc_input.sci_req.svp_crc32) {
+ bunyan_info(svp_bunyan, "crc32 mismatch",
+ BUNYAN_T_IP, "remote ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version",
+ ntohs(scp->sc_input.sci_req.svp_ver),
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response id",
+ ntohl(scp->sc_input.sci_req.svp_id),
+ BUNYAN_T_INT32, "query state", sqp->sq_state,
+ BUNYAN_T_UINT32, "msg_crc", ntohl(crc),
+ BUNYAN_T_UINT32, "calc_crc",
+ ntohl(scp->sc_input.sci_req.svp_crc32),
+ BUNYAN_T_END);
+ return (SVP_RA_ERROR);
+ }
+ scp->sc_input.sci_query = NULL;
+ scp->sc_input.sci_offset = 0;
+
+ if (nop == SVP_R_VL2_ACK) {
+ svp_vl2_ack_t *sl2a = sqp->sq_wdata;
+ sqp->sq_status = ntohs(sl2a->sl2a_status);
+ } else if (nop == SVP_R_VL3_ACK) {
+ svp_vl3_ack_t *sl3a = sqp->sq_wdata;
+ sqp->sq_status = ntohs(sl3a->sl3a_status);
+ } else {
+ libvarpd_panic("unhandled nop: %d", nop);
+ }
+
+ /*
+ * XXX What assumptions can now be violated?
+ */
+ list_remove(&scp->sc_queries, sqp);
+ mutex_unlock(&scp->sc_lock);
+
+ /*
+ * We have to release all of our resources associated with this entry
+ * before we call the callback. After we call it, the memory will be
+ * lost to time.
+ */
+ svp_query_release(sqp);
+ sqp->sq_func(sqp, sqp->sq_arg);
+ mutex_lock(&scp->sc_lock);
+ scp->sc_event.se_events |= POLLIN | POLLRDNORM;
+
+ return (SVP_RA_NONE);
+}
+
+static svp_conn_act_t
+svp_conn_reset(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ assert(svp_event_dissociate(&scp->sc_event, scp->sc_socket) ==
+ ENOENT);
+ if (close(scp->sc_socket) != 0)
+ libvarpd_panic("failed to close socket %d: %d", scp->sc_socket,
+ errno);
+ scp->sc_socket = -1;
+ scp->sc_cstate = SVP_CS_INITIAL;
+ scp->sc_input.sci_query = NULL;
+ scp->sc_output.sco_query = NULL;
+
+ svp_remote_reassign(srp, scp);
+
+ return (svp_conn_connect(scp));
+}
+
+/*
+ * This is our general state transition function. We're called here when we want
+ * to advance part of our state machine as well as to re-arm ourselves. We can
+ * also end up here from the standard event loop as a result of having a user
+ * event posted.
+ */
+static void
+svp_conn_handler(port_event_t *pe, void *arg)
+{
+ svp_conn_t *scp = arg;
+ svp_remote_t *srp = scp->sc_remote;
+ svp_conn_act_t ret = SVP_RA_NONE;
+
+ mutex_lock(&scp->sc_lock);
+
+ /*
+ * Check if one of our event interrupts is set. An event interrupt, such
+ * as having to be reaped or be torndown is notified by a
+ * PORT_SOURCE_USER event that tries to take care of this. However,
+ * because of the fact that the event loop can be ongoing despite this,
+ * we may get here before the PORT_SOURCE_USER has casued us to get
+ * here. In such a case, if the PORT_SOURCE_USER event is tagged, then
+ * we're going to opt to do nothing here and wait for it to come and
+ * tear us down. That will also indicate to us that we have nothing to
+ * worry about as far as general timing and the like goes.
+ */
+
+ if ((scp->sc_flags & SVP_CF_UFLAG) != 0 &&
+ (scp->sc_flags & SVP_CF_USER) != 0 &&
+ pe != NULL &&
+ pe->portev_source != PORT_SOURCE_USER) {
+ mutex_unlock(&scp->sc_lock);
+ return;
+ }
+
+ if (pe != NULL && pe->portev_source == PORT_SOURCE_USER) {
+ scp->sc_flags &= ~SVP_CF_USER;
+ if ((scp->sc_flags & SVP_CF_UFLAG) == 0) {
+ mutex_unlock(&scp->sc_lock);
+ return;
+ }
+ }
+
+ /* Check if this needs to be freed */
+ if (scp->sc_flags & SVP_CF_REAP) {
+ mutex_unlock(&scp->sc_lock);
+ svp_conn_destroy(scp);
+ return;
+ }
+
+ /* Check if this needs to be reset */
+ if (scp->sc_flags & SVP_CF_TEARDOWN) {
+ ret = SVP_RA_ERROR;
+ goto out;
+ }
+
+ switch (scp->sc_cstate) {
+ case SVP_CS_INITIAL:
+ case SVP_CS_BACKOFF:
+ assert(pe == NULL);
+ ret = svp_conn_connect(scp);
+ break;
+ case SVP_CS_CONNECTING:
+ assert(pe != NULL);
+ ret = svp_conn_poll_connect(pe, scp);
+ break;
+ case SVP_CS_ACTIVE:
+ assert(pe != NULL);
+ if (pe->portev_events & POLLOUT)
+ ret = svp_conn_pollout(scp);
+ if (ret == SVP_RA_NONE && (pe->portev_events & POLLIN))
+ ret = svp_conn_pollin(scp);
+ if (ret == SVP_RA_NONE) {
+ int err;
+ if ((err = svp_event_associate(&scp->sc_event,
+ scp->sc_socket)) != 0) {
+ scp->sc_error = SVP_CE_ASSOCIATE;
+ scp->sc_errno = ret;
+ scp->sc_cstate = SVP_CS_ERROR;
+ ret = SVP_RA_DEGRADE;
+ }
+ }
+ break;
+ default:
+ libvarpd_panic("svp_conn_handler encountered unexpected "
+ "state: %d", scp->sc_cstate);
+ }
+out:
+ mutex_unlock(&scp->sc_lock);
+
+ if (ret == SVP_RA_NONE)
+ return;
+
+ mutex_lock(&srp->sr_lock);
+ mutex_lock(&scp->sc_lock);
+ if (ret == SVP_RA_ERROR)
+ ret = svp_conn_reset(scp);
+
+ if (ret == SVP_RA_DEGRADE)
+ svp_conn_degrade(scp);
+ else if (ret == SVP_RA_RESTORE)
+ svp_conn_restore(scp);
+ mutex_unlock(&scp->sc_lock);
+ mutex_unlock(&srp->sr_lock);
+}
+
+static void
+svp_conn_backtimer(void *arg)
+{
+ svp_conn_t *scp = arg;
+
+ svp_conn_handler(NULL, scp);
+}
+
+/*
+ * This fires every svp_conn_query_timeout seconds. Its purpos is to determine
+ * if we haven't heard back on a request with in svp_conn_query_timeout seconds.
+ * If any of the svp_conn_query_t's that have been started (indicated by
+ * svp_query_t`sq_acttime != -1), and more than svp_conn_query_timeout seconds
+ * have passed, we basically tear this connection down and reassign outstanding
+ * queries.
+ */
+static void
+svp_conn_querytimer(void *arg)
+{
+ svp_query_t *sqp;
+ svp_conn_t *scp = arg;
+ hrtime_t now = gethrtime();
+
+ mutex_lock(&scp->sc_lock);
+
+ /*
+ * If we're not in the active state, then we don't care about this as
+ * we're already either going to die or we have no connections to worry
+ * about.
+ */
+ if (scp->sc_cstate != SVP_CS_ACTIVE) {
+ mutex_unlock(&scp->sc_lock);
+ return;
+ }
+
+ for (sqp = list_head(&scp->sc_queries); sqp != NULL;
+ sqp = list_next(&scp->sc_queries, sqp)) {
+ if (sqp->sq_acttime == -1)
+ continue;
+ if ((sqp->sq_acttime - now) / NANOSEC > svp_conn_query_timeout)
+ break;
+ }
+
+ /* Nothing timed out, we're good here */
+ if (sqp == NULL) {
+ mutex_unlock(&scp->sc_lock);
+ return;
+ }
+
+ scp->sc_flags |= SVP_CF_TEARDOWN;
+ svp_conn_inject(scp);
+
+ mutex_unlock(&scp->sc_lock);
+}
+
+/*
+ * This connection has fallen out of DNS, figure out what we need to do with it.
+ */
+void
+svp_conn_fallout(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+
+ mutex_lock(&scp->sc_lock);
+ switch (scp->sc_cstate) {
+ case SVP_CS_ERROR:
+ /*
+ * Connection is already inactive, so it's safe to tear down.
+ * Fire it off through the state machine to tear down via the
+ * backoff timer.
+ */
+ svp_conn_remove(scp);
+ scp->sc_flags |= SVP_CF_REAP;
+ svp_conn_inject(scp);
+ break;
+ case SVP_CS_INITIAL:
+ case SVP_CS_BACKOFF:
+ case SVP_CS_CONNECTING:
+ /*
+ * Here, we have something actively going on, so we'll let it be
+ * clean up the next time we hit the event loop by the event
+ * loop itself. As it has no connections, there isn't much to
+ * really do, though we'll take this chance to go ahead and
+ * remove it from the remote.
+ */
+ svp_conn_remove(scp);
+ scp->sc_flags |= SVP_CF_REAP;
+ svp_conn_inject(scp);
+ break;
+ case SVP_CS_ACTIVE:
+ scp->sc_cstate = SVP_CS_WINDDOWN;
+ /*
+ * XXX We need to look at what's currently outstanding. If
+ * nothing is going on at the moment, we should try to
+ * port disassociate, and if succsesful, eg. not ENOENT, clean
+ * up right here and now.
+ */
+ break;
+ case SVP_CS_WINDDOWN:
+ /*
+ * Nothing specific to do here, we'e finishing up with this,
+ * just haven't finished yet.
+ */
+ break;
+ default:
+ libvarpd_panic("svp_conn_fallout encountered"
+ "unkonwn state");
+ }
+ mutex_unlock(&scp->sc_lock);
+ mutex_unlock(&srp->sr_lock);
+}
+
+int
+svp_conn_create(svp_remote_t *srp, const struct in6_addr *addr)
+{
+ svp_conn_t *scp;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ scp = umem_zalloc(sizeof (svp_conn_t), UMEM_DEFAULT);
+ if (scp == NULL)
+ return (ENOMEM);
+
+ scp->sc_remote = srp;
+ scp->sc_event.se_func = svp_conn_handler;
+ scp->sc_event.se_arg = scp;
+ scp->sc_btimer.st_func = svp_conn_backtimer;
+ scp->sc_btimer.st_arg = scp;
+ scp->sc_btimer.st_oneshot = B_TRUE;
+ scp->sc_btimer.st_value = 1;
+
+ scp->sc_qtimer.st_func = svp_conn_querytimer;
+ scp->sc_qtimer.st_arg = scp;
+ scp->sc_qtimer.st_oneshot = B_FALSE;
+ scp->sc_qtimer.st_value = svp_conn_query_timeout;
+
+ scp->sc_socket = -1;
+
+ list_create(&scp->sc_queries, sizeof (svp_query_t),
+ offsetof(svp_query_t, sq_lnode));
+ scp->sc_gen = srp->sr_gen;
+ bcopy(addr, &scp->sc_addr, sizeof (struct in6_addr));
+ scp->sc_cstate = SVP_CS_INITIAL;
+ mutex_lock(&scp->sc_lock);
+ svp_conn_add(scp);
+ mutex_unlock(&scp->sc_lock);
+
+ /* Now that we're locked and loaded, add our timers */
+ svp_timer_add(&scp->sc_qtimer);
+ svp_timer_add(&scp->sc_btimer);
+
+ return (0);
+}
+
+/*
+ * At the time of calling, the entry has been removed from all lists. In
+ * addition, the entries state should be SVP_CS_ERROR, therefore, we know that
+ * the fd should not be associated with the event loop. We'll double check that
+ * just in case. We should also have already been removed from the remote's
+ * list.
+ */
+void
+svp_conn_destroy(svp_conn_t *scp)
+{
+ int ret;
+
+ mutex_lock(&scp->sc_lock);
+ if (scp->sc_cstate != SVP_CS_ERROR)
+ libvarpd_panic("asked to tear down an active connection");
+ if (scp->sc_flags & SVP_CF_ADDED)
+ libvarpd_panic("asked to remove a connection still in "
+ "the remote list\n");
+ if (!list_is_empty(&scp->sc_queries))
+ libvarpd_panic("asked to remove a connection with non-empty "
+ "query list");
+
+ if ((ret = svp_event_dissociate(&scp->sc_event, scp->sc_socket)) !=
+ ENOENT) {
+ libvarpd_panic("dissociate failed or was actually "
+ "associated: %d", ret);
+ }
+ mutex_unlock(&scp->sc_lock);
+
+ /* Verify our timers are killed */
+ svp_timer_remove(&scp->sc_btimer);
+ svp_timer_remove(&scp->sc_qtimer);
+
+ if (scp->sc_socket != -1 && close(scp->sc_socket) != 0)
+ libvarpd_panic("failed to close svp_conn_t`scp_socket fd "
+ "%d: %d", scp->sc_socket, errno);
+
+ list_destroy(&scp->sc_queries);
+ umem_free(scp, sizeof (svp_conn_t));
+}
+
+void
+svp_conn_queue(svp_conn_t *scp, svp_query_t *sqp)
+{
+ assert(MUTEX_HELD(&scp->sc_lock));
+ assert(scp->sc_cstate == SVP_CS_ACTIVE);
+
+ sqp->sq_acttime = -1;
+ list_insert_tail(&scp->sc_queries, sqp);
+ if (!(scp->sc_event.se_events & POLLOUT)) {
+ scp->sc_event.se_events |= POLLOUT;
+ /*
+ * XXX If this fails, we should give up this set of conns or
+ * something... For now, abort.
+ */
+ if (svp_event_associate(&scp->sc_event, scp->sc_socket) != 0)
+ libvarpd_panic("svp_event_associate failed somehow");
+ }
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c
new file mode 100644
index 0000000000..43d064d64d
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c
@@ -0,0 +1,50 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+/*
+ * Perform standard crc32 functions.
+ *
+ * XXX This should probably be a library
+ */
+
+#include <sys/crc32.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <inttypes.h>
+#include <libvarpd_svp.h>
+
+static uint32_t svp_crc32_tab[] = { CRC32_TABLE };
+
+static uint32_t
+svp_crc32(uint32_t old, const uint8_t *buf, size_t len)
+{
+ uint32_t out;
+
+ CRC32(out, buf, len, old, svp_crc32_tab);
+ return (out);
+}
+
+void
+svp_query_crc32(svp_req_t *shp, void *buf, size_t data)
+{
+ uint32_t crc = -1U;
+
+ shp->svp_crc32 = 0;
+ crc = svp_crc32(crc, (uint8_t *)shp, sizeof (svp_req_t));
+ crc = svp_crc32(crc, buf, data);
+ crc = ~crc;
+ shp->svp_crc32 = htonl(crc);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c
new file mode 100644
index 0000000000..2c80de097e
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c
@@ -0,0 +1,173 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/*
+ * DNS Host-name related functions.
+ *
+ * Every backend is stored in DNS. To find out memebership, we query DNS and use
+ * that to update our world. We update our DNS records on both a timer
+ * granularity and immediately after creation. We'll also XXX go through and do
+ * this after all of our valid entries have disappeared.
+ *
+ * Unfortuantely, doing host name resolution in a way that allows us to leverage
+ * the system resolvers and the system's caching, require us to use blocking
+ * calls in libc. If we can't reach a given server, that will tie up a thread
+ * for quite some time. To work around that fact, we're going to create a fixed
+ * number of threads and we'll use them to service this kind of work. While not
+ * great, we don't have many better options.
+ */
+
+#include <sys/socket.h>
+#include <netdb.h>
+#include <thread.h>
+#include <synch.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <libvarpd_svp.h>
+
+int svp_host_nthreads = 8;
+
+static mutex_t svp_host_lock = DEFAULTMUTEX;
+static cond_t svp_host_cv = DEFAULTCV;
+static svp_remote_t *svp_host_head;
+
+static void *
+svp_host_loop(void *unused)
+{
+ for (;;) {
+ int err;
+ svp_remote_t *srp;
+ struct addrinfo *addrs;
+
+ mutex_lock(&svp_host_lock);
+ while (svp_host_head == NULL)
+ cond_wait(&svp_host_cv, &svp_host_lock);
+ srp = svp_host_head;
+ svp_host_head = srp->sr_nexthost;
+ if (svp_host_head != NULL)
+ cond_signal(&svp_host_cv);
+ mutex_unlock(&svp_host_lock);
+
+ mutex_lock(&srp->sr_lock);
+ assert(srp->sr_state & SVP_RS_LOOKUP_SCHEDULED);
+ srp->sr_state &= ~SVP_RS_LOOKUP_SCHEDULED;
+ if (srp->sr_state & SVP_RS_LOOKUP_INPROGRESS) {
+ mutex_unlock(&srp->sr_lock);
+ continue;
+ }
+ srp->sr_state |= SVP_RS_LOOKUP_INPROGRESS;
+ mutex_unlock(&srp->sr_lock);
+
+ for (;;) {
+ err = getaddrinfo(srp->sr_hostname, NULL, NULL, &addrs);
+ if (err == 0)
+ break;
+ if (err != 0) {
+ switch (err) {
+ case EAI_ADDRFAMILY:
+ case EAI_BADFLAGS:
+ case EAI_FAMILY:
+ case EAI_SERVICE:
+ case EAI_SOCKTYPE:
+ case EAI_OVERFLOW:
+ default:
+ libvarpd_panic("unexpected getaddrinfo "
+ "failure: %d", err);
+ case EAI_AGAIN:
+ case EAI_MEMORY:
+ case EAI_SYSTEM:
+ continue;
+ case EAI_FAIL:
+ case EAI_NODATA:
+ case EAI_NONAME:
+ /*
+ * XXX At this point in time we have
+ * something which isn't very good. This
+ * may have been a typo or something may
+ * have been destroyed. We should go
+ * ahead and degrade this overall
+ * instance, because we're not going to
+ * make much forward progress... It'd be
+ * great if we could actually issue more
+ * of an EREPORT to describe what
+ * happened...
+ */
+ mutex_lock(&srp->sr_lock);
+ svp_remote_degrade(srp, SVP_RD_DNS_FAIL);
+ mutex_unlock(&srp->sr_lock);
+ break;
+ }
+ }
+ break;
+ }
+
+ if (err == 0) {
+ /*
+ * We've successfully resolved something, mark this
+ * degredation over for now.
+ */
+ mutex_lock(&srp->sr_lock);
+ svp_remote_restore(srp, SVP_RD_DNS_FAIL);
+ mutex_unlock(&srp->sr_lock);
+ svp_remote_resolved(srp, addrs);
+ }
+
+ mutex_lock(&srp->sr_lock);
+ srp->sr_state &= ~SVP_RS_LOOKUP_INPROGRESS;
+ mutex_unlock(&srp->sr_lock);
+ }
+}
+
+void
+svp_host_queue(svp_remote_t *srp)
+{
+ svp_remote_t *s;
+ mutex_lock(&svp_host_lock);
+ mutex_lock(&srp->sr_lock);
+ if (srp->sr_state & SVP_RS_LOOKUP_SCHEDULED) {
+ mutex_unlock(&srp->sr_lock);
+ mutex_unlock(&svp_host_lock);
+ return;
+ }
+ srp->sr_state |= SVP_RS_LOOKUP_SCHEDULED;
+ s = svp_host_head;
+ while (s != NULL && s->sr_nexthost != NULL)
+ s = s->sr_nexthost;
+ if (s == NULL) {
+ assert(s == svp_host_head);
+ svp_host_head = srp;
+ } else {
+ s->sr_nexthost = srp;
+ }
+ srp->sr_nexthost = NULL;
+ cond_signal(&svp_host_cv);
+ mutex_unlock(&srp->sr_lock);
+ mutex_unlock(&svp_host_lock);
+}
+
+int
+svp_host_init(void)
+{
+ int i;
+
+ for (i = 0; i < svp_host_nthreads; i++) {
+ if (thr_create(NULL, 0, svp_host_loop, NULL,
+ THR_DETACHED | THR_DAEMON, NULL) != 0)
+ return (errno);
+ }
+
+ return (0);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c
new file mode 100644
index 0000000000..a3579320ff
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c
@@ -0,0 +1,206 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/*
+ * Event loop mechanism for our backend.
+ */
+
+#include <unistd.h>
+#include <thread.h>
+#include <port.h>
+#include <signal.h>
+#include <time.h>
+#include <errno.h>
+#include <umem.h>
+
+#include <libvarpd_svp.h>
+
+typedef struct svp_event_loop {
+ int sel_port; /* RO */
+ int sel_nthread; /* RO */
+ thread_t *sel_threads; /* RO */
+ boolean_t sel_stop; /* svp_elock */
+ timer_t sel_hosttimer;
+} svp_event_loop_t;
+
+static svp_event_loop_t svp_event;
+static mutex_t svp_elock = DEFAULTMUTEX;
+
+static void *
+svp_event_thr(void *arg)
+{
+ for (;;) {
+ int ret;
+ port_event_t pe;
+ svp_event_t *sep;
+
+ mutex_lock(&svp_elock);
+ if (svp_event.sel_stop == B_TRUE) {
+ mutex_unlock(&svp_elock);
+ break;
+ }
+ mutex_unlock(&svp_elock);
+
+ ret = port_get(svp_event.sel_port, &pe, NULL);
+ if (ret != 0) {
+ switch (errno) {
+ case EFAULT:
+ case EBADF:
+ case EINVAL:
+ libvarpd_panic("unexpected port_get errno: %d",
+ errno);
+ default:
+ break;
+ }
+ }
+
+ /* TODO Process the event */
+ if (pe.portev_user == NULL)
+ libvarpd_panic("received event (%p) without "
+ "protev_user set", &pe);
+ sep = (svp_event_t *)pe.portev_user;
+ sep->se_func(&pe, sep->se_arg);
+ }
+
+ return (NULL);
+}
+
+int
+svp_event_associate(svp_event_t *sep, int fd)
+{
+ int ret;
+
+ ret = port_associate(svp_event.sel_port, PORT_SOURCE_FD, fd,
+ sep->se_events, sep);
+ if (ret != 0) {
+ switch (errno) {
+ case EBADF:
+ case EBADFD:
+ case EINVAL:
+ case EAGAIN:
+ libvarpd_panic("unexpected port_associate error: %d",
+ errno);
+ default:
+ ret = errno;
+ break;
+ }
+ }
+
+ return (ret);
+}
+
+int
+svp_event_dissociate(svp_event_t *sep, int fd)
+{
+ int ret;
+
+ ret = port_dissociate(svp_event.sel_port, PORT_SOURCE_FD, fd);
+ if (ret != 0) {
+ if (errno != ENOENT)
+ libvarpd_panic("unexpected port_dissociate error: %d",
+ errno);
+ ret = errno;
+ }
+ return (ret);
+}
+
+int
+svp_event_inject(void *user)
+{
+ return (port_send(svp_event.sel_port, 0, user));
+}
+
+int
+svp_event_timer_init(svp_event_t *sep)
+{
+ port_notify_t pn;
+ struct sigevent evp;
+ struct itimerspec ts;
+
+ pn.portnfy_port = svp_event.sel_port;
+ pn.portnfy_user = sep;
+ evp.sigev_notify = SIGEV_PORT;
+ evp.sigev_value.sival_ptr = &pn;
+
+ if (timer_create(CLOCK_REALTIME, &evp, &svp_event.sel_hosttimer) != 0)
+ return (errno);
+
+ ts.it_value.tv_sec = svp_tickrate;
+ ts.it_value.tv_nsec = 0;
+ ts.it_interval.tv_sec = svp_tickrate;
+ ts.it_interval.tv_nsec = 0;
+
+ if (timer_settime(svp_event.sel_hosttimer, TIMER_RELTIME, &ts,
+ NULL) != 0) {
+ int ret = errno;
+ (void) timer_delete(svp_event.sel_hosttimer);
+ return (ret);
+ }
+
+ return (0);
+}
+
+int
+svp_event_init(void)
+{
+ long i, ncpus;
+
+ svp_event.sel_port = port_create();
+ if (svp_event.sel_port == -1)
+ return (errno);
+
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN) * 2 + 1;
+ if (ncpus <= 0)
+ libvarpd_panic("sysconf for nprocs failed... %d/%d",
+ ncpus, errno);
+
+ svp_event.sel_threads = umem_alloc(sizeof (thread_t) * ncpus,
+ UMEM_DEFAULT);
+ if (svp_event.sel_threads == NULL) {
+ int ret = errno;
+ (void) timer_delete(svp_event.sel_hosttimer);
+ (void) close(svp_event.sel_port);
+ svp_event.sel_port = -1;
+ return (ret);
+ }
+
+ for (i = 0; i < ncpus; i++) {
+ int ret;
+ thread_t *thr = &svp_event.sel_threads[i];
+
+ ret = thr_create(NULL, 0, svp_event_thr, NULL,
+ THR_DETACHED | THR_DAEMON, thr);
+ if (ret != 0) {
+ ret = errno;
+ (void) timer_delete(svp_event.sel_hosttimer);
+ (void) close(svp_event.sel_port);
+ svp_event.sel_port = -1;
+ return (errno);
+ }
+ }
+
+ return (0);
+}
+
+void
+svp_event_fini(void)
+{
+ mutex_lock(&svp_elock);
+ svp_event.sel_stop = B_TRUE;
+ mutex_unlock(&svp_elock);
+
+ (void) timer_delete(svp_event.sel_hosttimer);
+ (void) close(svp_event.sel_port);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h b/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h
new file mode 100644
index 0000000000..e95f3e4c61
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h
@@ -0,0 +1,172 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_SVP_PROT_H
+#define _LIBVARPD_SVP_PROT_H
+
+/*
+ * SVP protocol Definitions
+ */
+
+#include <sys/types.h>
+#include <inttypes.h>
+#include <sys/ethernet.h>
+#include <netinet/in.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * SDC VXLAN Protocol Definitions
+ */
+
+#define SVP_VERSION_ONE 1
+#define SVP_CURRENT_VERSION SVP_VERSION_ONE
+
+typedef struct svp_req {
+ uint16_t svp_ver;
+ uint16_t svp_op;
+ uint32_t svp_size;
+ uint32_t svp_id;
+ uint32_t svp_crc32;
+} svp_req_t;
+
+typedef enum svp_op {
+ SVP_R_UNKNOWN = 0x00,
+ SVP_R_PING = 0x01,
+ SVP_R_PONG = 0x02,
+ SVP_R_VL2_REQ = 0x03,
+ SVP_R_VL2_ACK = 0x04,
+ SVP_R_VL3_REQ = 0x05,
+ SVP_R_VL3_ACK = 0x06,
+ SVP_R_BULK_REQ = 0x07,
+ SVP_R_BULK_ACK = 0x08,
+ SVP_R_LOG_REQ = 0x09,
+ SVP_R_LOG_ACK = 0x0A,
+ SVP_R_LOG_RM = 0x0B,
+ SVP_R_LOG_RACK = 0x0C,
+ SVP_R_SHOOTDOWN = 0x0D
+} svp_op_t;
+
+typedef enum svp_status {
+ SVP_S_OK = 0x00, /* Everything OK */
+ SVP_S_FATAL = 0x01, /* Fatal error, close connection */
+ SVP_S_NOTFOUND = 0x02, /* Entry not found */
+ SVP_S_BADL3TYPE = 0x03, /* Unknown svp_vl3_type_t */
+ SVP_S_BADBULK = 0x04, /* Unknown svp_bulk_type_t */
+ SVP_S_BADLOG = 0x05, /* Unknown svp_log_type_t */
+ SVP_S_LOGAGAIN = 0x06 /* Nothing in the log yet */
+} svp_status_t;
+
+typedef struct svp_vl2_req {
+ uint8_t sl2r_mac[ETHERADDRL];
+ uint8_t sl2r_pad[2];
+ uint32_t sl2r_vnetid;
+} svp_vl2_req_t;
+
+typedef struct svp_vl2_ack {
+ uint16_t sl2a_status;
+ uint16_t sl2a_port;
+ uint8_t sl2a_addr[16];
+} svp_vl2_ack_t;
+
+typedef enum svp_vl3_type {
+ SVP_VL3_IP = 0x01,
+ SVP_VL3_IPV6 = 0x02
+} svp_vl3_type_t;
+
+typedef struct svp_vl3_req {
+ uint8_t sl3r_ip[16];
+ uint32_t sl3r_type;
+ uint32_t sl3r_vnetid;
+} svp_vl3_req_t;
+
+typedef struct svp_vl3_ack {
+ uint32_t sl3a_status;
+ uint8_t sl3a_mac[ETHERADDRL];
+ uint16_t sl3a_uport;
+ uint8_t sl3a_uip[16];
+} svp_vl3_ack_t;
+
+typedef enum svp_bulk_type {
+ SVP_BULK_VL2 = 0x01,
+ SVP_BULK_VL3 = 0x02
+} svp_bulk_type_t;
+
+typedef struct svp_bulk_req {
+ uint32_t svbr_type;
+} svp_bulk_req_t;
+
+typedef struct svp_bulk_ack {
+ uint32_t svba_status;
+ uint32_t svba_type;
+ uint8_t svba_data[];
+} svp_bulk_ack_t;
+
+typedef enum svp_log_type {
+ SVP_LOG_VL2 = 0x01,
+ SVP_LOG_VL3 = 0x02
+} svp_log_type_t;
+
+typedef struct svp_log_req {
+ uint32_t svlr_type;
+ uint32_t svlr_count;
+} svp_log_req_t;
+
+typedef struct svp_log_vl2 {
+ uint8_t svl2_id[16]; /* 16-byte UUID */
+ uint8_t svl2_mac[ETHERADDRL];
+ uint8_t svl2_pad[2];
+ uint32_t svl2_vnetid;
+} svp_log_vl2_t;
+
+typedef struct svp_log_vl3 {
+ uint8_t svl3_id[16]; /* 16-byte UUID */
+ uint8_t slv3_ip[16];
+ uint8_t svl3_mac[ETHERADDRL];
+ uint16_t svl3_vlan;
+ uint8_t svl3_tmac[ETHERADDRL];
+ uint8_t svl3_tpad[2];
+ uint32_t svl3_vnetid;
+} svp_log_vl3_t;
+
+typedef struct svp_log_ack {
+ uint32_t svla_status;
+ uint32_t svla_type;
+ uint8_t svla_data[];
+} svp_log_ack_t;
+
+typedef struct svp_lrm_req {
+ uint32_t svrr_type;
+ uint32_t svrr_pad;
+ uint8_t svrr_ids[];
+} svp_lrm_req_t;
+
+typedef struct svp_lrm_ack {
+ uint32_t svra_status;
+} svp_lrm_ack_t;
+
+typedef struct svp_shootdown {
+ uint8_t svsd_mac[ETHERADDRL];
+ uint8_t svsd_pad[2];
+ uint32_t svsd_vnetid;
+} svp_shootdown_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_SVP_PROT_H */
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c
new file mode 100644
index 0000000000..6da565246c
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c
@@ -0,0 +1,596 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/*
+ * This file encapsulates all of the logic for dealing with a given remote host
+ * that is being used to service requests. Multiple different overlay devices
+ * all share the same single device here.
+ */
+
+#include <umem.h>
+#include <strings.h>
+#include <string.h>
+#include <stddef.h>
+#include <thread.h>
+#include <synch.h>
+#include <assert.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <errno.h>
+#include <libidspace.h>
+
+#include <libvarpd_provider.h>
+#include <libvarpd_svp.h>
+
+static mutex_t svp_remote_lock = DEFAULTMUTEX;
+static avl_tree_t svp_remote_tree;
+static svp_timer_t svp_dns_timer;
+static id_space_t *svp_idspace;
+static int svp_dns_timer_rate = 30; /* seconds */
+
+static void
+svp_remote_mkfmamsg(svp_remote_t *srp, svp_degrade_state_t state, char *buf,
+ size_t buflen)
+{
+ switch (state) {
+ case SVP_RD_DNS_FAIL:
+ (void) snprintf(buf, buflen, "failed to resolve or find "
+ "entries for hostname %s", srp->sr_hostname);
+ break;
+ case SVP_RD_REMOTE_FAIL:
+ (void) snprintf(buf, buflen, "cannot reach any remote peers");
+ break;
+ default:
+ (void) snprintf(buf, buflen, "unkonwn error state: %d", state);
+ }
+}
+
+static int
+svp_remote_comparator(const void *l, const void *r)
+{
+ int ret;
+ const svp_remote_t *lr = l, *rr = r;
+
+ ret = strcmp(lr->sr_hostname, rr->sr_hostname);
+ if (ret > 0)
+ return (1);
+ else if (ret < 0)
+ return (-1);
+
+ if (lr->sr_rport > rr->sr_rport)
+ return (1);
+ else if (lr->sr_rport < rr->sr_rport)
+ return (-1);
+ else
+ return (0);
+}
+
+void
+svp_query_release(svp_query_t *sqp)
+{
+ id_free(svp_idspace, sqp->sq_header.svp_id);
+}
+
+static void
+svp_remote_destroy(svp_remote_t *srp)
+{
+ size_t len;
+
+ /*
+ * XXX Clean up DNS related information, eg. make sure we're not in the
+ * queue. Likely need a flag cv...
+ */
+
+ if (mutex_destroy(&srp->sr_lock) != 0)
+ libvarpd_panic("failed to destroy mutex sr_lock");
+
+ if (srp->sr_addrinfo != NULL)
+ freeaddrinfo(srp->sr_addrinfo);
+ len = strlen(srp->sr_hostname) + 1;
+ umem_free(srp->sr_hostname, len);
+ umem_free(srp, sizeof (svp_remote_t));
+}
+
+static int
+svp_remote_create(const char *host, uint16_t port, svp_remote_t **outp)
+{
+ size_t hlen;
+ svp_remote_t *remote;
+
+ assert(MUTEX_HELD(&svp_remote_lock));
+
+ remote = umem_zalloc(sizeof (svp_remote_t), UMEM_DEFAULT);
+ if (remote == NULL) {
+ mutex_unlock(&svp_remote_lock);
+ return (ENOMEM);
+ }
+ hlen = strlen(host) + 1;
+ remote->sr_hostname = umem_alloc(hlen, UMEM_DEFAULT);
+ if (remote->sr_hostname == NULL) {
+ umem_free(remote, sizeof (svp_remote_t));
+ mutex_unlock(&svp_remote_lock);
+ return (ENOMEM);
+ }
+ remote->sr_rport = port;
+ if (mutex_init(&remote->sr_lock, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create mutex sr_lock");
+ list_create(&remote->sr_conns, sizeof (svp_conn_t),
+ offsetof(svp_conn_t, sc_rlist));
+ avl_create(&remote->sr_tree, svp_comparator, sizeof (svp_t),
+ offsetof(svp_t, svp_rlink));
+ (void) strlcpy(remote->sr_hostname, host, hlen);
+ remote->sr_count = 1;
+
+ *outp = remote;
+ return (0);
+}
+
+int
+svp_remote_find(char *host, uint16_t port, svp_remote_t **outp)
+{
+ int ret;
+ svp_remote_t lookup, *remote;
+
+ lookup.sr_hostname = host;
+ lookup.sr_rport = port;
+ mutex_lock(&svp_remote_lock);
+ remote = avl_find(&svp_remote_tree, &lookup, NULL);
+ if (remote != NULL) {
+ assert(remote->sr_count > 0);
+ remote->sr_count++;
+ *outp = remote;
+ mutex_unlock(&svp_remote_lock);
+ return (0);
+ }
+
+ if ((ret = svp_remote_create(host, port, outp)) != 0) {
+ mutex_unlock(&svp_remote_lock);
+ return (ret);
+ }
+
+ avl_add(&svp_remote_tree, *outp);
+ mutex_unlock(&svp_remote_lock);
+
+ /* Make sure DNS is up to date */
+ svp_host_queue(*outp);
+
+ return (0);
+}
+
+void
+svp_remote_release(svp_remote_t *srp)
+{
+ mutex_lock(&svp_remote_lock);
+ mutex_lock(&srp->sr_lock);
+ srp->sr_count--;
+ if (srp->sr_count != 0) {
+ mutex_unlock(&srp->sr_lock);
+ mutex_unlock(&svp_remote_lock);
+ return;
+ }
+ mutex_unlock(&srp->sr_lock);
+
+ avl_remove(&svp_remote_tree, srp);
+ mutex_unlock(&svp_remote_lock);
+ svp_remote_destroy(srp);
+}
+
+int
+svp_remote_attach(svp_remote_t *srp, svp_t *svp)
+{
+ svp_t check;
+ avl_index_t where;
+
+ mutex_lock(&srp->sr_lock);
+ if (svp->svp_remote != NULL)
+ libvarpd_panic("failed to create mutex sr_lock");
+
+ /*
+ * We require everything except shootdowns
+ */
+ if (svp->svp_cb.scb_vl2_lookup == NULL)
+ libvarpd_panic("missing callback scb_vl2_lookup");
+ if (svp->svp_cb.scb_vl3_lookup == NULL)
+ libvarpd_panic("missing callback scb_vl3_lookup");
+ if (svp->svp_cb.scb_vl2_invalidate == NULL)
+ libvarpd_panic("missing callback scb_vl2_invalidate");
+ if (svp->svp_cb.scb_vl3_inject == NULL)
+ libvarpd_panic("missing callback scb_vl3_inject");
+
+ check.svp_vid = svp->svp_vid;
+ if (avl_find(&srp->sr_tree, &check, &where) != NULL)
+ libvarpd_panic("found duplicate entry with vid %ld",
+ svp->svp_vid);
+ avl_insert(&srp->sr_tree, svp, where);
+ svp->svp_remote = srp;
+ mutex_unlock(&srp->sr_lock);
+
+ return (0);
+}
+
+void
+svp_remote_detach(svp_t *svp)
+{
+ svp_t *lookup;
+ svp_remote_t *srp = svp->svp_remote;
+
+ if (srp == NULL)
+ libvarpd_panic("trying to detach remote when none exists");
+
+ mutex_lock(&srp->sr_lock);
+ lookup = avl_find(&srp->sr_tree, svp, NULL);
+ if (lookup == NULL || lookup != svp)
+ libvarpd_panic("inconsitent remote avl tree...");
+ avl_remove(&srp->sr_tree, svp);
+ svp->svp_remote = NULL;
+ mutex_unlock(&srp->sr_lock);
+ svp_remote_release(srp);
+}
+
+/*
+ * Walk the list of connections and find the first one that's available, the
+ * move it to the back of the list so it's less likely to be used again.
+ */
+static boolean_t
+svp_remote_conn_queue(svp_remote_t *srp, svp_query_t *sqp)
+{
+ svp_conn_t *scp;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ for (scp = list_head(&srp->sr_conns); scp != NULL;
+ scp = list_next(&srp->sr_conns, scp)) {
+ mutex_lock(&scp->sc_lock);
+ if (scp->sc_cstate != SVP_CS_ACTIVE) {
+ mutex_unlock(&scp->sc_lock);
+ continue;
+ }
+ svp_conn_queue(scp, sqp);
+ mutex_unlock(&scp->sc_lock);
+ list_remove(&srp->sr_conns, scp);
+ list_insert_tail(&srp->sr_conns, scp);
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+static void
+svp_remote_vl2_lookup_cb(svp_query_t *sqp, void *arg)
+{
+ svp_t *svp = sqp->sq_svp;
+ svp_vl2_ack_t *vl2a = (svp_vl2_ack_t *)sqp->sq_wdata;
+
+ if (sqp->sq_status == SVP_S_OK)
+ svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status,
+ (struct in6_addr *)vl2a->sl2a_addr, ntohs(vl2a->sl2a_port),
+ arg);
+ else
+ svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, NULL, 0, arg);
+}
+
+void
+svp_remote_vl2_lookup(svp_t *svp, svp_query_t *sqp, const uint8_t *mac,
+ void *arg)
+{
+ svp_remote_t *srp;
+ svp_vl2_req_t *vl2r = &sqp->sq_rdun.sqd_vl2r;
+
+ srp = svp->svp_remote;
+ sqp->sq_func = svp_remote_vl2_lookup_cb;
+ sqp->sq_arg = arg;
+ sqp->sq_svp = svp;
+ sqp->sq_state = SVP_QUERY_INIT;
+ sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
+ sqp->sq_header.svp_op = htons(SVP_R_VL2_REQ);
+ sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t));
+ /*
+ * XXX ID, crc32 need real values
+ */
+ sqp->sq_header.svp_id = id_alloc(svp_idspace);
+ if (sqp->sq_header.svp_id == -1)
+ libvarpd_panic("failed to allcoate from svp_idspace: %d",
+ errno);
+ sqp->sq_header.svp_crc32 = htonl(0);
+ sqp->sq_rdata = vl2r;
+ sqp->sq_rsize = sizeof (svp_vl2_req_t);
+ sqp->sq_wdata = NULL;
+ sqp->sq_wsize = 0;
+
+ bcopy(mac, vl2r->sl2r_mac, ETHERADDRL);
+ vl2r->sl2r_vnetid = ntohl(svp->svp_vid);
+
+ mutex_lock(&srp->sr_lock);
+ if (svp_remote_conn_queue(srp, sqp) == B_FALSE)
+ svp->svp_cb.scb_vl2_lookup(svp, SVP_S_FATAL, NULL, NULL, arg);
+ mutex_unlock(&srp->sr_lock);
+}
+
+static void
+svp_remote_vl3_lookup_cb(svp_query_t *sqp, void *arg)
+{
+ svp_t *svp = sqp->sq_svp;
+ svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
+
+ if (sqp->sq_status == SVP_S_OK)
+ svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, vl3a->sl3a_mac,
+ (struct in6_addr *)vl3a->sl3a_uip, ntohs(vl3a->sl3a_uport),
+ arg);
+ else
+ svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, NULL, NULL, 0,
+ arg);
+}
+
+void
+svp_remote_vl3_lookup(svp_t *svp, svp_query_t *sqp,
+ const struct sockaddr *addr, void *arg)
+{
+ svp_remote_t *srp;
+ svp_vl3_req_t *vl3r = &sqp->sq_rdun.sdq_vl3r;
+
+ if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
+ libvarpd_panic("unexpected sa_family for the vl3 lookup");
+
+ srp = svp->svp_remote;
+ sqp->sq_func = svp_remote_vl3_lookup_cb;
+ sqp->sq_arg = arg;
+ sqp->sq_svp = svp;
+ sqp->sq_state = SVP_QUERY_INIT;
+ sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
+ sqp->sq_header.svp_op = htons(SVP_R_VL3_REQ);
+ sqp->sq_header.svp_size = htons(sizeof (svp_vl3_req_t));
+ /*
+ * XXX ID, crc32 need real values
+ */
+ sqp->sq_header.svp_id = id_alloc(svp_idspace);
+ if (sqp->sq_header.svp_id == -1)
+ libvarpd_panic("failed to allcoate from svp_idspace: %d",
+ errno);
+ sqp->sq_header.svp_crc32 = htonl(0);
+ sqp->sq_rdata = vl3r;
+ sqp->sq_rsize = sizeof (svp_vl3_req_t);
+ sqp->sq_wdata = NULL;
+ sqp->sq_wsize = 0;
+
+ if (addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)addr;
+ vl3r->sl3r_type = SVP_VL3_IPV6;
+ bcopy(&s6->sin6_addr, vl3r->sl3r_ip,
+ sizeof (struct in6_addr));
+ } else {
+ struct sockaddr_in *s4 = (struct sockaddr_in *)addr;
+ struct in6_addr v6;
+
+ vl3r->sl3r_type = SVP_VL3_IP;
+ IN6_INADDR_TO_V4MAPPED(&s4->sin_addr, &v6);
+ bcopy(&v6, vl3r->sl3r_ip, sizeof (struct in6_addr));
+ }
+ vl3r->sl3r_vnetid = ntohl(svp->svp_vid);
+
+ mutex_lock(&srp->sr_lock);
+ if (svp_remote_conn_queue(srp, sqp) == B_FALSE)
+ svp->svp_cb.scb_vl3_lookup(svp, SVP_S_FATAL, NULL, NULL, NULL,
+ arg);
+ mutex_unlock(&srp->sr_lock);
+}
+
+void
+svp_remote_dns_timer(void *unused)
+{
+ svp_remote_t *s;
+ mutex_lock(&svp_remote_lock);
+ for (s = avl_first(&svp_remote_tree); s != NULL;
+ s = AVL_NEXT(&svp_remote_tree, s)) {
+ svp_host_queue(s);
+ }
+ mutex_unlock(&svp_remote_lock);
+}
+
+void
+svp_remote_resolved(svp_remote_t *srp, struct addrinfo *newaddrs)
+{
+ struct addrinfo *a;
+ svp_conn_t *scp;
+ int ngen;
+
+ mutex_lock(&srp->sr_lock);
+ srp->sr_gen++;
+ ngen = srp->sr_gen;
+ mutex_unlock(&srp->sr_lock);
+
+ for (a = newaddrs; a != NULL; a = a->ai_next) {
+ struct in6_addr in6;
+ struct in6_addr *addrp;
+
+ if (a->ai_family != AF_INET && a->ai_family != AF_INET6)
+ continue;
+
+ if (a->ai_family == AF_INET) {
+ struct sockaddr_in *v4;
+ v4 = (struct sockaddr_in *)a->ai_addr;
+ addrp = &in6;
+ IN6_INADDR_TO_V4MAPPED(&v4->sin_addr, addrp);
+ } else {
+ struct sockaddr_in6 *v6;
+ v6 = (struct sockaddr_in6 *)a->ai_addr;
+ addrp = &v6->sin6_addr;
+ }
+
+ mutex_lock(&srp->sr_lock);
+ for (scp = list_head(&srp->sr_conns); scp != NULL;
+ scp = list_next(&srp->sr_conns, scp)) {
+ mutex_lock(&scp->sc_lock);
+ if (bcmp(addrp, &scp->sc_addr,
+ sizeof (struct in6_addr)) == 0) {
+ scp->sc_gen = ngen;
+ mutex_unlock(&scp->sc_lock);
+ break;
+ }
+ mutex_unlock(&scp->sc_lock);
+ }
+
+ /*
+ * We need to be careful in the assumptions that we make here,
+ * as there's a good chance that svp_conn_create will
+ * drop the svp_remote_t`sr_lock to kick off its effective event
+ * loop.
+ */
+ if (scp == NULL)
+ svp_conn_create(srp, addrp);
+ mutex_unlock(&srp->sr_lock);
+ }
+
+ /*
+ * Now it's time to clean things up. We do not actively clean up the
+ * current connections that we have, instead allowing them to stay
+ * around assuming that they're still useful. Instead, we go through and
+ * purge the degraded list for anything that's from an older generation.
+ */
+ mutex_lock(&srp->sr_lock);
+ for (scp = list_head(&srp->sr_conns); scp != NULL;
+ scp = list_next(&srp->sr_conns, scp)) {
+ boolean_t fall = B_FALSE;
+ mutex_lock(&scp->sc_lock);
+ if (scp->sc_gen < srp->sr_gen)
+ fall = B_TRUE;
+ mutex_unlock(&scp->sc_lock);
+ if (fall == B_TRUE)
+ svp_conn_fallout(scp);
+ }
+ mutex_unlock(&srp->sr_lock);
+}
+
+/*
+ * This connection is in the process of being reset, we need to reassign all of
+ * its queries to other places or mark them as fatal.
+ */
+void
+svp_remote_reassign(svp_remote_t *srp, svp_conn_t *scp)
+{
+ assert(MUTEX_HELD(&srp->sr_lock));
+ svp_query_t *sqp;
+
+ /*
+ * As we try to reassing all of its queries, remove it from the list.
+ */
+ list_remove(&srp->sr_conns, scp);
+
+ while ((sqp = list_remove_head(&scp->sc_queries)) != NULL) {
+ sqp->sq_wdata = NULL;
+ sqp->sq_wsize = 0;
+ sqp->sq_acttime = -1;
+
+ /*
+ * XXX We probably want to maintain a queue of these for some
+ * time.
+ */
+ if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
+ sqp->sq_status = SVP_S_FATAL;
+ sqp->sq_func(sqp, sqp->sq_arg);
+ }
+ }
+
+ /*
+ * Now that we're done, go ahead and re-insert.
+ */
+ list_insert_tail(&srp->sr_conns, scp);
+}
+
+void
+svp_remote_degrade(svp_remote_t *srp, svp_degrade_state_t flag)
+{
+ int sf, nf;
+ char buf[256];
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+
+ if (flag == SVP_RD_ALL || flag == 0)
+ libvarpd_panic("invalid flag passed to degrade");
+
+ if ((flag & srp->sr_degrade) != 0) {
+ return;
+ }
+
+ sf = ffs(srp->sr_degrade);
+ nf = ffs(flag);
+ srp->sr_degrade |= flag;
+ if (sf == 0 || sf > nf) {
+ svp_t *svp;
+ svp_remote_mkfmamsg(srp, flag, buf, sizeof (buf));
+
+ for (svp = avl_first(&srp->sr_tree); svp != NULL;
+ svp = AVL_NEXT(&srp->sr_tree, svp)) {
+ libvarpd_fma_degrade(svp->svp_hdl, buf);
+ }
+ }
+}
+
+void
+svp_remote_restore(svp_remote_t *srp, svp_degrade_state_t flag)
+{
+ int sf, nf;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ sf = ffs(srp->sr_degrade);
+ if ((srp->sr_degrade & flag) != flag)
+ return;
+ srp->sr_degrade &= ~flag;
+ nf = ffs(srp->sr_degrade);
+
+ /*
+ * If we're now empty, restore the device. If we still are degraded, but
+ * we now have a higher base than we used to, change the message.
+ */
+ if (srp->sr_degrade == 0) {
+ svp_t *svp;
+ for (svp = avl_first(&srp->sr_tree); svp != NULL;
+ svp = AVL_NEXT(&srp->sr_tree, svp)) {
+ libvarpd_fma_restore(svp->svp_hdl);
+ }
+ } else if (nf != sf) {
+ svp_t *svp;
+ char buf[256];
+
+ svp_remote_mkfmamsg(srp, 1U << (nf - 1), buf, sizeof (buf));
+ for (svp = avl_first(&srp->sr_tree); svp != NULL;
+ svp = AVL_NEXT(&srp->sr_tree, svp)) {
+ libvarpd_fma_degrade(svp->svp_hdl, buf);
+ }
+ }
+}
+
+int
+svp_remote_init(void)
+{
+ svp_idspace = id_space_create("svp_req_ids", 1, INT32_MAX);
+ if (svp_idspace == NULL)
+ return (errno);
+ avl_create(&svp_remote_tree, svp_remote_comparator,
+ sizeof (svp_remote_t), offsetof(svp_remote_t, sr_gnode));
+ svp_dns_timer.st_func = svp_remote_dns_timer;
+ svp_dns_timer.st_arg = NULL;
+ svp_dns_timer.st_oneshot = B_FALSE;
+ svp_dns_timer.st_value = svp_dns_timer_rate;
+ svp_timer_add(&svp_dns_timer);
+ return (0);
+}
+
+void
+svp_remote_fini(void)
+{
+ svp_timer_remove(&svp_dns_timer);
+ avl_destroy(&svp_remote_tree);
+ if (svp_idspace == NULL)
+ id_space_destroy(svp_idspace);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c
new file mode 100644
index 0000000000..f1fb2908c7
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c
@@ -0,0 +1,144 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+#include <stddef.h>
+#include <libvarpd_svp.h>
+
+/*
+ * This implements all of the logic of maintaining a timer for the svp backend.
+ * We have a timer that fires at a one second tick. We maintain all of our
+ * events in avl tree, sorted by the tick that they need to be processed at.
+ */
+
+int svp_tickrate = 1;
+static svp_event_t svp_timer_event;
+static mutex_t svp_timer_lock = DEFAULTMUTEX;
+static cond_t svp_timer_cv = DEFAULTCV;
+static avl_tree_t svp_timer_tree;
+static uint64_t svp_timer_nticks;
+
+static int
+svp_timer_comparator(const void *l, const void *r)
+{
+ const svp_timer_t *lt, *rt;
+
+ lt = l;
+ rt = r;
+
+ if (lt->st_expire > rt->st_expire)
+ return (1);
+ else if (lt->st_expire < rt->st_expire)
+ return (-1);
+
+ /*
+ * Multiple timers can have the same delivery time, so sort within that
+ * by the address of the timer itself.
+ */
+ if ((uintptr_t)lt > (uintptr_t)rt)
+ return (1);
+ else if ((uintptr_t)lt < (uintptr_t)rt)
+ return (-1);
+
+ return (0);
+}
+
+static void
+svp_timer_tick(port_event_t *pe, void *arg)
+{
+ mutex_lock(&svp_timer_lock);
+ svp_timer_nticks++;
+
+ for (;;) {
+ svp_timer_t *t;
+
+ t = avl_first(&svp_timer_tree);
+ if (t == NULL || t->st_expire > svp_timer_nticks)
+ break;
+
+ avl_remove(&svp_timer_tree, t);
+
+ /*
+ * We drop this while performing an operation so that way state
+ * can advance in the face of a long-running callback.
+ */
+ t->st_delivering = B_TRUE;
+ mutex_unlock(&svp_timer_lock);
+ t->st_func(t->st_arg);
+ mutex_lock(&svp_timer_lock);
+ t->st_delivering = B_FALSE;
+ cond_broadcast(&svp_timer_cv);
+ if (t->st_oneshot == B_FALSE) {
+ t->st_expire += t->st_value;
+ avl_add(&svp_timer_tree, t);
+ }
+ }
+ mutex_unlock(&svp_timer_lock);
+}
+
+void
+svp_timer_add(svp_timer_t *stp)
+{
+ if (stp->st_value == 0)
+ libvarpd_panic("tried to add svp timer with zero value");
+
+ mutex_lock(&svp_timer_lock);
+ stp->st_delivering = B_FALSE;
+ stp->st_expire = svp_timer_nticks + stp->st_value;
+ avl_add(&svp_timer_tree, stp);
+ mutex_unlock(&svp_timer_lock);
+}
+
+void
+svp_timer_remove(svp_timer_t *stp)
+{
+ mutex_lock(&svp_timer_lock);
+
+ /*
+ * If the event in question is not currently being delivered, then we
+ * can stop it before it next fires. If it is currently being delivered,
+ * we need to wait for that to finish. Because we hold the timer lock,
+ * we know that it cannot be rearmed. Therefore, we make sure the one
+ * shot is set to zero, and wait until it's no longer set to delivering.
+ */
+ if (stp->st_delivering == B_FALSE) {
+ avl_remove(&svp_timer_tree, stp);
+ mutex_unlock(&svp_timer_lock);
+ return;
+ }
+
+ stp->st_oneshot = B_TRUE;
+ while (stp->st_delivering == B_TRUE)
+ cond_wait(&svp_timer_cv, &svp_timer_lock);
+
+ mutex_unlock(&svp_timer_lock);
+}
+
+int
+svp_timer_init(void)
+{
+ int ret;
+
+ svp_timer_event.se_func = svp_timer_tick;
+ svp_timer_event.se_arg = NULL;
+
+ avl_create(&svp_timer_tree, svp_timer_comparator, sizeof (svp_timer_t),
+ offsetof(svp_timer_t, st_link));
+
+ if ((ret = svp_event_timer_init(&svp_timer_event)) != 0) {
+ avl_destroy(&svp_timer_tree);
+ }
+
+ return (ret);
+}
diff --git a/usr/src/lib/varpd/svp/common/llib-lvarpd_svp b/usr/src/lib/varpd/svp/common/llib-lvarpd_svp
new file mode 100644
index 0000000000..31b3d36fbe
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/llib-lvarpd_svp
@@ -0,0 +1,18 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
diff --git a/usr/src/lib/varpd/svp/common/mapfile-vers b/usr/src/lib/varpd/svp/common/mapfile-vers
new file mode 100644
index 0000000000..642ef72adc
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/mapfile-vers
@@ -0,0 +1,35 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate {
+ local:
+ *;
+};
diff --git a/usr/src/lib/varpd/svp/i386/Makefile b/usr/src/lib/varpd/svp/i386/Makefile
new file mode 100644
index 0000000000..cf2f2487af
--- /dev/null
+++ b/usr/src/lib/varpd/svp/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/svp/sparc/Makefile b/usr/src/lib/varpd/svp/sparc/Makefile
new file mode 100644
index 0000000000..cf2f2487af
--- /dev/null
+++ b/usr/src/lib/varpd/svp/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/svp/sparcv9/Makefile b/usr/src/lib/varpd/svp/sparcv9/Makefile
new file mode 100644
index 0000000000..b64b830ddd
--- /dev/null
+++ b/usr/src/lib/varpd/svp/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)