diff options
Diffstat (limited to 'usr/src/lib/varpd')
63 files changed, 12531 insertions, 0 deletions
diff --git a/usr/src/lib/varpd/Makefile b/usr/src/lib/varpd/Makefile new file mode 100644 index 0000000000..daa849572e --- /dev/null +++ b/usr/src/lib/varpd/Makefile @@ -0,0 +1,33 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +SUBDIRS = libvarpd .WAIT direct files svp + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +check := TARGET = check +install := TARGET = install +install_h := TARGET = install_h +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber install install_h check lint: $(SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/varpd/Makefile.plugin b/usr/src/lib/varpd/Makefile.plugin new file mode 100644 index 0000000000..48f188500c --- /dev/null +++ b/usr/src/lib/varpd/Makefile.plugin @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +ROOTLIBDIR = $(ROOT)/usr/lib/varpd +ROOTLIBDIR64 = $(ROOT)/usr/lib/varpd/$(MACH64) + +MAPFILES += ../../libvarpd/common/mapfile-plugin diff --git a/usr/src/lib/varpd/direct/Makefile b/usr/src/lib/varpd/direct/Makefile new file mode 100644 index 0000000000..275f07bf8b --- /dev/null +++ b/usr/src/lib/varpd/direct/Makefile @@ -0,0 +1,40 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../../Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +install := TARGET = install +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber install lint: $(SUBDIRS) + +install_h: + +check: + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../../Makefile.targ diff --git a/usr/src/lib/varpd/direct/Makefile.com b/usr/src/lib/varpd/direct/Makefile.com new file mode 100644 index 0000000000..4072c9a0ab --- /dev/null +++ b/usr/src/lib/varpd/direct/Makefile.com @@ -0,0 +1,38 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +LIBRARY = libvarpd_direct.a +VERS = .1 +OBJECTS = libvarpd_direct.o + +include ../../../Makefile.lib +include ../../Makefile.plugin + +LIBS = $(DYNLIB) +LDLIBS += -lc -lumem -lnvpair -lnsl +CPPFLAGS += -I../common + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all + +SRCDIR = ../common + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../../Makefile.targ diff --git a/usr/src/lib/varpd/direct/amd64/Makefile b/usr/src/lib/varpd/direct/amd64/Makefile new file mode 100644 index 0000000000..d552642882 --- /dev/null +++ b/usr/src/lib/varpd/direct/amd64/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/varpd/direct/common/libvarpd_direct.c b/usr/src/lib/varpd/direct/common/libvarpd_direct.c new file mode 100644 index 0000000000..018cdf641c --- /dev/null +++ b/usr/src/lib/varpd/direct/common/libvarpd_direct.c @@ -0,0 +1,411 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2016 Joyent, Inc. + */ + +/* + * Point to point plug-in for varpd. + * + * This plugin implements a simple point to point plugin for a packet. It + * represents the traditional tunnel, just in overlay form. As such, the only + * properties it needs are those to determine where to send everything. At this + * time, we don't allow a mulicast address; however, there's no reason that the + * direct plugin shouldn't in theory support multicast, though when implementing + * it the best path will become clear. + * + * In general this module has been designed to make it easy to support a + * destination of either IP or IP and port; however, we restrict it to the + * latter as we don't currently have an implementation that would allow us to + * test that. + */ + +#include <libvarpd_provider.h> +#include <umem.h> +#include <errno.h> +#include <thread.h> +#include <synch.h> +#include <strings.h> +#include <assert.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <libnvpair.h> + +typedef struct varpd_direct { + overlay_plugin_dest_t vad_dest; /* RO */ + mutex_t vad_lock; /* Protects the rest */ + boolean_t vad_hip; + boolean_t vad_hport; + struct in6_addr vad_ip; + uint16_t vad_port; +} varpd_direct_t; + +static const char *varpd_direct_props[] = { + "direct/dest_ip", + "direct/dest_port" +}; + +static boolean_t +varpd_direct_valid_dest(overlay_plugin_dest_t dest) +{ + if (dest & ~(OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)) + return (B_FALSE); + + if (!(dest & (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))) + return (B_FALSE); + + return (B_TRUE); +} + +/* ARGSUSED */ +static int +varpd_direct_create(varpd_provider_handle_t *hdl, void **outp, + overlay_plugin_dest_t dest) +{ + int ret; + varpd_direct_t *vdp; + + if (varpd_direct_valid_dest(dest) == B_FALSE) + return (ENOTSUP); + + vdp = umem_alloc(sizeof (varpd_direct_t), UMEM_DEFAULT); + if (vdp == NULL) + return (ENOMEM); + + if ((ret = mutex_init(&vdp->vad_lock, USYNC_THREAD | LOCK_ERRORCHECK, + NULL)) != 0) { + umem_free(vdp, sizeof (varpd_direct_t)); + return (ret); + } + + vdp->vad_dest = dest; + vdp->vad_hip = B_FALSE; + vdp->vad_hport = B_FALSE; + *outp = vdp; + return (0); +} + +static int +varpd_direct_start(void *arg) +{ + varpd_direct_t *vdp = arg; + + mutex_enter(&vdp->vad_lock); + if (vdp->vad_hip == B_FALSE ||((vdp->vad_dest & OVERLAY_PLUGIN_D_IP) && + vdp->vad_hport == B_FALSE)) { + mutex_exit(&vdp->vad_lock); + return (EAGAIN); + } + mutex_exit(&vdp->vad_lock); + + return (0); +} + +/* ARGSUSED */ +static void +varpd_direct_stop(void *arg) +{ +} + +static void +varpd_direct_destroy(void *arg) +{ + varpd_direct_t *vdp = arg; + + if (mutex_destroy(&vdp->vad_lock) != 0) + abort(); + umem_free(vdp, sizeof (varpd_direct_t)); +} + +static int +varpd_direct_default(void *arg, overlay_target_point_t *otp) +{ + varpd_direct_t *vdp = arg; + + mutex_enter(&vdp->vad_lock); + bcopy(&vdp->vad_ip, &otp->otp_ip, sizeof (struct in6_addr)); + otp->otp_port = vdp->vad_port; + mutex_exit(&vdp->vad_lock); + + return (VARPD_LOOKUP_OK); +} + +static int +varpd_direct_nprops(void *arg, uint_t *nprops) +{ + const varpd_direct_t *vdp = arg; + + *nprops = 0; + if (vdp->vad_dest & OVERLAY_PLUGIN_D_ETHERNET) + *nprops += 1; + + if (vdp->vad_dest & OVERLAY_PLUGIN_D_IP) + *nprops += 1; + + if (vdp->vad_dest & OVERLAY_PLUGIN_D_PORT) + *nprops += 1; + + assert(*nprops == 1 || *nprops == 2); + + return (0); +} + +static int +varpd_direct_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph) +{ + varpd_direct_t *vdp = arg; + + /* + * Because we only support IP + port combos right now, prop 0 should + * always be the IP. We don't support a port without an IP. + */ + assert(vdp->vad_dest & OVERLAY_PLUGIN_D_IP); + if (propid == 0) { + libvarpd_prop_set_name(vph, varpd_direct_props[0]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP); + libvarpd_prop_set_nodefault(vph); + return (0); + } + + if (propid == 1 && vdp->vad_dest & OVERLAY_PLUGIN_D_PORT) { + libvarpd_prop_set_name(vph, varpd_direct_props[1]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT); + libvarpd_prop_set_nodefault(vph); + libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX); + return (0); + } + + return (EINVAL); +} + +static int +varpd_direct_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep) +{ + varpd_direct_t *vdp = arg; + + /* direct/dest_ip */ + if (strcmp(pname, varpd_direct_props[0]) == 0) { + if (*sizep < sizeof (struct in6_addr)) + return (EOVERFLOW); + mutex_enter(&vdp->vad_lock); + if (vdp->vad_hip == B_FALSE) { + *sizep = 0; + } else { + bcopy(&vdp->vad_ip, buf, sizeof (struct in6_addr)); + *sizep = sizeof (struct in6_addr); + } + mutex_exit(&vdp->vad_lock); + return (0); + } + + /* direct/dest_port */ + if (strcmp(pname, varpd_direct_props[1]) == 0) { + uint64_t val; + + if (*sizep < sizeof (uint64_t)) + return (EOVERFLOW); + mutex_enter(&vdp->vad_lock); + if (vdp->vad_hport == B_FALSE) { + *sizep = 0; + } else { + val = vdp->vad_port; + bcopy(&val, buf, sizeof (uint64_t)); + *sizep = sizeof (uint64_t); + } + mutex_exit(&vdp->vad_lock); + return (0); + } + + return (EINVAL); +} + +static int +varpd_direct_setprop(void *arg, const char *pname, const void *buf, + const uint32_t size) +{ + varpd_direct_t *vdp = arg; + + /* direct/dest_ip */ + if (strcmp(pname, varpd_direct_props[0]) == 0) { + const struct in6_addr *ipv6 = buf; + + if (size < sizeof (struct in6_addr)) + return (EOVERFLOW); + + if (IN6_IS_ADDR_V4COMPAT(ipv6)) + return (EINVAL); + + if (IN6_IS_ADDR_6TO4(ipv6)) + return (EINVAL); + + mutex_enter(&vdp->vad_lock); + bcopy(buf, &vdp->vad_ip, sizeof (struct in6_addr)); + vdp->vad_hip = B_TRUE; + mutex_exit(&vdp->vad_lock); + return (0); + } + + /* direct/dest_port */ + if (strcmp(pname, varpd_direct_props[1]) == 0) { + const uint64_t *valp = buf; + if (size < sizeof (uint64_t)) + return (EOVERFLOW); + + if (*valp == 0 || *valp > UINT16_MAX) + return (EINVAL); + + mutex_enter(&vdp->vad_lock); + vdp->vad_port = (uint16_t)*valp; + vdp->vad_hport = B_TRUE; + mutex_exit(&vdp->vad_lock); + return (0); + } + + return (EINVAL); +} + +static int +varpd_direct_save(void *arg, nvlist_t *nvp) +{ + int ret; + varpd_direct_t *vdp = arg; + + mutex_enter(&vdp->vad_lock); + if (vdp->vad_hport == B_TRUE) { + if ((ret = nvlist_add_uint16(nvp, varpd_direct_props[1], + vdp->vad_port)) != 0) { + mutex_exit(&vdp->vad_lock); + return (ret); + } + } + + if (vdp->vad_hip == B_TRUE) { + char buf[INET6_ADDRSTRLEN]; + + if (inet_ntop(AF_INET6, &vdp->vad_ip, buf, sizeof (buf)) == + NULL) + abort(); + if ((ret = nvlist_add_string(nvp, varpd_direct_props[0], + buf)) != 0) { + mutex_exit(&vdp->vad_lock); + return (ret); + } + } + mutex_exit(&vdp->vad_lock); + + return (0); +} + +/* ARGSUSED */ +static int +varpd_direct_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl, + overlay_plugin_dest_t dest, void **outp) +{ + int ret; + char *ipstr; + varpd_direct_t *vdp; + + if (varpd_direct_valid_dest(dest) == B_FALSE) + return (ENOTSUP); + + vdp = umem_alloc(sizeof (varpd_direct_t), UMEM_DEFAULT); + if (vdp == NULL) + return (ENOMEM); + + if ((ret = mutex_init(&vdp->vad_lock, USYNC_THREAD | LOCK_ERRORCHECK, + NULL)) != 0) { + umem_free(vdp, sizeof (varpd_direct_t)); + return (ret); + } + + if ((ret = nvlist_lookup_uint16(nvp, varpd_direct_props[1], + &vdp->vad_port)) != 0) { + if (ret != ENOENT) { + if (mutex_destroy(&vdp->vad_lock) != 0) + abort(); + umem_free(vdp, sizeof (varpd_direct_t)); + return (ret); + } + vdp->vad_hport = B_FALSE; + } else { + vdp->vad_hport = B_TRUE; + } + + if ((ret = nvlist_lookup_string(nvp, varpd_direct_props[0], + &ipstr)) != 0) { + if (ret != ENOENT) { + if (mutex_destroy(&vdp->vad_lock) != 0) + abort(); + umem_free(vdp, sizeof (varpd_direct_t)); + return (ret); + } + vdp->vad_hip = B_FALSE; + } else { + ret = inet_pton(AF_INET6, ipstr, &vdp->vad_ip); + /* + * inet_pton is only defined to return -1 with errno set to + * EAFNOSUPPORT, which really, shouldn't happen. + */ + if (ret == -1) { + assert(errno == EAFNOSUPPORT); + abort(); + } + if (ret == 0) { + if (mutex_destroy(&vdp->vad_lock) != 0) + abort(); + umem_free(vdp, sizeof (varpd_direct_t)); + return (EINVAL); + } + } + + *outp = vdp; + return (0); +} + +static const varpd_plugin_ops_t varpd_direct_ops = { + 0, + varpd_direct_create, + varpd_direct_start, + varpd_direct_stop, + varpd_direct_destroy, + varpd_direct_default, + NULL, + varpd_direct_nprops, + varpd_direct_propinfo, + varpd_direct_getprop, + varpd_direct_setprop, + varpd_direct_save, + varpd_direct_restore +}; + +#pragma init(varpd_direct_init) +static void +varpd_direct_init(void) +{ + int err; + varpd_plugin_register_t *vpr; + + vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err); + if (vpr == NULL) + return; + + vpr->vpr_mode = OVERLAY_TARGET_POINT; + vpr->vpr_name = "direct"; + vpr->vpr_ops = &varpd_direct_ops; + (void) libvarpd_plugin_register(vpr); + libvarpd_plugin_free(vpr); +} diff --git a/usr/src/lib/varpd/direct/common/llib-lvarpd_direct b/usr/src/lib/varpd/direct/common/llib-lvarpd_direct new file mode 100644 index 0000000000..03c34f4fcb --- /dev/null +++ b/usr/src/lib/varpd/direct/common/llib-lvarpd_direct @@ -0,0 +1,18 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + diff --git a/usr/src/lib/varpd/direct/common/mapfile-vers b/usr/src/lib/varpd/direct/common/mapfile-vers new file mode 100644 index 0000000000..6b7c5a5067 --- /dev/null +++ b/usr/src/lib/varpd/direct/common/mapfile-vers @@ -0,0 +1,35 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION SUNWprivate { + local: + *; +}; diff --git a/usr/src/lib/varpd/direct/i386/Makefile b/usr/src/lib/varpd/direct/i386/Makefile new file mode 100644 index 0000000000..f2b4f63da5 --- /dev/null +++ b/usr/src/lib/varpd/direct/i386/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/direct/sparc/Makefile b/usr/src/lib/varpd/direct/sparc/Makefile new file mode 100644 index 0000000000..f2b4f63da5 --- /dev/null +++ b/usr/src/lib/varpd/direct/sparc/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/direct/sparcv9/Makefile b/usr/src/lib/varpd/direct/sparcv9/Makefile new file mode 100644 index 0000000000..d552642882 --- /dev/null +++ b/usr/src/lib/varpd/direct/sparcv9/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/varpd/files/Makefile b/usr/src/lib/varpd/files/Makefile new file mode 100644 index 0000000000..275f07bf8b --- /dev/null +++ b/usr/src/lib/varpd/files/Makefile @@ -0,0 +1,40 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../../Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +install := TARGET = install +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber install lint: $(SUBDIRS) + +install_h: + +check: + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../../Makefile.targ diff --git a/usr/src/lib/varpd/files/Makefile.com b/usr/src/lib/varpd/files/Makefile.com new file mode 100644 index 0000000000..1f6a7c03b1 --- /dev/null +++ b/usr/src/lib/varpd/files/Makefile.com @@ -0,0 +1,42 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +LIBRARY = libvarpd_files.a +VERS = .1 +OBJECTS = libvarpd_files.o \ + libvarpd_files_json.o + +include ../../../Makefile.lib +include ../../Makefile.plugin + +LIBS = $(DYNLIB) +LDLIBS += -lc -lumem -lnvpair -lsocket -lnsl -lcmdutils +CPPFLAGS += -I../common + +LINTFLAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTFLAGS64 += -erroff=E_BAD_PTR_CAST_ALIGN + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all + +SRCDIR = ../common + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../../Makefile.targ diff --git a/usr/src/lib/varpd/files/amd64/Makefile b/usr/src/lib/varpd/files/amd64/Makefile new file mode 100644 index 0000000000..d552642882 --- /dev/null +++ b/usr/src/lib/varpd/files/amd64/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/varpd/files/common/libvarpd_files.c b/usr/src/lib/varpd/files/common/libvarpd_files.c new file mode 100644 index 0000000000..812919a07d --- /dev/null +++ b/usr/src/lib/varpd/files/common/libvarpd_files.c @@ -0,0 +1,605 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015, Joyent, Inc. + */ + +/* + * Files based plug-in for varpd + * + * This is a dynamic varpd plug-in that has a static backing store. It's really + * nothing more than a glorified version of /etc/ethers, though it facilitiates + * a bit more. The files module allows for the full set of mappings to be fixed + * at creation time. In addition, it also provides support for proxying ARP, + * NDP, and DHCP. + * + * At this time, the plugin requires that the destination type involve both an + * IP address and a port; however, there's no reason that this cannot be made + * more flexible as we have additional encapsulation algorithms that support it. + * The plug-in only has a single property, which is the location of the JSON + * file. The JSON file itself looks something like: + * + * { + * "aa:bb:cc:dd:ee:ff": { + * "arp": "10.23.69.1", + * "ndp": "2600:3c00::f03c:91ff:fe96:a264", + * "ip": "192.168.1.1", + * "port": 8080 + * }, + * ... + * } + */ + +#include <libvarpd_provider.h> +#include <umem.h> +#include <errno.h> +#include <thread.h> +#include <synch.h> +#include <strings.h> +#include <assert.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <libnvpair.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/ethernet.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include <libvarpd_files_json.h> + +typedef struct varpd_files { + overlay_plugin_dest_t vaf_dest; /* RO */ + varpd_provider_handle_t *vaf_hdl; /* RO */ + char *vaf_path; /* WO */ + nvlist_t *vaf_nvl; /* WO */ + uint64_t vaf_nmisses; /* Atomic */ + uint64_t vaf_narp; /* Atomic */ +} varpd_files_t; + +static const char *varpd_files_props[] = { + "files/config" +}; + +static boolean_t +varpd_files_valid_dest(overlay_plugin_dest_t dest) +{ + if (dest & ~(OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)) + return (B_FALSE); + + if (!(dest & (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))) + return (B_FALSE); + + return (B_TRUE); +} + +static int +varpd_files_create(varpd_provider_handle_t *hdl, void **outp, + overlay_plugin_dest_t dest) +{ + varpd_files_t *vaf; + + if (varpd_files_valid_dest(dest) == B_FALSE) + return (ENOTSUP); + + vaf = umem_alloc(sizeof (varpd_files_t), UMEM_DEFAULT); + if (vaf == NULL) + return (ENOMEM); + + bzero(vaf, sizeof (varpd_files_t)); + vaf->vaf_dest = dest; + vaf->vaf_path = NULL; + vaf->vaf_nvl = NULL; + vaf->vaf_hdl = hdl; + *outp = vaf; + return (0); +} + +static int +varpd_files_normalize_nvlist(varpd_files_t *vaf, nvlist_t *nvl) +{ + int ret; + nvlist_t *out; + nvpair_t *pair; + + if ((ret = nvlist_alloc(&out, NV_UNIQUE_NAME, 0)) != 0) + return (ret); + + for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL; + pair = nvlist_next_nvpair(nvl, pair)) { + char *name, fname[ETHERADDRSTRL]; + nvlist_t *data; + struct ether_addr ether, *e; + e = ðer; + + if (nvpair_type(pair) != DATA_TYPE_NVLIST) { + nvlist_free(out); + return (EINVAL); + } + + name = nvpair_name(pair); + if ((ret = nvpair_value_nvlist(pair, &data)) != 0) { + nvlist_free(out); + return (EINVAL); + } + + if (ether_aton_r(name, e) == NULL) { + nvlist_free(out); + return (EINVAL); + } + + if (ether_ntoa_r(e, fname) == NULL) { + nvlist_free(out); + return (ENOMEM); + } + + if ((ret = nvlist_add_nvlist(out, fname, data)) != 0) { + nvlist_free(out); + return (EINVAL); + } + } + + vaf->vaf_nvl = out; + return (0); +} + +static int +varpd_files_start(void *arg) +{ + int fd, ret; + void *maddr; + struct stat st; + nvlist_t *nvl; + varpd_files_t *vaf = arg; + + if (vaf->vaf_path == NULL) + return (EAGAIN); + + if ((fd = open(vaf->vaf_path, O_RDONLY)) < 0) + return (errno); + + if (fstat(fd, &st) != 0) { + ret = errno; + if (close(fd) != 0) + abort(); + return (ret); + } + + maddr = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd, 0); + if (maddr == NULL) { + ret = errno; + if (close(fd) != 0) + abort(); + return (ret); + } + + ret = nvlist_parse_json(maddr, st.st_size, &nvl, + NVJSON_FORCE_INTEGER, NULL); + if (ret == 0) { + ret = varpd_files_normalize_nvlist(vaf, nvl); + nvlist_free(nvl); + } + if (munmap(maddr, st.st_size) != 0) + abort(); + if (close(fd) != 0) + abort(); + + return (ret); +} + +static void +varpd_files_stop(void *arg) +{ + varpd_files_t *vaf = arg; + + nvlist_free(vaf->vaf_nvl); + vaf->vaf_nvl = NULL; +} + +static void +varpd_files_destroy(void *arg) +{ + varpd_files_t *vaf = arg; + + assert(vaf->vaf_nvl == NULL); + if (vaf->vaf_path != NULL) { + umem_free(vaf->vaf_path, strlen(vaf->vaf_path) + 1); + vaf->vaf_path = NULL; + } + umem_free(vaf, sizeof (varpd_files_t)); +} + +static void +varpd_files_lookup(void *arg, varpd_query_handle_t *qh, + const overlay_targ_lookup_t *otl, overlay_target_point_t *otp) +{ + char macstr[ETHERADDRSTRL], *ipstr; + nvlist_t *nvl; + varpd_files_t *vaf = arg; + int32_t port; + static const uint8_t bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + + /* We don't support a default */ + if (otl == NULL) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + if (otl->otl_sap == ETHERTYPE_ARP) { + libvarpd_plugin_proxy_arp(vaf->vaf_hdl, qh, otl); + return; + } + + if (otl->otl_sap == ETHERTYPE_IPV6 && + otl->otl_dstaddr[0] == 0x33 && + otl->otl_dstaddr[1] == 0x33) { + libvarpd_plugin_proxy_ndp(vaf->vaf_hdl, qh, otl); + return; + } + + if (otl->otl_sap == ETHERTYPE_IP && + bcmp(otl->otl_dstaddr, bcast, ETHERADDRL) == 0) { + char *mac; + struct ether_addr a, *addr; + + addr = &a; + if (ether_ntoa_r((struct ether_addr *)otl->otl_srcaddr, + macstr) == NULL) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + if (nvlist_lookup_string(nvl, "dhcp-proxy", &mac) != 0) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + if (ether_aton_r(mac, addr) == NULL) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + libvarpd_plugin_proxy_dhcp(vaf->vaf_hdl, qh, otl); + return; + } + + if (ether_ntoa_r((struct ether_addr *)otl->otl_dstaddr, + macstr) == NULL) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + if (nvlist_lookup_int32(nvl, "port", &port) != 0) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + if (port <= 0 || port > UINT16_MAX) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + otp->otp_port = port; + + if (nvlist_lookup_string(nvl, "ip", &ipstr) != 0) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + /* + * Try to parse it as a v6 address and then if it's not, try to + * transform it into a v4 address which we'll then wrap it into a v4 + * mapped address. + */ + if (inet_pton(AF_INET6, ipstr, &otp->otp_ip) != 1) { + uint32_t v4; + if (inet_pton(AF_INET, ipstr, &v4) != 1) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + IN6_IPADDR_TO_V4MAPPED(v4, &otp->otp_ip); + } + + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_OK); +} + +/* ARGSUSED */ +static int +varpd_files_nprops(void *arg, uint_t *nprops) +{ + *nprops = 1; + return (0); +} + +/* ARGSUSED */ +static int +varpd_files_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph) +{ + if (propid != 0) + return (EINVAL); + + libvarpd_prop_set_name(vph, varpd_files_props[0]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING); + libvarpd_prop_set_nodefault(vph); + return (0); +} + +static int +varpd_files_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep) +{ + varpd_files_t *vaf = arg; + + if (strcmp(pname, varpd_files_props[0]) != 0) + return (EINVAL); + + if (vaf->vaf_path != NULL) { + size_t len = strlen(vaf->vaf_path) + 1; + if (*sizep < len) + return (EOVERFLOW); + *sizep = len; + (void) strlcpy(buf, vaf->vaf_path, *sizep); + + } else { + *sizep = 0; + } + + return (0); +} + +static int +varpd_files_setprop(void *arg, const char *pname, const void *buf, + const uint32_t size) +{ + varpd_files_t *vaf = arg; + + if (strcmp(pname, varpd_files_props[0]) != 0) + return (EINVAL); + + if (vaf->vaf_path != NULL) + umem_free(vaf->vaf_path, strlen(vaf->vaf_path) + 1); + + vaf->vaf_path = umem_alloc(size, UMEM_DEFAULT); + if (vaf->vaf_path == NULL) + return (ENOMEM); + (void) strlcpy(vaf->vaf_path, buf, size); + return (0); +} + +static int +varpd_files_save(void *arg, nvlist_t *nvp) +{ + int ret; + varpd_files_t *vaf = arg; + + if (vaf->vaf_path == NULL) + return (0); + + if ((ret = nvlist_add_string(nvp, varpd_files_props[0], + vaf->vaf_path)) != 0) + return (ret); + + if ((ret = nvlist_add_uint64(nvp, "files/vaf_nmisses", + vaf->vaf_nmisses)) != 0) + return (ret); + + if ((ret = nvlist_add_uint64(nvp, "files/vaf_narp", + vaf->vaf_narp)) != 0) + return (ret); + return (0); +} + +static int +varpd_files_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl, + overlay_plugin_dest_t dest, void **outp) +{ + varpd_files_t *vaf; + char *str; + int ret; + uint64_t nmisses, narp; + + if (varpd_files_valid_dest(dest) == B_FALSE) + return (EINVAL); + + ret = nvlist_lookup_string(nvp, varpd_files_props[0], &str); + if (ret != 0 && ret != ENOENT) + return (ret); + else if (ret == ENOENT) + str = NULL; + + if (nvlist_lookup_uint64(nvp, "files/vaf_nmisses", &nmisses) != 0) + return (EINVAL); + if (nvlist_lookup_uint64(nvp, "files/vaf_narp", &narp) != 0) + return (EINVAL); + + vaf = umem_alloc(sizeof (varpd_files_t), UMEM_DEFAULT); + if (vaf == NULL) + return (ENOMEM); + + bzero(vaf, sizeof (varpd_files_t)); + vaf->vaf_dest = dest; + if (str != NULL) { + size_t len = strlen(str) + 1; + vaf->vaf_path = umem_alloc(len, UMEM_DEFAULT); + if (vaf->vaf_path == NULL) { + umem_free(vaf, sizeof (varpd_files_t)); + return (ENOMEM); + } + (void) strlcpy(vaf->vaf_path, str, len); + } + + vaf->vaf_hdl = hdl; + *outp = vaf; + return (0); +} + +static void +varpd_files_proxy_arp(void *arg, varpd_arp_handle_t *vah, int kind, + const struct sockaddr *sock, uint8_t *out) +{ + varpd_files_t *vaf = arg; + const struct sockaddr_in *ip; + const struct sockaddr_in6 *ip6; + nvpair_t *pair; + + if (kind != VARPD_QTYPE_ETHERNET) { + libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP); + return; + } + + if (sock->sa_family != AF_INET && sock->sa_family != AF_INET6) { + libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP); + return; + } + + ip = (const struct sockaddr_in *)sock; + ip6 = (const struct sockaddr_in6 *)sock; + for (pair = nvlist_next_nvpair(vaf->vaf_nvl, NULL); pair != NULL; + pair = nvlist_next_nvpair(vaf->vaf_nvl, pair)) { + char *mac, *ipstr; + nvlist_t *data; + struct in_addr ia; + struct in6_addr ia6; + struct ether_addr ether, *e; + e = ðer; + + if (nvpair_type(pair) != DATA_TYPE_NVLIST) + continue; + + mac = nvpair_name(pair); + if (nvpair_value_nvlist(pair, &data) != 0) + continue; + + + if (sock->sa_family == AF_INET) { + if (nvlist_lookup_string(data, "arp", &ipstr) != 0) + continue; + + if (inet_pton(AF_INET, ipstr, &ia) != 1) + continue; + + if (bcmp(&ia, &ip->sin_addr, + sizeof (struct in_addr)) != 0) + continue; + } else { + if (nvlist_lookup_string(data, "ndp", &ipstr) != 0) + continue; + + if (inet_pton(AF_INET6, ipstr, &ia6) != 1) + continue; + + if (bcmp(&ia6, &ip6->sin6_addr, + sizeof (struct in6_addr)) != 0) + continue; + } + + if (ether_aton_r(mac, e) == NULL) { + libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP); + return; + } + + bcopy(e, out, ETHERADDRL); + libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_OK); + return; + } + + libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP); +} + +static void +varpd_files_proxy_dhcp(void *arg, varpd_dhcp_handle_t *vdh, int type, + const overlay_targ_lookup_t *otl, uint8_t *out) +{ + varpd_files_t *vaf = arg; + nvlist_t *nvl; + char macstr[ETHERADDRSTRL], *mac; + struct ether_addr a, *addr; + + addr = &a; + if (type != VARPD_QTYPE_ETHERNET) { + libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP); + return; + } + + if (ether_ntoa_r((struct ether_addr *)otl->otl_srcaddr, + macstr) == NULL) { + libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP); + return; + } + + if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) { + libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP); + return; + } + + if (nvlist_lookup_string(nvl, "dhcp-proxy", &mac) != 0) { + libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP); + return; + } + + if (ether_aton_r(mac, addr) == NULL) { + libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP); + return; + } + + bcopy(addr, out, ETHERADDRL); + libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_OK); +} + +static const varpd_plugin_ops_t varpd_files_ops = { + 0, + varpd_files_create, + varpd_files_start, + varpd_files_stop, + varpd_files_destroy, + NULL, + varpd_files_lookup, + varpd_files_nprops, + varpd_files_propinfo, + varpd_files_getprop, + varpd_files_setprop, + varpd_files_save, + varpd_files_restore, + varpd_files_proxy_arp, + varpd_files_proxy_dhcp +}; + +#pragma init(varpd_files_init) +static void +varpd_files_init(void) +{ + int err; + varpd_plugin_register_t *vpr; + + vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err); + if (vpr == NULL) + return; + + vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC; + vpr->vpr_name = "files"; + vpr->vpr_ops = &varpd_files_ops; + (void) libvarpd_plugin_register(vpr); + libvarpd_plugin_free(vpr); +} diff --git a/usr/src/lib/varpd/files/common/libvarpd_files_json.c b/usr/src/lib/varpd/files/common/libvarpd_files_json.c new file mode 100644 index 0000000000..53e63c6244 --- /dev/null +++ b/usr/src/lib/varpd/files/common/libvarpd_files_json.c @@ -0,0 +1,936 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#include <strings.h> +#include <errno.h> +#include <libnvpair.h> +#include <sys/ccompile.h> + +#include "libvarpd_files_json.h" + +typedef enum json_type { + JSON_TYPE_NOTHING = 0, + JSON_TYPE_STRING = 1, + JSON_TYPE_INTEGER, + JSON_TYPE_DOUBLE, + JSON_TYPE_BOOLEAN, + JSON_TYPE_NULL, + JSON_TYPE_OBJECT, + JSON_TYPE_ARRAY +} json_type_t; + +typedef enum parse_state { + PARSE_ERROR = -1, + PARSE_DONE = 0, + PARSE_REST, + PARSE_OBJECT, + PARSE_KEY_STRING, + PARSE_COLON, + PARSE_STRING, + PARSE_OBJECT_COMMA, + PARSE_ARRAY, + PARSE_BAREWORD, + PARSE_NUMBER, + PARSE_ARRAY_VALUE, + PARSE_ARRAY_COMMA +} parse_state_t; + +#define JSON_MARKER ".__json_" +#define JSON_MARKER_ARRAY JSON_MARKER "array" + +typedef struct parse_frame { + parse_state_t pf_ps; + nvlist_t *pf_nvl; + + char *pf_key; + void *pf_value; + json_type_t pf_value_type; + int pf_array_index; + + struct parse_frame *pf_next; +} parse_frame_t; + +typedef struct state { + const char *s_in; + unsigned long s_pos; + unsigned long s_len; + + parse_frame_t *s_top; + + nvlist_parse_json_flags_t s_flags; + + /* + * This string buffer is used for temporary storage by the + * "collect_*()" family of functions. + */ + custr_t *s_collect; + + int s_errno; + custr_t *s_errstr; +} state_t; + +typedef void (*parse_handler_t)(state_t *); + +static void +movestate(state_t *s, parse_state_t ps) +{ + if (s->s_flags & NVJSON_DEBUG) { + (void) fprintf(stderr, "nvjson: move state %d -> %d\n", + s->s_top->pf_ps, ps); + } + s->s_top->pf_ps = ps; +} + +static void +posterror(state_t *s, int erno, const char *error) +{ + /* + * If the caller wants error messages printed to stderr, do that + * first. + */ + if (s->s_flags & NVJSON_ERRORS_TO_STDERR) { + (void) fprintf(stderr, "nvjson error (pos %ld, errno %d): %s\n", + s->s_pos, erno, error); + } + + /* + * Try and store the error message for the caller. This may fail if + * the error was related to memory pressure, and that condition still + * exists. + */ + s->s_errno = erno; + if (s->s_errstr != NULL) { + (void) custr_append(s->s_errstr, error); + } + + movestate(s, PARSE_ERROR); +} + +static int +pushstate(state_t *s, parse_state_t ps, parse_state_t retps) +{ + parse_frame_t *n; + + if (s->s_flags & NVJSON_DEBUG) { + (void) fprintf(stderr, "nvjson: push state %d -> %d (ret %d)\n", + s->s_top->pf_ps, ps, retps); + } + + if ((n = calloc(1, sizeof (*n))) == NULL) { + posterror(s, errno, "pushstate calloc failure"); + return (-1); + } + + /* + * Store the state we'll return to when popping this + * frame: + */ + s->s_top->pf_ps = retps; + + /* + * Store the initial state for the new frame, and + * put it on top of the stack: + */ + n->pf_ps = ps; + n->pf_value_type = JSON_TYPE_NOTHING; + + n->pf_next = s->s_top; + s->s_top = n; + + return (0); +} + +static char +popchar(state_t *s) +{ + if (s->s_pos > s->s_len) { + return (0); + } + return (s->s_in[s->s_pos++]); +} + +static char +peekchar(state_t *s) +{ + if (s->s_pos > s->s_len) { + return (0); + } + return (s->s_in[s->s_pos]); +} + +static void +discard_whitespace(state_t *s) +{ + while (isspace(peekchar(s))) { + (void) popchar(s); + } +} + +static char *escape_pairs[] = { + "\"\"", "\\\\", "//", "b\b", "f\f", "n\n", "r\r", "t\t", NULL +}; + +static char +collect_string_escape(state_t *s) +{ + int i; + char c = popchar(s); + + if (c == '\0') { + posterror(s, EPROTO, "EOF mid-escape sequence"); + return (-1); + } + + /* + * Handle four-digit Unicode escapes up to and including \u007f. + * Strings that cannot be represented as 7-bit clean ASCII are not + * currently supported. + */ + if (c == 'u') { + int res; + int ndigs = 0; + char digs[5]; + + /* + * Deal with 4-digit unicode escape. + */ + while (ndigs < 4) { + if ((digs[ndigs++] = popchar(s)) == '\0') { + posterror(s, EPROTO, "EOF mid-escape " + "sequence"); + return (-1); + } + } + digs[4] = '\0'; + if ((res = atoi(digs)) > 127) { + posterror(s, EPROTO, "unicode escape above 0x7f"); + return (-1); + } + + if (custr_appendc(s->s_collect, res) != 0) { + posterror(s, errno, "custr_appendc failure"); + return (-1); + } + return (0); + } + + /* + * See if this is a C-style escape character we recognise. + */ + for (i = 0; escape_pairs[i] != NULL; i++) { + char *ep = escape_pairs[i]; + if (ep[0] == c) { + if (custr_appendc(s->s_collect, ep[1]) != 0) { + posterror(s, errno, "custr_appendc failure"); + return (-1); + } + return (0); + } + } + + posterror(s, EPROTO, "unrecognised escape sequence"); + return (-1); +} + +static int +collect_string(state_t *s) +{ + custr_reset(s->s_collect); + + for (;;) { + char c; + + switch (c = popchar(s)) { + case '"': + /* + * Legal End of String. + */ + return (0); + + case '\0': + posterror(s, EPROTO, "EOF mid-string"); + return (-1); + + case '\\': + /* + * Escape Characters and Sequences. + */ + if (collect_string_escape(s) != 0) { + return (-1); + } + break; + + default: + if (custr_appendc(s->s_collect, c) != 0) { + posterror(s, errno, "custr_appendc failure"); + return (-1); + } + break; + } + } +} + +static int +collect_bareword(state_t *s) +{ + custr_reset(s->s_collect); + + for (;;) { + if (!islower(peekchar(s))) { + return (0); + } + + if (custr_appendc(s->s_collect, popchar(s)) != 0) { + posterror(s, errno, "custr_appendc failure"); + return (-1); + } + } +} + +static void +hdlr_bareword(state_t *s) +{ + const char *str; + + if (collect_bareword(s) != 0) { + return; + } + + str = custr_cstr(s->s_collect); + if (strcmp(str, "true") == 0) { + s->s_top->pf_value_type = JSON_TYPE_BOOLEAN; + s->s_top->pf_value = (void *)B_TRUE; + } else if (strcmp(str, "false") == 0) { + s->s_top->pf_value_type = JSON_TYPE_BOOLEAN; + s->s_top->pf_value = (void *)B_FALSE; + } else if (strcmp(str, "null") == 0) { + s->s_top->pf_value_type = JSON_TYPE_NULL; + } else { + posterror(s, EPROTO, "expected 'true', 'false' or 'null'"); + return; + } + + movestate(s, PARSE_DONE); +} + +/* ARGSUSED */ +static int +collect_number(state_t *s, boolean_t *isint, int32_t *result, + double *fresult __unused) +{ + boolean_t neg = B_FALSE; + int t; + + custr_reset(s->s_collect); + + if (peekchar(s) == '-') { + neg = B_TRUE; + (void) popchar(s); + } + /* + * Read the 'int' portion: + */ + if (!isdigit(peekchar(s))) { + posterror(s, EPROTO, "malformed number: expected digit (0-9)"); + return (-1); + } + for (;;) { + if (!isdigit(peekchar(s))) { + break; + } + if (custr_appendc(s->s_collect, popchar(s)) != 0) { + posterror(s, errno, "custr_append failure"); + return (-1); + } + } + if (peekchar(s) == '.' || peekchar(s) == 'e' || peekchar(s) == 'E') { + posterror(s, ENOTSUP, "do not yet support FRACs or EXPs"); + return (-1); + } + + t = atoi(custr_cstr(s->s_collect)); + + *isint = B_TRUE; + *result = (neg == B_TRUE) ? (-t) : t; + return (0); +} + +static void +hdlr_number(state_t *s) +{ + boolean_t isint; + int32_t result; + double fresult; + + if (collect_number(s, &isint, &result, &fresult) != 0) { + return; + } + + if (isint == B_TRUE) { + s->s_top->pf_value = (void *)(uintptr_t)result; + s->s_top->pf_value_type = JSON_TYPE_INTEGER; + } else { + s->s_top->pf_value = malloc(sizeof (fresult)); + bcopy(&fresult, s->s_top->pf_value, sizeof (fresult)); + s->s_top->pf_value_type = JSON_TYPE_DOUBLE; + } + + movestate(s, PARSE_DONE); +} + +static void +hdlr_rest(state_t *s) +{ + char c; + discard_whitespace(s); + c = popchar(s); + switch (c) { + case '{': + movestate(s, PARSE_OBJECT); + return; + + case '[': + movestate(s, PARSE_ARRAY); + return; + + default: + posterror(s, EPROTO, "EOF before object or array"); + return; + } +} + +static int +add_empty_child(state_t *s) +{ + /* + * Here, we create an empty nvlist to represent this object + * or array: + */ + nvlist_t *empty; + if (nvlist_alloc(&empty, NV_UNIQUE_NAME, 0) != 0) { + posterror(s, errno, "nvlist_alloc failure"); + return (-1); + } + if (s->s_top->pf_next != NULL) { + /* + * If we're a child of the frame above, we store ourselves in + * that frame's nvlist: + */ + nvlist_t *nvl = s->s_top->pf_next->pf_nvl; + char *key = s->s_top->pf_next->pf_key; + + if (nvlist_add_nvlist(nvl, key, empty) != 0) { + posterror(s, errno, "nvlist_add_nvlist failure"); + nvlist_free(empty); + return (-1); + } + nvlist_free(empty); + if (nvlist_lookup_nvlist(nvl, key, &empty) != 0) { + posterror(s, errno, "nvlist_lookup_nvlist failure"); + return (-1); + } + } + s->s_top->pf_nvl = empty; + return (0); +} + +static int +decorate_array(state_t *s) +{ + int idx = s->s_top->pf_array_index; + /* + * When we are done creating an array, we store a 'length' + * property on it, as well as an internal-use marker value. + */ + if (nvlist_add_boolean(s->s_top->pf_nvl, JSON_MARKER_ARRAY) != 0 || + nvlist_add_uint32(s->s_top->pf_nvl, "length", idx) != 0) { + posterror(s, errno, "nvlist_add failure"); + return (-1); + } + + return (0); +} + +static void +hdlr_array(state_t *s) +{ + s->s_top->pf_value_type = JSON_TYPE_ARRAY; + + if (add_empty_child(s) != 0) { + return; + } + + discard_whitespace(s); + + switch (peekchar(s)) { + case ']': + (void) popchar(s); + + if (decorate_array(s) != 0) { + return; + } + + movestate(s, PARSE_DONE); + return; + + default: + movestate(s, PARSE_ARRAY_VALUE); + return; + } +} + +static void +hdlr_array_comma(state_t *s) +{ + discard_whitespace(s); + + switch (popchar(s)) { + case ']': + if (decorate_array(s) != 0) { + return; + } + + movestate(s, PARSE_DONE); + return; + case ',': + movestate(s, PARSE_ARRAY_VALUE); + return; + default: + posterror(s, EPROTO, "expected ',' or ']'"); + return; + } +} + +static void +hdlr_array_value(state_t *s) +{ + char c; + + /* + * Generate keyname from the next array index: + */ + if (s->s_top->pf_key != NULL) { + (void) fprintf(stderr, "pf_key not null! was %s\n", + s->s_top->pf_key); + abort(); + } + + if (asprintf(&s->s_top->pf_key, "%d", s->s_top->pf_array_index++) < 0) { + posterror(s, errno, "asprintf failure"); + return; + } + + discard_whitespace(s); + + /* + * Select which type handler we need for the next value: + */ + switch (c = peekchar(s)) { + case '"': + (void) popchar(s); + (void) pushstate(s, PARSE_STRING, PARSE_ARRAY_COMMA); + return; + + case '{': + (void) popchar(s); + (void) pushstate(s, PARSE_OBJECT, PARSE_ARRAY_COMMA); + return; + + case '[': + (void) popchar(s); + (void) pushstate(s, PARSE_ARRAY, PARSE_ARRAY_COMMA); + return; + + default: + if (islower(c)) { + (void) pushstate(s, PARSE_BAREWORD, + PARSE_ARRAY_COMMA); + return; + } else if (c == '-' || isdigit(c)) { + (void) pushstate(s, PARSE_NUMBER, PARSE_ARRAY_COMMA); + return; + } else { + posterror(s, EPROTO, "unexpected character at start " + "of value"); + return; + } + } +} + +static void +hdlr_object(state_t *s) +{ + s->s_top->pf_value_type = JSON_TYPE_OBJECT; + + if (add_empty_child(s) != 0) { + return; + } + + discard_whitespace(s); + + switch (popchar(s)) { + case '}': + movestate(s, PARSE_DONE); + return; + + case '"': + movestate(s, PARSE_KEY_STRING); + return; + + default: + posterror(s, EPROTO, "expected key or '}'"); + return; + } +} + +static void +hdlr_key_string(state_t *s) +{ + if (collect_string(s) != 0) { + return; + } + + /* + * Record the key name of the next value. + */ + if ((s->s_top->pf_key = strdup(custr_cstr(s->s_collect))) == NULL) { + posterror(s, errno, "strdup failure"); + return; + } + + movestate(s, PARSE_COLON); +} + +static void +hdlr_colon(state_t *s) +{ + char c; + discard_whitespace(s); + + if ((c = popchar(s)) != ':') { + posterror(s, EPROTO, "expected ':'"); + return; + } + + discard_whitespace(s); + + /* + * Select which type handler we need for the value after the colon: + */ + switch (c = peekchar(s)) { + case '"': + (void) popchar(s); + (void) pushstate(s, PARSE_STRING, PARSE_OBJECT_COMMA); + return; + + case '{': + (void) popchar(s); + (void) pushstate(s, PARSE_OBJECT, PARSE_OBJECT_COMMA); + return; + + case '[': + (void) popchar(s); + (void) pushstate(s, PARSE_ARRAY, PARSE_OBJECT_COMMA); + return; + + default: + if (islower(c)) { + (void) pushstate(s, PARSE_BAREWORD, PARSE_OBJECT_COMMA); + return; + } else if (c == '-' || isdigit(c)) { + (void) pushstate(s, PARSE_NUMBER, PARSE_OBJECT_COMMA); + return; + } else { + (void) posterror(s, EPROTO, "unexpected character at " + "start of value"); + return; + } + } +} + +static void +hdlr_object_comma(state_t *s) +{ + discard_whitespace(s); + + switch (popchar(s)) { + case '}': + movestate(s, PARSE_DONE); + return; + + case ',': + discard_whitespace(s); + if (popchar(s) != '"') { + posterror(s, EPROTO, "expected '\"'"); + return; + } + movestate(s, PARSE_KEY_STRING); + return; + + default: + posterror(s, EPROTO, "expected ',' or '}'"); + return; + } +} + +static void +hdlr_string(state_t *s) +{ + if (collect_string(s) != 0) { + return; + } + + s->s_top->pf_value_type = JSON_TYPE_STRING; + if ((s->s_top->pf_value = strdup(custr_cstr(s->s_collect))) == NULL) { + posterror(s, errno, "strdup failure"); + return; + } + + movestate(s, PARSE_DONE); +} + +static int +store_value(state_t *s) +{ + nvlist_t *targ = s->s_top->pf_next->pf_nvl; + char *key = s->s_top->pf_next->pf_key; + json_type_t type = s->s_top->pf_value_type; + int ret = 0; + + switch (type) { + case JSON_TYPE_STRING: + if (nvlist_add_string(targ, key, s->s_top->pf_value) != 0) { + posterror(s, errno, "nvlist_add_string failure"); + ret = -1; + } + free(s->s_top->pf_value); + break; + + case JSON_TYPE_BOOLEAN: + if (nvlist_add_boolean_value(targ, key, + (boolean_t)s->s_top->pf_value) != 0) { + posterror(s, errno, "nvlist_add_boolean_value " + "failure"); + ret = -1; + } + break; + + case JSON_TYPE_NULL: + if (nvlist_add_boolean(targ, key) != 0) { + posterror(s, errno, "nvlist_add_boolean failure"); + ret = -1; + } + break; + + case JSON_TYPE_INTEGER: + if (nvlist_add_int32(targ, key, + (int32_t)(uintptr_t)s->s_top->pf_value) != 0) { + posterror(s, errno, "nvlist_add_int32 failure"); + ret = -1; + } + break; + + case JSON_TYPE_ARRAY: + case JSON_TYPE_OBJECT: + /* + * Objects and arrays are already 'stored' in their target + * nvlist on creation. See: hdlr_object, hdlr_array. + */ + break; + + default: + (void) fprintf(stderr, "ERROR: could not store unknown " + "type %d\n", type); + abort(); + } + + s->s_top->pf_value = NULL; + free(s->s_top->pf_next->pf_key); + s->s_top->pf_next->pf_key = NULL; + return (ret); +} + +static parse_frame_t * +parse_frame_free(parse_frame_t *pf, boolean_t free_nvl) +{ + parse_frame_t *next = pf->pf_next; + if (pf->pf_key != NULL) { + free(pf->pf_key); + } + if (pf->pf_value != NULL) { + abort(); + } + if (free_nvl && pf->pf_nvl != NULL) { + nvlist_free(pf->pf_nvl); + } + free(pf); + return (next); +} + +static parse_handler_t hdlrs[] = { + NULL, /* PARSE_DONE */ + hdlr_rest, /* PARSE_REST */ + hdlr_object, /* PARSE_OBJECT */ + hdlr_key_string, /* PARSE_KEY_STRING */ + hdlr_colon, /* PARSE_COLON */ + hdlr_string, /* PARSE_STRING */ + hdlr_object_comma, /* PARSE_OBJECT_COMMA */ + hdlr_array, /* PARSE_ARRAY */ + hdlr_bareword, /* PARSE_BAREWORD */ + hdlr_number, /* PARSE_NUMBER */ + hdlr_array_value, /* PARSE_ARRAY_VALUE */ + hdlr_array_comma /* PARSE_ARRAY_COMMA */ +}; +#define NUM_PARSE_HANDLERS (int)(sizeof (hdlrs) / sizeof (hdlrs[0])) + +int +nvlist_parse_json(const char *buf, size_t buflen, nvlist_t **nvlp, + nvlist_parse_json_flags_t flag, nvlist_parse_json_error_t *errout) +{ + state_t s; + + /* + * Check for valid flags: + */ + if ((flag & NVJSON_FORCE_INTEGER) && (flag & NVJSON_FORCE_DOUBLE)) { + errno = EINVAL; + return (-1); + } + if ((flag & ~NVJSON_ALL) != 0) { + errno = EINVAL; + return (-1); + } + + /* + * Initialise parsing state structure: + */ + bzero(&s, sizeof (s)); + s.s_in = buf; + s.s_pos = 0; + s.s_len = buflen; + s.s_flags = flag; + + /* + * Allocate the collect buffer string. + */ + if (custr_alloc(&s.s_collect) != 0) { + s.s_errno = errno; + if (errout != NULL) { + (void) snprintf(errout->nje_message, + sizeof (errout->nje_message), + "custr alloc failure: %s", + strerror(errno)); + } + goto out; + } + + /* + * If the caller has requested error information, allocate the error + * string now. + */ + if (errout != NULL) { + if (custr_alloc_buf(&s.s_errstr, errout->nje_message, + sizeof (errout->nje_message)) != 0) { + s.s_errno = errno; + (void) snprintf(errout->nje_message, + sizeof (errout->nje_message), + "custr alloc failure: %s", + strerror(errno)); + goto out; + } + custr_reset(s.s_errstr); + } + + /* + * Allocate top-most stack frame: + */ + if ((s.s_top = calloc(1, sizeof (*s.s_top))) == NULL) { + s.s_errno = errno; + goto out; + } + + s.s_top->pf_ps = PARSE_REST; + for (;;) { + if (s.s_top->pf_ps < 0) { + /* + * The parser reported an error. + */ + goto out; + } + + if (s.s_top->pf_ps == PARSE_DONE) { + if (s.s_top->pf_next == NULL) { + /* + * Last frame, so we're really + * done. + */ + *nvlp = s.s_top->pf_nvl; + goto out; + } else { + /* + * Otherwise, pop a frame and continue in + * previous state. Copy out the value we + * created in the old frame: + */ + if (store_value(&s) != 0) { + goto out; + } + + /* + * Free old frame: + */ + s.s_top = parse_frame_free(s.s_top, B_FALSE); + } + } + + /* + * Dispatch to parser handler routine for this state: + */ + if (s.s_top->pf_ps >= NUM_PARSE_HANDLERS || + hdlrs[s.s_top->pf_ps] == NULL) { + (void) fprintf(stderr, "no handler for state %d\n", + s.s_top->pf_ps); + abort(); + } + hdlrs[s.s_top->pf_ps](&s); + } + +out: + if (errout != NULL) { + /* + * Copy out error number and parse position. The custr_t for + * the error message was backed by the buffer in the error + * object, so no copying is required. + */ + errout->nje_errno = s.s_errno; + errout->nje_pos = s.s_pos; + } + + /* + * Free resources: + */ + while (s.s_top != NULL) { + s.s_top = parse_frame_free(s.s_top, s.s_errno == 0 ? B_FALSE : + B_TRUE); + } + custr_free(s.s_collect); + custr_free(s.s_errstr); + + errno = s.s_errno; + return (s.s_errno == 0 ? 0 : -1); +} diff --git a/usr/src/lib/varpd/files/common/libvarpd_files_json.h b/usr/src/lib/varpd/files/common/libvarpd_files_json.h new file mode 100644 index 0000000000..27506e22d6 --- /dev/null +++ b/usr/src/lib/varpd/files/common/libvarpd_files_json.h @@ -0,0 +1,52 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#ifndef _LIBVARPD_FILES_JSON_H +#define _LIBVARPD_FILES_JSON_H + +#include <libnvpair.h> +#include <libcmdutils.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum nvlist_parse_json_flags { + NVJSON_FORCE_INTEGER = 0x01, + NVJSON_FORCE_DOUBLE = 0x02, + NVJSON_ERRORS_TO_STDERR = 0x04, + NVJSON_DEBUG = 0x08 +} nvlist_parse_json_flags_t; + +typedef struct nvlist_parse_json_error { + int nje_errno; + long nje_pos; + char nje_message[512]; +} nvlist_parse_json_error_t; + +#define NVJSON_ALL \ + (NVJSON_FORCE_INTEGER | \ + NVJSON_FORCE_DOUBLE | \ + NVJSON_ERRORS_TO_STDERR | \ + NVJSON_DEBUG) + +extern int nvlist_parse_json(const char *, size_t, nvlist_t **, + nvlist_parse_json_flags_t, nvlist_parse_json_error_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBVARPD_FILES_JSON_H */ diff --git a/usr/src/lib/varpd/files/common/llib-lvarpd_files b/usr/src/lib/varpd/files/common/llib-lvarpd_files new file mode 100644 index 0000000000..03c34f4fcb --- /dev/null +++ b/usr/src/lib/varpd/files/common/llib-lvarpd_files @@ -0,0 +1,18 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + diff --git a/usr/src/lib/varpd/files/common/mapfile-vers b/usr/src/lib/varpd/files/common/mapfile-vers new file mode 100644 index 0000000000..6b7c5a5067 --- /dev/null +++ b/usr/src/lib/varpd/files/common/mapfile-vers @@ -0,0 +1,35 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION SUNWprivate { + local: + *; +}; diff --git a/usr/src/lib/varpd/files/i386/Makefile b/usr/src/lib/varpd/files/i386/Makefile new file mode 100644 index 0000000000..f2b4f63da5 --- /dev/null +++ b/usr/src/lib/varpd/files/i386/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/files/sparc/Makefile b/usr/src/lib/varpd/files/sparc/Makefile new file mode 100644 index 0000000000..f2b4f63da5 --- /dev/null +++ b/usr/src/lib/varpd/files/sparc/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/files/sparcv9/Makefile b/usr/src/lib/varpd/files/sparcv9/Makefile new file mode 100644 index 0000000000..d552642882 --- /dev/null +++ b/usr/src/lib/varpd/files/sparcv9/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/varpd/libvarpd/Makefile b/usr/src/lib/varpd/libvarpd/Makefile new file mode 100644 index 0000000000..2a4f8f070c --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/Makefile @@ -0,0 +1,55 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../../Makefile.lib + +HDRS = libvarpd.h libvarpd_client.h libvarpd_provider.h +HDRDIR = common +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +TYPECHECK_LIB = libvarpd.so.1 +TYPELIST = \ + varpd_client_instance_arg_t \ + varpd_client_nprops_arg_t \ + varpd_client_propinfo_arg_t \ + varpd_client_eresp_t \ + varpd_persist_header_t \ + overlay_targ_cache_entry_t \ + overlay_targ_cache_t \ + overlay_targ_cache_iter_t + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +install := TARGET = install +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber lint: $(SUBDIRS) + +install: $(SUBDIRS) $(VARPD_MAPFILES) install_h + +install_h: $(ROOTHDRS) + +check: $(CHECKHDRS) $(TYPECHECK) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../../Makefile.targ diff --git a/usr/src/lib/varpd/libvarpd/Makefile.com b/usr/src/lib/varpd/libvarpd/Makefile.com new file mode 100644 index 0000000000..1521f83f8f --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/Makefile.com @@ -0,0 +1,53 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +LIBRARY = libvarpd.a +VERS = .1 +OBJECTS = libvarpd.o \ + libvarpd_arp.o \ + libvarpd_client.o \ + libvarpd_door.o \ + libvarpd_overlay.o \ + libvarpd_panic.o \ + libvarpd_persist.o \ + libvarpd_prop.o \ + libvarpd_plugin.o \ + libvarpd_util.o + +include ../../../Makefile.lib + +LIBS = $(DYNLIB) $(LINTLIB) +LDLIBS += -lc -lavl -lumem -lidspace -lnvpair -lmd5 -lrename \ + -lbunyan +CPPFLAGS += -I../common + +CERRWARN += -erroff=E_STRUCT_DERIVED_FROM_FLEX_MBR +LINTFLAGS += -erroff=E_STRUCT_DERIVED_FROM_FLEX_MBR \ + -erroff=E_BAD_PTR_CAST_ALIGN +LINTFLAGS64 += -erroff=E_STRUCT_DERIVED_FROM_FLEX_MBR \ + -erroff=E_BAD_PTR_CAST_ALIGN + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all + +SRCDIR = ../common + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../../Makefile.targ diff --git a/usr/src/lib/varpd/libvarpd/amd64/Makefile b/usr/src/lib/varpd/libvarpd/amd64/Makefile new file mode 100644 index 0000000000..d552642882 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/amd64/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd.c b/usr/src/lib/varpd/libvarpd/common/libvarpd.c new file mode 100644 index 0000000000..e4460089cc --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd.c @@ -0,0 +1,360 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * varpd library + */ + +#include <stdlib.h> +#include <errno.h> +#include <umem.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/avl.h> +#include <stddef.h> +#include <stdio.h> +#include <strings.h> + +#include <libvarpd_impl.h> + +static int +libvarpd_instance_comparator(const void *lp, const void *rp) +{ + const varpd_instance_t *lpp, *rpp; + lpp = lp; + rpp = rp; + + if (lpp->vri_id > rpp->vri_id) + return (1); + if (lpp->vri_id < rpp->vri_id) + return (-1); + return (0); +} + +static int +libvarpd_instance_lcomparator(const void *lp, const void *rp) +{ + const varpd_instance_t *lpp, *rpp; + lpp = lp; + rpp = rp; + + if (lpp->vri_linkid > rpp->vri_linkid) + return (1); + if (lpp->vri_linkid < rpp->vri_linkid) + return (-1); + return (0); +} + +int +libvarpd_create(varpd_handle_t **vphp) +{ + int ret; + varpd_impl_t *vip; + char buf[32]; + + if (vphp == NULL) + return (EINVAL); + + *vphp = NULL; + vip = umem_alloc(sizeof (varpd_impl_t), UMEM_DEFAULT); + if (vip == NULL) + return (errno); + + bzero(vip, sizeof (varpd_impl_t)); + (void) snprintf(buf, sizeof (buf), "varpd_%p", vip); + vip->vdi_idspace = id_space_create(buf, LIBVARPD_ID_MIN, + LIBVARPD_ID_MAX); + if (vip->vdi_idspace == NULL) { + int ret = errno; + umem_free(vip, sizeof (varpd_impl_t)); + return (ret); + } + + vip->vdi_qcache = umem_cache_create("query", sizeof (varpd_query_t), 0, + NULL, NULL, NULL, NULL, NULL, 0); + if (vip->vdi_qcache == NULL) { + int ret = errno; + id_space_destroy(vip->vdi_idspace); + umem_free(vip, sizeof (varpd_impl_t)); + return (ret); + } + + if ((ret = libvarpd_overlay_init(vip)) != 0) { + umem_cache_destroy(vip->vdi_qcache); + id_space_destroy(vip->vdi_idspace); + umem_free(vip, sizeof (varpd_impl_t)); + return (ret); + } + + if ((ret = bunyan_init("varpd", &vip->vdi_bunyan)) != 0) { + libvarpd_overlay_fini(vip); + umem_cache_destroy(vip->vdi_qcache); + id_space_destroy(vip->vdi_idspace); + umem_free(vip, sizeof (varpd_impl_t)); + return (ret); + } + + libvarpd_persist_init(vip); + + avl_create(&vip->vdi_plugins, libvarpd_plugin_comparator, + sizeof (varpd_plugin_t), offsetof(varpd_plugin_t, vpp_node)); + + avl_create(&vip->vdi_instances, libvarpd_instance_comparator, + sizeof (varpd_instance_t), offsetof(varpd_instance_t, vri_inode)); + avl_create(&vip->vdi_linstances, libvarpd_instance_lcomparator, + sizeof (varpd_instance_t), offsetof(varpd_instance_t, vri_lnode)); + + if (mutex_init(&vip->vdi_lock, USYNC_THREAD | LOCK_ERRORCHECK, + NULL) != 0) + libvarpd_panic("failed to create mutex: %d", errno); + + vip->vdi_doorfd = -1; + *vphp = (varpd_handle_t *)vip; + return (0); +} + +void +libvarpd_destroy(varpd_handle_t *vhp) +{ + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + libvarpd_overlay_lookup_quiesce(vhp); + if (mutex_destroy(&vip->vdi_lock) != 0) + libvarpd_panic("failed to destroy mutex: %d", errno); + libvarpd_persist_fini(vip); + libvarpd_overlay_fini(vip); + umem_cache_destroy(vip->vdi_qcache); + id_space_destroy(vip->vdi_idspace); + umem_free(vip, sizeof (varpd_impl_t)); +} + +int +libvarpd_instance_create(varpd_handle_t *vhp, datalink_id_t linkid, + const char *pname, varpd_instance_handle_t **outp) +{ + int ret; + varpd_impl_t *vip = (varpd_impl_t *)vhp; + varpd_plugin_t *plugin; + varpd_instance_t *inst, lookup; + overlay_plugin_dest_t dest; + uint64_t vid; + + /* + * We should really have our own errnos. + */ + plugin = libvarpd_plugin_lookup(vip, pname); + if (plugin == NULL) + return (ENOENT); + + if ((ret = libvarpd_overlay_info(vip, linkid, &dest, NULL, &vid)) != 0) + return (ret); + + inst = umem_alloc(sizeof (varpd_instance_t), UMEM_DEFAULT); + if (inst == NULL) + return (ENOMEM); + + inst->vri_id = id_alloc(vip->vdi_idspace); + if (inst->vri_id == -1) + libvarpd_panic("failed to allocate id from vdi_idspace: %d", + errno); + inst->vri_linkid = linkid; + inst->vri_vnetid = vid; + inst->vri_mode = plugin->vpp_mode; + inst->vri_dest = dest; + inst->vri_plugin = plugin; + inst->vri_impl = vip; + inst->vri_flags = 0; + if ((ret = plugin->vpp_ops->vpo_create((varpd_provider_handle_t *)inst, + &inst->vri_private, dest)) != 0) { + id_free(vip->vdi_idspace, inst->vri_id); + umem_free(inst, sizeof (varpd_instance_t)); + return (ret); + } + + if (mutex_init(&inst->vri_lock, USYNC_THREAD | LOCK_ERRORCHECK, + NULL) != 0) + libvarpd_panic("failed to create mutex: %d", errno); + + mutex_enter(&vip->vdi_lock); + lookup.vri_id = inst->vri_id; + if (avl_find(&vip->vdi_instances, &lookup, NULL) != NULL) + libvarpd_panic("found duplicate instance with id %d", + lookup.vri_id); + avl_add(&vip->vdi_instances, inst); + lookup.vri_linkid = inst->vri_linkid; + if (avl_find(&vip->vdi_linstances, &lookup, NULL) != NULL) + libvarpd_panic("found duplicate linstance with id %d", + lookup.vri_linkid); + avl_add(&vip->vdi_linstances, inst); + mutex_exit(&vip->vdi_lock); + *outp = (varpd_instance_handle_t *)inst; + return (0); +} + +uint64_t +libvarpd_instance_id(varpd_instance_handle_t *ihp) +{ + varpd_instance_t *inst = (varpd_instance_t *)ihp; + return (inst->vri_id); +} + +uint64_t +libvarpd_plugin_vnetid(varpd_provider_handle_t *vhp) +{ + varpd_instance_t *inst = (varpd_instance_t *)vhp; + return (inst->vri_vnetid); +} + +varpd_instance_handle_t * +libvarpd_instance_lookup(varpd_handle_t *vhp, uint64_t id) +{ + varpd_impl_t *vip = (varpd_impl_t *)vhp; + varpd_instance_t lookup, *retp; + + lookup.vri_id = id; + mutex_enter(&vip->vdi_lock); + retp = avl_find(&vip->vdi_instances, &lookup, NULL); + mutex_exit(&vip->vdi_lock); + return ((varpd_instance_handle_t *)retp); +} + +/* + * If this function becomes external to varpd, we need to change it to return a + * varpd_instance_handle_t. + */ +varpd_instance_t * +libvarpd_instance_lookup_by_dlid(varpd_impl_t *vip, datalink_id_t linkid) +{ + varpd_instance_t lookup, *retp; + + lookup.vri_linkid = linkid; + mutex_enter(&vip->vdi_lock); + retp = avl_find(&vip->vdi_linstances, &lookup, NULL); + mutex_exit(&vip->vdi_lock); + return (retp); +} + +/* + * When an instance is being destroyed, that means we should deactivate it, as + * well as clean it up. That means here, the proper order is calling the plug-in + * stop and then the destroy function. + */ +void +libvarpd_instance_destroy(varpd_instance_handle_t *ihp) +{ + varpd_instance_t *inst = (varpd_instance_t *)ihp; + varpd_impl_t *vip = inst->vri_impl; + + /* + * First things first, remove it from global visibility. + */ + mutex_enter(&vip->vdi_lock); + avl_remove(&vip->vdi_instances, inst); + avl_remove(&vip->vdi_linstances, inst); + mutex_exit(&vip->vdi_lock); + + mutex_enter(&inst->vri_lock); + + /* + * We need to clean up this instance, that means remove it from + * persistence and stopping it. Then finally we'll have to clean it up + * entirely. + */ + if (inst->vri_flags & VARPD_INSTANCE_F_ACTIVATED) { + inst->vri_flags &= ~VARPD_INSTANCE_F_ACTIVATED; + libvarpd_torch_instance(vip, inst); + inst->vri_plugin->vpp_ops->vpo_stop(inst->vri_private); + inst->vri_plugin->vpp_ops->vpo_destroy(inst->vri_private); + inst->vri_private = NULL; + } + mutex_exit(&inst->vri_lock); + + /* Do the full clean up of the instance */ + if (mutex_destroy(&inst->vri_lock) != 0) + libvarpd_panic("failed to destroy instance vri_lock"); + id_free(vip->vdi_idspace, inst->vri_id); + umem_free(inst, sizeof (varpd_instance_t)); +} + +int +libvarpd_instance_activate(varpd_instance_handle_t *ihp) +{ + int ret; + varpd_instance_t *inst = (varpd_instance_t *)ihp; + + mutex_enter(&inst->vri_lock); + + if (inst->vri_flags & VARPD_INSTANCE_F_ACTIVATED) { + ret = EEXIST; + goto out; + } + + if ((ret = inst->vri_plugin->vpp_ops->vpo_start(inst->vri_private)) != + 0) + goto out; + + if ((ret = libvarpd_persist_instance(inst->vri_impl, inst)) != 0) + goto out; + + /* + * If this fails, we don't need to call stop, as the caller should end + * up calling destroy on the instance, which takes care of calling stop + * and destroy. + */ + if ((ret = libvarpd_overlay_associate(inst)) != 0) + goto out; + + inst->vri_flags |= VARPD_INSTANCE_F_ACTIVATED; + +out: + mutex_exit(&inst->vri_lock); + return (ret); +} + +const bunyan_logger_t * +libvarpd_plugin_bunyan(varpd_provider_handle_t *vhp) +{ + varpd_instance_t *inst = (varpd_instance_t *)vhp; + return (inst->vri_impl->vdi_bunyan); +} + +static void +libvarpd_prefork(void) +{ + libvarpd_plugin_prefork(); +} + +static void +libvarpd_postfork(void) +{ + libvarpd_plugin_postfork(); +} + +#pragma init(libvarpd_init) +static void +libvarpd_init(void) +{ + libvarpd_plugin_init(); + if (pthread_atfork(libvarpd_prefork, libvarpd_postfork, + libvarpd_postfork) != 0) + libvarpd_panic("failed to create varpd atfork: %d", errno); +} + +#pragma fini(libvarpd_fini) +static void +libvarpd_fini(void) +{ + libvarpd_plugin_fini(); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd.h b/usr/src/lib/varpd/libvarpd/common/libvarpd.h new file mode 100644 index 0000000000..aaf78e2ca6 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd.h @@ -0,0 +1,77 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#ifndef _LIBVARPD_H +#define _LIBVARPD_H + +/* + * varpd interfaces + */ + +#include <sys/types.h> +#include <stdint.h> +#include <sys/mac.h> +#include <libvarpd_client.h> +#include <stdio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct __varpd_handle varpd_handle_t; +typedef struct __varpd_prop_handle varpd_prop_handle_t; +typedef struct __varpd_instance_handle varpd_instance_handle_t; + +extern int libvarpd_create(varpd_handle_t **); +extern void libvarpd_destroy(varpd_handle_t *); + +extern int libvarpd_persist_enable(varpd_handle_t *, const char *); +extern int libvarpd_persist_restore(varpd_handle_t *); +extern int libvarpd_persist_disable(varpd_handle_t *); + +extern int libvarpd_instance_create(varpd_handle_t *, datalink_id_t, + const char *, varpd_instance_handle_t **); +extern uint64_t libvarpd_instance_id(varpd_instance_handle_t *); +extern varpd_instance_handle_t *libvarpd_instance_lookup(varpd_handle_t *, + uint64_t); +extern void libvarpd_instance_destroy(varpd_instance_handle_t *); +extern int libvarpd_instance_activate(varpd_instance_handle_t *); + +extern int libvarpd_plugin_load(varpd_handle_t *, const char *); +typedef int (*libvarpd_plugin_walk_f)(varpd_handle_t *, const char *, void *); +extern int libvarpd_plugin_walk(varpd_handle_t *, libvarpd_plugin_walk_f, + void *); + +extern int libvarpd_prop_handle_alloc(varpd_handle_t *, + varpd_instance_handle_t *, varpd_prop_handle_t **); +extern void libvarpd_prop_handle_free(varpd_prop_handle_t *); +extern int libvarpd_prop_nprops(varpd_instance_handle_t *, uint_t *); +extern int libvarpd_prop_info_fill(varpd_prop_handle_t *, uint_t); +extern int libvarpd_prop_info(varpd_prop_handle_t *, const char **, uint_t *, + uint_t *, const void **, uint32_t *, const mac_propval_range_t **); +extern int libvarpd_prop_get(varpd_prop_handle_t *, void *, uint32_t *); +extern int libvarpd_prop_set(varpd_prop_handle_t *, const void *, uint32_t); + +extern int libvarpd_door_server_create(varpd_handle_t *, const char *); +extern void libvarpd_door_server_destroy(varpd_handle_t *); + +extern void libvarpd_overlay_lookup_run(varpd_handle_t *); +extern void libvarpd_overlay_lookup_quiesce(varpd_handle_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBVARPD_H */ diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c new file mode 100644 index 0000000000..df69207fe0 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c @@ -0,0 +1,650 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * Common routines for implmeenting proxy arp + */ + +#include <sys/types.h> +#include <net/if.h> +#include <netinet/if_ether.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/icmp6.h> +#include <netinet/udp.h> +#include <netinet/dhcp.h> +#include <libvarpd_impl.h> +#include <sys/vlan.h> +#include <strings.h> +#include <assert.h> + +#define IPV6_VERSION 6 + +typedef struct varpd_arp_query { + int vaq_type; + char vaq_buf[ETHERMAX + VLAN_TAGSZ]; + size_t vaq_bsize; + uint8_t vaq_lookup[ETHERADDRL]; + struct sockaddr_storage vaq_sock; + varpd_instance_t *vaq_inst; + struct ether_arp *vaq_ea; + varpd_query_handle_t *vaq_query; + const overlay_targ_lookup_t *vaq_otl; + ip6_t *vaq_ip6; + nd_neighbor_solicit_t *vaq_ns; +} varpd_arp_query_t; + +typedef struct varpd_dhcp_query { + char vdq_buf[ETHERMAX + VLAN_TAGSZ]; + size_t vdq_bsize; + uint8_t vdq_lookup[ETHERADDRL]; + const overlay_targ_lookup_t *vdq_otl; + varpd_instance_t *vdq_inst; + varpd_query_handle_t *vdq_query; + struct ether_header *vdq_ether; +} varpd_dhcp_query_t; + +static const uint8_t libvarpd_arp_bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff }; + +void +libvarpd_plugin_proxy_arp(varpd_provider_handle_t *hdl, + varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl) +{ + varpd_arp_query_t *vaq; + varpd_instance_t *inst = (varpd_instance_t *)hdl; + struct ether_arp *ea; + struct sockaddr_in *ip; + + vaq = umem_alloc(sizeof (varpd_arp_query_t), UMEM_DEFAULT); + if (vaq == NULL) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + return; + } + vaq->vaq_bsize = sizeof (vaq->vaq_buf); + + if (otl->otl_sap != ETHERTYPE_ARP) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + /* + * An ARP packet should not be very large because it's definited to only + * be allowed to have a single entry at a given time. But our data must + * be at least as large as an ether_arp and our header must be at least + * as large as a standard ethernet header. + */ + if (otl->otl_hdrsize + otl->otl_pktsize > vaq->vaq_bsize || + otl->otl_pktsize < sizeof (struct ether_arp) || + otl->otl_hdrsize < sizeof (struct ether_header)) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + if (libvarpd_overlay_packet(inst->vri_impl, otl, vaq->vaq_buf, + &vaq->vaq_bsize) != 0) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + if (otl->otl_hdrsize + otl->otl_pktsize < vaq->vaq_bsize) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + ea = (void *)((uintptr_t)vaq->vaq_buf + (uintptr_t)otl->otl_hdrsize); + + /* + * Make sure it matches something that we know about. + */ + if (ntohs(ea->ea_hdr.ar_hrd) != ARPHRD_ETHER || + ntohs(ea->ea_hdr.ar_pro) != ETHERTYPE_IP || + ea->ea_hdr.ar_hln != ETHERADDRL || + ea->ea_hdr.ar_pln != sizeof (ea->arp_spa) || + ntohs(ea->ea_hdr.ar_op) != ARPOP_REQUEST) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + /* + * Now that we've verified that our data is sane, see if we're doing a + * gratuitous arp and if so, drop it. Otherwise, we may end up + * triggering duplicate address detection. + */ + if (bcmp(ea->arp_spa, ea->arp_tpa, sizeof (ea->arp_spa)) == 0) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + bzero(&vaq->vaq_sock, sizeof (struct sockaddr_storage)); + ip = (struct sockaddr_in *)&vaq->vaq_sock; + ip->sin_family = AF_INET; + bcopy(ea->arp_tpa, &ip->sin_addr, sizeof (ea->arp_tpa)); + + vaq->vaq_type = AF_INET; + vaq->vaq_inst = inst; + vaq->vaq_ea = ea; + vaq->vaq_query = vqh; + vaq->vaq_otl = otl; + + if (inst->vri_plugin->vpp_ops->vpo_arp == NULL) + libvarpd_panic("%s plugin asked to do arp, but has no method", + inst->vri_plugin->vpp_name); + + inst->vri_plugin->vpp_ops->vpo_arp(inst->vri_private, + (varpd_arp_handle_t *)vaq, VARPD_QTYPE_ETHERNET, + (struct sockaddr *)ip, vaq->vaq_lookup); +} + +static void +libvarpd_proxy_arp_fini(varpd_arp_query_t *vaq) +{ + struct ether_header *ether; + struct sockaddr_in *ip; + + ip = (struct sockaddr_in *)&vaq->vaq_sock; + /* + * Modify our packet in place for a reply. We need to swap around the + * sender and target addresses. + */ + vaq->vaq_ea->ea_hdr.ar_op = htons(ARPOP_REPLY); + bcopy(vaq->vaq_ea->arp_sha, vaq->vaq_ea->arp_tha, ETHERADDRL); + bcopy(vaq->vaq_lookup, vaq->vaq_ea->arp_sha, ETHERADDRL); + bcopy(vaq->vaq_ea->arp_spa, &ip->sin_addr, + sizeof (vaq->vaq_ea->arp_spa)); + bcopy(vaq->vaq_ea->arp_tpa, vaq->vaq_ea->arp_spa, + sizeof (vaq->vaq_ea->arp_spa)); + bcopy(&ip->sin_addr, vaq->vaq_ea->arp_tpa, + sizeof (vaq->vaq_ea->arp_spa)); + + /* + * Finally go ahead and fix up the mac header and reply to the sender + * explicitly. + */ + ether = (struct ether_header *)vaq->vaq_buf; + bcopy(ðer->ether_shost, ðer->ether_dhost, ETHERADDRL); + bcopy(vaq->vaq_lookup, ðer->ether_shost, ETHERADDRL); + + (void) libvarpd_overlay_inject(vaq->vaq_inst->vri_impl, vaq->vaq_otl, + vaq->vaq_buf, vaq->vaq_bsize); + + libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); +} + +static uint16_t +libvarpd_icmpv6_checksum(const ip6_t *v6hdr, const uint16_t *buf, uint16_t mlen) +{ + int i; + uint16_t *v; + uint32_t sum = 0; + + assert(mlen % 2 == 0); + v = (uint16_t *)&v6hdr->ip6_src; + for (i = 0; i < sizeof (struct in6_addr); i += 2, v++) + sum += *v; + v = (uint16_t *)&v6hdr->ip6_dst; + for (i = 0; i < sizeof (struct in6_addr); i += 2, v++) + sum += *v; + sum += htons(mlen); +#ifdef _BIG_ENDIAN + sum += IPPROTO_ICMPV6; +#else + sum += IPPROTO_ICMPV6 << 8; +#endif /* _BIG_ENDIAN */ + + for (i = 0; i < mlen; i += 2, buf++) + sum += *buf; + + while ((sum >> 16) != 0) + sum = (sum & 0xffff) + (sum >> 16); + + return (sum & 0xffff); +} + +/* + * Proxying NDP is much more involved than proxying ARP. For starters, NDP + * neighbor solicitations are implemented in terms of IPv6 ICMP as opposed to + * its own Ethertype. Therefore, we're going to have to grab a packet if it's a + * multicast packet and then determine if we actually want to do anything with + * it. + */ +void +libvarpd_plugin_proxy_ndp(varpd_provider_handle_t *hdl, + varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl) +{ + size_t bsize, plen; + varpd_arp_query_t *vaq; + ip6_t *v6hdr; + nd_neighbor_solicit_t *ns; + nd_opt_hdr_t *opt; + struct sockaddr_in6 *s6; + + varpd_instance_t *inst = (varpd_instance_t *)hdl; + uint8_t *eth = NULL; + + vaq = umem_alloc(sizeof (varpd_arp_query_t), UMEM_DEFAULT); + if (vaq == NULL) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + return; + } + vaq->vaq_bsize = sizeof (vaq->vaq_buf); + + if (otl->otl_dstaddr[0] != 0x33 || + otl->otl_dstaddr[1] != 0x33) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + /* + * If we have more than a standard frame size for the ICMP neighbor + * solicitation, drop it. Similarly if there isn't enough data present + * for us, drop it. + */ + if (otl->otl_hdrsize + otl->otl_pktsize > vaq->vaq_bsize) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + if (otl->otl_pktsize < sizeof (ip6_t) + + sizeof (nd_neighbor_solicit_t)) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + if (libvarpd_overlay_packet(inst->vri_impl, otl, vaq->vaq_buf, + &vaq->vaq_bsize) != 0) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + bsize = vaq->vaq_bsize; + bsize -= otl->otl_hdrsize; + assert(bsize > sizeof (ip6_t)); + + v6hdr = (ip6_t *)(vaq->vaq_buf + otl->otl_hdrsize); + if (((v6hdr->ip6_vfc & 0xf0) >> 4) != IPV6_VERSION) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + if (v6hdr->ip6_nxt != IPPROTO_ICMPV6) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + /* + * In addition to getting these requests on the multicast address for + * node solicitation, we may also end up getting them on a generic + * multicast address due to timeouts or other choices by various OSes. + * We should fairly liberal and accept both, even though the standard + * wants them to a solicitation address. + */ + if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&v6hdr->ip6_dst) && + !IN6_IS_ADDR_MC_LINKLOCAL(&v6hdr->ip6_dst)) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + bsize -= sizeof (ip6_t); + plen = ntohs(v6hdr->ip6_plen); + if (bsize < plen) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + /* + * Now we know that this is an ICMPv6 request targetting the right + * IPv6 multicast prefix. Let's go through and verify that ICMPv6 + * indicates that we have the real thing and ensure that per RFC 4861 + * the target address is not a multicast address. Further, because this + * is a multicast on Ethernet, we must have a source link-layer address. + * + * We should probably enforce that we have a valid ICMP checksum at some + * point. + */ + ns = (nd_neighbor_solicit_t *)(vaq->vaq_buf + otl->otl_hdrsize + + sizeof (ip6_t)); + if (ns->nd_ns_type != ND_NEIGHBOR_SOLICIT && ns->nd_ns_code != 0) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + if (IN6_IS_ADDR_MULTICAST(&ns->nd_ns_target) || + IN6_IS_ADDR_V4MAPPED(&ns->nd_ns_target) || + IN6_IS_ADDR_LOOPBACK(&ns->nd_ns_target)) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + plen -= sizeof (nd_neighbor_solicit_t); + opt = (nd_opt_hdr_t *)(ns+1); + while (plen >= sizeof (struct nd_opt_hdr)) { + /* If we have an option with no lenght, that's clear bogus */ + if (opt->nd_opt_len == 0) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + if (opt->nd_opt_type == ND_OPT_SOURCE_LINKADDR) { + eth = (uint8_t *)((uintptr_t)opt + + sizeof (nd_opt_hdr_t)); + } + plen -= opt->nd_opt_len * 8; + opt = (nd_opt_hdr_t *)((uintptr_t)opt + + opt->nd_opt_len * 8); + } + + if (eth == NULL) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + bzero(&vaq->vaq_sock, sizeof (struct sockaddr_storage)); + s6 = (struct sockaddr_in6 *)&vaq->vaq_sock; + s6->sin6_family = AF_INET6; + bcopy(&ns->nd_ns_target, &s6->sin6_addr, sizeof (s6->sin6_addr)); + + if (inst->vri_plugin->vpp_ops->vpo_arp == NULL) + libvarpd_panic("%s plugin asked to do arp, but has no method", + inst->vri_plugin->vpp_name); + + vaq->vaq_type = AF_INET6; + vaq->vaq_inst = inst; + vaq->vaq_ea = NULL; + vaq->vaq_query = vqh; + vaq->vaq_otl = otl; + vaq->vaq_ns = ns; + vaq->vaq_ip6 = v6hdr; + inst->vri_plugin->vpp_ops->vpo_arp(inst->vri_private, + (varpd_arp_handle_t *)vaq, VARPD_QTYPE_ETHERNET, + (struct sockaddr *)s6, vaq->vaq_lookup); +} + +static void +libvarpd_proxy_ndp_fini(varpd_arp_query_t *vaq) +{ + char resp[ETHERMAX + VLAN_TAGSZ]; + struct ether_header *ether; + nd_neighbor_advert_t *na; + nd_opt_hdr_t *opt; + ip6_t *v6hdr; + size_t roff = 0; + + /* + * Now we need to assemble an RA as a response. Unlike with arp, we opt + * to use a new packet just to make things a bit simpler saner here. + */ + v6hdr = vaq->vaq_ip6; + bcopy(vaq->vaq_buf, resp, vaq->vaq_otl->otl_hdrsize); + ether = (struct ether_header *)resp; + bcopy(ðer->ether_shost, ðer->ether_dhost, ETHERADDRL); + bcopy(vaq->vaq_lookup, ðer->ether_shost, ETHERADDRL); + roff += vaq->vaq_otl->otl_hdrsize; + bcopy(v6hdr, resp + roff, sizeof (ip6_t)); + v6hdr = (ip6_t *)(resp + roff); + bcopy(&v6hdr->ip6_src, &v6hdr->ip6_dst, sizeof (struct in6_addr)); + bcopy(&vaq->vaq_ns->nd_ns_target, &v6hdr->ip6_src, + sizeof (struct in6_addr)); + roff += sizeof (ip6_t); + na = (nd_neighbor_advert_t *)(resp + roff); + na->nd_na_type = ND_NEIGHBOR_ADVERT; + na->nd_na_code = 0; + /* + * RFC 4443 defines that we should set the checksum to zero before we + * calculate the checksumat we should set the checksum to zero before we + * calculate it. + */ + na->nd_na_cksum = 0; + /* + * Nota bene, the header <netinet/icmp6.h> has already transformed this + * into the appropriate host order. Don't use htonl. + */ + na->nd_na_flags_reserved = ND_NA_FLAG_SOLICITED | ND_NA_FLAG_OVERRIDE; + bcopy(&vaq->vaq_ns->nd_ns_target, &na->nd_na_target, + sizeof (struct in6_addr)); + roff += sizeof (nd_neighbor_advert_t); + + opt = (nd_opt_hdr_t *)(resp + roff); + opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; + opt->nd_opt_len = 1; + roff += sizeof (nd_opt_hdr_t); + bcopy(vaq->vaq_lookup, resp + roff, ETHERADDRL); + roff += ETHERADDRL; + + /* + * Now that we've filled in the packet, go back and compute the checksum + * and fill in the IPv6 payload size. + */ + v6hdr->ip6_plen = htons(roff - sizeof (ip6_t) - + vaq->vaq_otl->otl_hdrsize); + na->nd_na_cksum = ~libvarpd_icmpv6_checksum(v6hdr, (uint16_t *)na, + ntohs(v6hdr->ip6_plen)) & 0xffff; + + (void) libvarpd_overlay_inject(vaq->vaq_inst->vri_impl, vaq->vaq_otl, + resp, roff); + + libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); +} + +void +libvarpd_plugin_arp_reply(varpd_arp_handle_t *vah, int action) +{ + varpd_arp_query_t *vaq = (varpd_arp_query_t *)vah; + + if (vaq == NULL) + libvarpd_panic("unknown plugin passed invalid " + "varpd_arp_handle_t"); + + if (action == VARPD_LOOKUP_DROP) { + libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } else if (action != VARPD_LOOKUP_OK) + libvarpd_panic("%s plugin returned invalid action %d", + vaq->vaq_inst->vri_plugin->vpp_name, action); + + switch (vaq->vaq_type) { + case AF_INET: + libvarpd_proxy_arp_fini(vaq); + break; + case AF_INET6: + libvarpd_proxy_ndp_fini(vaq); + break; + default: + libvarpd_panic("encountered unknown vaq_type: %d", + vaq->vaq_type); + } +} + +void +libvarpd_plugin_proxy_dhcp(varpd_provider_handle_t *hdl, + varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl) +{ + varpd_dhcp_query_t *vdq; + struct ether_header *ether; + struct ip *ip; + struct udphdr *udp; + varpd_instance_t *inst = (varpd_instance_t *)hdl; + + vdq = umem_alloc(sizeof (varpd_dhcp_query_t), UMEM_DEFAULT); + if (vdq == NULL) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + return; + } + vdq->vdq_bsize = sizeof (vdq->vdq_buf); + + if (otl->otl_sap != ETHERTYPE_IP) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + if (bcmp(otl->otl_dstaddr, libvarpd_arp_bcast, ETHERADDRL) != 0) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + if (otl->otl_hdrsize + otl->otl_pktsize > vdq->vdq_bsize || + otl->otl_pktsize < sizeof (struct ip) + sizeof (struct udphdr) + + sizeof (struct dhcp) || + otl->otl_hdrsize < sizeof (struct ether_header)) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + if (libvarpd_overlay_packet(inst->vri_impl, otl, vdq->vdq_buf, + &vdq->vdq_bsize) != 0) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + if (vdq->vdq_bsize != otl->otl_hdrsize + otl->otl_pktsize) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + ether = (struct ether_header *)vdq->vdq_buf; + ip = (struct ip *)(vdq->vdq_buf + otl->otl_hdrsize); + + if (ip->ip_v != IPVERSION && ip->ip_p != IPPROTO_UDP) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + if (otl->otl_hdrsize + ip->ip_hl * 4 + sizeof (struct udphdr) > + vdq->vdq_bsize) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + udp = (struct udphdr *)(vdq->vdq_buf + otl->otl_hdrsize + + ip->ip_hl * 4); + + if (ntohs(udp->uh_sport) != IPPORT_BOOTPC || + ntohs(udp->uh_dport) != IPPORT_BOOTPS) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + vdq->vdq_ether = ether; + vdq->vdq_inst = inst; + vdq->vdq_query = vqh; + vdq->vdq_otl = otl; + + if (inst->vri_plugin->vpp_ops->vpo_dhcp == NULL) + libvarpd_panic("%s plugin asked to do dhcp, but has no method", + inst->vri_plugin->vpp_name); + + inst->vri_plugin->vpp_ops->vpo_dhcp(inst->vri_private, + (varpd_dhcp_handle_t *)vdq, VARPD_QTYPE_ETHERNET, otl, + vdq->vdq_lookup); +} + +void +libvarpd_plugin_dhcp_reply(varpd_dhcp_handle_t *vdh, int action) +{ + varpd_dhcp_query_t *vdq = (varpd_dhcp_query_t *)vdh; + + if (vdq == NULL) + libvarpd_panic("unknown plugin passed invalid " + "varpd_dhcp_handle_t"); + + if (action == VARPD_LOOKUP_DROP) { + libvarpd_plugin_query_reply(vdq->vdq_query, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } else if (action != VARPD_LOOKUP_OK) + libvarpd_panic("%s plugin returned invalid action %d", + vdq->vdq_inst->vri_plugin->vpp_name, action); + + bcopy(vdq->vdq_lookup, &vdq->vdq_ether->ether_dhost, ETHERADDRL); + (void) libvarpd_overlay_resend(vdq->vdq_inst->vri_impl, vdq->vdq_otl, + vdq->vdq_buf, vdq->vdq_bsize); + + libvarpd_plugin_query_reply(vdq->vdq_query, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); +} + +/* + * Inject a gratuitious ARP packet to the specified mac address. + */ +void +libvarpd_inject_arp(varpd_provider_handle_t *vph, const uint16_t vlan, + const uint8_t *srcmac, const struct in_addr *srcip, const uint8_t *dstmac) +{ + char buf[500]; + size_t bsize = 0; + struct ether_arp *ea; + varpd_instance_t *inst = (varpd_instance_t *)vph; + + if (vlan != 0) { + struct ether_vlan_header *eh; + eh = (struct ether_vlan_header *)(buf + bsize); + bsize += sizeof (struct ether_vlan_header); + bcopy(dstmac, &eh->ether_dhost, ETHERADDRL); + bcopy(srcmac, &eh->ether_shost, ETHERADDRL); + eh->ether_tpid = htons(ETHERTYPE_VLAN); + eh->ether_tci = htons(VLAN_TCI(0, ETHER_CFI, vlan)); + eh->ether_type = htons(ETHERTYPE_ARP); + } else { + struct ether_header *eh; + eh = (struct ether_header *)(buf + bsize); + bsize += sizeof (struct ether_header); + bcopy(dstmac, &eh->ether_dhost, ETHERADDRL); + bcopy(srcmac, &eh->ether_shost, ETHERADDRL); + eh->ether_type = htons(ETHERTYPE_ARP); + } + + ea = (struct ether_arp *)(buf + bsize); + bsize += sizeof (struct ether_arp); + ea->ea_hdr.ar_hrd = htons(ARPHRD_ETHER); + ea->ea_hdr.ar_pro = htons(ETHERTYPE_IP); + ea->ea_hdr.ar_hln = ETHERADDRL; + ea->ea_hdr.ar_pln = sizeof (struct in_addr); + ea->ea_hdr.ar_op = htons(ARPOP_REQUEST); + bcopy(srcmac, ea->arp_sha, ETHERADDRL); + bcopy(srcip, ea->arp_spa, sizeof (struct in_addr)); + bcopy(libvarpd_arp_bcast, ea->arp_tha, ETHERADDRL); + bcopy(srcip, ea->arp_tpa, sizeof (struct in_addr)); + + (void) libvarpd_overlay_instance_inject(inst, buf, bsize); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c new file mode 100644 index 0000000000..b0fa907386 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c @@ -0,0 +1,626 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * varpd client interfaces + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <umem.h> +#include <unistd.h> +#include <string.h> +#include <strings.h> +#include <door.h> + +#include <libvarpd_impl.h> + +typedef struct varpd_client { + int vcl_doorfd; +} varpd_client_t; + +typedef struct varpd_client_prop_info { + varpd_client_t *vcprop_client; + uint64_t vcprop_instance; + uint_t vcprop_propid; + uint_t vcprop_type; + uint_t vcprop_prot; + uint32_t vcprop_defsize; + uint32_t vcprop_psize; + char vcprop_name[LIBVARPD_PROP_NAMELEN]; + uint8_t vcprop_default[LIBVARPD_PROP_SIZEMAX]; + uint8_t vcprop_poss[LIBVARPD_PROP_SIZEMAX]; +} varpd_client_prop_info_t; + +static int +libvarpd_c_door_call(varpd_client_t *client, varpd_client_arg_t *argp, + size_t altsize) +{ + int ret; + door_arg_t darg; + + darg.data_ptr = (char *)argp; + darg.desc_ptr = NULL; + darg.desc_num = 0; + darg.rbuf = (char *)argp; + if (altsize != 0) { + darg.data_size = altsize; + darg.rsize = altsize; + } else { + darg.data_size = sizeof (varpd_client_arg_t); + darg.rsize = sizeof (varpd_client_arg_t); + } + + do { + ret = door_call(client->vcl_doorfd, &darg); + } while (ret != 0 && errno == EINTR); + if (ret != 0) { + switch (errno) { + case E2BIG: + case EFAULT: + case EINVAL: + case ENOTSUP: + case EOVERFLOW: + case ENFILE: + libvarpd_panic("unhandalable errno from door_call: %d", + errno); + } + ret = errno; + } + + return (ret); +} + +int +libvarpd_c_create(varpd_client_handle_t **chpp, const char *doorname) +{ + varpd_client_t *client; + + client = umem_alloc(sizeof (varpd_client_t), UMEM_DEFAULT); + if (client == NULL) + return (ENOMEM); + + client->vcl_doorfd = open(doorname, O_RDWR); + if (client->vcl_doorfd < 0) { + int ret = errno; + umem_free(client, sizeof (varpd_client_t)); + return (ret); + } + + *chpp = (varpd_client_handle_t *)client; + return (0); +} + +void +libvarpd_c_destroy(varpd_client_handle_t *chp) +{ + varpd_client_t *client = (varpd_client_t *)chp; + if (close(client->vcl_doorfd) != 0) + libvarpd_panic("failed to close door fd %d: %d", + client->vcl_doorfd, errno); + + umem_free(chp, sizeof (varpd_client_handle_t *)); +} + +int +libvarpd_c_instance_create(varpd_client_handle_t *chp, datalink_id_t linkid, + const char *search, uint64_t *cidp) +{ + int ret; + varpd_client_t *client = (varpd_client_t *)chp; + varpd_client_arg_t carg; + varpd_client_create_arg_t *cap = &carg.vca_un.vca_create; + + if (strlen(search) >= LIBVARPD_PROP_NAMELEN) + return (EINVAL); + carg.vca_command = VARPD_CLIENT_CREATE; + carg.vca_errno = 0; + cap->vcca_linkid = linkid; + (void) strlcpy(cap->vcca_plugin, search, LIBVARPD_PROP_NAMELEN); + + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + *cidp = cap->vcca_id; + + return (0); +} + +int +libvarpd_c_instance_activate(varpd_client_handle_t *chp, uint64_t cid) +{ + int ret; + varpd_client_t *client = (varpd_client_t *)chp; + varpd_client_arg_t carg; + varpd_client_instance_arg_t *vciap = &carg.vca_un.vca_instance; + + carg.vca_command = VARPD_CLIENT_ACTIVATE; + carg.vca_errno = 0; + vciap->vcia_id = cid; + + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + return (0); +} + +int +libvarpd_c_instance_destroy(varpd_client_handle_t *chp, uint64_t cid) +{ + int ret; + varpd_client_t *client = (varpd_client_t *)chp; + varpd_client_arg_t carg; + varpd_client_instance_arg_t *vciap = &carg.vca_un.vca_instance; + + carg.vca_command = VARPD_CLIENT_DESTROY; + carg.vca_errno = 0; + vciap->vcia_id = cid; + + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + return (0); +} + +int +libvarpd_c_prop_nprops(varpd_client_handle_t *chp, uint64_t cid, uint_t *nprops) +{ + int ret; + varpd_client_t *client = (varpd_client_t *)chp; + varpd_client_arg_t carg; + varpd_client_nprops_arg_t *vcnap = &carg.vca_un.vca_nprops; + + carg.vca_command = VARPD_CLIENT_NPROPS; + carg.vca_errno = 0; + vcnap->vcna_id = cid; + vcnap->vcna_nprops = 0; + + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + *nprops = vcnap->vcna_nprops; + return (0); +} + +int +libvarpd_c_prop_handle_alloc(varpd_client_handle_t *chp, uint64_t cid, + varpd_client_prop_handle_t **phdlp) +{ + varpd_client_prop_info_t *infop; + + infop = umem_alloc(sizeof (varpd_client_prop_info_t), UMEM_DEFAULT); + if (infop == NULL) + return (ENOMEM); + + bzero(infop, sizeof (varpd_client_prop_info_t)); + infop->vcprop_client = (varpd_client_t *)chp; + infop->vcprop_instance = cid; + infop->vcprop_propid = UINT_MAX; + *phdlp = (varpd_client_prop_handle_t *)infop; + return (0); +} + +void +libvarpd_c_prop_handle_free(varpd_client_prop_handle_t *phdl) +{ + umem_free(phdl, sizeof (varpd_client_prop_info_t)); + phdl = NULL; +} + +static void +libvarpd_c_prop_info_from_door(varpd_client_prop_info_t *infop, + const varpd_client_propinfo_arg_t *vcfap) +{ + infop->vcprop_propid = vcfap->vcfa_propid; + infop->vcprop_type = vcfap->vcfa_type; + infop->vcprop_prot = vcfap->vcfa_prot; + infop->vcprop_defsize = vcfap->vcfa_defsize; + infop->vcprop_psize = vcfap->vcfa_psize; + bcopy(vcfap->vcfa_name, infop->vcprop_name, LIBVARPD_PROP_NAMELEN); + bcopy(vcfap->vcfa_default, infop->vcprop_default, + LIBVARPD_PROP_SIZEMAX); + bcopy(vcfap->vcfa_poss, infop->vcprop_poss, LIBVARPD_PROP_SIZEMAX); +} + +int +libvarpd_c_prop_info_fill_by_name(varpd_client_prop_handle_t *phdl, + const char *name) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_propinfo_arg_t *vcfap = &carg.vca_un.vca_info; + varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl; + + if (strlen(name) >= LIBVARPD_PROP_NAMELEN) + return (EINVAL); + bzero(&carg, sizeof (varpd_client_arg_t)); + carg.vca_command = VARPD_CLIENT_PROPINFO; + carg.vca_errno = 0; + vcfap->vcfa_id = infop->vcprop_instance; + vcfap->vcfa_propid = UINT_MAX; + (void) strlcpy(vcfap->vcfa_name, name, LIBVARPD_PROP_NAMELEN); + + ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + libvarpd_c_prop_info_from_door(infop, vcfap); + return (0); +} + +int +libvarpd_c_prop_info_fill(varpd_client_prop_handle_t *phdl, uint_t propid) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_propinfo_arg_t *vcfap = &carg.vca_un.vca_info; + varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl; + + bzero(&carg, sizeof (varpd_client_arg_t)); + carg.vca_command = VARPD_CLIENT_PROPINFO; + carg.vca_errno = 0; + vcfap->vcfa_id = infop->vcprop_instance; + vcfap->vcfa_propid = propid; + + ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + libvarpd_c_prop_info_from_door(infop, vcfap); + return (0); +} + +int +libvarpd_c_prop_info(varpd_client_prop_handle_t *phdl, const char **namep, + uint_t *typep, uint_t *protp, const void **defp, uint32_t *defsizep, + const mac_propval_range_t **possp) +{ + varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl; + if (infop->vcprop_propid == UINT_MAX) + return (EINVAL); + + if (namep != NULL) + *namep = infop->vcprop_name; + if (typep != NULL) + *typep = infop->vcprop_type; + if (protp != NULL) + *protp = infop->vcprop_prot; + if (defp != NULL) + *defp = infop->vcprop_default; + if (defsizep != NULL) + *defsizep = infop->vcprop_defsize; + if (possp != NULL) + *possp = (const mac_propval_range_t *)infop->vcprop_poss; + return (0); +} + +int +libvarpd_c_prop_get(varpd_client_prop_handle_t *phdl, void *buf, uint32_t *len) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_prop_arg_t *vcpap = &carg.vca_un.vca_prop; + varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl; + + if (len == NULL || buf == NULL || infop->vcprop_propid == UINT_MAX) + return (EINVAL); + if (*len < LIBVARPD_PROP_SIZEMAX) + return (EOVERFLOW); + + bzero(&carg, sizeof (varpd_client_arg_t)); + carg.vca_command = VARPD_CLIENT_GETPROP; + carg.vca_errno = 0; + vcpap->vcpa_id = infop->vcprop_instance; + vcpap->vcpa_propid = infop->vcprop_propid; + + ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + /* + * If the buffer size is too large then something odd has certainly + * happened here, it means that varpd has gone rogue. In such a case we + * return a rather odd errror, though we don't believe that this should + * generally happen. + */ + if (vcpap->vcpa_bufsize > LIBVARPD_PROP_SIZEMAX) + return (E2BIG); + + bcopy(vcpap->vcpa_buf, buf, vcpap->vcpa_bufsize); + *len = vcpap->vcpa_bufsize; + return (0); +} + +int +libvarpd_c_prop_set(varpd_client_prop_handle_t *phdl, const void *buf, + uint32_t len) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_prop_arg_t *vcpap = &carg.vca_un.vca_prop; + varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl; + + if (len == NULL || buf == NULL || infop->vcprop_propid == UINT_MAX) + return (EINVAL); + if (len > LIBVARPD_PROP_SIZEMAX) + return (EOVERFLOW); + + carg.vca_command = VARPD_CLIENT_SETPROP; + carg.vca_errno = 0; + vcpap->vcpa_id = infop->vcprop_instance; + vcpap->vcpa_propid = infop->vcprop_propid; + vcpap->vcpa_bufsize = len; + bcopy(buf, vcpap->vcpa_buf, len); + + ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + return (0); +} + +int +libvarpd_c_instance_lookup(varpd_client_handle_t *chp, datalink_id_t linkid, + uint64_t *instp) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_lookup_arg_t *vclap = &carg.vca_un.vca_lookup; + varpd_client_t *client = (varpd_client_t *)chp; + + carg.vca_command = VARPD_CLIENT_LOOKUP; + carg.vca_errno = 0; + vclap->vcla_linkid = linkid; + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + if (instp != NULL) + *instp = vclap->vcla_id; + + return (0); +} + +int +libvarpd_c_instance_target_mode(varpd_client_handle_t *chp, uint64_t cid, + uint_t *dtype, uint_t *mtype) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_target_mode_arg_t *vctmap = &carg.vca_un.vca_mode; + varpd_client_t *client = (varpd_client_t *)chp; + + carg.vca_command = VARPD_CLIENT_TARGET_MODE; + carg.vca_errno = 0; + vctmap->vtma_id = cid; + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + if (ret == 0) { + if (mtype != NULL) + *mtype = vctmap->vtma_mode; + if (dtype != NULL) + *dtype = vctmap->vtma_dest; + } + + return (ret); +} + +int +libvarpd_c_instance_cache_flush(varpd_client_handle_t *chp, uint64_t cid) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache; + varpd_client_t *client = (varpd_client_t *)chp; + + carg.vca_command = VARPD_CLIENT_CACHE_FLUSH; + carg.vca_errno = 0; + + vctcap->vtca_id = cid; + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + return (0); +} + +int +libvarpd_c_instance_cache_delete(varpd_client_handle_t *chp, uint64_t cid, + const struct ether_addr *key) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache; + varpd_client_t *client = (varpd_client_t *)chp; + + if (key == NULL) + return (EINVAL); + + carg.vca_command = VARPD_CLIENT_CACHE_DELETE; + carg.vca_errno = 0; + vctcap->vtca_id = cid; + bcopy(key, vctcap->vtca_key, ETHERADDRL); + + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + return (0); +} + +int +libvarpd_c_instance_cache_get(varpd_client_handle_t *chp, uint64_t cid, + const struct ether_addr *key, varpd_client_cache_entry_t *entry) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache; + varpd_client_t *client = (varpd_client_t *)chp; + + if (key == NULL || entry == NULL) + return (EINVAL); + + carg.vca_command = VARPD_CLIENT_CACHE_GET; + carg.vca_errno = 0; + vctcap->vtca_id = cid; + bcopy(key, vctcap->vtca_key, ETHERADDRL); + bzero(&vctcap->vtca_entry, sizeof (varpd_client_cache_entry_t)); + + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + bcopy(&vctcap->vtca_entry, entry, sizeof (varpd_client_cache_entry_t)); + return (0); +} + +int +libvarpd_c_instance_cache_set(varpd_client_handle_t *chp, uint64_t cid, + const struct ether_addr *key, const varpd_client_cache_entry_t *entry) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache; + varpd_client_t *client = (varpd_client_t *)chp; + + if (key == NULL || entry == NULL) + return (EINVAL); + + carg.vca_command = VARPD_CLIENT_CACHE_SET; + carg.vca_errno = 0; + vctcap->vtca_id = cid; + bcopy(key, vctcap->vtca_key, ETHERADDRL); + bcopy(entry, &vctcap->vtca_entry, sizeof (varpd_client_cache_entry_t)); + + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + return (0); +} + +int +libvarpd_c_instance_cache_walk(varpd_client_handle_t *chp, uint64_t cid, + varpd_client_cache_f func, void *arg) +{ + int ret = 0; + size_t bufsize = sizeof (varpd_client_arg_t) + + 100 * sizeof (varpd_client_cache_entry_t); + varpd_client_t *client = (varpd_client_t *)chp; + varpd_client_arg_t *cargp; + varpd_client_target_walk_arg_t *vctwap; + + /* + * Because the number of entries involved in a walk may be large, we + * dynamically allocate a number of queries to make at a single time. + * This also means that the average door request doesn't inflate by the + * number of entries we want. For now, let's always grab 100 entries in + * a request. + */ + cargp = umem_zalloc(bufsize, UMEM_DEFAULT); + if (cargp == NULL) + return (errno); + vctwap = &cargp->vca_un.vca_walk; + for (;;) { + int i; + + cargp->vca_command = VARPD_CLIENT_CACHE_WALK; + cargp->vca_errno = 0; + vctwap->vtcw_id = cid; + vctwap->vtcw_count = 100; + + ret = libvarpd_c_door_call(client, cargp, bufsize); + if (ret != 0) + break; + + if (cargp->vca_errno != 0) { + ret = cargp->vca_errno; + break; + } + + if (vctwap->vtcw_count == 0) { + ret = 0; + break; + } + + for (i = 0; i < vctwap->vtcw_count; i++) { + varpd_client_cache_entry_t ent; + + ent.vcp_flags = vctwap->vtcw_ents[i].otce_flags; + bcopy(vctwap->vtcw_ents[i].otce_dest.otp_mac, + &ent.vcp_mac, ETHERADDRL); + ent.vcp_ip = vctwap->vtcw_ents[i].otce_dest.otp_ip; + ent.vcp_port = vctwap->vtcw_ents[i].otce_dest.otp_port; + ret = func(chp, cid, + (struct ether_addr *)vctwap->vtcw_ents[i].otce_mac, + &ent, arg); + if (ret != 0) { + ret = 0; + goto done; + } + } + } + +done: + umem_free(cargp, bufsize); + return (ret); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h new file mode 100644 index 0000000000..459711b385 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h @@ -0,0 +1,92 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#ifndef _LIBVARPD_CLIENT_H +#define _LIBVARPD_CLIENT_H + +/* + * varpd interfaces + */ + +#include <sys/types.h> +#include <stdint.h> +#include <sys/mac.h> +#include <sys/overlay_target.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct __varpd_client_handle varpd_client_handle_t; +typedef struct __varpd_client_prop_handle varpd_client_prop_handle_t; + +typedef struct varpd_client_cache_entry { + struct ether_addr vcp_mac; + uint16_t vcp_flags; + struct in6_addr vcp_ip; + uint16_t vcp_port; +} varpd_client_cache_entry_t; + +/* + * We just use the values from the kernel for now. + */ +#define LIBVARPD_PROP_SIZEMAX OVERLAY_PROP_SIZEMAX +#define LIBVARPD_PROP_NAMELEN OVERLAY_PROP_NAMELEN + +extern int libvarpd_c_create(varpd_client_handle_t **, const char *); +extern void libvarpd_c_destroy(varpd_client_handle_t *); +extern int libvarpd_c_instance_create(varpd_client_handle_t *, datalink_id_t, + const char *, uint64_t *); +extern int libvarpd_c_instance_activate(varpd_client_handle_t *, uint64_t); +extern int libvarpd_c_instance_destroy(varpd_client_handle_t *, uint64_t); + +extern int libvarpd_c_prop_nprops(varpd_client_handle_t *, uint64_t, uint_t *); +extern int libvarpd_c_prop_handle_alloc(varpd_client_handle_t *, uint64_t, + varpd_client_prop_handle_t **); +extern void libvarpd_c_prop_handle_free(varpd_client_prop_handle_t *); +extern int libvarpd_c_prop_info_fill(varpd_client_prop_handle_t *, uint_t); +extern int libvarpd_c_prop_info_fill_by_name(varpd_client_prop_handle_t *, + const char *); +extern int libvarpd_c_prop_info(varpd_client_prop_handle_t *, const char **, + uint_t *, uint_t *, const void **, uint32_t *, + const mac_propval_range_t **); +extern int libvarpd_c_prop_get(varpd_client_prop_handle_t *, void *, + uint32_t *); +extern int libvarpd_c_prop_set(varpd_client_prop_handle_t *, const void *, + uint32_t); + +extern int libvarpd_c_instance_lookup(varpd_client_handle_t *, datalink_id_t, + uint64_t *); +extern int libvarpd_c_instance_target_mode(varpd_client_handle_t *, uint64_t, + uint_t *, uint_t *); +extern int libvarpd_c_instance_cache_flush(varpd_client_handle_t *, uint64_t); +extern int libvarpd_c_instance_cache_delete(varpd_client_handle_t *, uint64_t, + const struct ether_addr *); +extern int libvarpd_c_instance_cache_get(varpd_client_handle_t *, uint64_t, + const struct ether_addr *, varpd_client_cache_entry_t *); +extern int libvarpd_c_instance_cache_set(varpd_client_handle_t *, uint64_t, + const struct ether_addr *, const varpd_client_cache_entry_t *); + +typedef int (*varpd_client_cache_f)(varpd_client_handle_t *, uint64_t, + const struct ether_addr *, const varpd_client_cache_entry_t *, void *); +extern int libvarpd_c_instance_cache_walk(varpd_client_handle_t *, uint64_t, + varpd_client_cache_f, void *); + + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBVARPD_CLIENT_H */ diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c new file mode 100644 index 0000000000..f684e031a8 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c @@ -0,0 +1,469 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * varpd door server logic + */ + +#include <door.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stropts.h> +#include <stdlib.h> +#include <strings.h> +#include <priv.h> +#include <libvarpd_impl.h> + +typedef int (libvarpd_door_f)(varpd_impl_t *, varpd_client_arg_t *, ucred_t *); + +static boolean_t +libvarpd_door_privileged(ucred_t *credp) +{ + const priv_set_t *ps; + + ps = ucred_getprivset(credp, PRIV_EFFECTIVE); + if (ps == NULL) + return (B_FALSE); + + return (priv_ismember(ps, PRIV_SYS_NET_CONFIG)); +} + +/* ARGSUSED */ +static int +libvarpd_door_f_create(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + int ret; + varpd_instance_handle_t *ihdl; + varpd_client_create_arg_t *vccap = &vcap->vca_un.vca_create; + + vccap->vcca_plugin[LIBVARPD_PROP_NAMELEN-1] = '\0'; + ret = libvarpd_instance_create((varpd_handle_t *)vip, + vccap->vcca_linkid, vccap->vcca_plugin, &ihdl); + if (ret == 0) + vccap->vcca_id = libvarpd_instance_id(ihdl); + + return (ret); +} + +/* ARGSUSED */ +static int +libvarpd_door_f_activate(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_instance_arg_t *vciap = &vcap->vca_un.vca_instance; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vciap->vcia_id); + if (ihp == NULL) + return (ENOENT); + return (libvarpd_instance_activate(ihp)); +} + +/* ARGSUSED */ +static int +libvarpd_door_f_destroy(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_instance_arg_t *vciap = &vcap->vca_un.vca_instance; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vciap->vcia_id); + if (ihp == NULL) + return (ENOENT); + libvarpd_instance_destroy(ihp); + return (0); +} + +/* ARGSUSED */ +static int +libvarpd_door_f_nprops(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_nprops_arg_t *vcnap = &vcap->vca_un.vca_nprops; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcnap->vcna_id); + if (ihp == NULL) + return (ENOENT); + + return (libvarpd_prop_nprops(ihp, &vcnap->vcna_nprops)); +} + +/* ARGSUSED */ +static int +libvarpd_door_f_propinfo(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + int ret; + varpd_instance_handle_t *ihp; + varpd_prop_handle_t *phdl; + varpd_client_propinfo_arg_t *vcfap = &vcap->vca_un.vca_info; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcfap->vcfa_id); + if (ihp == NULL) + return (ENOENT); + ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl); + if (ret != 0) + return (ret); + + if (vcfap->vcfa_propid != UINT_MAX) { + ret = libvarpd_prop_info_fill(phdl, vcfap->vcfa_propid); + if (ret != 0) { + libvarpd_prop_handle_free(phdl); + return (ret); + } + } else { + uint_t i, nprop; + const char *name; + + vcfap->vcfa_name[LIBVARPD_PROP_NAMELEN-1] = '\0'; + ret = libvarpd_prop_nprops(ihp, &nprop); + if (ret != 0) { + libvarpd_prop_handle_free(phdl); + return (ret); + } + for (i = 0; i < nprop; i++) { + ret = libvarpd_prop_info_fill(phdl, i); + if (ret != 0) { + libvarpd_prop_handle_free(phdl); + return (ret); + } + ret = libvarpd_prop_info(phdl, &name, NULL, NULL, NULL, + NULL, NULL); + if (ret != 0) { + libvarpd_prop_handle_free(phdl); + return (ret); + } + if (strcmp(vcfap->vcfa_name, name) == 0) + break; + } + + if (i == nprop) { + libvarpd_prop_handle_free(phdl); + return (ENOENT); + } + vcfap->vcfa_propid = i; + } + libvarpd_prop_door_convert(phdl, vcfap); + libvarpd_prop_handle_free(phdl); + return (0); +} + +/* ARGSUSED */ +static int +libvarpd_door_f_getprop(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + int ret; + uint32_t size; + varpd_instance_handle_t *ihp; + varpd_prop_handle_t *phdl; + varpd_client_prop_arg_t *vcpap = &vcap->vca_un.vca_prop; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcpap->vcpa_id); + if (ihp == NULL) + return (ENOENT); + ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl); + if (ret != 0) + return (ret); + + ret = libvarpd_prop_info_fill(phdl, vcpap->vcpa_propid); + if (ret != 0) { + libvarpd_prop_handle_free(phdl); + return (ret); + } + + ret = libvarpd_prop_get(phdl, vcpap->vcpa_buf, &size); + if (ret == 0) + vcpap->vcpa_bufsize = size; + libvarpd_prop_handle_free(phdl); + return (0); +} + +/* ARGSUSED */ +static int +libvarpd_door_f_setprop(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + int ret; + varpd_instance_handle_t *ihp; + varpd_prop_handle_t *phdl; + varpd_client_prop_arg_t *vcpap = &vcap->vca_un.vca_prop; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcpap->vcpa_id); + if (ihp == NULL) + return (ENOENT); + ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl); + if (ret != 0) + return (ret); + + ret = libvarpd_prop_info_fill(phdl, vcpap->vcpa_propid); + if (ret != 0) { + libvarpd_prop_handle_free(phdl); + return (ret); + } + + ret = libvarpd_prop_set(phdl, vcpap->vcpa_buf, vcpap->vcpa_bufsize); + libvarpd_prop_handle_free(phdl); + return (ret); +} + +/* ARGSUSED */ +static int +libvarpd_door_f_lookup(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_t *inst; + varpd_client_lookup_arg_t *vclap = &vcap->vca_un.vca_lookup; + + inst = libvarpd_instance_lookup_by_dlid(vip, vclap->vcla_linkid); + if (inst == NULL) + return (ENOENT); + + vclap->vcla_id = inst->vri_id; + return (0); +} + +/* ARGSUSED */ +static int +libvarpd_door_f_target(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_instance_t *inst; + varpd_client_target_mode_arg_t *vtmap = &vcap->vca_un.vca_mode; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtmap->vtma_id); + if (ihp == NULL) + return (ENOENT); + inst = (varpd_instance_t *)ihp; + vtmap->vtma_dest = inst->vri_dest; + vtmap->vtma_mode = inst->vri_mode; + return (0); +} + +static int +libvarpd_door_f_flush(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache; + + if (libvarpd_door_privileged(credp) == B_FALSE) + return (EPERM); + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id); + if (ihp == NULL) + return (ENOENT); + return (libvarpd_overlay_cache_flush((varpd_instance_t *)ihp)); +} + +static int +libvarpd_door_f_delete(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache; + + if (libvarpd_door_privileged(credp) == B_FALSE) + return (EPERM); + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id); + if (ihp == NULL) + return (ENOENT); + return (libvarpd_overlay_cache_delete((varpd_instance_t *)ihp, + vtcap->vtca_key)); +} + +/* ARGSUSED */ +static int +libvarpd_door_f_get(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id); + if (ihp == NULL) + return (ENOENT); + return (libvarpd_overlay_cache_get((varpd_instance_t *)ihp, + vtcap->vtca_key, &vtcap->vtca_entry)); +} + +static int +libvarpd_door_f_set(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache; + + if (libvarpd_door_privileged(credp) == B_FALSE) + return (EPERM); + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id); + if (ihp == NULL) + return (ENOENT); + + return (libvarpd_overlay_cache_set((varpd_instance_t *)ihp, + vtcap->vtca_key, &vtcap->vtca_entry)); +} + +/* ARGSUSED */ +static int +libvarpd_door_f_walk(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_target_walk_arg_t *vctwp = &vcap->vca_un.vca_walk; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vctwp->vtcw_id); + if (ihp == NULL) + return (ENOENT); + + return (libvarpd_overlay_cache_walk_fill((varpd_instance_t *)ihp, + &vctwp->vtcw_marker, &vctwp->vtcw_count, vctwp->vtcw_ents)); +} + +static libvarpd_door_f *libvarpd_door_table[] = { + libvarpd_door_f_create, + libvarpd_door_f_activate, + libvarpd_door_f_destroy, + libvarpd_door_f_nprops, + libvarpd_door_f_propinfo, + libvarpd_door_f_getprop, + libvarpd_door_f_setprop, + libvarpd_door_f_lookup, + libvarpd_door_f_target, + libvarpd_door_f_flush, + libvarpd_door_f_delete, + libvarpd_door_f_get, + libvarpd_door_f_set, + libvarpd_door_f_walk +}; + +/* ARGSUSED */ +static void +libvarpd_door_server(void *cookie, char *argp, size_t argsz, door_desc_t *dp, + uint_t ndesc) +{ + int ret; + varpd_client_eresp_t err; + ucred_t *credp = NULL; + varpd_impl_t *vip = cookie; + varpd_client_arg_t *vcap = (varpd_client_arg_t *)argp; + + err.vce_command = VARPD_CLIENT_INVALID; + if (argsz < sizeof (varpd_client_arg_t)) { + err.vce_errno = EINVAL; + goto errout; + } + + if ((ret = door_ucred(&credp)) != 0) { + err.vce_errno = ret; + goto errout; + } + + if (vcap->vca_command == VARPD_CLIENT_INVALID || + vcap->vca_command >= VARPD_CLIENT_MAX) { + err.vce_errno = EINVAL; + goto errout; + } + + vcap->vca_errno = 0; + ret = libvarpd_door_table[vcap->vca_command - 1](vip, vcap, credp); + if (ret != 0) + vcap->vca_errno = ret; + + ucred_free(credp); + (void) door_return(argp, argsz, NULL, 0); + return; + +errout: + ucred_free(credp); + (void) door_return((char *)&err, sizeof (err), NULL, 0); +} + +int +libvarpd_door_server_create(varpd_handle_t *vhp, const char *path) +{ + int fd, ret; + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + mutex_enter(&vip->vdi_lock); + if (vip->vdi_doorfd >= 0) { + mutex_exit(&vip->vdi_lock); + return (EEXIST); + } + + vip->vdi_doorfd = door_create(libvarpd_door_server, vip, + DOOR_REFUSE_DESC | DOOR_NO_CANCEL); + if (vip->vdi_doorfd == -1) { + mutex_exit(&vip->vdi_lock); + return (errno); + } + + if ((fd = open(path, O_CREAT | O_RDWR, 0666)) == -1) { + ret = errno; + if (door_revoke(vip->vdi_doorfd) != 0) + libvarpd_panic("failed to revoke door: %d", + errno); + mutex_exit(&vip->vdi_lock); + return (errno); + } + + if (fchown(fd, UID_NETADM, GID_NETADM) != 0) { + ret = errno; + if (door_revoke(vip->vdi_doorfd) != 0) + libvarpd_panic("failed to revoke door: %d", + errno); + mutex_exit(&vip->vdi_lock); + return (ret); + } + + if (close(fd) != 0) + libvarpd_panic("failed to close door fd %d: %d", + fd, errno); + (void) fdetach(path); + if (fattach(vip->vdi_doorfd, path) != 0) { + ret = errno; + if (door_revoke(vip->vdi_doorfd) != 0) + libvarpd_panic("failed to revoke door: %d", + errno); + mutex_exit(&vip->vdi_lock); + return (ret); + } + + mutex_exit(&vip->vdi_lock); + return (0); +} + +void +libvarpd_door_server_destroy(varpd_handle_t *vhp) +{ + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + mutex_enter(&vip->vdi_lock); + if (vip->vdi_doorfd != 0) { + if (door_revoke(vip->vdi_doorfd) != 0) + libvarpd_panic("failed to revoke door: %d", + errno); + vip->vdi_doorfd = -1; + } + mutex_exit(&vip->vdi_lock); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h new file mode 100644 index 0000000000..7ecd3a952f --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h @@ -0,0 +1,247 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#ifndef _LIBVARPD_IMPL_H +#define _LIBVARPD_IMPL_H + +/* + * varpd internal interfaces + */ + +#include <libvarpd.h> +#include <libvarpd_provider.h> +#include <sys/avl.h> +#include <thread.h> +#include <synch.h> +#include <limits.h> +#include <libidspace.h> +#include <umem.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define LIBVARPD_ID_MIN 1 +#define LIBVARPD_ID_MAX INT32_MAX + +typedef struct varpd_plugin { + avl_node_t vpp_node; + const char *vpp_name; + overlay_target_mode_t vpp_mode; + const varpd_plugin_ops_t *vpp_ops; + mutex_t vpp_lock; + uint_t vpp_active; +} varpd_plugin_t; + +typedef struct varpd_impl { + mutex_t vdi_lock; + rwlock_t vdi_pfdlock; + avl_tree_t vdi_plugins; /* vdi_lock */ + avl_tree_t vdi_instances; /* vdi_lock */ + avl_tree_t vdi_linstances; /* vdi_lock */ + id_space_t *vdi_idspace; /* RO */ + umem_cache_t *vdi_qcache; /* RO */ + bunyan_logger_t *vdi_bunyan; /* RO */ + int vdi_overlayfd; /* RO */ + int vdi_doorfd; /* vdi_lock */ + int vdi_persistfd; /* vdi_plock */ + cond_t vdi_lthr_cv; /* vdi_lock */ + boolean_t vdi_lthr_quiesce; /* vdi_lock */ + uint_t vdi_lthr_count; /* vdi_lock */ +} varpd_impl_t; + +typedef enum varpd_instance_flags { + VARPD_INSTANCE_F_ACTIVATED = 0x01 +} varpd_instance_flags_t; + +typedef struct varpd_instance { + avl_node_t vri_inode; + avl_node_t vri_lnode; + uint64_t vri_id; /* RO */ + uint64_t vri_vnetid; /* RO */ + datalink_id_t vri_linkid; /* RO */ + overlay_target_mode_t vri_mode; /* RO */ + overlay_plugin_dest_t vri_dest; /* RO */ + varpd_impl_t *vri_impl; /* RO */ + varpd_plugin_t *vri_plugin; /* RO */ + void *vri_private; /* RO */ + mutex_t vri_lock; + varpd_instance_flags_t vri_flags; /* vri_lock */ +} varpd_instance_t; + +typedef struct varpd_query { + overlay_targ_lookup_t vq_lookup; + overlay_targ_resp_t vq_response; + varpd_instance_t *vq_instance; +} varpd_query_t; + +typedef struct varpd_client_create_arg { + datalink_id_t vcca_linkid; + uint64_t vcca_id; + char vcca_plugin[LIBVARPD_PROP_NAMELEN]; +} varpd_client_create_arg_t; + +typedef struct varpd_client_instance_arg { + uint64_t vcia_id; +} varpd_client_instance_arg_t; + +typedef struct varpd_client_nprops_arg { + uint64_t vcna_id; + uint_t vcna_nprops; +} varpd_client_nprops_arg_t; + +typedef struct varpd_client_propinfo_arg { + uint64_t vcfa_id; + uint_t vcfa_propid; + uint_t vcfa_type; + uint_t vcfa_prot; + uint32_t vcfa_defsize; + uint32_t vcfa_psize; + char vcfa_name[LIBVARPD_PROP_NAMELEN]; + uint8_t vcfa_default[LIBVARPD_PROP_SIZEMAX]; + uint8_t vcfa_poss[LIBVARPD_PROP_SIZEMAX]; +} varpd_client_propinfo_arg_t; + +typedef struct varpd_client_prop_arg { + uint64_t vcpa_id; + uint_t vcpa_propid; + uint8_t vcpa_buf[LIBVARPD_PROP_SIZEMAX]; + size_t vcpa_bufsize; +} varpd_client_prop_arg_t; + +typedef struct varpd_client_lookup_arg { + datalink_id_t vcla_linkid; + uint32_t vcla_pad; + uint64_t vcla_id; +} varpd_client_lookup_arg_t; + +typedef struct varpd_client_target_mode_arg { + uint64_t vtma_id; + uint32_t vtma_dest; + uint32_t vtma_mode; +} varpd_client_target_mode_arg_t; + +typedef struct varpd_client_target_cache_arg { + uint64_t vtca_id; + uint8_t vtca_key[ETHERADDRL]; + uint8_t vtca_pad[2]; + varpd_client_cache_entry_t vtca_entry; +} varpd_client_target_cache_arg_t; + +typedef struct varpd_client_target_walk_arg { + uint64_t vtcw_id; + uint64_t vtcw_marker; + uint64_t vtcw_count; + overlay_targ_cache_entry_t vtcw_ents[]; +} varpd_client_target_walk_arg_t; + +typedef enum varpd_client_command { + VARPD_CLIENT_INVALID = 0x0, + VARPD_CLIENT_CREATE, + VARPD_CLIENT_ACTIVATE, + VARPD_CLIENT_DESTROY, + VARPD_CLIENT_NPROPS, + VARPD_CLIENT_PROPINFO, + VARPD_CLIENT_GETPROP, + VARPD_CLIENT_SETPROP, + VARPD_CLIENT_LOOKUP, + VARPD_CLIENT_TARGET_MODE, + VARPD_CLIENT_CACHE_FLUSH, + VARPD_CLIENT_CACHE_DELETE, + VARPD_CLIENT_CACHE_GET, + VARPD_CLIENT_CACHE_SET, + VARPD_CLIENT_CACHE_WALK, + VARPD_CLIENT_MAX +} varpd_client_command_t; + +typedef struct varpd_client_arg { + uint_t vca_command; + uint_t vca_errno; + union { + varpd_client_create_arg_t vca_create; + varpd_client_instance_arg_t vca_instance; + varpd_client_nprops_arg_t vca_nprops; + varpd_client_propinfo_arg_t vca_info; + varpd_client_prop_arg_t vca_prop; + varpd_client_lookup_arg_t vca_lookup; + varpd_client_target_mode_arg_t vca_mode; + varpd_client_target_cache_arg_t vca_cache; + varpd_client_target_walk_arg_t vca_walk; + } vca_un; +} varpd_client_arg_t; + +typedef struct varpd_client_eresp { + uint_t vce_command; + uint_t vce_errno; +} varpd_client_eresp_t; + +extern void libvarpd_plugin_init(void); +extern void libvarpd_plugin_prefork(void); +extern void libvarpd_plugin_postfork(void); +extern void libvarpd_plugin_fini(void); +extern int libvarpd_plugin_comparator(const void *, const void *); +extern varpd_plugin_t *libvarpd_plugin_lookup(varpd_impl_t *, const char *); + +extern varpd_instance_t *libvarpd_instance_lookup_by_dlid(varpd_impl_t *, + datalink_id_t); + +extern void libvarpd_prop_door_convert(const varpd_prop_handle_t *, + varpd_client_propinfo_arg_t *); + +extern const char *libvarpd_isaext(void); +typedef int (*libvarpd_dirwalk_f)(varpd_impl_t *, const char *, void *); +extern int libvarpd_dirwalk(varpd_impl_t *, const char *, const char *, + libvarpd_dirwalk_f, void *); + +extern int libvarpd_overlay_init(varpd_impl_t *); +extern void libvarpd_overlay_fini(varpd_impl_t *); +extern int libvarpd_overlay_info(varpd_impl_t *, datalink_id_t, + overlay_plugin_dest_t *, uint64_t *, uint64_t *); +extern int libvarpd_overlay_associate(varpd_instance_t *); +extern int libvarpd_overlay_disassociate(varpd_instance_t *); +extern int libvarpd_overlay_degrade(varpd_instance_t *, const char *); +extern int libvarpd_overlay_degrade_datalink(varpd_impl_t *, datalink_id_t, + const char *); +extern int libvarpd_overlay_restore(varpd_instance_t *); +extern int libvarpd_overlay_packet(varpd_impl_t *, + const overlay_targ_lookup_t *, void *, size_t *); +extern int libvarpd_overlay_inject(varpd_impl_t *, + const overlay_targ_lookup_t *, void *, size_t); +extern int libvarpd_overlay_instance_inject(varpd_instance_t *, void *, size_t); +extern int libvarpd_overlay_resend(varpd_impl_t *, + const overlay_targ_lookup_t *, void *, size_t); +typedef int (*libvarpd_overlay_iter_f)(varpd_impl_t *, datalink_id_t, void *); +extern int libvarpd_overlay_iter(varpd_impl_t *, libvarpd_overlay_iter_f, + void *); +extern int libvarpd_overlay_cache_flush(varpd_instance_t *); +extern int libvarpd_overlay_cache_delete(varpd_instance_t *, const uint8_t *); +extern int libvarpd_overlay_cache_delete(varpd_instance_t *, const uint8_t *); +extern int libvarpd_overlay_cache_get(varpd_instance_t *, const uint8_t *, + varpd_client_cache_entry_t *); +extern int libvarpd_overlay_cache_set(varpd_instance_t *, const uint8_t *, + const varpd_client_cache_entry_t *); +extern int libvarpd_overlay_cache_walk_fill(varpd_instance_t *, uint64_t *, + uint64_t *, overlay_targ_cache_entry_t *); + +extern void libvarpd_persist_init(varpd_impl_t *); +extern void libvarpd_persist_fini(varpd_impl_t *); +extern int libvarpd_persist_instance(varpd_impl_t *, varpd_instance_t *); +extern void libvarpd_torch_instance(varpd_impl_t *, varpd_instance_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBVARPD_IMPL_H */ diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c new file mode 100644 index 0000000000..b7506c56cf --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c @@ -0,0 +1,584 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * Interactions with /dev/overlay + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <assert.h> +#include <unistd.h> +#include <stdlib.h> +#include <stropts.h> +#include <strings.h> +#include <umem.h> + +#include <libvarpd_impl.h> +#include <sys/overlay_target.h> + +#define OVERLAY_PATH "/dev/overlay" + +int +libvarpd_overlay_init(varpd_impl_t *vip) +{ + vip->vdi_overlayfd = open(OVERLAY_PATH, O_RDWR | O_EXCL); + if (vip->vdi_overlayfd == -1) + return (errno); + return (0); +} + +void +libvarpd_overlay_fini(varpd_impl_t *vip) +{ + assert(vip->vdi_overlayfd > 0); + if (close(vip->vdi_overlayfd) != 0) + libvarpd_panic("failed to close /dev/overlay fd %d: %d", + vip->vdi_overlayfd, errno); +} + +int +libvarpd_overlay_info(varpd_impl_t *vip, datalink_id_t linkid, + overlay_plugin_dest_t *destp, uint64_t *flags, uint64_t *vnetid) +{ + overlay_targ_info_t oti; + + oti.oti_linkid = linkid; + if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_INFO, &oti) != 0) + return (errno); + + if (destp != NULL) + *destp = oti.oti_needs; + if (flags != NULL) + *flags = oti.oti_flags; + if (vnetid != NULL) + *vnetid = oti.oti_vnetid; + return (0); +} + +int +libvarpd_overlay_associate(varpd_instance_t *inst) +{ + overlay_targ_associate_t ota; + varpd_impl_t *vip = inst->vri_impl; + + bzero(&ota, sizeof (overlay_targ_associate_t)); + ota.ota_linkid = inst->vri_linkid; + ota.ota_mode = inst->vri_mode; + ota.ota_id = inst->vri_id; + ota.ota_provides = inst->vri_dest; + + if (ota.ota_mode == OVERLAY_TARGET_POINT) { + int ret; + ret = inst->vri_plugin->vpp_ops->vpo_default(inst->vri_private, + &ota.ota_point); + if (ret != VARPD_LOOKUP_OK) + return (ret); + } + + if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_ASSOCIATE, &ota) != 0) + return (errno); + + return (0); +} + +int +libvarpd_overlay_disassociate(varpd_instance_t *inst) +{ + overlay_targ_id_t otid; + varpd_impl_t *vip = inst->vri_impl; + + otid.otid_linkid = inst->vri_linkid; + if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_DISASSOCIATE, &otid) != 0) + return (errno); + return (0); +} + +int +libvarpd_overlay_degrade_datalink(varpd_impl_t *vip, datalink_id_t linkid, + const char *msg) +{ + overlay_targ_degrade_t otd; + + otd.otd_linkid = linkid; + (void) strlcpy(otd.otd_buf, msg, OVERLAY_STATUS_BUFLEN); + if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_DEGRADE, &otd) != 0) + return (errno); + return (0); + +} + +int +libvarpd_overlay_degrade(varpd_instance_t *inst, const char *msg) +{ + return (libvarpd_overlay_degrade_datalink(inst->vri_impl, + inst->vri_linkid, msg)); +} + +int +libvarpd_overlay_restore(varpd_instance_t *inst) +{ + overlay_targ_id_t otid; + varpd_impl_t *vip = inst->vri_impl; + + otid.otid_linkid = inst->vri_linkid; + if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_RESTORE, &otid) != 0) + return (errno); + return (0); +} + +int +libvarpd_overlay_packet(varpd_impl_t *vip, const overlay_targ_lookup_t *otl, + void *buf, size_t *buflen) +{ + int ret; + overlay_targ_pkt_t otp; + + otp.otp_linkid = UINT64_MAX; + otp.otp_reqid = otl->otl_reqid; + otp.otp_size = *buflen; + otp.otp_buf = buf; + + do { + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_PKT, &otp); + } while (ret != 0 && errno == EINTR); + if (ret != 0 && errno == EFAULT) + libvarpd_panic("OVERLAY_TARG_PKT ioctl efault"); + else if (ret != 0) + ret = errno; + + if (ret == 0) + *buflen = otp.otp_size; + + return (ret); +} + +static int +libvarpd_overlay_inject_common(varpd_impl_t *vip, varpd_instance_t *inst, + const overlay_targ_lookup_t *otl, void *buf, size_t buflen, int cmd) +{ + int ret; + overlay_targ_pkt_t otp; + + if (otl == NULL) { + otp.otp_linkid = inst->vri_linkid; + otp.otp_reqid = 0; + } else { + otp.otp_linkid = UINT64_MAX; + otp.otp_reqid = otl->otl_reqid; + } + otp.otp_size = buflen; + otp.otp_buf = buf; + + do { + ret = ioctl(vip->vdi_overlayfd, cmd, &otp); + } while (ret != 0 && errno == EINTR); + if (ret != 0 && errno == EFAULT) + libvarpd_panic("overlay_inject_common ioctl EFAULT"); + else if (ret != 0) + ret = errno; + + return (ret); +} + +int +libvarpd_overlay_inject(varpd_impl_t *vip, const overlay_targ_lookup_t *otl, + void *buf, size_t buflen) +{ + return (libvarpd_overlay_inject_common(vip, NULL, otl, buf, buflen, + OVERLAY_TARG_INJECT)); +} + +int +libvarpd_overlay_instance_inject(varpd_instance_t *inst, void *buf, + size_t buflen) +{ + return (libvarpd_overlay_inject_common(inst->vri_impl, inst, NULL, buf, + buflen, OVERLAY_TARG_INJECT)); +} + +int +libvarpd_overlay_resend(varpd_impl_t *vip, const overlay_targ_lookup_t *otl, + void *buf, size_t buflen) +{ + return (libvarpd_overlay_inject_common(vip, NULL, otl, buf, buflen, + OVERLAY_TARG_RESEND)); +} + +static void +libvarpd_overlay_lookup_reply(varpd_impl_t *vip, + const overlay_targ_lookup_t *otl, overlay_targ_resp_t *otr, int cmd) +{ + int ret; + + otr->otr_reqid = otl->otl_reqid; + do { + ret = ioctl(vip->vdi_overlayfd, cmd, otr); + } while (ret != 0 && errno == EINTR); + + /* + * The only errors that should cause us to end up here are due to + * programmer errors. Aruably the EINAVL case indicates that something + * is a bit off; however, at this time we don't opt to kill varpd. + */ + if (ret != 0 && errno != EINVAL) + libvarpd_panic("receieved bad errno from lookup_reply " + "(cmd %d): %d\n", cmd, errno); +} + +static void +libvarpd_overlay_lookup_handle(varpd_impl_t *vip) +{ + int ret; + varpd_query_t *vqp; + overlay_targ_lookup_t *otl; + overlay_targ_resp_t *otr; + varpd_instance_t *inst; + + vqp = umem_cache_alloc(vip->vdi_qcache, UMEM_DEFAULT); + otl = &vqp->vq_lookup; + otr = &vqp->vq_response; + /* + * abort doesn't really help here that much, maybe we can instead try + * and for a reap or something? + */ + if (vqp == NULL) + libvarpd_panic("failed to allocate memory for lookup " + "handle..., we should not panic()"); + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_LOOKUP, otl); + if (ret != 0 && errno != ETIME && errno != EINTR) + libvarpd_panic("received bad errno from OVERLAY_TARG_LOOKUP: " + "%d", errno); + + if (ret != 0) { + umem_cache_free(vip->vdi_qcache, vqp); + return; + } + + inst = (varpd_instance_t *)libvarpd_instance_lookup( + (varpd_handle_t *)vip, otl->otl_varpdid); + if (inst == NULL) { + libvarpd_overlay_lookup_reply(vip, otl, otr, + OVERLAY_TARG_DROP); + umem_cache_free(vip->vdi_qcache, vqp); + return; + } + vqp->vq_instance = inst; + + inst->vri_plugin->vpp_ops->vpo_lookup(inst->vri_private, + (varpd_query_handle_t *)vqp, otl, &otr->otr_answer); +} + +void +libvarpd_overlay_lookup_run(varpd_handle_t *vhp) +{ + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + mutex_enter(&vip->vdi_lock); + if (vip->vdi_lthr_quiesce == B_TRUE) { + mutex_exit(&vip->vdi_lock); + return; + } + vip->vdi_lthr_count++; + + for (;;) { + mutex_exit(&vip->vdi_lock); + libvarpd_overlay_lookup_handle(vip); + mutex_enter(&vip->vdi_lock); + if (vip->vdi_lthr_quiesce == B_TRUE) + break; + } + assert(vip->vdi_lthr_count > 0); + vip->vdi_lthr_count--; + (void) cond_signal(&vip->vdi_lthr_cv); + mutex_exit(&vip->vdi_lock); +} + +void +libvarpd_overlay_lookup_quiesce(varpd_handle_t *vhp) +{ + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + mutex_enter(&vip->vdi_lock); + if (vip->vdi_lthr_count == 0) { + mutex_exit(&vip->vdi_lock); + return; + } + vip->vdi_lthr_quiesce = B_TRUE; + while (vip->vdi_lthr_count > 0) + (void) cond_wait(&vip->vdi_lthr_cv, &vip->vdi_lock); + vip->vdi_lthr_quiesce = B_FALSE; + mutex_exit(&vip->vdi_lock); +} + +int +libvarpd_overlay_iter(varpd_impl_t *vip, libvarpd_overlay_iter_f func, + void *arg) +{ + uint32_t curents = 0, i; + size_t size; + overlay_targ_list_t *otl; + + for (;;) { + size = sizeof (overlay_targ_list_t) + + sizeof (uint32_t) * curents; + otl = umem_alloc(size, UMEM_DEFAULT); + if (otl == NULL) + return (ENOMEM); + + otl->otl_nents = curents; + if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_LIST, otl) != 0) { + if (errno == EFAULT) + libvarpd_panic("OVERLAY_TARG_LIST ioctl " + "efault"); + umem_free(otl, size); + if (errno == EINTR) + continue; + else + return (errno); + } + + if (otl->otl_nents == curents) + break; + + curents = otl->otl_nents; + umem_free(otl, size); + } + + for (i = 0; i < otl->otl_nents; i++) { + if (func(vip, otl->otl_ents[i], arg) != 0) + break; + } + umem_free(otl, size); + return (0); +} + +int +libvarpd_overlay_cache_flush(varpd_instance_t *inst) +{ + int ret; + overlay_targ_cache_t cache; + varpd_impl_t *vip = inst->vri_impl; + + bzero(&cache, sizeof (overlay_targ_cache_t)); + cache.otc_linkid = inst->vri_linkid; + + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_FLUSH, &cache); + if (ret != 0 && errno == EFAULT) + libvarpd_panic("OVERLAY_TARG_CACHE_FLUSH ioctl efault"); + else if (ret != 0) + ret = errno; + + return (ret); +} + +int +libvarpd_overlay_cache_delete(varpd_instance_t *inst, const uint8_t *key) +{ + int ret; + overlay_targ_cache_t cache; + varpd_impl_t *vip = inst->vri_impl; + + bzero(&cache, sizeof (overlay_targ_cache_t)); + cache.otc_linkid = inst->vri_linkid; + bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL); + + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_REMOVE, &cache); + if (ret != 0 && errno == EFAULT) + libvarpd_panic("OVERLAY_TARG_CACHE_REMOVE ioctl efault"); + else if (ret != 0) + ret = errno; + + return (ret); + +} + +int +libvarpd_overlay_cache_get(varpd_instance_t *inst, const uint8_t *key, + varpd_client_cache_entry_t *entry) +{ + int ret; + overlay_targ_cache_t cache; + varpd_impl_t *vip = inst->vri_impl; + + bzero(&cache, sizeof (overlay_targ_cache_t)); + cache.otc_linkid = inst->vri_linkid; + bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL); + + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_GET, &cache); + if (ret != 0 && errno == EFAULT) + libvarpd_panic("OVERLAY_TARG_CACHE_GET ioctl efault"); + else if (ret != 0) + return (errno); + + bcopy(cache.otc_entry.otce_dest.otp_mac, &entry->vcp_mac, ETHERADDRL); + entry->vcp_flags = cache.otc_entry.otce_flags; + entry->vcp_ip = cache.otc_entry.otce_dest.otp_ip; + entry->vcp_port = cache.otc_entry.otce_dest.otp_port; + + return (0); +} + +int +libvarpd_overlay_cache_set(varpd_instance_t *inst, const uint8_t *key, + const varpd_client_cache_entry_t *entry) +{ + int ret; + overlay_targ_cache_t cache; + varpd_impl_t *vip = inst->vri_impl; + + bzero(&cache, sizeof (overlay_targ_cache_t)); + cache.otc_linkid = inst->vri_linkid; + bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL); + bcopy(&entry->vcp_mac, cache.otc_entry.otce_dest.otp_mac, ETHERADDRL); + cache.otc_entry.otce_flags = entry->vcp_flags; + cache.otc_entry.otce_dest.otp_ip = entry->vcp_ip; + cache.otc_entry.otce_dest.otp_port = entry->vcp_port; + + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_SET, &cache); + if (ret != 0 && errno == EFAULT) + libvarpd_panic("OVERLAY_TARG_CACHE_SET ioctl efault"); + else if (ret != 0) + return (errno); + + return (0); +} + +int +libvarpd_overlay_cache_walk_fill(varpd_instance_t *inst, uint64_t *markerp, + uint64_t *countp, overlay_targ_cache_entry_t *ents) +{ + int ret; + size_t asize; + overlay_targ_cache_iter_t *iter; + varpd_impl_t *vip = inst->vri_impl; + + if (*countp > 200) + return (E2BIG); + + asize = sizeof (overlay_targ_cache_iter_t) + + *countp * sizeof (overlay_targ_cache_entry_t); + iter = umem_alloc(asize, UMEM_DEFAULT); + if (iter == NULL) + return (ENOMEM); + + iter->otci_linkid = inst->vri_linkid; + iter->otci_marker = *markerp; + iter->otci_count = *countp; + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_ITER, iter); + if (ret != 0 && errno == EFAULT) + libvarpd_panic("OVERLAY_TARG_CACHE_ITER ioctl efault"); + else if (ret != 0) { + ret = errno; + goto out; + } + + *markerp = iter->otci_marker; + *countp = iter->otci_count; + bcopy(iter->otci_ents, ents, + *countp * sizeof (overlay_targ_cache_entry_t)); +out: + umem_free(iter, asize); + return (ret); +} + +void +libvarpd_plugin_query_reply(varpd_query_handle_t *vqh, int action) +{ + varpd_query_t *vqp = (varpd_query_t *)vqh; + + if (vqp == NULL) + libvarpd_panic("unkonwn plugin passed invalid " + "varpd_query_handle_t"); + + if (action == VARPD_LOOKUP_DROP) + libvarpd_overlay_lookup_reply(vqp->vq_instance->vri_impl, + &vqp->vq_lookup, &vqp->vq_response, OVERLAY_TARG_DROP); + else if (action == VARPD_LOOKUP_OK) + libvarpd_overlay_lookup_reply(vqp->vq_instance->vri_impl, + &vqp->vq_lookup, &vqp->vq_response, OVERLAY_TARG_RESPOND); + else + libvarpd_panic("plugin %s passed in an invalid action: %d", + vqp->vq_instance->vri_plugin->vpp_name, action); +} + +void +libvarpd_inject_varp(varpd_provider_handle_t *vph, const uint8_t *mac, + const overlay_target_point_t *otp) +{ + int ret; + overlay_targ_cache_t otc; + varpd_instance_t *inst = (varpd_instance_t *)vph; + varpd_impl_t *vip = inst->vri_impl; + + if (otp == NULL) { + (void) libvarpd_overlay_cache_delete(inst, mac); + return; + } + + otc.otc_linkid = inst->vri_linkid; + otc.otc_entry.otce_flags = 0; + bcopy(mac, otc.otc_entry.otce_mac, ETHERADDRL); + bcopy(otp, &otc.otc_entry.otce_dest, sizeof (overlay_target_point_t)); + + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_SET, &otc); + if (ret != 0) { + switch (errno) { + case EBADF: + case EFAULT: + case ENOTSUP: + libvarpd_panic("received bad errno from " + "OVERLAY_TARG_CACHE_SET: %d", errno); + default: + break; + } + } +} + +void +libvarpd_fma_degrade(varpd_provider_handle_t *vph, const char *msg) +{ + int ret; + varpd_instance_t *inst = (varpd_instance_t *)vph; + + ret = libvarpd_overlay_degrade(inst, msg); + switch (ret) { + case ENOENT: + case EFAULT: + libvarpd_panic("received bad errno from degrade ioctl: %d", + errno); + default: + break; + } +} + +void +libvarpd_fma_restore(varpd_provider_handle_t *vph) +{ + int ret; + varpd_instance_t *inst = (varpd_instance_t *)vph; + + ret = libvarpd_overlay_restore(inst); + switch (ret) { + case ENOENT: + case EFAULT: + libvarpd_panic("received bad errno from restore ioctl: %d", + errno); + default: + break; + } +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c new file mode 100644 index 0000000000..6728d79d6e --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c @@ -0,0 +1,53 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015, Joyent, Inc. + */ + +/* + * No, 'tis not so deep as a well, nor so wide as a church door; but 'tis + * enough,'twill serve. Ask for me tomorrow, and you shall find me a grave man. + * + * This file maintains various routines for handling when we die. + */ + +#include <stdio.h> +#include <stdarg.h> +#include <errno.h> +#include <thread.h> +#include <stdlib.h> + +/* + * Normally these would be static, but if they're static, that throws off lint + * because it thinks we never use them, which is kind of the point, because we + * only read them in the core... + */ +int varpd_panic_errno; +char varpd_panic_buf[1024]; +thread_t varpd_panic_thread; + +void +libvarpd_panic(const char *fmt, ...) +{ + va_list ap; + + /* Always save errno first! */ + varpd_panic_errno = errno; + varpd_panic_thread = thr_self(); + + if (fmt != NULL) { + va_start(ap, fmt); + (void) vsnprintf(varpd_panic_buf, sizeof (varpd_panic_buf), fmt, + ap); + } + abort(); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c new file mode 100644 index 0000000000..27cc802a9c --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c @@ -0,0 +1,586 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. All rights reserved. + */ + +/* + * varpd persistence backend + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <strings.h> +#include <librename.h> +#include <md5.h> +#include <sys/sysmacros.h> +#include <dirent.h> +#include <sys/mman.h> +#include <umem.h> +#include <sys/debug.h> + +#include <libvarpd_impl.h> + +static uint8_t varpd_persist_magic[4] = { + 'v', + 'a', + 'r', + 'p', +}; + +#define VARPD_PERSIST_MAXWRITE 4096 +#define VARPD_PERSIST_VERSION_ONE 1 +#define VARPD_PERSIST_SUFFIX ".varpd" + +typedef struct varpd_persist_header { + uint8_t vph_magic[4]; + uint32_t vph_version; + uint8_t vph_md5[16]; +} varpd_persist_header_t; + +void +libvarpd_persist_init(varpd_impl_t *vip) +{ + vip->vdi_persistfd = -1; + if (rwlock_init(&vip->vdi_pfdlock, USYNC_THREAD, NULL) != 0) + libvarpd_panic("failed to create rw vdi_pfdlock"); +} + +void +libvarpd_persist_fini(varpd_impl_t *vip) +{ + /* + * Clean up for someone that left something behind. + */ + if (vip->vdi_persistfd != -1) { + if (close(vip->vdi_persistfd) != 0) + libvarpd_panic("failed to close persist fd %d: %d", + vip->vdi_persistfd, errno); + vip->vdi_persistfd = -1; + } + if (rwlock_destroy(&vip->vdi_pfdlock) != 0) + libvarpd_panic("failed to destroy rw vdi_pfdlock"); +} + +int +libvarpd_persist_enable(varpd_handle_t *vhp, const char *rootdir) +{ + int fd; + struct stat st; + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + fd = open(rootdir, O_RDONLY); + if (fd < 0) + return (errno); + + if (fstat(fd, &st) != 0) { + int ret = errno; + if (close(fd) != 0) + libvarpd_panic("failed to close rootdir fd (%s) %d: %d", + rootdir, fd, errno); + return (ret); + } + + if (!S_ISDIR(st.st_mode)) { + if (close(fd) != 0) + libvarpd_panic("failed to close rootdir fd (%s) %d: %d", + rootdir, fd, errno); + return (EINVAL); + } + + + VERIFY0(rw_wrlock(&vip->vdi_pfdlock)); + if (vip->vdi_persistfd != -1) { + VERIFY0(rw_unlock(&vip->vdi_pfdlock)); + if (close(fd) != 0) + libvarpd_panic("failed to close rootdir fd (%s) %d: %d", + rootdir, fd, errno); + return (EEXIST); + } + vip->vdi_persistfd = fd; + VERIFY0(rw_unlock(&vip->vdi_pfdlock)); + + return (0); +} + +static int +libvarpd_persist_write(int fd, const void *buf, size_t buflen) +{ + ssize_t ret; + off_t off = 0; + + while (buflen > 0) { + ret = write(fd, (void *)((uintptr_t)buf + off), + MIN(buflen, VARPD_PERSIST_MAXWRITE)); + if (ret == -1 && errno == EINTR) + continue; + if (ret == -1) + return (errno); + + off += ret; + buflen -= ret; + } + + return (0); +} + +static int +libvarpd_persist_nvlist(int dirfd, uint64_t id, nvlist_t *nvl) +{ + int err, fd; + size_t size; + varpd_persist_header_t hdr; + librename_atomic_t *lrap; + char *buf = NULL, *name; + + if ((err = nvlist_pack(nvl, &buf, &size, NV_ENCODE_XDR, 0)) != 0) + return (err); + + if (asprintf(&name, "%llu%s", (unsigned long long)id, ".varpd") == -1) { + err = errno; + free(buf); + return (err); + } + + if ((err = librename_atomic_fdinit(dirfd, name, NULL, 0600, 0, + &lrap)) != 0) { + free(name); + free(buf); + return (err); + } + + fd = librename_atomic_fd(lrap); + + bzero(&hdr, sizeof (varpd_persist_header_t)); + bcopy(varpd_persist_magic, hdr.vph_magic, sizeof (varpd_persist_magic)); + hdr.vph_version = VARPD_PERSIST_VERSION_ONE; + md5_calc(hdr.vph_md5, buf, size); + + if ((err = libvarpd_persist_write(fd, &hdr, + sizeof (varpd_persist_header_t))) != 0) { + librename_atomic_fini(lrap); + free(name); + free(buf); + return (err); + } + + if ((err = libvarpd_persist_write(fd, buf, size)) != 0) { + librename_atomic_fini(lrap); + free(name); + free(buf); + return (err); + } + + do { + err = librename_atomic_commit(lrap); + } while (err == EINTR); + + librename_atomic_fini(lrap); + free(name); + free(buf); + return (err); +} + +int +libvarpd_persist_instance(varpd_impl_t *vip, varpd_instance_t *inst) +{ + int err = 0; + nvlist_t *nvl = NULL, *cvl = NULL; + + VERIFY0(rw_rdlock(&vip->vdi_pfdlock)); + /* Check if persistence exists */ + if (vip->vdi_persistfd == -1) + goto out; + + if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0) + goto out; + + if ((err = nvlist_alloc(&cvl, NV_UNIQUE_NAME, 0)) != 0) + goto out; + + if ((err = nvlist_add_uint64(nvl, "vri_id", inst->vri_id)) != 0) + goto out; + + if ((err = nvlist_add_uint32(nvl, "vri_linkid", inst->vri_linkid)) != 0) + goto out; + + if ((err = nvlist_add_uint32(nvl, "vri_dest", + (uint32_t)inst->vri_dest)) != 0) + goto out; + if ((err = nvlist_add_uint32(nvl, "vri_mode", + (uint32_t)inst->vri_mode)) != 0) + goto out; + + if ((err = nvlist_add_string(nvl, "vri_plugin", + inst->vri_plugin->vpp_name)) != 0) + goto out; + + err = inst->vri_plugin->vpp_ops->vpo_save(inst->vri_private, cvl); + if (err != 0) + goto out; + + if ((err = nvlist_add_nvlist(nvl, "vri_private", cvl)) != 0) + goto out; + + err = libvarpd_persist_nvlist(vip->vdi_persistfd, inst->vri_id, nvl); +out: + nvlist_free(nvl); + nvlist_free(cvl); + VERIFY0(rw_unlock(&vip->vdi_pfdlock)); + return (err); +} + +void +libvarpd_torch_instance(varpd_impl_t *vip, varpd_instance_t *inst) +{ + char buf[32]; + int ret; + + VERIFY0(rw_rdlock(&vip->vdi_pfdlock)); + if (vip->vdi_persistfd == -1) { + VERIFY0(rw_unlock(&vip->vdi_pfdlock)); + return; + } + + if (snprintf(buf, sizeof (buf), "%lld.varpd", inst->vri_id) >= 32) + libvarpd_panic("somehow exceeded static value for " + "libvarpd_torch_instance buffer"); + + do { + ret = unlinkat(vip->vdi_persistfd, buf, 0); + } while (ret == -1 && errno == EINTR); + if (ret != 0) { + switch (errno) { + case ENOENT: + break; + default: + libvarpd_panic("failed to unlinkat %d`%s: %s", + vip->vdi_persistfd, buf, strerror(errno)); + } + } + + VERIFY0(rw_unlock(&vip->vdi_pfdlock)); +} + +static int +libvarpd_persist_restore_instance(varpd_impl_t *vip, nvlist_t *nvl) +{ + int err; + nvlist_t *pvl; + uint64_t id, flags, vid; + uint32_t linkid, dest, mode; + char *pluginstr; + varpd_plugin_t *plugin; + overlay_plugin_dest_t adest; + varpd_instance_t *inst, lookup; + + if (nvlist_lookup_uint64(nvl, "vri_id", &id) != 0) + return (EINVAL); + + if (nvlist_lookup_uint32(nvl, "vri_linkid", &linkid) != 0) + return (EINVAL); + + if (nvlist_lookup_uint32(nvl, "vri_dest", &dest) != 0) + return (EINVAL); + + if (nvlist_lookup_uint32(nvl, "vri_mode", &mode) != 0) + return (EINVAL); + + if (nvlist_lookup_string(nvl, "vri_plugin", &pluginstr) != 0) + return (EINVAL); + + if (nvlist_lookup_nvlist(nvl, "vri_private", &pvl) != 0) + return (EINVAL); + + plugin = libvarpd_plugin_lookup(vip, pluginstr); + if (plugin == NULL) + return (EINVAL); + + if (plugin->vpp_mode != mode) + return (EINVAL); + + if (libvarpd_overlay_info(vip, linkid, &adest, &flags, &vid) != 0) + return (EINVAL); + + if (dest != adest) + return (EINVAL); + + inst = umem_alloc(sizeof (varpd_instance_t), UMEM_DEFAULT); + if (inst == NULL) + libvarpd_panic("failed to allocate instance for restore"); + + inst->vri_id = id_alloc_specific(vip->vdi_idspace, id); + if (inst->vri_id != id) { + umem_free(inst, sizeof (varpd_instance_t)); + return (EINVAL); + } + + inst->vri_linkid = linkid; + inst->vri_vnetid = vid; + inst->vri_mode = plugin->vpp_mode; + inst->vri_dest = dest; + inst->vri_plugin = plugin; + inst->vri_impl = vip; + inst->vri_flags = 0; + if (plugin->vpp_ops->vpo_restore(pvl, (varpd_provider_handle_t *)inst, + dest, &inst->vri_private) != 0) { + id_free(vip->vdi_idspace, id); + umem_free(inst, sizeof (varpd_instance_t)); + return (EINVAL); + } + + if (mutex_init(&inst->vri_lock, USYNC_THREAD | LOCK_ERRORCHECK, + NULL) != 0) + libvarpd_panic("failed to create vri_lock mutex"); + + mutex_enter(&vip->vdi_lock); + lookup.vri_id = inst->vri_id; + if (avl_find(&vip->vdi_instances, &lookup, NULL) != NULL) + libvarpd_panic("found duplicate instance with id %d", + lookup.vri_id); + avl_add(&vip->vdi_instances, inst); + lookup.vri_linkid = inst->vri_linkid; + if (avl_find(&vip->vdi_linstances, &lookup, NULL) != NULL) + libvarpd_panic("found duplicate linstance with id %d", + lookup.vri_linkid); + avl_add(&vip->vdi_linstances, inst); + mutex_exit(&vip->vdi_lock); + + if (plugin->vpp_ops->vpo_start(inst->vri_private) != 0) { + libvarpd_instance_destroy((varpd_instance_handle_t *)inst); + return (EINVAL); + } + + if (flags & OVERLAY_TARG_INFO_F_ACTIVE) + (void) libvarpd_overlay_disassociate(inst); + + if (libvarpd_overlay_associate(inst) != 0) { + libvarpd_instance_destroy((varpd_instance_handle_t *)inst); + return (EINVAL); + } + + if (flags & OVERLAY_TARG_INFO_F_DEGRADED) { + if ((err = libvarpd_overlay_restore(inst)) != 0) { + libvarpd_panic("failed to restore instance %p: %d\n", + inst, err); + } + } + + mutex_enter(&inst->vri_lock); + inst->vri_flags |= VARPD_INSTANCE_F_ACTIVATED; + mutex_exit(&inst->vri_lock); + + return (0); +} + +static int +libvarpd_persist_restore_one(varpd_impl_t *vip, int fd) +{ + int err; + size_t fsize; + struct stat st; + void *buf, *datap; + varpd_persist_header_t *hdr; + uint8_t md5[16]; + nvlist_t *nvl; + + if (fstat(fd, &st) != 0) + return (errno); + + if (st.st_size <= sizeof (varpd_persist_header_t)) + return (EINVAL); + fsize = st.st_size - sizeof (varpd_persist_header_t); + + buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (buf == MAP_FAILED) + return (errno); + + hdr = buf; + if (bcmp(varpd_persist_magic, hdr->vph_magic, + sizeof (varpd_persist_magic)) != 0) { + if (munmap(buf, st.st_size) != 0) + libvarpd_panic("failed to munmap %p: %d", buf, errno); + return (EINVAL); + } + + if (hdr->vph_version != VARPD_PERSIST_VERSION_ONE) { + if (munmap(buf, st.st_size) != 0) + libvarpd_panic("failed to munmap %p: %d", buf, errno); + return (EINVAL); + } + + datap = (void *)((uintptr_t)buf + sizeof (varpd_persist_header_t)); + md5_calc(md5, datap, fsize); + if (bcmp(md5, hdr->vph_md5, sizeof (uint8_t) * 16) != 0) { + if (munmap(buf, st.st_size) != 0) + libvarpd_panic("failed to munmap %p: %d", buf, errno); + return (EINVAL); + } + + err = nvlist_unpack(datap, fsize, &nvl, 0); + if (munmap(buf, st.st_size) != 0) + libvarpd_panic("failed to munmap %p: %d", buf, errno); + + if (err != 0) + return (EINVAL); + + err = libvarpd_persist_restore_instance(vip, nvl); + nvlist_free(nvl); + return (err); +} + +/* ARGSUSED */ +static int +libvarpd_check_degrade_cb(varpd_impl_t *vip, datalink_id_t linkid, void *arg) +{ + varpd_instance_t *inst; + + mutex_enter(&vip->vdi_lock); + for (inst = avl_first(&vip->vdi_instances); inst != NULL; + inst = AVL_NEXT(&vip->vdi_instances, inst)) { + if (inst->vri_linkid == linkid) { + mutex_exit(&vip->vdi_lock); + return (0); + } + } + + mutex_exit(&vip->vdi_lock); + + (void) libvarpd_overlay_degrade_datalink(vip, linkid, + "no varpd instance exists"); + return (0); +} + +static void +libvarpd_check_degrade(varpd_impl_t *vip) +{ + (void) libvarpd_overlay_iter(vip, libvarpd_check_degrade_cb, NULL); +} + +int +libvarpd_persist_restore(varpd_handle_t *vhp) +{ + int dirfd; + int ret = 0; + DIR *dirp = NULL; + struct dirent *dp; + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + VERIFY0(rw_rdlock(&vip->vdi_pfdlock)); + if ((dirfd = dup(vip->vdi_persistfd)) < 0) { + ret = errno; + goto out; + } + + if ((dirp = fdopendir(dirfd)) == NULL) { + ret = errno; + if (close(dirfd) != 0) + libvarpd_panic("failed to close dirfd %d: %d", + dirfd, errno); + goto out; + } + + for (;;) { + int fd; + uint64_t id; + char *eptr; + struct stat st; + + errno = 0; + dp = readdir(dirp); + if (dp == NULL) { + ret = errno; + break; + } + + if (strcmp(dp->d_name, ".") == 0 || + strcmp(dp->d_name, "..") == 0) + continue; + + /* + * Leave files that we don't recognize alone. A valid file has + * the format `%llu.varpd`. + */ + errno = 0; + id = strtoull(dp->d_name, &eptr, 10); + if ((id == 0 && errno == EINVAL) || + (id == ULLONG_MAX && errno == ERANGE)) + continue; + + if (strcmp(eptr, VARPD_PERSIST_SUFFIX) != 0) + continue; + + fd = openat(vip->vdi_persistfd, dp->d_name, O_RDONLY); + if (fd < 0) { + ret = errno; + break; + } + + if (fstat(fd, &st) != 0) { + ret = errno; + break; + } + + if (!S_ISREG(st.st_mode)) { + if (close(fd) != 0) + libvarpd_panic("failed to close fd (%s) %d: " + "%d\n", dp->d_name, fd, errno); + continue; + } + + ret = libvarpd_persist_restore_one(vip, fd); + if (close(fd) != 0) + libvarpd_panic("failed to close fd (%s) %d: " + "%d\n", dp->d_name, fd, errno); + /* + * This is an invalid file. We'll unlink it to save us this + * trouble in the future. + */ + if (ret != 0) { + if (unlinkat(vip->vdi_persistfd, dp->d_name, 0) != 0) { + ret = errno; + break; + } + } + } + + libvarpd_check_degrade(vip); + +out: + if (dirp != NULL) + (void) closedir(dirp); + VERIFY0(rw_unlock(&vip->vdi_pfdlock)); + return (ret); +} + +int +libvarpd_persist_disable(varpd_handle_t *vhp) +{ + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + VERIFY0(rw_wrlock(&vip->vdi_pfdlock)); + if (vip->vdi_persistfd == -1) { + mutex_exit(&vip->vdi_lock); + VERIFY0(rw_unlock(&vip->vdi_pfdlock)); + return (ENOENT); + } + if (close(vip->vdi_persistfd) != 0) + libvarpd_panic("failed to close persist fd %d: %d", + vip->vdi_persistfd, errno); + vip->vdi_persistfd = -1; + VERIFY0(rw_unlock(&vip->vdi_pfdlock)); + return (0); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c new file mode 100644 index 0000000000..ac73286fdd --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c @@ -0,0 +1,269 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * varpd plugin management + */ + +#include <libvarpd_impl.h> +#include <errno.h> +#include <umem.h> +#include <assert.h> +#include <strings.h> +#include <dlfcn.h> +#include <link.h> +#include <stdio.h> +#include <bunyan.h> + +static varpd_impl_t *varpd_load_handle; +static const char *varpd_load_path; +static mutex_t varpd_load_lock; +static cond_t varpd_load_cv; + +int +libvarpd_plugin_comparator(const void *lp, const void *rp) +{ + int ret; + const varpd_plugin_t *lpp, *rpp; + + lpp = lp; + rpp = rp; + + ret = strcmp(lpp->vpp_name, rpp->vpp_name); + if (ret > 0) + return (1); + if (ret < 0) + return (-1); + return (0); +} + +varpd_plugin_register_t * +libvarpd_plugin_alloc(uint_t version, int *errp) +{ + int err; + varpd_plugin_register_t *vprp; + + if (errp == NULL) + errp = &err; + + if (version != VARPD_VERSION_ONE) { + (void) bunyan_warn(varpd_load_handle->vdi_bunyan, + "unsupported registration version", + BUNYAN_T_STRING, "module_path", varpd_load_path, + BUNYAN_T_INT32, "module_version", version, + BUNYAN_T_END); + *errp = EINVAL; + return (NULL); + } + + vprp = umem_alloc(sizeof (varpd_plugin_register_t), UMEM_DEFAULT); + if (vprp == NULL) { + (void) bunyan_warn(varpd_load_handle->vdi_bunyan, + "failed to allocate registration handle", + BUNYAN_T_STRING, "module_path", varpd_load_path, + BUNYAN_T_END); + *errp = ENOMEM; + return (NULL); + } + + vprp->vpr_version = VARPD_VERSION_ONE; + + return (vprp); +} + +void +libvarpd_plugin_free(varpd_plugin_register_t *vprp) +{ + umem_free(vprp, sizeof (varpd_plugin_register_t)); +} + +int +libvarpd_plugin_register(varpd_plugin_register_t *vprp) +{ + varpd_plugin_t *vpp; + varpd_plugin_t lookup; + + vpp = umem_alloc(sizeof (varpd_plugin_t), UMEM_DEFAULT); + if (vpp == NULL) { + (void) bunyan_warn(varpd_load_handle->vdi_bunyan, + "failed to allocate memory for the varpd_plugin_t", + BUNYAN_T_STRING, "module_path", varpd_load_path, + BUNYAN_T_END); + return (ENOMEM); + } + + /* Watch out for an evil plugin */ + if (vprp->vpr_version != VARPD_VERSION_ONE) { + (void) bunyan_warn(varpd_load_handle->vdi_bunyan, + "unsupported registration version", + BUNYAN_T_STRING, "module_path", varpd_load_path, + BUNYAN_T_INT32, "module_version", vprp->vpr_version, + BUNYAN_T_END); + return (EINVAL); + } + + mutex_enter(&varpd_load_lock); + if (varpd_load_handle == NULL) + libvarpd_panic("varpd_load_handle was unexpectedly null"); + + mutex_enter(&varpd_load_handle->vdi_lock); + lookup.vpp_name = vprp->vpr_name; + if (avl_find(&varpd_load_handle->vdi_plugins, &lookup, NULL) != NULL) { + (void) bunyan_warn(varpd_load_handle->vdi_bunyan, + "module already exists with requested name", + BUNYAN_T_STRING, "module_path", varpd_load_path, + BUNYAN_T_STRING, "name", vprp->vpr_name, + BUNYAN_T_END); + mutex_exit(&varpd_load_handle->vdi_lock); + mutex_exit(&varpd_load_lock); + umem_free(vpp, sizeof (varpd_plugin_t)); + return (EEXIST); + } + vpp->vpp_name = strdup(vprp->vpr_name); + if (vpp->vpp_name == NULL) { + (void) bunyan_warn(varpd_load_handle->vdi_bunyan, + "failed to allocate memory to duplicate name", + BUNYAN_T_STRING, "module_path", varpd_load_path, + BUNYAN_T_END); + mutex_exit(&varpd_load_handle->vdi_lock); + mutex_exit(&varpd_load_lock); + umem_free(vpp, sizeof (varpd_plugin_t)); + return (ENOMEM); + } + + vpp->vpp_mode = vprp->vpr_mode; + vpp->vpp_ops = vprp->vpr_ops; + if (mutex_init(&vpp->vpp_lock, USYNC_THREAD | LOCK_ERRORCHECK, + NULL) != 0) + libvarpd_panic("failed to create plugin's vpp_lock"); + vpp->vpp_active = 0; + avl_add(&varpd_load_handle->vdi_plugins, vpp); + mutex_exit(&varpd_load_handle->vdi_lock); + mutex_exit(&varpd_load_lock); + + return (0); +} + +varpd_plugin_t * +libvarpd_plugin_lookup(varpd_impl_t *vip, const char *name) +{ + varpd_plugin_t lookup, *ret; + + lookup.vpp_name = name; + mutex_enter(&vip->vdi_lock); + ret = avl_find(&vip->vdi_plugins, &lookup, NULL); + mutex_exit(&vip->vdi_lock); + + return (ret); +} + +/* ARGSUSED */ +static int +libvarpd_plugin_load_cb(varpd_impl_t *vip, const char *path, void *unused) +{ + void *dlp; + + varpd_load_path = path; + dlp = dlopen(path, RTLD_LOCAL | RTLD_NOW); + if (dlp == NULL) { + (void) bunyan_error(vip->vdi_bunyan, "dlopen failed", + BUNYAN_T_STRING, "module path", path, + BUNYAN_T_END); + } + path = NULL; + + return (0); +} + +int +libvarpd_plugin_load(varpd_handle_t *vph, const char *path) +{ + int ret = 0; + varpd_impl_t *vip = (varpd_impl_t *)vph; + + if (vip == NULL || path == NULL) + return (EINVAL); + mutex_enter(&varpd_load_lock); + while (varpd_load_handle != NULL) + (void) cond_wait(&varpd_load_cv, &varpd_load_lock); + varpd_load_handle = vip; + mutex_exit(&varpd_load_lock); + + ret = libvarpd_dirwalk(vip, path, ".so", libvarpd_plugin_load_cb, NULL); + + mutex_enter(&varpd_load_lock); + varpd_load_handle = NULL; + (void) cond_signal(&varpd_load_cv); + mutex_exit(&varpd_load_lock); + + return (ret); +} + +int +libvarpd_plugin_walk(varpd_handle_t *vph, libvarpd_plugin_walk_f func, + void *arg) +{ + varpd_impl_t *vip = (varpd_impl_t *)vph; + varpd_plugin_t *vpp; + + mutex_enter(&vip->vdi_lock); + for (vpp = avl_first(&vip->vdi_plugins); vpp != NULL; + vpp = AVL_NEXT(&vip->vdi_plugins, vpp)) { + if (func(vph, vpp->vpp_name, arg) != 0) { + mutex_exit(&vip->vdi_lock); + return (1); + } + } + mutex_exit(&vip->vdi_lock); + return (0); +} + +void +libvarpd_plugin_init(void) +{ + if (mutex_init(&varpd_load_lock, USYNC_THREAD | LOCK_RECURSIVE | + LOCK_ERRORCHECK, NULL) != 0) + libvarpd_panic("failed to create varpd_load_lock"); + + if (cond_init(&varpd_load_cv, USYNC_THREAD, NULL) != 0) + libvarpd_panic("failed to create varpd_load_cv"); + + varpd_load_handle = NULL; +} + +void +libvarpd_plugin_fini(void) +{ + assert(varpd_load_handle == NULL); + if (mutex_destroy(&varpd_load_lock) != 0) + libvarpd_panic("failed to destroy varpd_load_lock"); + if (cond_destroy(&varpd_load_cv) != 0) + libvarpd_panic("failed to destroy varpd_load_cv"); +} + +void +libvarpd_plugin_prefork(void) +{ + mutex_enter(&varpd_load_lock); + while (varpd_load_handle != NULL) + (void) cond_wait(&varpd_load_cv, &varpd_load_lock); +} + +void +libvarpd_plugin_postfork(void) +{ + (void) cond_signal(&varpd_load_cv); + mutex_exit(&varpd_load_lock); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c new file mode 100644 index 0000000000..abe65a8c5d --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c @@ -0,0 +1,299 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * varpd property management + */ + +#include <libvarpd_impl.h> +#include <errno.h> +#include <strings.h> +#include <sys/mac.h> +#include <umem.h> + +typedef struct varpd_prop_info { + varpd_impl_t *vprop_vip; + varpd_instance_t *vprop_instance; + uint_t vprop_type; + uint_t vprop_prot; + uint32_t vprop_defsize; + uint32_t vprop_psize; + char vprop_name[LIBVARPD_PROP_NAMELEN]; + uint8_t vprop_default[LIBVARPD_PROP_SIZEMAX]; + uint8_t vprop_poss[LIBVARPD_PROP_SIZEMAX]; +} varpd_prop_info_t; + +/* Internal Properties */ +static int varpd_nintprops = 1; +static const char *varpd_intprops[] = { + "search" +}; + +static int +libvarpd_prop_get_search(varpd_prop_info_t *infop, void *buf, uint32_t *sizep) +{ + varpd_plugin_t *vpp = infop->vprop_instance->vri_plugin; + size_t nlen; + + nlen = strlen(vpp->vpp_name) + 1; + if (nlen > *sizep) + return (EOVERFLOW); + *sizep = nlen; + (void) strlcpy(buf, vpp->vpp_name, *sizep); + return (0); +} + +void +libvarpd_prop_set_name(varpd_prop_handle_t *phdl, const char *name) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + (void) strlcpy(infop->vprop_name, name, OVERLAY_PROP_NAMELEN); +} + +void +libvarpd_prop_set_prot(varpd_prop_handle_t *phdl, overlay_prop_prot_t perm) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + infop->vprop_prot = perm; +} + +void +libvarpd_prop_set_type(varpd_prop_handle_t *phdl, overlay_prop_type_t type) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + infop->vprop_type = type; +} + +int +libvarpd_prop_set_default(varpd_prop_handle_t *phdl, void *buf, ssize_t len) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + + if (len > LIBVARPD_PROP_SIZEMAX) + return (E2BIG); + + if (len < 0) + return (EOVERFLOW); + + bcopy(buf, infop->vprop_default, len); + infop->vprop_defsize = len; + return (0); +} + +void +libvarpd_prop_set_nodefault(varpd_prop_handle_t *phdl) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + + infop->vprop_default[0] = '\0'; + infop->vprop_defsize = 0; +} + +void +libvarpd_prop_set_range_uint32(varpd_prop_handle_t *phdl, uint32_t min, + uint32_t max) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss; + + if (rangep->mpr_count != 0 && rangep->mpr_type != MAC_PROPVAL_UINT32) + return; + + if (infop->vprop_psize + sizeof (mac_propval_uint32_range_t) > + sizeof (infop->vprop_poss)) + return; + + infop->vprop_psize += sizeof (mac_propval_uint32_range_t); + rangep->mpr_count++; + rangep->mpr_type = MAC_PROPVAL_UINT32; + rangep->u.mpr_uint32[rangep->mpr_count-1].mpur_min = min; + rangep->u.mpr_uint32[rangep->mpr_count-1].mpur_max = max; +} + +void +libvarpd_prop_set_range_str(varpd_prop_handle_t *phdl, const char *str) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + size_t len = strlen(str) + 1; /* Account for a null terminator */ + mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss; + mac_propval_str_range_t *pstr = &rangep->u.mpr_str; + + if (rangep->mpr_count != 0 && rangep->mpr_type != MAC_PROPVAL_STR) + return; + + if (infop->vprop_psize + len > sizeof (infop->vprop_poss)) + return; + + rangep->mpr_count++; + rangep->mpr_type = MAC_PROPVAL_STR; + (void) strlcpy((char *)&pstr->mpur_data[pstr->mpur_nextbyte], str, + sizeof (infop->vprop_poss) - infop->vprop_psize); + pstr->mpur_nextbyte += len; + infop->vprop_psize += len; +} + +int +libvarpd_prop_handle_alloc(varpd_handle_t *vph, varpd_instance_handle_t *inst, + varpd_prop_handle_t **phdlp) +{ + varpd_prop_info_t *infop; + + infop = umem_alloc(sizeof (varpd_prop_info_t), UMEM_DEFAULT); + if (infop == NULL) + return (ENOMEM); + + bzero(infop, sizeof (varpd_prop_info_t)); + infop->vprop_vip = (varpd_impl_t *)vph; + infop->vprop_instance = (varpd_instance_t *)inst; + + *phdlp = (varpd_prop_handle_t *)infop; + return (0); +} + +void +libvarpd_prop_handle_free(varpd_prop_handle_t *phdl) +{ + umem_free(phdl, sizeof (varpd_prop_info_t)); +} + +int +libvarpd_prop_nprops(varpd_instance_handle_t *ihdl, uint_t *np) +{ + int ret; + varpd_instance_t *instp = (varpd_instance_t *)ihdl; + + ret = instp->vri_plugin->vpp_ops->vpo_nprops(instp->vri_private, np); + if (ret != 0) + return (ret); + *np += varpd_nintprops; + return (0); +} + +static int +libvarpd_prop_info_fill_int_cb(varpd_handle_t *handle, const char *name, + void *arg) +{ + varpd_prop_handle_t *vph = arg; + libvarpd_prop_set_range_str(vph, name); + return (0); +} + +static int +libvarpd_prop_info_fill_int(varpd_prop_handle_t *vph, uint_t propid) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)vph; + if (propid >= varpd_nintprops) + abort(); + libvarpd_prop_set_name(vph, varpd_intprops[0]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_READ); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING); + libvarpd_prop_set_nodefault(vph); + libvarpd_plugin_walk((varpd_handle_t *)infop->vprop_instance->vri_impl, + libvarpd_prop_info_fill_int_cb, vph); + return (0); +} + +int +libvarpd_prop_info_fill(varpd_prop_handle_t *phdl, uint_t propid) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + varpd_instance_t *instp = infop->vprop_instance; + mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss; + + infop->vprop_psize = sizeof (mac_propval_range_t); + + bzero(rangep, sizeof (mac_propval_range_t)); + if (propid < varpd_nintprops) { + return (libvarpd_prop_info_fill_int(phdl, propid)); + } else { + varpd_plugin_t *vpp = instp->vri_plugin; + return (vpp->vpp_ops->vpo_propinfo(instp->vri_private, + propid - varpd_nintprops, phdl)); + } +} + +int +libvarpd_prop_info(varpd_prop_handle_t *phdl, const char **namep, + uint_t *typep, uint_t *protp, const void **defp, uint32_t *sizep, + const mac_propval_range_t **possp) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + if (namep != NULL) + *namep = infop->vprop_name; + if (typep != NULL) + *typep = infop->vprop_type; + if (protp != NULL) + *protp = infop->vprop_prot; + if (defp != NULL) + *defp = infop->vprop_default; + if (sizep != NULL) + *sizep = infop->vprop_psize; + if (possp != NULL) + *possp = (mac_propval_range_t *)infop->vprop_poss; + return (0); +} + +int +libvarpd_prop_get(varpd_prop_handle_t *phdl, void *buf, uint32_t *sizep) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + varpd_instance_t *instp = infop->vprop_instance; + + if (infop->vprop_name[0] == '\0') + return (EINVAL); + + if (strcmp(varpd_intprops[0], infop->vprop_name) == 0) { + /* search property */ + return (libvarpd_prop_get_search(infop, buf, sizep)); + } + + return (instp->vri_plugin->vpp_ops->vpo_getprop(instp->vri_private, + infop->vprop_name, buf, sizep)); +} + +int +libvarpd_prop_set(varpd_prop_handle_t *phdl, const void *buf, uint32_t size) +{ + int i; + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + varpd_instance_t *instp = infop->vprop_instance; + + if (infop->vprop_name[0] == '\0') + return (EINVAL); + + for (i = 0; i < varpd_nintprops; i++) { + if (strcmp(infop->vprop_name, varpd_intprops[i]) == 0) { + return (EPERM); + } + } + + return (instp->vri_plugin->vpp_ops->vpo_setprop(instp->vri_private, + infop->vprop_name, buf, size)); +} + +void +libvarpd_prop_door_convert(const varpd_prop_handle_t *phdl, + varpd_client_propinfo_arg_t *vcfap) +{ + const varpd_prop_info_t *infop = (const varpd_prop_info_t *)phdl; + + vcfap->vcfa_type = infop->vprop_type; + vcfap->vcfa_prot = infop->vprop_prot; + vcfap->vcfa_defsize = infop->vprop_defsize; + vcfap->vcfa_psize = infop->vprop_psize; + bcopy(infop->vprop_name, vcfap->vcfa_name, LIBVARPD_PROP_NAMELEN); + bcopy(infop->vprop_default, vcfap->vcfa_default, LIBVARPD_PROP_SIZEMAX); + bcopy(infop->vprop_poss, vcfap->vcfa_poss, LIBVARPD_PROP_SIZEMAX); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h new file mode 100644 index 0000000000..64fa99d308 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h @@ -0,0 +1,419 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#ifndef _LIBVARPD_PROVIDER_H +#define _LIBVARPD_PROVIDER_H + +/* + * varpd provider interface for lookup modules + * + * This header file defines all the structures and functions that a given lookup + * module needs to implement and perform its purpose. At this time, all of these + * interfaces are considered private to illumos and therefore are subject to + * change. At some point we will move to more broadly stabilize these interfaces + * and commit to them. Until such time, expect breakage for out of gate + * consumers. + * + * A plugin is a dynamic shared object that is placed inside of varpd's default + * module. + * + * The shared object must define an initializer, such as with #pragma init. This + * function will be run with the module is dlopened by libvarpd. In that init + * function, the function must allocate a varpd_plugin_register by calling + * libvarpd_plugin_alloc() and specifying VARPD_CURRENT_VERSION. If that + * succeeds, then it should proceed to fill out the registration and then call, + * libvarpd_plugin_register() with it. Regardless of whether it succeeds or + * fails, it should call libvarpd_plugin_free(). In the case of failure, there + * is not much that the module should do, other than log some message to the + * standard bunyan logger that exists. + * + * Once libvarpd_plugin_register() returns, the module should assume that any + * of the operations it defined in the operation vector may be called and + * therefore it is recommended that any other required initialization should be + * performed at that time. + * + * At this time, once a plugin is loaded, it will not be unloaded. Therefore, + * there is no corresponding requirement to unregister, though that may come in + * a future version. + * + * ----------------------------- + * Plugin Types and Destinations + * ----------------------------- + * + * There are two different kinds of plugins in this world, there are point to + * point plugins and there are dynamic plugins. The key difference is in how + * packets are routed through the system. In a point to point plugin, a single + * destination is used when the instance is started. In dynamic plugins, + * destinations are looked up as they are required and an instance of a plugin + * is required to provide that. + * + * These point to point plugins define a type of OVERLAY_TARGET_POINT and the + * dynamic plugins instead define a type of OVERLAY_TARGET_DYNAMIC. + * + * Encapsulation plugins have multiple types of destinations. They may require + * an Ethernet address (OVERLAY_PLUGIN_D_ETHERNET), IP address + * (OVERLAY_PLUGIN_D_IP), and a port (OVERLAY_PLUGIN_D_PORT). For example, + * consider vxlan, it requires an IP and a port; while a hypothetical nvgre, + * would only require an IP. + * + * A plugin is allowed to describe which of these fields that it supports and + * given which encapsulation plugin it is paired with, it can support a varying + * degree of properties. For example, consider the example of the direct plugin. + * It has a notion of a destination port and a destination IP. If it is paired + * with a plugin that only requires an IP, then it wouldn't need to show a + * property that's related to a destination port. + * + * ------------------ + * Plugin Definitions + * ------------------ + * + * A plugin is required to fill in both an operations vector and a series of + * additional metadata that it passed in to libvarpd_plugin_register(). The + * following lists all of the routines and their purposes. The full signatures + * are available in the body of the header file. + * + * varpd_plugin_create_f + * + * Create a new instance of a plugin. Each instance refers to a different + * overlay device and thus a different overlay identifier. Each instance + * has its own property space and is unique. This function gives the chance + * for the plugin to create and provide any private data that it will + * require. + * + * In addition, the plugin is given the type of destination that is + * required and it is its job to determine whether or not it supports it. + * + * varpd_plugin_destory_f + * + * This is the opposite of varpd_plugin_create_f. It is called to allow the + * plugin to reclaim any resources with the private argument that it passed + * out as part of the destroy function. + * + * varpd_plugin_start_f + * + * This routine is called to indicate that an instance should be started. + * This is a plugin's chance to verify that it has all of its required + * properties set and to take care of any action that needs to be handled + * to begin the plugin. After this point it will be legal to have the + * varpd_plugin_default_f, varpd_plugin_lookup_f, varpd_plugin_arp_f and + * varpd_plugin_dhcp_f endpoints called. + * + * varpd_plugin_stop_f + * + * This routine is called to indicate that an instance is stopping, it is + * the opposite of varpd_plugin_start_f. This is a chance to clean up + * resources that are a side effect of having started the instance. + * + * varpd_plugin_default_f + * + * This routine is defined by plugins of type OVERLAY_TARGET_POINT. It is + * used to answer the question of where should all traffic for this + * instance be destined. Plugins of type OVERLAY_TARGET_DYNAMIC should + * leave this entry set to NULL. + * + * On success, the default routine should return VARPD_LOOKUP_OK. On + * failure, it should return the macro VARPD_LOOKUP_DROP. + * + * varpd_plugin_lookup_f + * + * This routine must be defined by plugins of type OVERLAY_TARGET_DYNAMIC. + * It is used to lookup the destination for a given request. Each request + * comes in with its own MAC address this allows a plugin to direct it to + * any remote location. + * + * This is designed as an asynchronous API. Once a lookup is completed it + * should call libvarpd_plugin_query_reply() and pass as the second + * argument either VARPD_LOOKUP_OK to indicate that it went alright or it + * should reply VARPD_LOOKUP_DROP to indicate that the packet should be + * dropped. + * + * In addition, there are several utility routines that can take care of + * various kinds of traffic automatically. For example, if an ARP, NDP, or + * DHCP packet comes in, there are utilities such as + * libvarpd_plugin_proxy_arp(), libvarpd_plugin_proxy_ndp() and + * libvarpd_plugin_proxy_dhcp(), which allows the system to do the heavy + * lifting of validating the packet once it finds that it matches certain + * properties. + * + * varpd_plugin_arp_f + * + * This is an optional entry for plugins of type OVERLAY_TARGET_DYNAMIC. + * This is called after a plugin calls libvarpd_plugin_proxy_arp() and is + * used to ask the plugin to perform an ARP or NDP query. The type of query + * is passed in in the third argument, the only valid value for which will + * be VARPD_QTYPE_ETHERNET, to indicate we're doing an Ethernet lookup. + * + * The layer three IP address that is being looked up will be included in + * the struct sockaddr. The sockaddr(3SOCKET)'s sa_family will be set to + * indicate the type, eg. AF_INET or AF_INET6 and that will indicate the + * kind of sockaddr that will be used. For more information see + * sockaddr(3SOCKET). The implementation ensures that enough space for the + * link layer address will exist. + * + * This is an asynchronous lookup. Once the answer has been written, a + * plugin should call libvarpd_plugin_arp_reply and if it was successful, + * VARPD_LOOKUP_OK should be passed in and if it failed, VARPD_LOOKUP_DROP + * should be passed in instead. + * + * varpd_plugin_dhcp_f + * + * This is an optional entry for plugins of type OVERLAY_TARGET_DYNAMIC. + * This is called after a plugin calls the libvarpd_plugin_proxy_dhcp() and + * is used to ask the plugin to determine where is the DHCP server that + * this packet should actually be sent to. What is happening here is that + * rather than broadcast the initial DHCP request, we instead unicast it to + * a specified DHCP server that this operation vector indicates. + * + * The plugin is given a type, the same as the ARP plugin which indicates + * the kind of link layer address, the only valid type is + * VARPD_QTYPE_ETHERNET, other types should be rejected. Then, like the arp + * entry point, the dhcp entry point should determine the link layer + * address of the DHCP server and write that out in the appropriate memory + * and call libvarpd_plugin_dhcp_reply() when done. Similar to the arp + * entry point, it should use VARPD_LOOKUP_OK to indicate that it was + * filled in and VARPD_LOOKUP_DROP to indicate that it was not. + * + * varpd_plugin_nprops_f + * + * This is used by a plugin to indicate the number of properties that + * should exist for this instance. Recall from the section that Plugin + * types and Destinations, that the number of entries here may vary. As + * such, the plugin should return the number that is appropriate for the + * instance. + * + * This number will be used to obtain information about a property via the + * propinfo functions. However, the getprop and setprop interfaces will + * always use names to indicate the property it is getting and setting. + * This difference is structured this way to deal with property discovery + * and to make the getprop and setprop interfaces slightly easier for other + * parts of the broader varpd/dladm infrastructure. + * + * varpd_plugin_propinfo_f + * + * This interface is used to get information about a property, the property + * that information is being requested for is being passed in via the + * second argument. Here, callers should set properties such as the name, + * the protection, whether or not the property is required, set any default + * value, if it exist, and if relevant, set the valid range of values. + * + * varpd_plugin_getprop_f + * + * This is used to get the value of a property, if it is set. The passed in + * length indicates the length of the buffer that is used for updating + * properties. If it is not of sufficient size, the function should return + * an error and not update the buffer. Otherwise, it should update the size + * pointer with the valid size. + * + * varpd_plugin_setprop_f + * + * This is used to set the value of a property. An endpoint should validate + * that the property is valid before updating it. In addition, it should + * update its state as appropriate. + * + * varpd_plugin_save_f + * + * This is used to serialize the state of a given instance of a plugin such + * that if varpd crashes, it can be recovered. The plugin should write all + * state into the nvlist that it is passed in, it may use any keys and + * values that it wants. The only consumer of that nvlist will be the + * plugin itself when the restore endpoint is called. + * + * varpd_plugin_restore_f + * + * This is called by the server to restore an instance that used to exist, + * but was lost due to a crash. This is a combination of calling create and + * setting properties. The plugin should restore any private state that it + * can find recorded from the nvlist. The only items in the nvlist will be + * those that were written out during a previous call to + * varpd_plugin_save_f. + * + * + * Once all of these interfaces are implemented, the plugin should define the + * following members in the varpd_plugin_register_t. + * + * vpr_version + * + * This indicates the version of the plugin. Plugins should set this to the + * macro VARPD_CURRENT_VERSION. + * + * vpr_mode + * + * This indicates the mode of the plugin. The plugin's mode should be one + * of OVERLAY_TARGET_POINT and OVERLAY_TARGET_DYNAMIC. For more discussion + * of these types and the differences, see the section on Plugin Types and + * Destinations. + * + * vpr_name + * + * This is the name of the plugin. This is how users will refer to it in + * the context of running dladm(1M) commands. Note, this name must be + * unique across the different plugins, as it will cause others with the + * same name not to be registered. + * + * vpr_ops + * + * This is the operations vector as described above. Importantly, the + * member vpo_callbacks must be set to zero, this is being used for future + * expansion of the structure. + * + * + * -------------------------------------------------- + * Downcalls, Upcalls, and Synchronization Guarantees + * -------------------------------------------------- + * + * Every instance of a plugin is independent. Calls into a plugin may be made + * for different instances in parallel. Any necessary locking is left to the + * plugin module. Within an instance, various calls may come in parallel. + * + * The primary guarantees are that none of the varpd_plugin_save_f, + * varpd_plugin_lookup_f, varpd_default_f, varpd_plugin_arp_f, and + * varpd_plugin_dhcp_f will be called until after a call to varpd_plugin_start_f + * has been called. Similarly, they will not be called after a call to + * vardp_plugin_stop_f. + * + * The functions documented in this header may be called back into from any + * context, including from the operation vectors. + */ + +#include <bunyan.h> +#include <libvarpd.h> +#include <libnvpair.h> +#include <sys/socket.h> +#include <sys/overlay_target.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define VARPD_VERSION_ONE 1 +#define VARPD_CURRENT_VERSION VARPD_VERSION_ONE + +typedef struct __varpd_provier_handle varpd_provider_handle_t; +typedef struct __varpd_query_handle varpd_query_handle_t; +typedef struct __varpd_arp_handle varpd_arp_handle_t; +typedef struct __varpd_dhcp_handle varpd_dhcp_handle_t; + +typedef int (*varpd_plugin_create_f)(varpd_provider_handle_t *, void **, + overlay_plugin_dest_t); +typedef int (*varpd_plugin_start_f)(void *); +typedef void (*varpd_plugin_stop_f)(void *); +typedef void (*varpd_plugin_destroy_f)(void *); + +#define VARPD_LOOKUP_OK (0) +#define VARPD_LOOKUP_DROP (-1) +typedef int (*varpd_plugin_default_f)(void *, overlay_target_point_t *); +typedef void (*varpd_plugin_lookup_f)(void *, varpd_query_handle_t *, + const overlay_targ_lookup_t *, overlay_target_point_t *); + +#define VARPD_QTYPE_ETHERNET 0x0 +typedef void (*varpd_plugin_arp_f)(void *, varpd_arp_handle_t *, int, + const struct sockaddr *, uint8_t *); +typedef void (*varpd_plugin_dhcp_f)(void *, varpd_dhcp_handle_t *, int, + const overlay_targ_lookup_t *, uint8_t *); + +typedef int (*varpd_plugin_nprops_f)(void *, uint_t *); +typedef int (*varpd_plugin_propinfo_f)(void *, const uint_t, + varpd_prop_handle_t *); +typedef int (*varpd_plugin_getprop_f)(void *, const char *, void *, uint32_t *); +typedef int (*varpd_plugin_setprop_f)(void *, const char *, const void *, + const uint32_t); + +typedef int (*varpd_plugin_save_f)(void *, nvlist_t *); +typedef int (*varpd_plugin_restore_f)(nvlist_t *, varpd_provider_handle_t *, + overlay_plugin_dest_t, void **); + +typedef struct varpd_plugin_ops { + uint_t vpo_callbacks; + varpd_plugin_create_f vpo_create; + varpd_plugin_start_f vpo_start; + varpd_plugin_stop_f vpo_stop; + varpd_plugin_destroy_f vpo_destroy; + varpd_plugin_default_f vpo_default; + varpd_plugin_lookup_f vpo_lookup; + varpd_plugin_nprops_f vpo_nprops; + varpd_plugin_propinfo_f vpo_propinfo; + varpd_plugin_getprop_f vpo_getprop; + varpd_plugin_setprop_f vpo_setprop; + varpd_plugin_save_f vpo_save; + varpd_plugin_restore_f vpo_restore; + varpd_plugin_arp_f vpo_arp; + varpd_plugin_dhcp_f vpo_dhcp; +} varpd_plugin_ops_t; + +typedef struct varpd_plugin_register { + uint_t vpr_version; + uint_t vpr_mode; + const char *vpr_name; + const varpd_plugin_ops_t *vpr_ops; +} varpd_plugin_register_t; + +extern varpd_plugin_register_t *libvarpd_plugin_alloc(uint_t, int *); +extern void libvarpd_plugin_free(varpd_plugin_register_t *); +extern int libvarpd_plugin_register(varpd_plugin_register_t *); + +/* + * Blowing up and logging + */ +extern void libvarpd_panic(const char *, ...) __NORETURN; +extern const bunyan_logger_t *libvarpd_plugin_bunyan(varpd_provider_handle_t *); + +/* + * Misc. Information APIs + */ +extern uint64_t libvarpd_plugin_vnetid(varpd_provider_handle_t *); + +/* + * Lookup Replying query and proxying + */ +extern void libvarpd_plugin_query_reply(varpd_query_handle_t *, int); + +extern void libvarpd_plugin_proxy_arp(varpd_provider_handle_t *, + varpd_query_handle_t *, const overlay_targ_lookup_t *); +extern void libvarpd_plugin_proxy_ndp(varpd_provider_handle_t *, + varpd_query_handle_t *, const overlay_targ_lookup_t *); +extern void libvarpd_plugin_arp_reply(varpd_arp_handle_t *, int); + +extern void libvarpd_plugin_proxy_dhcp(varpd_provider_handle_t *, + varpd_query_handle_t *, const overlay_targ_lookup_t *); +extern void libvarpd_plugin_dhcp_reply(varpd_dhcp_handle_t *, int); + + +/* + * Property information callbacks + */ +extern void libvarpd_prop_set_name(varpd_prop_handle_t *, const char *); +extern void libvarpd_prop_set_prot(varpd_prop_handle_t *, overlay_prop_prot_t); +extern void libvarpd_prop_set_type(varpd_prop_handle_t *, overlay_prop_type_t); +extern int libvarpd_prop_set_default(varpd_prop_handle_t *, void *, ssize_t); +extern void libvarpd_prop_set_nodefault(varpd_prop_handle_t *); +extern void libvarpd_prop_set_range_uint32(varpd_prop_handle_t *, uint32_t, + uint32_t); +extern void libvarpd_prop_set_range_str(varpd_prop_handle_t *, const char *); + +/* + * Various injecting and invalidation routines + */ +extern void libvarpd_inject_varp(varpd_provider_handle_t *, const uint8_t *, + const overlay_target_point_t *); +extern void libvarpd_inject_arp(varpd_provider_handle_t *, const uint16_t, + const uint8_t *, const struct in_addr *, const uint8_t *); +extern void libvarpd_fma_degrade(varpd_provider_handle_t *, const char *); +extern void libvarpd_fma_restore(varpd_provider_handle_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBVARPD_PROVIDER_H */ diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c new file mode 100644 index 0000000000..e21fed2126 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c @@ -0,0 +1,97 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#include <libvarpd_impl.h> +#include <assert.h> +#include <stdio.h> +#include <sys/types.h> +#include <dirent.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +const char * +libvarpd_isaext(void) +{ +#if defined(__sparc) +#if defined(__sparcv9) + return ("64"); +#else /* __sparcv9 */ + return (""); +#endif /* __sparvc9 */ +#elif defined(__amd64) + return ("64"); +#elif defined(__i386) + return (""); +#else +#error "unkonwn ISA" +#endif +} + +int +libvarpd_dirwalk(varpd_impl_t *vip, const char *path, const char *suffix, + libvarpd_dirwalk_f func, void *arg) +{ + int ret; + size_t slen; + char *dirpath, *filepath; + DIR *dirp; + struct dirent *dp; + assert(vip != NULL && path != NULL); + + if (asprintf(&dirpath, "%s/%s", path, libvarpd_isaext()) == -1) + return (errno); + + if ((dirp = opendir(dirpath)) == NULL) { + ret = errno; + return (ret); + } + + slen = strlen(suffix); + for (;;) { + size_t len; + + errno = 0; + dp = readdir(dirp); + if (dp == NULL) { + ret = errno; + break; + } + + len = strlen(dp->d_name); + if (len <= slen) + continue; + + if (strcmp(suffix, dp->d_name + (len - slen)) != 0) + continue; + + if (asprintf(&filepath, "%s/%s", dirpath, dp->d_name) == -1) { + ret = errno; + break; + } + + if (func(vip, filepath, arg) != 0) { + free(filepath); + ret = 0; + break; + } + + free(filepath); + } + + (void) closedir(dirp); + free(dirpath); + return (ret); +} diff --git a/usr/src/lib/varpd/libvarpd/common/llib-lvarpd b/usr/src/lib/varpd/libvarpd/common/llib-lvarpd new file mode 100644 index 0000000000..85150d3463 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/llib-lvarpd @@ -0,0 +1,19 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +#include <libvarpd.h> diff --git a/usr/src/lib/varpd/libvarpd/common/mapfile-plugin b/usr/src/lib/varpd/libvarpd/common/mapfile-plugin new file mode 100644 index 0000000000..8cef7f669f --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/mapfile-plugin @@ -0,0 +1,57 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_SCOPE { + global: + libvarpd_fma_degrade { FLAGS = EXTERN }; + libvarpd_inject_arp { FLAGS = EXTERN }; + libvarpd_inject_ndp { FLAGS = EXTERN }; + libvarpd_inject_varp { FLAGS = EXTERN }; + libvarpd_fma_restore { FLAGS = EXTERN }; + libvarpd_panic { FLAGS = EXTERN }; + libvarpd_plugin_alloc { FLAGS = EXTERN }; + libvarpd_plugin_arp_reply { FLAGS = EXTERN }; + libvarpd_plugin_dhcp_reply { FLAGS = EXTERN }; + libvarpd_plugin_free { FLAGS = EXTERN }; + libvarpd_plugin_proxy_arp { FLAGS = EXTERN }; + libvarpd_plugin_proxy_dhcp { FLAGS = EXTERN }; + libvarpd_plugin_proxy_ndp { FLAGS = EXTERN }; + libvarpd_plugin_query_reply { FLAGS = EXTERN }; + libvarpd_plugin_register { FLAGS = EXTERN }; + libvarpd_plugin_vnetid { FLAGS = EXTERN }; + libvarpd_prop_set_name { FLAGS = EXTERN }; + libvarpd_prop_set_prot { FLAGS = EXTERN }; + libvarpd_prop_set_type { FLAGS = EXTERN }; + libvarpd_prop_set_default { FLAGS = EXTERN }; + libvarpd_prop_set_nodefault { FLAGS = EXTERN }; + libvarpd_prop_set_range_uint32 { FLAGS = EXTERN }; + libvarpd_prop_set_rangestr { FLAGS = EXTERN }; +}; diff --git a/usr/src/lib/varpd/libvarpd/common/mapfile-vers b/usr/src/lib/varpd/libvarpd/common/mapfile-vers new file mode 100644 index 0000000000..7aa930cb54 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/mapfile-vers @@ -0,0 +1,113 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION SUNWprivate { + global: + libvarpd_c_create; + libvarpd_c_destroy; + libvarpd_c_instance_activate; + libvarpd_c_instance_create; + libvarpd_c_instance_destroy; + libvarpd_c_prop_nprops; + libvarpd_c_prop_handle_alloc; + libvarpd_c_prop_handle_free; + libvarpd_c_prop_info_fill; + libvarpd_c_prop_info_fill_by_name; + libvarpd_c_prop_info; + libvarpd_c_prop_get; + libvarpd_c_prop_set; + + libvarpd_c_instance_lookup; + libvarpd_c_instance_target_mode; + libvarpd_c_instance_cache_flush; + libvarpd_c_instance_cache_delete; + libvarpd_c_instance_cache_get; + libvarpd_c_instance_cache_set; + libvarpd_c_instance_cache_walk; + + libvarpd_create; + libvarpd_destroy; + + libvarpd_door_server_create; + libvarpd_door_server_destroy; + + libvarpd_fma_degrade; + libvarpd_fma_restore; + + libvarpd_inject_varp; + libvarpd_inject_arp; + + libvarpd_instance_activate; + libvarpd_instance_create; + libvarpd_instance_destroy; + libvarpd_instance_lookup; + libvarpd_instance_id; + + libvarpd_panic; + + libvarpd_persist_disable; + libvarpd_persist_enable; + libvarpd_persist_restore; + + libvarpd_plugin_alloc; + libvarpd_plugin_load; + libvarpd_plugin_free; + libvarpd_plugin_arp_reply; + libvarpd_plugin_dhcp_reply; + libvarpd_plugin_query_reply; + libvarpd_plugin_proxy_arp; + libvarpd_plugin_proxy_dhcp; + libvarpd_plugin_proxy_ndp; + libvarpd_plugin_register; + libvarpd_plugin_walk; + libvarpd_plugin_vnetid; + + libvarpd_prop_set_default; + libvarpd_prop_set_nodefault; + libvarpd_prop_set_name; + libvarpd_prop_set_prot; + libvarpd_prop_set_range_uint32; + libvarpd_prop_set_range_str; + libvarpd_prop_set_type; + + libvarpd_prop_handle_alloc; + libvarpd_prop_handle_free; + libvarpd_prop_nprops; + libvarpd_prop_info_fill; + libvarpd_prop_info; + libvarpd_prop_get; + libvarpd_prop_set; + + libvarpd_overlay_lookup_quiesce; + libvarpd_overlay_lookup_run; + local: + *; +}; diff --git a/usr/src/lib/varpd/libvarpd/i386/Makefile b/usr/src/lib/varpd/libvarpd/i386/Makefile new file mode 100644 index 0000000000..f2b4f63da5 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/i386/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/libvarpd/sparc/Makefile b/usr/src/lib/varpd/libvarpd/sparc/Makefile new file mode 100644 index 0000000000..f2b4f63da5 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/sparc/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/libvarpd/sparcv9/Makefile b/usr/src/lib/varpd/libvarpd/sparcv9/Makefile new file mode 100644 index 0000000000..d552642882 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/sparcv9/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/varpd/svp/Makefile b/usr/src/lib/varpd/svp/Makefile new file mode 100644 index 0000000000..275f07bf8b --- /dev/null +++ b/usr/src/lib/varpd/svp/Makefile @@ -0,0 +1,40 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../../Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +install := TARGET = install +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber install lint: $(SUBDIRS) + +install_h: + +check: + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../../Makefile.targ diff --git a/usr/src/lib/varpd/svp/Makefile.com b/usr/src/lib/varpd/svp/Makefile.com new file mode 100644 index 0000000000..15b6540e74 --- /dev/null +++ b/usr/src/lib/varpd/svp/Makefile.com @@ -0,0 +1,54 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +LIBRARY = libvarpd_svp.a +VERS = .1 +OBJECTS = libvarpd_svp.o \ + libvarpd_svp_conn.o \ + libvarpd_svp_crc.o \ + libvarpd_svp_host.o \ + libvarpd_svp_loop.o \ + libvarpd_svp_remote.o \ + libvarpd_svp_shootdown.o \ + libvarpd_svp_timer.o + +include ../../../Makefile.lib +include ../../Makefile.plugin + +LIBS = $(DYNLIB) + +# +# Yes, this isn't a command, but libcmdutils does have the list(9F) +# functions and better to use that then compile list.o yet again +# ourselves... probably. +# +LDLIBS += -lc -lumem -lnvpair -lsocket -lnsl -lavl \ + -lcmdutils -lidspace -lbunyan +CPPFLAGS += -I../common + +LINTFLAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTFLAGS64 += -erroff=E_BAD_PTR_CAST_ALIGN +SRCDIR = ../common + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../../Makefile.targ diff --git a/usr/src/lib/varpd/svp/amd64/Makefile b/usr/src/lib/varpd/svp/amd64/Makefile new file mode 100644 index 0000000000..d552642882 --- /dev/null +++ b/usr/src/lib/varpd/svp/amd64/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp.c b/usr/src/lib/varpd/svp/common/libvarpd_svp.c new file mode 100644 index 0000000000..58828065a1 --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp.c @@ -0,0 +1,1138 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015, Joyent, Inc. + */ + +/* + * This plugin implements the SDC VXLAN Protocol (SVP). + * + * This plugin is designed to work with a broader distributed system that + * mainains a database of mappings and provides a means of looking up data and + * provides a stream of updates. While it is named after VXLAN, there isn't + * anything specific to VXLAN baked into the protocol at this time, other than + * that it requires both an IP address and a port; however, if there's a good + * reason to support others here, we can modify that. + * + * ----------- + * Terminology + * ----------- + * + * Throughout this module we refer to a few different kinds of addresses: + * + * VL3 + * + * A VL3 address, or virtual layer 3, refers to the layer three addreses + * that are used by entities on an overlay network. As far as we're + * concerned that means that this is the IP address of an interface on an + * overlay network. + * + * VL2 + * + * A VL2 address, or a virtual layer 2, referes to the link-layer addresses + * that are used by entities on an overlay network. As far as we're + * concerned that means that this is the MAC addresses of an interface on + * an overlay network. + * + * UL3 + * + * A UL3, or underlay layer 3, refers to the layer three (IP) address on + * the underlay network. + * + * The svp plugin provides lookups from VL3->VL2, eg. the equivalent of an ARP + * or NDP query, and then also provides VL2->UL3 lookups. + * + * ------------------- + * Protocol Operations + * ------------------- + * + * The svp protocol is defined in lib/varpd/svp/common/libvarpd_svp_prot.h. It + * defines the basic TCP protocol that we use to communicate to hosts. At this + * time, it is not quite 100% implemented in both this plug-in and our primary + * server, sdc-portolan (see https://github.com/joyent/sdc-portolan). + * + * At this time, we don't quite support everything that we need to. Including + * the SVP_R_BULK_REQ and SVP_R_SHOOTDOWN. + * + * --------------------------------- + * General Design and Considerations + * --------------------------------- + * + * Every instance of the svp plugin requires the hostname and port of a server + * to contact. Though, we have co-opted the port 1296 (the year of the oldest + * extant portolan) as our default port. + * + * Each of the different instance of the plugins has a corresponding remote + * backend. The remote backend represents the tuple of the [ host, port ]. + * Different instances that share the same host and port tuple will use the same + * backend. + * + * The backend is actually in charge of performing lookups, resolving and + * updating the set of remote hosts based on the DNS resolution we've been + * provided, and taking care of things like shootdowns. + * + * The whole plugin itself maintains an event loop and a number of threads to + * service that event loop. On top of that event loop, we have a simple timer + * backend that ticks at one second intervals and performs various callbacks, + * such as idle query timers, DNS resolution, connection backoff, etc. Each of + * the remote hosts that we obtain is wrapped up in an svp_conn_t, which manages + * the connection state, reconnecting, etc. + * + * All in all, the general way that this all looks like is: + * + * +----------------------------+ + * | Plugin Instance | + * | svp_t | + * | | + * | varpd_provider_handle_t * -+-> varpd handle + * | uint64_t ----+-> varpd ID + * | char * ----+-> remote host + * | uint16_t ----+-> remote port + * | svp_remote_t * ---+------+-> remote backend + * +---------------------+------+ + * | + * v + * +----------------------+ +----------------+ + * | Remote backend |------------------>| Remove Backend |---> ... + * | svp_remote_t | | svp_remote_t | + * | | +----------------+ + * | svp_remote_state_t --+-> state flags + * | svp_degrade_state_t -+-> degraded reason + * | struct addrinfo * --+-> resolved hosts + * | uint_t ---+-> active hosts + * | uint_t ---+-> DNS generation + * | uint_t ---+-> Reference count + * | uint_t ---+-> active conns + * | uint_t ---+-> degraded conns + * | list_t ---+---+-> connection list + * +------------------+---+ + * | + * +------------------------------+-----------------+ + * | | | + * v v v + * +-------------------+ +---------------- + * | SVP Connection | | SVP connection | ... + * | svp_conn_t | | svp_conn_t | + * | | +----------------+ + * | svp_event_t ----+-> event loop handle + * | svp_timer_t ----+-> backoff timer + * | svp_timer_t ----+-> query timer + * | int ----+-> socket fd + * | uint_t ----+-> generation + * | uint_t ----+-> current backoff + * | svp_conn_flags_t -+-> connection flags + * | svp_conn_state_t -+-> connection state + * | svp_conn_error_t -+-> connection error + * | int ---+-> last errrno + * | hrtime_t ---+-> activity timestamp + * | svp_conn_out_t ---+-> outgoing data state + * | svp_conn_in_t ---+-> incoming data state + * | list_t ---+--+-> active queries + * +----------------+--+ + * | + * +----------------------------------+-----------------+ + * | | | + * v v v + * +--------------------+ +-------------+ + * | SVP Query | | SVP Query | ... + * | svp_query_t | | svp_query_t | + * | | +-------------+ + * | svp_query_f ---+-> callback function + * | void * ---+-> callback arg + * | svp_query_state_t -+-> state flags + * | svp_req_t ---+-> svp prot. header + * | svp_query_data_t --+-> read data + * | svp_query_data_t --+-> write data + * | svp_status_t ---+-> request status + * +--------------------+ + * + * The svp_t is the instance that we assoicate with varpd. The instance itself + * maintains properties and then when it's started associates with an + * svp_remote_t, which is the remote backend. The remote backend itself, + * maintains the DNS state and spins up and downs connections based on the + * results from DNS. By default, we query DNS every 30 seconds. For more on the + * connection life cycle, see the next section. + * + * By default, each connection maintains its own back off timer and list of + * queries it's servicing. Only one request is generally outstanding at a time + * and requests are round robined across the various connections. + * + * The query itself represents the svp request that's going on and keep track of + * its state and is a place for data that's read and written to as part of the + * request. + * + * Connections maintain a query timer such that if we have not received data on + * a socket for a certain amount of time, we kill that socket and begin a + * reconnection cycle with backoff. + * + * ------------------------ + * Connection State Machine + * ------------------------ + * + * We have a connection pool that's built upon DNS records. DNS describes the + * membership of the set of remote peers that make up our pool and we maintain + * one connection to each of them. In addition, we maintain an exponential + * backoff for each peer and will attempt to reconect immediately before backing + * off. The following are the valid states that a connection can be in: + * + * SVP_CS_ERROR An OS error has occurred on this connection, + * such as failure to create a socket or associate + * the socket with an event port. We also + * transition all connections to this state before + * we destroy them. + * + * SVP_CS_INITIAL This is the initial state of a connection, all + * that should exist is an unbound socket. + * + * SVP_CS_CONNECTING A call to connect has been made and we are + * polling for it to complete. + * + * SVP_CS_BACKOFF A connect attempt has failed and we are + * currently backing off, waiting to try again. + * + * SVP_CS_ACTIVE We have successfully connected to the remote + * system. + * + * SVP_CS_WINDDOWN This connection is going to valhalla. In other + * words, a previously active connection is no + * longer valid in DNS, so we should curb our use + * of it, and reap it as soon as we have other + * active connections. + * + * The following diagram attempts to describe our state transition scheme, and + * when we transition from one state to the next. + * + * | + * * New remote IP from DNS resolution, + * | not currently active in the system. + * | + * v Socket Error, + * +----------------+ still in DNS + * +----------------<---| SVP_CS_INITIAL |<----------------------*-----+ + * | +----------------+ | + * | System | | + * | Connection . . . . . success * Successful | + * | failed . | connect() | + * | +----*---------+ | +-----------*--+ | + * | | | | | | | + * | V ^ v ^ V ^ + * | +----------------+ +-------------------+ +---------------+ + * +<-| SVP_CS_BACKOFF | | SVP_CS_CONNECTING | | SVP_CS_ACTIVE | + * | +----------------+ +-------------------+ +---------------+ + * | V ^ V V V + * | Backoff wait * | | | * Removed + * v interval +--------------+ +-----------------<-----+ | from DNS + * | finished | | + * | V | + * | | V + * | | +-----------------+ + * +----------------+----------<-----+-------<----| SVP_CS_WINDDOWN | + * | +-----------------+ + * * . . . Fatal system, not + * | socket error or + * V quiesced after + * +--------------+ removal from DNS + * | SVP_CS_ERROR | + * +--------------+ + * | + * * . . . Removed from DNS + * v + * +------------+ + * | Connection | + * | Destroyed | + * +------------+ + * + * -------------------------- + * Connection Event Injection + * -------------------------- + * + * For each connection that exists in the system, we have a timer in place that + * is in charge of performing timeout activity. It fires once every thirty + * seconds or so for a given connection and checks to ensure that we have had + * activity for the most recent query on the connection. If not, it terminates + * the connection. This is important as if we have sent all our data and are + * waiting for the remote end to reply, without enabling something like TCP + * keep-alive, we will not be notified that anything that has happened to the + * remote connection, for example a panic. In addition, this also protects + * against a server that is up, but a portolan that is not making forward + * progress. + * + * When a timeout occurs, we first try to disassociate any active events, which + * by definition must exist. Once that's done, we inject a port source user + * event. Now, there is a small gotcha. Let's assume for a moment that we have a + * pathological portolan. That means that it knows to inject activity right at + * the time out window. That means, that the event may be disassociated before + * we could get to it. If that's the case, we must _not_ inject the user event + * and instead, we'll let the pending event take care of it. We know that the + * pending event hasn't hit the main part of the loop yet, otherwise, it would + * have released the lock protecting our state and associated the event. + * + * ------------ + * Notes on DNS + * ------------ + * + * Unfortunately, doing host name resolution in a way that allows us to leverage + * the system's resolvers and the system's caching, require us to make blocking + * calls in libc via getaddrinfo(3SOCKET). If we can't reach a given server, + * that will tie up a thread for quite some time. To work around that fact, + * we're going to create a fixed number of threads and we'll use them to service + * our DNS requests. While this isn't ideal, until we have a sane means of + * integrating a DNS resolution into an event loop with say portfs, it's not + * going to be a fun day no matter what we do. + * + * ------ + * Timers + * ------ + * + * We maintain a single timer based on CLOCK_REALTIME. It's designed to fire + * every second. While we'd rather use CLOCK_HIGHRES just to alleviate ourselves + * from timer drift; however, as zones may not actually have CLOCK_HIGHRES + * access, we don't want them to end up in there. The timer itself is just a + * simple avl tree sorted by expiration time, which is stored as a tick in the + * future, a tick is just one second. + * + * ---------- + * Shootdowns + * ---------- + * + * As part of the protocol, we need to be able to handle shootdowns that inform + * us some of the information in the system is out of date. This information + * needs to be processed promptly; however, the information is hopefully going + * to be relatively infrequent relative to the normal flow of information. + * + * The shoot down information needs to be done on a per-backend basis. The + * general design is that we'll have a single query for this which can fire on a + * 5-10s period, we randmoize the latter part to give us a bit more load + * spreading. If we complete because there's no work to do, then we wait the + * normal period. If we complete, but there's still work to do, we'll go again + * after a second. + * + * A shootdown has a few different parts. We first receive a list of items to + * shootdown. After performing all of those, we need to acknowledge them. When + * that's been done successfully, we can move onto the next part. From a + * protocol perspective, we make a SVP_R_LOG_REQ, we get a reply, and then after + * processing them, send an SVP_R_LOG_RM. Only once that's been acked do we + * continue. + * + * However, one of the challenges that we have is that these invalidations are + * just that, an invalidation. For a virtual layer two request, that's fine, + * because the kernel supports that. However, for virtual layer three + * invalidations, we have a bit more work to do. These protocols, ARP and NDP, + * don't really support a notion of just an invalidation, instead you have to + * inject the new data in a gratuitous fashion. + * + * To that end, what we instead do is when we receive a VL3 invalidation, we + * turn that info a VL3 request. We hold the general request as outstanding + * until we receive all of the callbacks for the VL3 invalidations, at which + * point we go through and do the log removal request. + */ + +#include <umem.h> +#include <errno.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <libnvpair.h> +#include <strings.h> +#include <string.h> +#include <assert.h> +#include <unistd.h> + +#include <libvarpd_provider.h> +#include "libvarpd_svp.h" + +bunyan_logger_t *svp_bunyan; +static int svp_defport = 1296; +static int svp_defuport = 1339; +static umem_cache_t *svp_lookup_cache; + +typedef enum svp_lookup_type { + SVP_L_UNKNOWN = 0x0, + SVP_L_VL2 = 0x1, + SVP_L_VL3 = 0x2 +} svp_lookup_type_t; + +typedef struct svp_lookup { + int svl_type; + union { + struct svl_lookup_vl2 { + varpd_query_handle_t *svl_handle; + overlay_target_point_t *svl_point; + } svl_vl2; + struct svl_lookup_vl3 { + varpd_arp_handle_t *svl_vah; + uint8_t *svl_out; + } svl_vl3; + } svl_u; + svp_query_t svl_query; +} svp_lookup_t; + +static const char *varpd_svp_props[] = { + "svp/host", + "svp/port", + "svp/underlay_ip", + "svp/underlay_port" +}; + +static const uint8_t svp_bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + +int +svp_comparator(const void *l, const void *r) +{ + const svp_t *ls = l; + const svp_t *rs = r; + + if (ls->svp_vid > rs->svp_vid) + return (1); + if (ls->svp_vid < rs->svp_vid) + return (-1); + return (0); +} + +static void +svp_vl2_lookup_cb(svp_t *svp, svp_status_t status, const struct in6_addr *uip, + const uint16_t uport, void *arg) +{ + svp_lookup_t *svl = arg; + overlay_target_point_t *otp; + + assert(svp != NULL); + assert(arg != NULL); + + if (status != SVP_S_OK) { + libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle, + VARPD_LOOKUP_DROP); + umem_cache_free(svp_lookup_cache, svl); + return; + } + + otp = svl->svl_u.svl_vl2.svl_point; + bcopy(uip, &otp->otp_ip, sizeof (struct in6_addr)); + otp->otp_port = uport; + libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle, + VARPD_LOOKUP_OK); + umem_cache_free(svp_lookup_cache, svl); +} + +static void +svp_vl3_lookup_cb(svp_t *svp, svp_status_t status, const uint8_t *vl2mac, + const struct in6_addr *uip, const uint16_t uport, void *arg) +{ + overlay_target_point_t point; + svp_lookup_t *svl = arg; + + assert(svp != NULL); + assert(svl != NULL); + + if (status != SVP_S_OK) { + libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah, + VARPD_LOOKUP_DROP); + umem_cache_free(svp_lookup_cache, svl); + return; + } + + /* Inject the L2 mapping before the L3 */ + bcopy(uip, &point.otp_ip, sizeof (struct in6_addr)); + point.otp_port = uport; + libvarpd_inject_varp(svp->svp_hdl, vl2mac, &point); + + bcopy(vl2mac, svl->svl_u.svl_vl3.svl_out, ETHERADDRL); + libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah, + VARPD_LOOKUP_OK); + umem_cache_free(svp_lookup_cache, svl); +} + +static void +svp_vl2_invalidate_cb(svp_t *svp, const uint8_t *vl2mac) +{ + libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL); +} + +static void +svp_vl3_inject_cb(svp_t *svp, const uint16_t vlan, const struct in6_addr *vl3ip, + const uint8_t *vl2mac, const uint8_t *targmac) +{ + struct in_addr v4; + + /* + * At the moment we don't support any IPv6 related log entries, this + * will change soon as we develop a bit more of the IPv6 related + * infrastructure so we can properly test the injection. + */ + if (IN6_IS_ADDR_V4MAPPED(vl3ip) == 0) { + return; + } else { + IN6_V4MAPPED_TO_INADDR(vl3ip, &v4); + if (targmac == NULL) + targmac = svp_bcast; + libvarpd_inject_arp(svp->svp_hdl, vlan, vl2mac, &v4, targmac); + } +} + +/* ARGSUSED */ +static void +svp_shootdown_cb(svp_t *svp, const uint8_t *vl2mac, const struct in6_addr *uip, + const uint16_t uport) +{ + /* + * We should probably do a conditional invlaidation here. + */ + libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL); +} + +static svp_cb_t svp_defops = { + svp_vl2_lookup_cb, + svp_vl3_lookup_cb, + svp_vl2_invalidate_cb, + svp_vl3_inject_cb, + svp_shootdown_cb +}; + +static boolean_t +varpd_svp_valid_dest(overlay_plugin_dest_t dest) +{ + if (dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)) + return (B_FALSE); + + return (B_TRUE); +} + +static int +varpd_svp_create(varpd_provider_handle_t *hdl, void **outp, + overlay_plugin_dest_t dest) +{ + int ret; + svp_t *svp; + + if (varpd_svp_valid_dest(dest) == B_FALSE) + return (ENOTSUP); + + svp = umem_zalloc(sizeof (svp_t), UMEM_DEFAULT); + if (svp == NULL) + return (ENOMEM); + + if ((ret = mutex_init(&svp->svp_lock, USYNC_THREAD | LOCK_ERRORCHECK, + NULL)) != 0) { + umem_free(svp, sizeof (svp_t)); + return (ret); + } + + svp->svp_port = svp_defport; + svp->svp_uport = svp_defuport; + svp->svp_cb = svp_defops; + svp->svp_hdl = hdl; + svp->svp_vid = libvarpd_plugin_vnetid(svp->svp_hdl); + *outp = svp; + return (0); +} + +static int +varpd_svp_start(void *arg) +{ + int ret; + svp_remote_t *srp; + svp_t *svp = arg; + + mutex_enter(&svp->svp_lock); + if (svp->svp_host == NULL || svp->svp_port == 0 || + svp->svp_huip == B_FALSE || svp->svp_uport == 0) { + mutex_exit(&svp->svp_lock); + return (EAGAIN); + } + mutex_exit(&svp->svp_lock); + + if ((ret = svp_remote_find(svp->svp_host, svp->svp_port, &svp->svp_uip, + &srp)) != 0) + return (ret); + + if ((ret = svp_remote_attach(srp, svp)) != 0) { + svp_remote_release(srp); + return (ret); + } + + return (0); +} + +static void +varpd_svp_stop(void *arg) +{ + svp_t *svp = arg; + + svp_remote_detach(svp); +} + +static void +varpd_svp_destroy(void *arg) +{ + svp_t *svp = arg; + + if (svp->svp_host != NULL) + umem_free(svp->svp_host, strlen(svp->svp_host) + 1); + + if (mutex_destroy(&svp->svp_lock) != 0) + libvarpd_panic("failed to destroy svp_t`svp_lock"); + + umem_free(svp, sizeof (svp_t)); +} + +static void +varpd_svp_lookup(void *arg, varpd_query_handle_t *vqh, + const overlay_targ_lookup_t *otl, overlay_target_point_t *otp) +{ + svp_lookup_t *slp; + svp_t *svp = arg; + + /* + * Check if this is something that we need to proxy, eg. arp or ndp. + */ + if (otl->otl_sap == ETHERTYPE_ARP) { + libvarpd_plugin_proxy_arp(svp->svp_hdl, vqh, otl); + return; + } + + if (otl->otl_dstaddr[0] == 0x33 && + otl->otl_dstaddr[1] == 0x33) { + if (otl->otl_sap == ETHERTYPE_IPV6) { + libvarpd_plugin_proxy_ndp(svp->svp_hdl, vqh, otl); + } else { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + } + return; + } + + /* + * Watch out for various multicast and broadcast addresses. We've + * already taken care of the IPv6 range above. Now we just need to + * handle broadcast and if the multicast bit is set, lowest bit of the + * first octet of the MAC, then we drop it now. + */ + if (bcmp(otl->otl_dstaddr, svp_bcast, ETHERADDRL) == 0 || + (otl->otl_dstaddr[0] & 0x01) == 0x01) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + return; + } + + /* + * If we have a failure to allocate memory for this, that's not good. + * However, telling the kernel to just drop this packet is much better + * than the alternative at this moment. At least we'll try again and we + * may have something more available to us in a little bit. + */ + slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT); + if (slp == NULL) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + return; + } + + slp->svl_type = SVP_L_VL2; + slp->svl_u.svl_vl2.svl_handle = vqh; + slp->svl_u.svl_vl2.svl_point = otp; + + svp_remote_vl2_lookup(svp, &slp->svl_query, otl->otl_dstaddr, slp); +} + +/* ARGSUSED */ +static int +varpd_svp_nprops(void *arg, uint_t *nprops) +{ + *nprops = sizeof (varpd_svp_props) / sizeof (char *); + return (0); +} + +/* ARGSUSED */ +static int +varpd_svp_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph) +{ + switch (propid) { + case 0: + /* svp/host */ + libvarpd_prop_set_name(vph, varpd_svp_props[0]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING); + libvarpd_prop_set_nodefault(vph); + break; + case 1: + /* svp/port */ + libvarpd_prop_set_name(vph, varpd_svp_props[1]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT); + (void) libvarpd_prop_set_default(vph, &svp_defport, + sizeof (svp_defport)); + libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX); + break; + case 2: + /* svp/underlay_ip */ + libvarpd_prop_set_name(vph, varpd_svp_props[2]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP); + libvarpd_prop_set_nodefault(vph); + break; + case 3: + /* svp/underlay_port */ + libvarpd_prop_set_name(vph, varpd_svp_props[3]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT); + (void) libvarpd_prop_set_default(vph, &svp_defuport, + sizeof (svp_defuport)); + libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX); + break; + default: + return (EINVAL); + } + return (0); +} + +static int +varpd_svp_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep) +{ + svp_t *svp = arg; + + /* svp/host */ + if (strcmp(pname, varpd_svp_props[0]) == 0) { + size_t len; + + mutex_enter(&svp->svp_lock); + if (svp->svp_host == NULL) { + *sizep = 0; + } else { + len = strlen(svp->svp_host) + 1; + if (*sizep < len) { + mutex_exit(&svp->svp_lock); + return (EOVERFLOW); + } + *sizep = len; + (void) strlcpy(buf, svp->svp_host, *sizep); + } + mutex_exit(&svp->svp_lock); + return (0); + } + + /* svp/port */ + if (strcmp(pname, varpd_svp_props[1]) == 0) { + uint64_t val; + + if (*sizep < sizeof (uint64_t)) + return (EOVERFLOW); + + mutex_enter(&svp->svp_lock); + if (svp->svp_port == 0) { + *sizep = 0; + } else { + val = svp->svp_port; + bcopy(&val, buf, sizeof (uint64_t)); + *sizep = sizeof (uint64_t); + } + + mutex_exit(&svp->svp_lock); + return (0); + } + + /* svp/underlay_ip */ + if (strcmp(pname, varpd_svp_props[2]) == 0) { + if (*sizep > sizeof (struct in6_addr)) + return (EOVERFLOW); + mutex_enter(&svp->svp_lock); + if (svp->svp_huip == B_FALSE) { + *sizep = 0; + } else { + bcopy(&svp->svp_uip, buf, sizeof (struct in6_addr)); + *sizep = sizeof (struct in6_addr); + } + return (0); + } + + /* svp/underlay_port */ + if (strcmp(pname, varpd_svp_props[3]) == 0) { + uint64_t val; + + if (*sizep < sizeof (uint64_t)) + return (EOVERFLOW); + + mutex_enter(&svp->svp_lock); + if (svp->svp_uport == 0) { + *sizep = 0; + } else { + val = svp->svp_uport; + bcopy(&val, buf, sizeof (uint64_t)); + *sizep = sizeof (uint64_t); + } + + mutex_exit(&svp->svp_lock); + return (0); + } + + return (EINVAL); +} + +static int +varpd_svp_setprop(void *arg, const char *pname, const void *buf, + const uint32_t size) +{ + svp_t *svp = arg; + + /* svp/host */ + if (strcmp(pname, varpd_svp_props[0]) == 0) { + char *dup; + dup = umem_alloc(size, UMEM_DEFAULT); + (void) strlcpy(dup, buf, size); + if (dup == NULL) + return (ENOMEM); + mutex_enter(&svp->svp_lock); + if (svp->svp_host != NULL) + umem_free(svp->svp_host, strlen(svp->svp_host) + 1); + svp->svp_host = dup; + mutex_exit(&svp->svp_lock); + return (0); + } + + /* svp/port */ + if (strcmp(pname, varpd_svp_props[1]) == 0) { + const uint64_t *valp = buf; + if (size < sizeof (uint64_t)) + return (EOVERFLOW); + + if (*valp == 0 || *valp > UINT16_MAX) + return (EINVAL); + + mutex_enter(&svp->svp_lock); + svp->svp_port = (uint16_t)*valp; + mutex_exit(&svp->svp_lock); + return (0); + } + + /* svp/underlay_ip */ + if (strcmp(pname, varpd_svp_props[2]) == 0) { + const struct in6_addr *ipv6 = buf; + + if (size < sizeof (struct in6_addr)) + return (EOVERFLOW); + + if (IN6_IS_ADDR_V4COMPAT(ipv6)) + return (EINVAL); + + if (IN6_IS_ADDR_MULTICAST(ipv6)) + return (EINVAL); + + if (IN6_IS_ADDR_6TO4(ipv6)) + return (EINVAL); + + if (IN6_IS_ADDR_V4MAPPED(ipv6)) { + ipaddr_t v4; + IN6_V4MAPPED_TO_IPADDR(ipv6, v4); + if (IN_MULTICAST(v4)) + return (EINVAL); + } + + mutex_enter(&svp->svp_lock); + bcopy(buf, &svp->svp_uip, sizeof (struct in6_addr)); + svp->svp_huip = B_TRUE; + mutex_exit(&svp->svp_lock); + return (0); + } + + /* svp/underlay_port */ + if (strcmp(pname, varpd_svp_props[3]) == 0) { + const uint64_t *valp = buf; + if (size < sizeof (uint64_t)) + return (EOVERFLOW); + + if (*valp == 0 || *valp > UINT16_MAX) + return (EINVAL); + + mutex_enter(&svp->svp_lock); + svp->svp_uport = (uint16_t)*valp; + mutex_exit(&svp->svp_lock); + + return (0); + } + + return (EINVAL); +} + +static int +varpd_svp_save(void *arg, nvlist_t *nvp) +{ + int ret; + svp_t *svp = arg; + + mutex_enter(&svp->svp_lock); + if (svp->svp_host != NULL) { + if ((ret = nvlist_add_string(nvp, varpd_svp_props[0], + svp->svp_host)) != 0) { + mutex_exit(&svp->svp_lock); + return (ret); + } + } + + if (svp->svp_port != 0) { + if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[1], + svp->svp_port)) != 0) { + mutex_exit(&svp->svp_lock); + return (ret); + } + } + + if (svp->svp_huip == B_TRUE) { + char buf[INET6_ADDRSTRLEN]; + + if (inet_ntop(AF_INET6, &svp->svp_uip, buf, sizeof (buf)) == + NULL) + libvarpd_panic("unexpected inet_ntop failure: %d", + errno); + + if ((ret = nvlist_add_string(nvp, varpd_svp_props[2], + buf)) != 0) { + mutex_exit(&svp->svp_lock); + return (ret); + } + } + + if (svp->svp_uport != 0) { + if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[3], + svp->svp_uport)) != 0) { + mutex_exit(&svp->svp_lock); + return (ret); + } + } + + mutex_exit(&svp->svp_lock); + return (0); +} + +static int +varpd_svp_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl, + overlay_plugin_dest_t dest, void **outp) +{ + int ret; + svp_t *svp; + char *ipstr, *hstr; + + if (varpd_svp_valid_dest(dest) == B_FALSE) + return (ENOTSUP); + + if ((ret = varpd_svp_create(hdl, (void **)&svp, dest)) != 0) + return (ret); + + if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[0], + &hstr)) != 0) { + if (ret != ENOENT) { + varpd_svp_destroy(svp); + return (ret); + } + svp->svp_host = NULL; + } else { + size_t blen = strlen(hstr) + 1; + svp->svp_host = umem_alloc(blen, UMEM_DEFAULT); + (void) strlcpy(svp->svp_host, hstr, blen); + } + + if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[1], + &svp->svp_port)) != 0) { + if (ret != ENOENT) { + varpd_svp_destroy(svp); + return (ret); + } + svp->svp_port = 0; + } + + if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[2], + &ipstr)) != 0) { + if (ret != ENOENT) { + varpd_svp_destroy(svp); + return (ret); + } + svp->svp_huip = B_FALSE; + } else { + ret = inet_pton(AF_INET6, ipstr, &svp->svp_uip); + if (ret == -1) { + assert(errno == EAFNOSUPPORT); + libvarpd_panic("unexpected inet_pton failure: %d", + errno); + } + + if (ret == 0) { + varpd_svp_destroy(svp); + return (EINVAL); + } + svp->svp_huip = B_TRUE; + } + + if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[3], + &svp->svp_uport)) != 0) { + if (ret != ENOENT) { + varpd_svp_destroy(svp); + return (ret); + } + svp->svp_uport = 0; + } + + svp->svp_hdl = hdl; + *outp = svp; + return (0); +} + +static void +varpd_svp_arp(void *arg, varpd_arp_handle_t *vah, int type, + const struct sockaddr *sock, uint8_t *out) +{ + svp_t *svp = arg; + svp_lookup_t *svl; + + if (type != VARPD_QTYPE_ETHERNET) { + libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP); + return; + } + + svl = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT); + if (svl == NULL) { + libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP); + return; + } + + svl->svl_type = SVP_L_VL3; + svl->svl_u.svl_vl3.svl_vah = vah; + svl->svl_u.svl_vl3.svl_out = out; + svp_remote_vl3_lookup(svp, &svl->svl_query, sock, svl); +} + +static const varpd_plugin_ops_t varpd_svp_ops = { + 0, + varpd_svp_create, + varpd_svp_start, + varpd_svp_stop, + varpd_svp_destroy, + NULL, + varpd_svp_lookup, + varpd_svp_nprops, + varpd_svp_propinfo, + varpd_svp_getprop, + varpd_svp_setprop, + varpd_svp_save, + varpd_svp_restore, + varpd_svp_arp, + NULL +}; + +static int +svp_bunyan_init(void) +{ + int ret; + + if ((ret = bunyan_init("svp", &svp_bunyan)) != 0) + return (ret); + ret = bunyan_stream_add(svp_bunyan, "stderr", BUNYAN_L_INFO, + bunyan_stream_fd, (void *)STDERR_FILENO); + if (ret != 0) + bunyan_fini(svp_bunyan); + return (ret); +} + +static void +svp_bunyan_fini(void) +{ + if (svp_bunyan != NULL) + bunyan_fini(svp_bunyan); +} + +#pragma init(varpd_svp_init) +static void +varpd_svp_init(void) +{ + int err; + varpd_plugin_register_t *vpr; + + if (svp_bunyan_init() != 0) + return; + + if ((err = svp_host_init()) != 0) { + (void) bunyan_error(svp_bunyan, "failed to init host subsystem", + BUNYAN_T_INT32, "error", err, + BUNYAN_T_END); + svp_bunyan_fini(); + return; + } + + svp_lookup_cache = umem_cache_create("svp_lookup", + sizeof (svp_lookup_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + if (svp_lookup_cache == NULL) { + (void) bunyan_error(svp_bunyan, + "failed to create svp_lookup cache", + BUNYAN_T_INT32, "error", errno, + BUNYAN_T_END); + svp_bunyan_fini(); + return; + } + + if ((err = svp_event_init()) != 0) { + (void) bunyan_error(svp_bunyan, + "failed to init event subsystem", + BUNYAN_T_INT32, "error", err, + BUNYAN_T_END); + svp_bunyan_fini(); + umem_cache_destroy(svp_lookup_cache); + return; + } + + if ((err = svp_timer_init()) != 0) { + (void) bunyan_error(svp_bunyan, + "failed to init timer subsystem", + BUNYAN_T_INT32, "error", err, + BUNYAN_T_END); + svp_event_fini(); + umem_cache_destroy(svp_lookup_cache); + svp_bunyan_fini(); + return; + } + + if ((err = svp_remote_init()) != 0) { + (void) bunyan_error(svp_bunyan, + "failed to init remote subsystem", + BUNYAN_T_INT32, "error", err, + BUNYAN_T_END); + svp_event_fini(); + umem_cache_destroy(svp_lookup_cache); + svp_bunyan_fini(); + return; + } + + vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err); + if (vpr == NULL) { + (void) bunyan_error(svp_bunyan, + "failed to alloc varpd plugin", + BUNYAN_T_INT32, "error", err, + BUNYAN_T_END); + svp_remote_fini(); + svp_event_fini(); + umem_cache_destroy(svp_lookup_cache); + svp_bunyan_fini(); + return; + } + + vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC; + vpr->vpr_name = "svp"; + vpr->vpr_ops = &varpd_svp_ops; + + if ((err = libvarpd_plugin_register(vpr)) != 0) { + (void) bunyan_error(svp_bunyan, + "failed to register varpd plugin", + BUNYAN_T_INT32, "error", err, + BUNYAN_T_END); + svp_remote_fini(); + svp_event_fini(); + umem_cache_destroy(svp_lookup_cache); + svp_bunyan_fini(); + + } + libvarpd_plugin_free(vpr); +} diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp.h b/usr/src/lib/varpd/svp/common/libvarpd_svp.h new file mode 100644 index 0000000000..8192b842ce --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp.h @@ -0,0 +1,357 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#ifndef _LIBVARPD_SVP_H +#define _LIBVARPD_SVP_H + +/* + * Implementation details of the SVP plugin and the SVP protocol. + */ + +#include <netinet/in.h> +#include <sys/ethernet.h> +#include <thread.h> +#include <synch.h> +#include <libvarpd_provider.h> +#include <sys/avl.h> +#include <port.h> +#include <sys/list.h> +#include <bunyan.h> + +#include <libvarpd_svp_prot.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct svp svp_t; +typedef struct svp_remote svp_remote_t; +typedef struct svp_conn svp_conn_t; +typedef struct svp_query svp_query_t; + +typedef void (*svp_event_f)(port_event_t *, void *); + +typedef struct svp_event { + svp_event_f se_func; + void *se_arg; + int se_events; +} svp_event_t; + +typedef void (*svp_timer_f)(void *); + +typedef struct svp_timer { + svp_timer_f st_func; /* Timer callback function */ + void *st_arg; /* Timer callback arg */ + boolean_t st_oneshot; /* Is timer a one shot? */ + uint32_t st_value; /* periodic or one-shot time */ + /* Fields below here are private to the svp_timer implementaiton */ + uint64_t st_expire; /* Next expiration */ + boolean_t st_delivering; /* Are we currently delivering this */ + avl_node_t st_link; +} svp_timer_t; + +/* + * Note, both the svp_log_ack_t and svp_lrm_req_t are not part of this structure + * as they are rather variable sized data and we don't want to constrain their + * size. Instead, the rdata and wdata members must be set appropriately. + */ +typedef union svp_query_data { + svp_vl2_req_t sqd_vl2r; + svp_vl2_ack_t sqd_vl2a; + svp_vl3_req_t sdq_vl3r; + svp_vl3_ack_t sdq_vl3a; + svp_log_req_t sdq_logr; + svp_lrm_ack_t sdq_lrma; +} svp_query_data_t; + +typedef void (*svp_query_f)(svp_query_t *, void *); + +typedef enum svp_query_state { + SVP_QUERY_INIT = 0x00, + SVP_QUERY_WRITING = 0x01, + SVP_QUERY_READING = 0x02, + SVP_QUERY_FINISHED = 0x03 +} svp_query_state_t; + +/* + * The query structure is usable for all forms of svp queries that end up + * getting passed across. Right now it's optimized for the fixed size data + * requests as opposed to requests whose responses will always be streaming in + * nature. Though, the streaming requests are the less common ones we have. We + * may need to make additional changes for those. + */ +struct svp_query { + list_node_t sq_lnode; /* List entry */ + svp_query_f sq_func; /* Callback function */ + svp_query_state_t sq_state; /* Query state */ + void *sq_arg; /* Callback function arg */ + svp_t *sq_svp; /* Pointer back to svp_t */ + svp_req_t sq_header; /* Header for the query */ + svp_query_data_t sq_rdun; /* Union for read data */ + svp_query_data_t sq_wdun; /* Union for write data */ + svp_status_t sq_status; /* Query response status */ + size_t sq_size; /* Query response size */ + void *sq_rdata; /* Read data pointer */ + size_t sq_rsize; /* Read data size */ + void *sq_wdata; /* Write data pointer */ + size_t sq_wsize; /* Write data size */ + hrtime_t sq_acttime; /* Last I/O activity time */ +}; + +typedef enum svp_conn_state { + SVP_CS_ERROR = 0x00, + SVP_CS_INITIAL = 0x01, + SVP_CS_CONNECTING = 0x02, + SVP_CS_BACKOFF = 0x03, + SVP_CS_ACTIVE = 0x04, + SVP_CS_WINDDOWN = 0x05 +} svp_conn_state_t; + +typedef enum svp_conn_error { + SVP_CE_NONE = 0x00, + SVP_CE_ASSOCIATE = 0x01, + SVP_CE_NOPOLLOUT = 0x02, + SVP_CE_SOCKET = 0x03 +} svp_conn_error_t; + +typedef enum svp_conn_flags { + SVP_CF_ADDED = 0x01, + SVP_CF_DEGRADED = 0x02, + SVP_CF_REAP = 0x04, + SVP_CF_TEARDOWN = 0x08, + SVP_CF_UFLAG = 0x0c, + SVP_CF_USER = 0x10 +} svp_conn_flags_t; + +typedef struct svp_conn_out { + svp_query_t *sco_query; + size_t sco_offset; +} svp_conn_out_t; + +typedef struct svp_conn_in { + svp_query_t *sci_query; + svp_req_t sci_req; + size_t sci_offset; +} svp_conn_in_t; + +struct svp_conn { + svp_remote_t *sc_remote; /* RO */ + struct in6_addr sc_addr; /* RO */ + list_node_t sc_rlist; /* svp_remote_t`sr_lock */ + mutex_t sc_lock; + svp_event_t sc_event; + svp_timer_t sc_btimer; + svp_timer_t sc_qtimer; + int sc_socket; + uint_t sc_gen; + uint_t sc_nbackoff; + svp_conn_flags_t sc_flags; + svp_conn_state_t sc_cstate; + svp_conn_error_t sc_error; + int sc_errno; + list_t sc_queries; + svp_conn_out_t sc_output; + svp_conn_in_t sc_input; +}; + +typedef enum svp_remote_state { + SVP_RS_LOOKUP_SCHEDULED = 0x01, /* On the DNS Queue */ + SVP_RS_LOOKUP_INPROGRESS = 0x02, /* Doing a DNS lookup */ + SVP_RS_LOOKUP_VALID = 0x04 /* addrinfo valid */ +} svp_remote_state_t; + +/* + * These series of bit-based flags should be ordered such that the most severe + * is first. We only can set one message that user land can see, so if more than + * one is set we want to make sure that one is there. + */ +typedef enum svp_degrade_state { + SVP_RD_DNS_FAIL = 0x01, /* DNS Resolution Failure */ + SVP_RD_REMOTE_FAIL = 0x02, /* cannot reach any remote peers */ + SVP_RD_ALL = 0x03 /* Only suitable for restore */ +} svp_degrade_state_t; + +typedef enum svp_shootdown_flags { + SVP_SD_RUNNING = 0x01, + SVP_SD_QUIESCE = 0x02, + SVP_SD_DORM = 0x04 +} svp_shootdown_flags_t; + +/* + * There is a single svp_sdlog_t per svp_remote_t. It maintains its own lock and + * condition variables. See the big theory statement for more information on how + * it's used. + */ +typedef struct svp_sdlog { + mutex_t sdl_lock; + cond_t sdl_cond; + uint_t sdl_ref; + svp_timer_t sdl_timer; + svp_shootdown_flags_t sdl_flags; + svp_query_t sdl_query; + void *sdl_logack; + void *sdl_logrm; + void *sdl_remote; +} svp_sdlog_t; + +struct svp_remote { + char *sr_hostname; /* RO */ + uint16_t sr_rport; /* RO */ + struct in6_addr sr_uip; /* RO */ + avl_node_t sr_gnode; /* svp_remote_lock */ + svp_remote_t *sr_nexthost; /* svp_host_lock */ + mutex_t sr_lock; + cond_t sr_cond; + svp_remote_state_t sr_state; + svp_degrade_state_t sr_degrade; + struct addrinfo *sr_addrinfo; + avl_tree_t sr_tree; + uint_t sr_count; /* active count */ + uint_t sr_gen; + uint_t sr_tconns; /* total conns + dconns */ + uint_t sr_ndconns; /* number of degraded conns */ + list_t sr_conns; /* all conns */ + svp_sdlog_t sr_shoot; +}; + +/* + * We have a bunch of different things that we get back from the API at the + * plug-in layer. These include: + * + * o OOB Shootdowns + * o VL3->VL2 Lookups + * o VL2->UL3 Lookups + * o VL2 Log invalidations + * o VL3 Log injections + */ +typedef void (*svp_vl2_lookup_f)(svp_t *, svp_status_t, const struct in6_addr *, + const uint16_t, void *); +typedef void (*svp_vl3_lookup_f)(svp_t *, svp_status_t, const uint8_t *, + const struct in6_addr *, const uint16_t, void *); +typedef void (*svp_vl2_invalidation_f)(svp_t *, const uint8_t *); +typedef void (*svp_vl3_inject_f)(svp_t *, const uint16_t, + const struct in6_addr *, const uint8_t *, const uint8_t *); +typedef void (*svp_shootdown_f)(svp_t *, const uint8_t *, + const struct in6_addr *, const uint16_t uport); + +typedef struct svp_cb { + svp_vl2_lookup_f scb_vl2_lookup; + svp_vl3_lookup_f scb_vl3_lookup; + svp_vl2_invalidation_f scb_vl2_invalidate; + svp_vl3_inject_f scb_vl3_inject; + svp_shootdown_f scb_shootdown; +} svp_cb_t; + +/* + * Core implementation structure. + */ +struct svp { + overlay_plugin_dest_t svp_dest; /* RO */ + varpd_provider_handle_t *svp_hdl; /* RO */ + svp_cb_t svp_cb; /* RO */ + uint64_t svp_vid; /* RO */ + avl_node_t svp_rlink; /* Owned by svp_remote */ + svp_remote_t *svp_remote; /* RO iff started */ + mutex_t svp_lock; + char *svp_host; /* svp_lock */ + uint16_t svp_port; /* svp_lock */ + uint16_t svp_uport; /* svp_lock */ + boolean_t svp_huip; /* svp_lock */ + struct in6_addr svp_uip; /* svp_lock */ +}; + +extern bunyan_logger_t *svp_bunyan; + +extern int svp_remote_find(char *, uint16_t, struct in6_addr *, + svp_remote_t **); +extern int svp_remote_attach(svp_remote_t *, svp_t *); +extern void svp_remote_detach(svp_t *); +extern void svp_remote_release(svp_remote_t *); +extern void svp_remote_vl3_lookup(svp_t *, svp_query_t *, + const struct sockaddr *, void *); +extern void svp_remote_vl2_lookup(svp_t *, svp_query_t *, const uint8_t *, + void *); + +/* + * Init functions + */ +extern int svp_remote_init(void); +extern void svp_remote_fini(void); +extern int svp_event_init(void); +extern int svp_event_timer_init(svp_event_t *); +extern void svp_event_fini(void); +extern int svp_host_init(void); +extern int svp_timer_init(void); + +/* + * Timers + */ +extern int svp_tickrate; +extern void svp_timer_add(svp_timer_t *); +extern void svp_timer_remove(svp_timer_t *); + +/* + * Event loop management + */ +extern int svp_event_associate(svp_event_t *, int); +extern int svp_event_dissociate(svp_event_t *, int); +extern int svp_event_inject(svp_event_t *); + +/* + * Connection manager + */ +extern int svp_conn_create(svp_remote_t *, const struct in6_addr *); +extern void svp_conn_destroy(svp_conn_t *); +extern void svp_conn_fallout(svp_conn_t *); +extern void svp_conn_queue(svp_conn_t *, svp_query_t *); + +/* + * FMA related + */ +extern void svp_remote_degrade(svp_remote_t *, svp_degrade_state_t); +extern void svp_remote_restore(svp_remote_t *, svp_degrade_state_t); + +/* + * Misc. + */ +extern int svp_comparator(const void *, const void *); +extern void svp_remote_reassign(svp_remote_t *, svp_conn_t *); +extern void svp_remote_resolved(svp_remote_t *, struct addrinfo *); +extern void svp_host_queue(svp_remote_t *); +extern void svp_query_release(svp_query_t *); +extern void svp_query_crc32(svp_req_t *, void *, size_t); + +/* + * Shootdown related + */ +extern void svp_remote_shootdown_vl3(svp_remote_t *, svp_log_vl3_t *, + svp_sdlog_t *); +extern void svp_remote_shootdown_vl2(svp_remote_t *, svp_log_vl2_t *); +extern void svp_remote_log_request(svp_remote_t *, svp_query_t *, void *, + size_t); +extern void svp_remote_lrm_request(svp_remote_t *, svp_query_t *, void *, + size_t); +extern void svp_shootdown_logr_cb(svp_remote_t *, svp_status_t, void *, size_t); +extern void svp_shootdown_lrm_cb(svp_remote_t *, svp_status_t); +extern void svp_shootdown_vl3_cb(svp_status_t, svp_log_vl3_t *, svp_sdlog_t *); +extern int svp_shootdown_init(svp_remote_t *); +extern void svp_shootdown_fini(svp_remote_t *); +extern void svp_shootdown_start(svp_remote_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBVARPD_SVP_H */ diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c new file mode 100644 index 0000000000..5d19f8a388 --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c @@ -0,0 +1,1031 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * Logic to manage an individual connection to a remote host. + * + * For more information, see the big theory statement in + * lib/varpd/svp/common/libvarpd_svp.c. + */ + +#include <assert.h> +#include <umem.h> +#include <errno.h> +#include <strings.h> +#include <unistd.h> +#include <stddef.h> +#include <sys/uio.h> +#include <sys/debug.h> + +#include <libvarpd_svp.h> + +int svp_conn_query_timeout = 30; +static int svp_conn_backoff_tbl[] = { 1, 2, 4, 8, 16, 32 }; +static int svp_conn_nbackoff = sizeof (svp_conn_backoff_tbl) / sizeof (int); + +typedef enum svp_conn_act { + SVP_RA_NONE = 0x00, + SVP_RA_DEGRADE = 0x01, + SVP_RA_RESTORE = 0x02, + SVP_RA_ERROR = 0x03, + SVP_RA_CLEANUP = 0x04 +} svp_conn_act_t; + +static void +svp_conn_inject(svp_conn_t *scp) +{ + int ret; + assert(MUTEX_HELD(&scp->sc_lock)); + + if (scp->sc_flags & SVP_CF_USER) + return; + scp->sc_flags |= SVP_CF_USER; + if ((ret = svp_event_inject(&scp->sc_event)) != 0) + libvarpd_panic("failed to inject event: %d\n", ret); +} + +static void +svp_conn_degrade(svp_conn_t *scp) +{ + svp_remote_t *srp = scp->sc_remote; + + assert(MUTEX_HELD(&srp->sr_lock)); + assert(MUTEX_HELD(&scp->sc_lock)); + + if (scp->sc_flags & SVP_CF_DEGRADED) + return; + + scp->sc_flags |= SVP_CF_DEGRADED; + srp->sr_ndconns++; + if (srp->sr_ndconns == srp->sr_tconns) + svp_remote_degrade(srp, SVP_RD_REMOTE_FAIL); +} + +static void +svp_conn_restore(svp_conn_t *scp) +{ + svp_remote_t *srp = scp->sc_remote; + + assert(MUTEX_HELD(&srp->sr_lock)); + assert(MUTEX_HELD(&scp->sc_lock)); + + if (!(scp->sc_flags & SVP_CF_DEGRADED)) + return; + + scp->sc_flags &= ~SVP_CF_DEGRADED; + if (srp->sr_ndconns == srp->sr_tconns) + svp_remote_restore(srp, SVP_RD_REMOTE_FAIL); + srp->sr_ndconns--; +} + +static void +svp_conn_add(svp_conn_t *scp) +{ + svp_remote_t *srp = scp->sc_remote; + + assert(MUTEX_HELD(&srp->sr_lock)); + assert(MUTEX_HELD(&scp->sc_lock)); + + if (scp->sc_flags & SVP_CF_ADDED) + return; + + list_insert_tail(&srp->sr_conns, scp); + scp->sc_flags |= SVP_CF_ADDED; + srp->sr_tconns++; +} + +static void +svp_conn_remove(svp_conn_t *scp) +{ + svp_remote_t *srp = scp->sc_remote; + + assert(MUTEX_HELD(&srp->sr_lock)); + assert(MUTEX_HELD(&scp->sc_lock)); + + if (!(scp->sc_flags & SVP_CF_ADDED)) + return; + + scp->sc_flags &= ~SVP_CF_ADDED; + if (scp->sc_flags & SVP_CF_DEGRADED) + srp->sr_ndconns--; + srp->sr_tconns--; + if (srp->sr_tconns == srp->sr_ndconns) + svp_remote_degrade(srp, SVP_RD_REMOTE_FAIL); +} + +static svp_query_t * +svp_conn_query_find(svp_conn_t *scp, uint32_t id) +{ + svp_query_t *sqp; + + assert(MUTEX_HELD(&scp->sc_lock)); + + for (sqp = list_head(&scp->sc_queries); sqp != NULL; + sqp = list_next(&scp->sc_queries, sqp)) { + if (sqp->sq_header.svp_id == id) + break; + } + + return (sqp); +} + +static svp_conn_act_t +svp_conn_backoff(svp_conn_t *scp) +{ + assert(MUTEX_HELD(&scp->sc_lock)); + + if (close(scp->sc_socket) != 0) + libvarpd_panic("failed to close socket %d: %d\n", + scp->sc_socket, errno); + scp->sc_socket = -1; + + scp->sc_cstate = SVP_CS_BACKOFF; + scp->sc_nbackoff++; + if (scp->sc_nbackoff >= svp_conn_nbackoff) { + scp->sc_btimer.st_value = + svp_conn_backoff_tbl[svp_conn_nbackoff - 1]; + } else { + scp->sc_btimer.st_value = + svp_conn_backoff_tbl[scp->sc_nbackoff - 1]; + } + svp_timer_add(&scp->sc_btimer); + + if (scp->sc_nbackoff > svp_conn_nbackoff) + return (SVP_RA_DEGRADE); + return (SVP_RA_NONE); +} + +static svp_conn_act_t +svp_conn_connect(svp_conn_t *scp) +{ + int ret; + struct sockaddr_in6 in6; + + assert(MUTEX_HELD(&scp->sc_lock)); + assert(scp->sc_cstate == SVP_CS_BACKOFF || + scp->sc_cstate == SVP_CS_INITIAL); + assert(scp->sc_socket == -1); + if (scp->sc_cstate == SVP_CS_INITIAL) + scp->sc_nbackoff = 0; + + scp->sc_socket = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); + if (scp->sc_socket == -1) { + scp->sc_error = SVP_CE_SOCKET; + scp->sc_errno = errno; + scp->sc_cstate = SVP_CS_ERROR; + return (SVP_RA_DEGRADE); + } + + bzero(&in6, sizeof (struct sockaddr_in6)); + in6.sin6_family = AF_INET6; + in6.sin6_port = htons(scp->sc_remote->sr_rport); + bcopy(&scp->sc_addr, &in6.sin6_addr, sizeof (struct in6_addr)); + ret = connect(scp->sc_socket, (struct sockaddr *)&in6, + sizeof (struct sockaddr_in6)); + if (ret != 0) { + boolean_t async = B_FALSE; + + switch (errno) { + case EACCES: + case EADDRINUSE: + case EAFNOSUPPORT: + case EALREADY: + case EBADF: + case EISCONN: + case ELOOP: + case ENOENT: + case ENOSR: + case EWOULDBLOCK: + libvarpd_panic("unanticipated connect errno %d", errno); + break; + case EINPROGRESS: + case EINTR: + async = B_TRUE; + default: + break; + } + + /* + * So, we will be connecting to this in the future, advance our + * state and make sure that we poll for the next round. + */ + if (async == B_TRUE) { + scp->sc_cstate = SVP_CS_CONNECTING; + scp->sc_event.se_events = POLLOUT | POLLHUP; + ret = svp_event_associate(&scp->sc_event, + scp->sc_socket); + if (ret == 0) + return (SVP_RA_NONE); + scp->sc_error = SVP_CE_ASSOCIATE; + scp->sc_errno = ret; + scp->sc_cstate = SVP_CS_ERROR; + return (SVP_RA_DEGRADE); + } else { + /* + * This call failed, which means that we obtained one of + * the following: + * + * EADDRNOTAVAIL + * ECONNREFUSED + * EIO + * ENETUNREACH + * EHOSTUNREACH + * ENXIO + * ETIMEDOUT + * + * Therefore we need to set ourselves into backoff and + * wait for that to clear up. + */ + return (svp_conn_backoff(scp)); + } + } + + /* + * We've connected. Successfully move ourselves to the bound + * state and start polling. + */ + scp->sc_cstate = SVP_CS_ACTIVE; + scp->sc_event.se_events = POLLIN | POLLRDNORM | POLLHUP; + ret = svp_event_associate(&scp->sc_event, scp->sc_socket); + if (ret == 0) + return (SVP_RA_RESTORE); + scp->sc_error = SVP_CE_ASSOCIATE; + scp->sc_cstate = SVP_CS_ERROR; + + return (SVP_RA_DEGRADE); +} + +/* + * This should be the first call we get after a connect. If we have successfully + * connected, we should see a writeable event. We may also see an error or a + * hang up. In either of these cases, we transition to error mode. If there is + * also a readable event, we ignore it at the moment and just let a + * reassociation pick it up so we can simplify the set of state transitions that + * we have. + */ +static svp_conn_act_t +svp_conn_poll_connect(port_event_t *pe, svp_conn_t *scp) +{ + int ret, err; + socklen_t sl = sizeof (err); + if (!(pe->portev_events & POLLOUT)) { + scp->sc_errno = 0; + scp->sc_error = SVP_CE_NOPOLLOUT; + scp->sc_cstate = SVP_CS_ERROR; + return (SVP_RA_DEGRADE); + } + + ret = getsockopt(scp->sc_socket, SOL_SOCKET, SO_ERROR, &err, &sl); + if (ret != 0) + libvarpd_panic("unanticipated getsockopt error"); + if (err != 0) { + return (svp_conn_backoff(scp)); + } + + scp->sc_cstate = SVP_CS_ACTIVE; + scp->sc_event.se_events = POLLIN | POLLRDNORM | POLLHUP; + ret = svp_event_associate(&scp->sc_event, scp->sc_socket); + if (ret == 0) + return (SVP_RA_RESTORE); + scp->sc_error = SVP_CE_ASSOCIATE; + scp->sc_errno = ret; + scp->sc_cstate = SVP_CS_ERROR; + return (SVP_RA_DEGRADE); +} + +static svp_conn_act_t +svp_conn_pollout(svp_conn_t *scp) +{ + svp_query_t *sqp; + svp_req_t *req; + size_t off; + struct iovec iov[2]; + int nvecs = 0; + ssize_t ret; + + assert(MUTEX_HELD(&scp->sc_lock)); + + /* + * We need to find a query and start writing it out. + */ + if (scp->sc_output.sco_query == NULL) { + for (sqp = list_head(&scp->sc_queries); sqp != NULL; + sqp = list_next(&scp->sc_queries, sqp)) { + if (sqp->sq_state != SVP_QUERY_INIT) + continue; + break; + } + + if (sqp == NULL) { + scp->sc_event.se_events &= ~POLLOUT; + return (SVP_RA_NONE); + } + + scp->sc_output.sco_query = sqp; + scp->sc_output.sco_offset = 0; + sqp->sq_state = SVP_QUERY_WRITING; + svp_query_crc32(&sqp->sq_header, sqp->sq_rdata, sqp->sq_rsize); + } + + sqp = scp->sc_output.sco_query; + req = &sqp->sq_header; + off = scp->sc_output.sco_offset; + if (off < sizeof (svp_req_t)) { + iov[nvecs].iov_base = (void *)((uintptr_t)req + off); + iov[nvecs].iov_len = sizeof (svp_req_t) - off; + nvecs++; + off = 0; + } else { + off -= sizeof (svp_req_t); + } + + iov[nvecs].iov_base = (void *)((uintptr_t)sqp->sq_rdata + off); + iov[nvecs].iov_len = sqp->sq_rsize - off; + nvecs++; + + do { + ret = writev(scp->sc_socket, iov, nvecs); + } while (ret == -1 && errno == EAGAIN); + if (ret == -1) { + switch (errno) { + case EAGAIN: + scp->sc_event.se_events |= POLLOUT; + return (SVP_RA_NONE); + case EIO: + case ENXIO: + case ECONNRESET: + return (SVP_RA_ERROR); + default: + libvarpd_panic("unexpected errno: %d", errno); + } + } + + sqp->sq_acttime = gethrtime(); + scp->sc_output.sco_offset += ret; + if (ret >= sizeof (svp_req_t) + sqp->sq_rsize) { + sqp->sq_state = SVP_QUERY_READING; + scp->sc_output.sco_query = NULL; + scp->sc_output.sco_offset = 0; + scp->sc_event.se_events |= POLLOUT; + } + return (SVP_RA_NONE); +} + +static boolean_t +svp_conn_pollin_validate(svp_conn_t *scp) +{ + svp_query_t *sqp; + uint32_t nsize; + uint16_t nvers, nop; + svp_req_t *resp = &scp->sc_input.sci_req; + + assert(MUTEX_HELD(&scp->sc_lock)); + + nvers = ntohs(resp->svp_ver); + nop = ntohs(resp->svp_op); + nsize = ntohl(resp->svp_size); + + if (nvers != SVP_CURRENT_VERSION) { + (void) bunyan_warn(svp_bunyan, "unsupported version", + BUNYAN_T_IP, "remote_ip", &scp->sc_addr, + BUNYAN_T_INT32, "remote_port", scp->sc_remote->sr_rport, + BUNYAN_T_INT32, "version", nvers, + BUNYAN_T_INT32, "operation", nop, + BUNYAN_T_INT32, "response_id", resp->svp_id, + BUNYAN_T_END); + return (B_FALSE); + } + + if (nop != SVP_R_VL2_ACK && nop != SVP_R_VL3_ACK && + nop != SVP_R_LOG_ACK && nop != SVP_R_LOG_RM_ACK) { + (void) bunyan_warn(svp_bunyan, "unsupported operation", + BUNYAN_T_IP, "remote_ip", &scp->sc_addr, + BUNYAN_T_INT32, "remote_port", scp->sc_remote->sr_rport, + BUNYAN_T_INT32, "version", nvers, + BUNYAN_T_INT32, "operation", nop, + BUNYAN_T_INT32, "response_id", resp->svp_id, + BUNYAN_T_END); + return (B_FALSE); + } + + sqp = svp_conn_query_find(scp, resp->svp_id); + if (sqp == NULL) { + (void) bunyan_warn(svp_bunyan, "unknown response id", + BUNYAN_T_IP, "remote_ip", &scp->sc_addr, + BUNYAN_T_INT32, "remote_port", scp->sc_remote->sr_rport, + BUNYAN_T_INT32, "version", nvers, + BUNYAN_T_INT32, "operation", nop, + BUNYAN_T_INT32, "response_id", resp->svp_id, + BUNYAN_T_END); + return (B_FALSE); + } + + if (sqp->sq_state != SVP_QUERY_READING) { + (void) bunyan_warn(svp_bunyan, + "got response for unexpecting query", + BUNYAN_T_IP, "remote_ip", &scp->sc_addr, + BUNYAN_T_INT32, "remote_port", scp->sc_remote->sr_rport, + BUNYAN_T_INT32, "version", nvers, + BUNYAN_T_INT32, "operation", nop, + BUNYAN_T_INT32, "response_id", resp->svp_id, + BUNYAN_T_INT32, "query_state", sqp->sq_state, + BUNYAN_T_END); + return (B_FALSE); + } + + if ((nop == SVP_R_VL2_ACK && nsize != sizeof (svp_vl2_ack_t)) || + (nop == SVP_R_VL3_ACK && nsize != sizeof (svp_vl3_ack_t)) || + (nop == SVP_R_LOG_RM_ACK && nsize != sizeof (svp_lrm_ack_t))) { + (void) bunyan_warn(svp_bunyan, "response size too large", + BUNYAN_T_IP, "remote_ip", &scp->sc_addr, + BUNYAN_T_INT32, "remote_port", scp->sc_remote->sr_rport, + BUNYAN_T_INT32, "version", nvers, + BUNYAN_T_INT32, "operation", nop, + BUNYAN_T_INT32, "response_id", resp->svp_id, + BUNYAN_T_INT32, "response_size", nsize, + BUNYAN_T_INT32, "expected_size", nop == SVP_R_VL2_ACK ? + sizeof (svp_vl2_ack_t) : sizeof (svp_vl3_ack_t), + BUNYAN_T_INT32, "query_state", sqp->sq_state, + BUNYAN_T_END); + return (B_FALSE); + } + + /* + * The valid size is anything <= to what the user requested, but at + * least svp_log_ack_t bytes large. + */ + if (nop == SVP_R_LOG_ACK) { + const char *msg = NULL; + if (nsize < sizeof (svp_log_ack_t)) + msg = "response size too small"; + else if (nsize > ((svp_log_req_t *)sqp->sq_rdata)->svlr_count) + msg = "response size too large"; + if (msg != NULL) { + (void) bunyan_warn(svp_bunyan, msg, + BUNYAN_T_IP, "remote_ip", &scp->sc_addr, + BUNYAN_T_INT32, "remote_port", + scp->sc_remote->sr_rport, + BUNYAN_T_INT32, "version", nvers, + BUNYAN_T_INT32, "operation", nop, + BUNYAN_T_INT32, "response_id", resp->svp_id, + BUNYAN_T_INT32, "response_size", nsize, + BUNYAN_T_INT32, "expected_size", + ((svp_log_req_t *)sqp->sq_rdata)->svlr_count, + BUNYAN_T_INT32, "query_state", sqp->sq_state, + BUNYAN_T_END); + return (B_FALSE); + } + } + + sqp->sq_size = nsize; + scp->sc_input.sci_query = sqp; + if (nop == SVP_R_VL2_ACK || nop == SVP_R_VL3_ACK || + nop == SVP_R_LOG_RM_ACK) { + sqp->sq_wdata = &sqp->sq_wdun; + sqp->sq_wsize = sizeof (svp_query_data_t); + } else { + VERIFY(nop == SVP_R_LOG_ACK); + assert(sqp->sq_wdata != NULL); + assert(sqp->sq_wsize != 0); + } + + return (B_TRUE); +} + +static svp_conn_act_t +svp_conn_pollin(svp_conn_t *scp) +{ + size_t off, total; + ssize_t ret; + svp_query_t *sqp; + uint32_t crc; + uint16_t nop; + + assert(MUTEX_HELD(&scp->sc_lock)); + + /* + * No query implies that we're reading in the header and that the offset + * is associted with it. + */ + off = scp->sc_input.sci_offset; + sqp = scp->sc_input.sci_query; + if (scp->sc_input.sci_query == NULL) { + svp_req_t *resp = &scp->sc_input.sci_req; + + assert(off < sizeof (svp_req_t)); + + do { + ret = read(scp->sc_socket, + (void *)((uintptr_t)resp + off), + sizeof (svp_req_t) - off); + } while (ret == -1 && errno == EINTR); + if (ret == -1) { + switch (errno) { + case EAGAIN: + scp->sc_event.se_events |= POLLIN | POLLRDNORM; + return (SVP_RA_NONE); + case EIO: + case ECONNRESET: + return (SVP_RA_ERROR); + break; + default: + libvarpd_panic("unexpeted read errno: %d", + errno); + } + } else if (ret == 0) { + /* Try to reconnect to the remote host */ + return (SVP_RA_ERROR); + } + + /* Didn't get all the data we need */ + if (off + ret < sizeof (svp_req_t)) { + scp->sc_input.sci_offset += ret; + scp->sc_event.se_events |= POLLIN | POLLRDNORM; + return (SVP_RA_NONE); + } + + if (svp_conn_pollin_validate(scp) != B_TRUE) + return (SVP_RA_ERROR); + } + + sqp = scp->sc_input.sci_query; + assert(sqp != NULL); + sqp->sq_acttime = gethrtime(); + total = ntohl(scp->sc_input.sci_req.svp_size); + do { + ret = read(scp->sc_socket, + (void *)((uintptr_t)sqp->sq_wdata + off), + total - off); + } while (ret == -1 && errno == EINTR); + + if (ret == -1) { + switch (errno) { + case EAGAIN: + scp->sc_event.se_events |= POLLIN | POLLRDNORM; + return (SVP_RA_NONE); + case EIO: + case ECONNRESET: + return (SVP_RA_ERROR); + break; + default: + libvarpd_panic("unexpeted read errno: %d", errno); + } + } else if (ret == 0) { + /* Try to reconnect to the remote host */ + return (SVP_RA_ERROR); + } + + if (ret + off < total) { + scp->sc_input.sci_offset += ret; + return (SVP_RA_NONE); + } + + nop = ntohs(scp->sc_input.sci_req.svp_op); + crc = scp->sc_input.sci_req.svp_crc32; + svp_query_crc32(&scp->sc_input.sci_req, sqp->sq_wdata, total); + if (crc != scp->sc_input.sci_req.svp_crc32) { + (void) bunyan_info(svp_bunyan, "crc32 mismatch", + BUNYAN_T_IP, "remote ip", &scp->sc_addr, + BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport, + BUNYAN_T_INT32, "version", + ntohs(scp->sc_input.sci_req.svp_ver), + BUNYAN_T_INT32, "operation", nop, + BUNYAN_T_INT32, "response id", + ntohl(scp->sc_input.sci_req.svp_id), + BUNYAN_T_INT32, "query state", sqp->sq_state, + BUNYAN_T_UINT32, "msg_crc", ntohl(crc), + BUNYAN_T_UINT32, "calc_crc", + ntohl(scp->sc_input.sci_req.svp_crc32), + BUNYAN_T_END); + return (SVP_RA_ERROR); + } + scp->sc_input.sci_query = NULL; + scp->sc_input.sci_offset = 0; + + if (nop == SVP_R_VL2_ACK) { + svp_vl2_ack_t *sl2a = sqp->sq_wdata; + sqp->sq_status = ntohl(sl2a->sl2a_status); + } else if (nop == SVP_R_VL3_ACK) { + svp_vl3_ack_t *sl3a = sqp->sq_wdata; + sqp->sq_status = ntohl(sl3a->sl3a_status); + } else if (nop == SVP_R_LOG_ACK) { + svp_log_ack_t *svla = sqp->sq_wdata; + sqp->sq_status = ntohl(svla->svla_status); + } else if (nop == SVP_R_LOG_RM_ACK) { + svp_lrm_ack_t *svra = sqp->sq_wdata; + sqp->sq_status = ntohl(svra->svra_status); + } else { + libvarpd_panic("unhandled nop: %d", nop); + } + + list_remove(&scp->sc_queries, sqp); + mutex_exit(&scp->sc_lock); + + /* + * We have to release all of our resources associated with this entry + * before we call the callback. After we call it, the memory will be + * lost to time. + */ + svp_query_release(sqp); + sqp->sq_func(sqp, sqp->sq_arg); + mutex_enter(&scp->sc_lock); + scp->sc_event.se_events |= POLLIN | POLLRDNORM; + + return (SVP_RA_NONE); +} + +static svp_conn_act_t +svp_conn_reset(svp_conn_t *scp) +{ + svp_remote_t *srp = scp->sc_remote; + + assert(MUTEX_HELD(&srp->sr_lock)); + assert(MUTEX_HELD(&scp->sc_lock)); + + assert(svp_event_dissociate(&scp->sc_event, scp->sc_socket) == + ENOENT); + if (close(scp->sc_socket) != 0) + libvarpd_panic("failed to close socket %d: %d", scp->sc_socket, + errno); + scp->sc_flags &= ~SVP_CF_TEARDOWN; + scp->sc_socket = -1; + scp->sc_cstate = SVP_CS_INITIAL; + scp->sc_input.sci_query = NULL; + scp->sc_output.sco_query = NULL; + + svp_remote_reassign(srp, scp); + + return (svp_conn_connect(scp)); +} + +/* + * This is our general state transition function. We're called here when we want + * to advance part of our state machine as well as to re-arm ourselves. We can + * also end up here from the standard event loop as a result of having a user + * event posted. + */ +static void +svp_conn_handler(port_event_t *pe, void *arg) +{ + svp_conn_t *scp = arg; + svp_remote_t *srp = scp->sc_remote; + svp_conn_act_t ret = SVP_RA_NONE; + svp_conn_state_t oldstate; + + mutex_enter(&scp->sc_lock); + + /* + * Check if one of our event interrupts is set. An event interrupt, such + * as having to be reaped or be torndown is notified by a + * PORT_SOURCE_USER event that tries to take care of this. However, + * because of the fact that the event loop can be ongoing despite this, + * we may get here before the PORT_SOURCE_USER has casued us to get + * here. In such a case, if the PORT_SOURCE_USER event is tagged, then + * we're going to opt to do nothing here and wait for it to come and + * tear us down. That will also indicate to us that we have nothing to + * worry about as far as general timing and the like goes. + */ + if ((scp->sc_flags & SVP_CF_UFLAG) != 0 && + (scp->sc_flags & SVP_CF_USER) != 0 && + pe != NULL && + pe->portev_source != PORT_SOURCE_USER) { + mutex_exit(&scp->sc_lock); + return; + } + + if (pe != NULL && pe->portev_source == PORT_SOURCE_USER) { + scp->sc_flags &= ~SVP_CF_USER; + if ((scp->sc_flags & SVP_CF_UFLAG) == 0) { + mutex_exit(&scp->sc_lock); + return; + } + } + + /* Check if this needs to be freed */ + if (scp->sc_flags & SVP_CF_REAP) { + mutex_exit(&scp->sc_lock); + svp_conn_destroy(scp); + return; + } + + /* Check if this needs to be reset */ + if (scp->sc_flags & SVP_CF_TEARDOWN) { + /* Make sure any other users of this are disassociated */ + ret = SVP_RA_ERROR; + goto out; + } + + switch (scp->sc_cstate) { + case SVP_CS_INITIAL: + case SVP_CS_BACKOFF: + assert(pe == NULL); + ret = svp_conn_connect(scp); + break; + case SVP_CS_CONNECTING: + assert(pe != NULL); + ret = svp_conn_poll_connect(pe, scp); + break; + case SVP_CS_ACTIVE: + case SVP_CS_WINDDOWN: + assert(pe != NULL); + oldstate = scp->sc_cstate; + if (pe->portev_events & POLLOUT) + ret = svp_conn_pollout(scp); + if (ret == SVP_RA_NONE && (pe->portev_events & POLLIN)) + ret = svp_conn_pollin(scp); + + if (oldstate == SVP_CS_WINDDOWN && + (list_is_empty(&scp->sc_queries) || ret != SVP_RA_NONE)) { + ret = SVP_RA_CLEANUP; + } + + if (ret == SVP_RA_NONE) { + int err; + if ((err = svp_event_associate(&scp->sc_event, + scp->sc_socket)) != 0) { + scp->sc_error = SVP_CE_ASSOCIATE; + scp->sc_errno = err; + scp->sc_cstate = SVP_CS_ERROR; + ret = SVP_RA_DEGRADE; + } + } + break; + default: + libvarpd_panic("svp_conn_handler encountered unexpected " + "state: %d", scp->sc_cstate); + } +out: + mutex_exit(&scp->sc_lock); + + if (ret == SVP_RA_NONE) + return; + + mutex_enter(&srp->sr_lock); + mutex_enter(&scp->sc_lock); + if (ret == SVP_RA_ERROR) + ret = svp_conn_reset(scp); + + if (ret == SVP_RA_DEGRADE) + svp_conn_degrade(scp); + if (ret == SVP_RA_RESTORE) + svp_conn_restore(scp); + + if (ret == SVP_RA_CLEANUP) { + svp_conn_remove(scp); + scp->sc_flags |= SVP_CF_REAP; + svp_conn_inject(scp); + } + mutex_exit(&scp->sc_lock); + mutex_exit(&srp->sr_lock); +} + +static void +svp_conn_backtimer(void *arg) +{ + svp_conn_t *scp = arg; + + svp_conn_handler(NULL, scp); +} + +/* + * This fires every svp_conn_query_timeout seconds. Its purpos is to determine + * if we haven't heard back on a request with in svp_conn_query_timeout seconds. + * If any of the svp_conn_query_t's that have been started (indicated by + * svp_query_t`sq_acttime != -1), and more than svp_conn_query_timeout seconds + * have passed, we basically tear this connection down and reassign outstanding + * queries. + */ +static void +svp_conn_querytimer(void *arg) +{ + int ret; + svp_query_t *sqp; + svp_conn_t *scp = arg; + hrtime_t now = gethrtime(); + + mutex_enter(&scp->sc_lock); + + /* + * If we're not in the active state, then we don't care about this as + * we're already either going to die or we have no connections to worry + * about. + */ + if (scp->sc_cstate != SVP_CS_ACTIVE) { + mutex_exit(&scp->sc_lock); + return; + } + + for (sqp = list_head(&scp->sc_queries); sqp != NULL; + sqp = list_next(&scp->sc_queries, sqp)) { + if (sqp->sq_acttime == -1) + continue; + if ((now - sqp->sq_acttime) / NANOSEC > svp_conn_query_timeout) + break; + } + + /* Nothing timed out, we're good here */ + if (sqp == NULL) { + mutex_exit(&scp->sc_lock); + return; + } + + (void) bunyan_warn(svp_bunyan, "query timed out on connection", + BUNYAN_T_IP, "remote_ip", &scp->sc_addr, + BUNYAN_T_INT32, "remote_port", scp->sc_remote->sr_rport, + BUNYAN_T_INT32, "operation", ntohs(sqp->sq_header.svp_op), + BUNYAN_T_END); + + /* + * Begin the tear down process for this connect. If we lose the + * disassociate, then we don't inject an event. See the big theory + * statement in libvarpd_svp.c for more information. + */ + scp->sc_flags |= SVP_CF_TEARDOWN; + + ret = svp_event_dissociate(&scp->sc_event, scp->sc_socket); + if (ret == 0) + svp_conn_inject(scp); + else + VERIFY(ret == ENOENT); + + mutex_exit(&scp->sc_lock); +} + +/* + * This connection has fallen out of DNS, figure out what we need to do with it. + */ +void +svp_conn_fallout(svp_conn_t *scp) +{ + svp_remote_t *srp = scp->sc_remote; + + assert(MUTEX_HELD(&srp->sr_lock)); + + mutex_enter(&scp->sc_lock); + switch (scp->sc_cstate) { + case SVP_CS_ERROR: + /* + * Connection is already inactive, so it's safe to tear down. + * Fire it off through the state machine to tear down via the + * backoff timer. + */ + svp_conn_remove(scp); + scp->sc_flags |= SVP_CF_REAP; + svp_conn_inject(scp); + break; + case SVP_CS_INITIAL: + case SVP_CS_BACKOFF: + case SVP_CS_CONNECTING: + /* + * Here, we have something actively going on, so we'll let it be + * clean up the next time we hit the event loop by the event + * loop itself. As it has no connections, there isn't much to + * really do, though we'll take this chance to go ahead and + * remove it from the remote. + */ + svp_conn_remove(scp); + scp->sc_flags |= SVP_CF_REAP; + svp_conn_inject(scp); + break; + case SVP_CS_ACTIVE: + case SVP_CS_WINDDOWN: + /* + * If there are no outstanding queries, then we should simply + * clean this up now,t he same way we would with the others. + * Othewrise, as we know the event loop is ongoing, we'll make + * sure that these entries get cleaned up once they're done. + */ + scp->sc_cstate = SVP_CS_WINDDOWN; + if (list_is_empty(&scp->sc_queries)) { + svp_conn_remove(scp); + scp->sc_flags |= SVP_CF_REAP; + svp_conn_inject(scp); + } + break; + default: + libvarpd_panic("svp_conn_fallout encountered" + "unkonwn state"); + } + mutex_exit(&scp->sc_lock); + mutex_exit(&srp->sr_lock); +} + +int +svp_conn_create(svp_remote_t *srp, const struct in6_addr *addr) +{ + int ret; + svp_conn_t *scp; + + assert(MUTEX_HELD(&srp->sr_lock)); + scp = umem_zalloc(sizeof (svp_conn_t), UMEM_DEFAULT); + if (scp == NULL) + return (ENOMEM); + + if ((ret = mutex_init(&scp->sc_lock, USYNC_THREAD | LOCK_ERRORCHECK, + NULL)) != 0) { + umem_free(scp, sizeof (svp_conn_t)); + return (ret); + } + + scp->sc_remote = srp; + scp->sc_event.se_func = svp_conn_handler; + scp->sc_event.se_arg = scp; + scp->sc_btimer.st_func = svp_conn_backtimer; + scp->sc_btimer.st_arg = scp; + scp->sc_btimer.st_oneshot = B_TRUE; + scp->sc_btimer.st_value = 1; + + scp->sc_qtimer.st_func = svp_conn_querytimer; + scp->sc_qtimer.st_arg = scp; + scp->sc_qtimer.st_oneshot = B_FALSE; + scp->sc_qtimer.st_value = svp_conn_query_timeout; + + scp->sc_socket = -1; + + list_create(&scp->sc_queries, sizeof (svp_query_t), + offsetof(svp_query_t, sq_lnode)); + scp->sc_gen = srp->sr_gen; + bcopy(addr, &scp->sc_addr, sizeof (struct in6_addr)); + scp->sc_cstate = SVP_CS_INITIAL; + mutex_enter(&scp->sc_lock); + svp_conn_add(scp); + mutex_exit(&scp->sc_lock); + + /* Now that we're locked and loaded, add our timers */ + svp_timer_add(&scp->sc_qtimer); + svp_timer_add(&scp->sc_btimer); + + return (0); +} + +/* + * At the time of calling, the entry has been removed from all lists. In + * addition, the entries state should be SVP_CS_ERROR, therefore, we know that + * the fd should not be associated with the event loop. We'll double check that + * just in case. We should also have already been removed from the remote's + * list. + */ +void +svp_conn_destroy(svp_conn_t *scp) +{ + int ret; + + mutex_enter(&scp->sc_lock); + if (scp->sc_cstate != SVP_CS_ERROR) + libvarpd_panic("asked to tear down an active connection"); + if (scp->sc_flags & SVP_CF_ADDED) + libvarpd_panic("asked to remove a connection still in " + "the remote list\n"); + if (!list_is_empty(&scp->sc_queries)) + libvarpd_panic("asked to remove a connection with non-empty " + "query list"); + + if ((ret = svp_event_dissociate(&scp->sc_event, scp->sc_socket)) != + ENOENT) { + libvarpd_panic("dissociate failed or was actually " + "associated: %d", ret); + } + mutex_exit(&scp->sc_lock); + + /* Verify our timers are killed */ + svp_timer_remove(&scp->sc_btimer); + svp_timer_remove(&scp->sc_qtimer); + + if (scp->sc_socket != -1 && close(scp->sc_socket) != 0) + libvarpd_panic("failed to close svp_conn_t`scp_socket fd " + "%d: %d", scp->sc_socket, errno); + + list_destroy(&scp->sc_queries); + umem_free(scp, sizeof (svp_conn_t)); +} + +void +svp_conn_queue(svp_conn_t *scp, svp_query_t *sqp) +{ + assert(MUTEX_HELD(&scp->sc_lock)); + assert(scp->sc_cstate == SVP_CS_ACTIVE); + + sqp->sq_acttime = -1; + list_insert_tail(&scp->sc_queries, sqp); + if (!(scp->sc_event.se_events & POLLOUT)) { + scp->sc_event.se_events |= POLLOUT; + /* + * If this becomes frequent, we should instead give up on this + * set of connections instead of aborting. + */ + if (svp_event_associate(&scp->sc_event, scp->sc_socket) != 0) + libvarpd_panic("svp_event_associate failed somehow"); + } +} diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c new file mode 100644 index 0000000000..ade47ff998 --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c @@ -0,0 +1,53 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015, Joyent, Inc. + */ + +/* + * Perform standard crc32 functions. + * + * For more information, see the big theory statement in + * lib/varpd/svp/common/libvarpd_svp.c. + * + * Really, this should just be a libcrc. + */ + +#include <sys/crc32.h> +#include <stdint.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <inttypes.h> +#include <libvarpd_svp.h> + +static uint32_t svp_crc32_tab[] = { CRC32_TABLE }; + +static uint32_t +svp_crc32(uint32_t old, const uint8_t *buf, size_t len) +{ + uint32_t out; + + CRC32(out, buf, len, old, svp_crc32_tab); + return (out); +} + +void +svp_query_crc32(svp_req_t *shp, void *buf, size_t data) +{ + uint32_t crc = -1U; + + shp->svp_crc32 = 0; + crc = svp_crc32(crc, (uint8_t *)shp, sizeof (svp_req_t)); + crc = svp_crc32(crc, buf, data); + crc = ~crc; + shp->svp_crc32 = htonl(crc); +} diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c new file mode 100644 index 0000000000..e91cc30e9d --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c @@ -0,0 +1,171 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * DNS Host-name related functions. + * + * For more information, see the big theory statement in + * lib/varpd/svp/common/libvarpd_svp.c. + */ + +#include <sys/socket.h> +#include <netdb.h> +#include <thread.h> +#include <synch.h> +#include <assert.h> +#include <errno.h> + +#include <libvarpd_svp.h> + +int svp_host_nthreads = 8; + +static mutex_t svp_host_lock = ERRORCHECKMUTEX; +static cond_t svp_host_cv = DEFAULTCV; +static svp_remote_t *svp_host_head; + +/* ARGSUSED */ +static void * +svp_host_loop(void *unused) +{ + for (;;) { + int err; + svp_remote_t *srp; + struct addrinfo *addrs; + + mutex_enter(&svp_host_lock); + while (svp_host_head == NULL) + (void) cond_wait(&svp_host_cv, &svp_host_lock); + srp = svp_host_head; + svp_host_head = srp->sr_nexthost; + if (svp_host_head != NULL) + (void) cond_signal(&svp_host_cv); + mutex_exit(&svp_host_lock); + + mutex_enter(&srp->sr_lock); + assert(srp->sr_state & SVP_RS_LOOKUP_SCHEDULED); + srp->sr_state &= ~SVP_RS_LOOKUP_SCHEDULED; + if (srp->sr_state & SVP_RS_LOOKUP_INPROGRESS) { + mutex_exit(&srp->sr_lock); + continue; + } + srp->sr_state |= SVP_RS_LOOKUP_INPROGRESS; + mutex_exit(&srp->sr_lock); + + for (;;) { + err = getaddrinfo(srp->sr_hostname, NULL, NULL, &addrs); + if (err == 0) + break; + if (err != 0) { + switch (err) { + case EAI_ADDRFAMILY: + case EAI_BADFLAGS: + case EAI_FAMILY: + case EAI_SERVICE: + case EAI_SOCKTYPE: + case EAI_OVERFLOW: + default: + libvarpd_panic("unexpected getaddrinfo " + "failure: %d", err); + break; + case EAI_AGAIN: + case EAI_MEMORY: + case EAI_SYSTEM: + continue; + case EAI_FAIL: + case EAI_NODATA: + case EAI_NONAME: + /* + * At this point in time we have + * something which isn't very good. This + * may have been a typo or something may + * have been destroyed. We should go + * ahead and degrade this overall + * instance, because we're not going to + * make much forward progress... It'd be + * great if we could actually issue more + * of an EREPORT to describe what + * happened, some day. + */ + mutex_enter(&srp->sr_lock); + svp_remote_degrade(srp, + SVP_RD_DNS_FAIL); + mutex_exit(&srp->sr_lock); + break; + } + } + break; + } + + if (err == 0) { + /* + * We've successfully resolved something, mark this + * degredation over for now. + */ + mutex_enter(&srp->sr_lock); + svp_remote_restore(srp, SVP_RD_DNS_FAIL); + mutex_exit(&srp->sr_lock); + svp_remote_resolved(srp, addrs); + } + + mutex_enter(&srp->sr_lock); + srp->sr_state &= ~SVP_RS_LOOKUP_INPROGRESS; + (void) cond_broadcast(&srp->sr_cond); + mutex_exit(&srp->sr_lock); + } + + /* LINTED: E_STMT_NOT_REACHED */ + return (NULL); +} + +void +svp_host_queue(svp_remote_t *srp) +{ + svp_remote_t *s; + mutex_enter(&svp_host_lock); + mutex_enter(&srp->sr_lock); + if (srp->sr_state & SVP_RS_LOOKUP_SCHEDULED) { + mutex_exit(&srp->sr_lock); + mutex_exit(&svp_host_lock); + return; + } + srp->sr_state |= SVP_RS_LOOKUP_SCHEDULED; + s = svp_host_head; + while (s != NULL && s->sr_nexthost != NULL) + s = s->sr_nexthost; + if (s == NULL) { + assert(s == svp_host_head); + svp_host_head = srp; + } else { + s->sr_nexthost = srp; + } + srp->sr_nexthost = NULL; + (void) cond_signal(&svp_host_cv); + mutex_exit(&srp->sr_lock); + mutex_exit(&svp_host_lock); +} + +int +svp_host_init(void) +{ + int i; + + for (i = 0; i < svp_host_nthreads; i++) { + if (thr_create(NULL, 0, svp_host_loop, NULL, + THR_DETACHED | THR_DAEMON, NULL) != 0) + return (errno); + } + + return (0); +} diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c new file mode 100644 index 0000000000..18a79b9dff --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c @@ -0,0 +1,210 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * Event loop mechanism for our backend. + * + * For more information, see the big theory statement in + * lib/varpd/svp/common/libvarpd_svp.c. + */ + +#include <unistd.h> +#include <thread.h> +#include <port.h> +#include <signal.h> +#include <time.h> +#include <errno.h> +#include <umem.h> + +#include <libvarpd_svp.h> + +typedef struct svp_event_loop { + int sel_port; /* RO */ + int sel_nthread; /* RO */ + thread_t *sel_threads; /* RO */ + boolean_t sel_stop; /* svp_elock */ + timer_t sel_hosttimer; +} svp_event_loop_t; + +static svp_event_loop_t svp_event; +static mutex_t svp_elock = ERRORCHECKMUTEX; + +/* ARGSUSED */ +static void * +svp_event_thr(void *arg) +{ + for (;;) { + int ret; + port_event_t pe; + svp_event_t *sep; + + mutex_enter(&svp_elock); + if (svp_event.sel_stop == B_TRUE) { + mutex_exit(&svp_elock); + break; + } + mutex_exit(&svp_elock); + + ret = port_get(svp_event.sel_port, &pe, NULL); + if (ret != 0) { + switch (errno) { + case EFAULT: + case EBADF: + case EINVAL: + libvarpd_panic("unexpected port_get errno: %d", + errno); + default: + break; + } + } + + if (pe.portev_user == NULL) + libvarpd_panic("received event (%p) without " + "protev_user set", &pe); + sep = (svp_event_t *)pe.portev_user; + sep->se_func(&pe, sep->se_arg); + } + + return (NULL); +} + +int +svp_event_associate(svp_event_t *sep, int fd) +{ + int ret; + + ret = port_associate(svp_event.sel_port, PORT_SOURCE_FD, fd, + sep->se_events, sep); + if (ret != 0) { + switch (errno) { + case EBADF: + case EBADFD: + case EINVAL: + case EAGAIN: + libvarpd_panic("unexpected port_associate error: %d", + errno); + default: + ret = errno; + break; + } + } + + return (ret); +} + +/* ARGSUSED */ +int +svp_event_dissociate(svp_event_t *sep, int fd) +{ + int ret; + + ret = port_dissociate(svp_event.sel_port, PORT_SOURCE_FD, fd); + if (ret != 0) { + if (errno != ENOENT) + libvarpd_panic("unexpected port_dissociate error: %d", + errno); + ret = errno; + } + return (ret); +} + +int +svp_event_inject(svp_event_t *user) +{ + return (port_send(svp_event.sel_port, 0, user)); +} + +int +svp_event_timer_init(svp_event_t *sep) +{ + port_notify_t pn; + struct sigevent evp; + struct itimerspec ts; + + pn.portnfy_port = svp_event.sel_port; + pn.portnfy_user = sep; + evp.sigev_notify = SIGEV_PORT; + evp.sigev_value.sival_ptr = &pn; + + if (timer_create(CLOCK_REALTIME, &evp, &svp_event.sel_hosttimer) != 0) + return (errno); + + ts.it_value.tv_sec = svp_tickrate; + ts.it_value.tv_nsec = 0; + ts.it_interval.tv_sec = svp_tickrate; + ts.it_interval.tv_nsec = 0; + + if (timer_settime(svp_event.sel_hosttimer, TIMER_RELTIME, &ts, + NULL) != 0) { + int ret = errno; + (void) timer_delete(svp_event.sel_hosttimer); + return (ret); + } + + return (0); +} + +int +svp_event_init(void) +{ + long i, ncpus; + + svp_event.sel_port = port_create(); + if (svp_event.sel_port == -1) + return (errno); + + ncpus = sysconf(_SC_NPROCESSORS_ONLN) * 2 + 1; + if (ncpus <= 0) + libvarpd_panic("sysconf for nprocs failed... %d/%d", + ncpus, errno); + + svp_event.sel_threads = umem_alloc(sizeof (thread_t) * ncpus, + UMEM_DEFAULT); + if (svp_event.sel_threads == NULL) { + int ret = errno; + (void) timer_delete(svp_event.sel_hosttimer); + (void) close(svp_event.sel_port); + svp_event.sel_port = -1; + return (ret); + } + + for (i = 0; i < ncpus; i++) { + int ret; + thread_t *thr = &svp_event.sel_threads[i]; + + ret = thr_create(NULL, 0, svp_event_thr, NULL, + THR_DETACHED | THR_DAEMON, thr); + if (ret != 0) { + ret = errno; + (void) timer_delete(svp_event.sel_hosttimer); + (void) close(svp_event.sel_port); + svp_event.sel_port = -1; + return (errno); + } + } + + return (0); +} + +void +svp_event_fini(void) +{ + mutex_enter(&svp_elock); + svp_event.sel_stop = B_TRUE; + mutex_exit(&svp_elock); + + (void) timer_delete(svp_event.sel_hosttimer); + (void) close(svp_event.sel_port); +} diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h b/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h new file mode 100644 index 0000000000..16dbdbec05 --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h @@ -0,0 +1,236 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#ifndef _LIBVARPD_SVP_PROT_H +#define _LIBVARPD_SVP_PROT_H + +/* + * SVP protocol Definitions + */ + +#include <sys/types.h> +#include <inttypes.h> +#include <sys/ethernet.h> +#include <netinet/in.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * SDC VXLAN Protocol Definitions + */ + +#define SVP_VERSION_ONE 1 +#define SVP_CURRENT_VERSION SVP_VERSION_ONE + +typedef struct svp_req { + uint16_t svp_ver; + uint16_t svp_op; + uint32_t svp_size; + uint32_t svp_id; + uint32_t svp_crc32; +} svp_req_t; + +typedef enum svp_op { + SVP_R_UNKNOWN = 0x00, + SVP_R_PING = 0x01, + SVP_R_PONG = 0x02, + SVP_R_VL2_REQ = 0x03, + SVP_R_VL2_ACK = 0x04, + SVP_R_VL3_REQ = 0x05, + SVP_R_VL3_ACK = 0x06, + SVP_R_BULK_REQ = 0x07, + SVP_R_BULK_ACK = 0x08, + SVP_R_LOG_REQ = 0x09, + SVP_R_LOG_ACK = 0x0A, + SVP_R_LOG_RM = 0x0B, + SVP_R_LOG_RM_ACK = 0x0C, + SVP_R_SHOOTDOWN = 0x0D +} svp_op_t; + +typedef enum svp_status { + SVP_S_OK = 0x00, /* Everything OK */ + SVP_S_FATAL = 0x01, /* Fatal error, close connection */ + SVP_S_NOTFOUND = 0x02, /* Entry not found */ + SVP_S_BADL3TYPE = 0x03, /* Unknown svp_vl3_type_t */ + SVP_S_BADBULK = 0x04 /* Unknown svp_bulk_type_t */ +} svp_status_t; + +/* + * A client issues the SVP_R_VL2_REQ whenever it needs to perform a VLS->UL3 + * lookup. Requests have the following structure: + */ +typedef struct svp_vl2_req { + uint8_t sl2r_mac[ETHERADDRL]; + uint8_t sl2r_pad[2]; + uint32_t sl2r_vnetid; +} svp_vl2_req_t; + +/* + * This is the message a server uses to reply to the SVP_R_VL2_REQ. If the + * destination on the underlay is an IPv4 address, it should be encoded as an + * IPv4-mapped IPv6 address. + */ +typedef struct svp_vl2_ack { + uint16_t sl2a_status; + uint16_t sl2a_port; + uint8_t sl2a_addr[16]; +} svp_vl2_ack_t; + + +/* + * A client issues the SVP_R_VL3_REQ request whenever it needs to perform a + * VL3->VL2 lookup. Note, that this also implicitly performs a VL2->UL3 lookup + * as well. The sl3r_type member is used to indicate the kind of lookup type + * that we're performing, eg. is it a L3 or L2. + */ +typedef enum svp_vl3_type { + SVP_VL3_IP = 0x01, + SVP_VL3_IPV6 = 0x02 +} svp_vl3_type_t; + +typedef struct svp_vl3_req { + uint8_t sl3r_ip[16]; + uint32_t sl3r_type; + uint32_t sl3r_vnetid; +} svp_vl3_req_t; + +/* + * This response, corresponding to the SVP_R_VL3_ACK, includes an answer to both + * the VL3->VL2 and the VL2->UL3 requests. + */ +typedef struct svp_vl3_ack { + uint32_t sl3a_status; + uint8_t sl3a_mac[ETHERADDRL]; + uint16_t sl3a_uport; + uint8_t sl3a_uip[16]; +} svp_vl3_ack_t; + +/* + * SVP_R_BULK_REQ requests a bulk dump of data. Currently we have two kinds of + * data tables that we need to dump: VL3->VL2 mappings and VL2->UL3 mappings. + * The kind that we want is indicated using the svbr_type member. + */ +typedef enum svp_bulk_type { + SVP_BULK_VL2 = 0x01, + SVP_BULK_VL3 = 0x02 +} svp_bulk_type_t; + +typedef struct svp_bulk_req { + uint32_t svbr_type; +} svp_bulk_req_t; + +/* + * When replying to a bulk request (SVP_R_BULK_ACK), data is streamed back + * across. The format of the data is currently undefined and as we work on the + * system, we'll get a better understanding of what this should look like. A + * client may need to stream such a request to disk, or the format will need to + * be in a streamable format that allows the client to construct data. + */ +typedef struct svp_bulk_ack { + uint32_t svba_status; + uint32_t svba_type; + uint8_t svba_data[]; +} svp_bulk_ack_t; + +/* + * SVP_R_LOG_REQ requests a log entries from the specified log from the server. + * The total number of bytes that the user is ready to receive is in svlr_count. + * However, the server should not block for data if none is available and thus + * may return less than svlr_count bytes back. We identify the IP address of the + * underlay to use here explicitly. + */ +typedef struct svp_log_req { + uint32_t svlr_count; + uint8_t svlr_ip[16]; +} svp_log_req_t; + +/* + * The server replies to a log request by sending a series of log entries. + * These log entries may be a mixture of both vl2 and vl3 records. The reply is + * a stream of bytes after the status message whose length is determined baseed + * on the header itself. Each entry begins with a uint32_t that describes its + * type and then is followed by the remaining data payload. The next entry + * follows immediately which again begins with the uint32_t word that describes + * what it should be. + */ +typedef enum svp_log_type { + SVP_LOG_VL2 = 0x01, + SVP_LOG_VL3 = 0x02 +} svp_log_type_t; + +typedef struct svp_log_vl2 { + uint32_t svl2_type; /* Should be SVP_LOG_VL2 */ + uint8_t svl2_id[16]; /* 16-byte UUID */ + uint8_t svl2_mac[ETHERADDRL]; + uint8_t svl2_pad[2]; + uint32_t svl2_vnetid; +} svp_log_vl2_t; + +typedef struct svp_log_vl3 { + uint32_t svl3_type; /* Should be SVP_LOG_VL3 */ + uint8_t svl3_id[16]; /* 16-byte UUID */ + uint8_t svl3_ip[16]; + uint8_t svl3_pad[2]; + uint16_t svl3_vlan; + uint32_t svl3_vnetid; +} svp_log_vl3_t; + +typedef struct svp_log_ack { + uint32_t svla_status; + uint8_t svla_data[]; +} svp_log_ack_t; + +/* + * SVP_R_LOG_RM is used after the client successfully processes a series of the + * log stream. It replies to tell the server that it can remove those IDs from + * processing. The IDs used are the same IDs that were in the individual + * SVP_R_LOG_ACK entries. + */ +typedef struct svp_lrm_req { + uint32_t svrr_count; + uint8_t svrr_ids[]; +} svp_lrm_req_t; + +/* + * SVP_R_LOG_RM_ACK is used to indicate that a log entry has been successfully + * deleted and at this point it makes sense to go and ask for another + * SVP_R_LOG_REQ. + */ +typedef struct svp_lrm_ack { + uint32_t svra_status; +} svp_lrm_ack_t; + +/* + * A shootdown (SVP_R_SHOOTDOWN) is used by a CN to reply to another CN that it + * sent an invalid entry that could not be processed. This should be a + * relatively infrequent occurrence. Unlike the rest of the messages, there is + * no reply to it. It's a single request to try and help get us out there. When + * a node receives this, it will issue a conditional revocation ioctl, that + * removes the entry if and only if, it matches the IP. That way if we've + * already gotten an updated entry for this, we don't remove it again. + */ +typedef struct svp_shootdown { + uint8_t svsd_mac[ETHERADDRL]; + uint8_t svsd_pad[2]; + uint32_t svsd_vnetid; +} svp_shootdown_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBVARPD_SVP_PROT_H */ diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c new file mode 100644 index 0000000000..8d482e4a12 --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c @@ -0,0 +1,821 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * Remote backend management + * + * For more information, see the big theory statement in + * lib/varpd/svp/common/libvarpd_svp.c. + */ + +#include <umem.h> +#include <strings.h> +#include <string.h> +#include <stddef.h> +#include <thread.h> +#include <synch.h> +#include <assert.h> +#include <sys/socket.h> +#include <netdb.h> +#include <errno.h> +#include <libidspace.h> + +#include <libvarpd_provider.h> +#include <libvarpd_svp.h> + +typedef struct svp_shoot_vl3 { + svp_query_t ssv_query; + struct sockaddr_in6 ssv_sock; + svp_log_vl3_t *ssv_vl3; + svp_sdlog_t *ssv_log; +} svp_shoot_vl3_t; + +static mutex_t svp_remote_lock = ERRORCHECKMUTEX; +static avl_tree_t svp_remote_tree; +static svp_timer_t svp_dns_timer; +static id_space_t *svp_idspace; +static int svp_dns_timer_rate = 30; /* seconds */ + +static void +svp_remote_mkfmamsg(svp_remote_t *srp, svp_degrade_state_t state, char *buf, + size_t buflen) +{ + switch (state) { + case SVP_RD_DNS_FAIL: + (void) snprintf(buf, buflen, "failed to resolve or find " + "entries for hostname %s", srp->sr_hostname); + break; + case SVP_RD_REMOTE_FAIL: + (void) snprintf(buf, buflen, "cannot reach any remote peers"); + break; + default: + (void) snprintf(buf, buflen, "unkonwn error state: %d", state); + } +} + +static int +svp_remote_comparator(const void *l, const void *r) +{ + int ret; + const svp_remote_t *lr = l, *rr = r; + + ret = strcmp(lr->sr_hostname, rr->sr_hostname); + if (ret > 0) + return (1); + else if (ret < 0) + return (-1); + + if (lr->sr_rport > rr->sr_rport) + return (1); + else if (lr->sr_rport < rr->sr_rport) + return (-1); + + return (memcmp(&lr->sr_uip, &rr->sr_uip, sizeof (struct in6_addr))); +} + +void +svp_query_release(svp_query_t *sqp) +{ + id_free(svp_idspace, sqp->sq_header.svp_id); +} + +static void +svp_remote_destroy(svp_remote_t *srp) +{ + size_t len; + + /* + * Clean up any unrelated DNS information. At this point we know that + * we're not in the remote tree. That means, that svp_remote_dns_timer + * cannot queue us. However, if any of our DNS related state flags are + * set, we have to hang out. + */ + mutex_enter(&srp->sr_lock); + while (srp->sr_state & + (SVP_RS_LOOKUP_SCHEDULED | SVP_RS_LOOKUP_INPROGRESS)) { + (void) cond_wait(&srp->sr_cond, &srp->sr_lock); + } + mutex_exit(&srp->sr_lock); + svp_shootdown_fini(srp); + + if (cond_destroy(&srp->sr_cond) != 0) + libvarpd_panic("failed to destroy cond sr_cond"); + + if (mutex_destroy(&srp->sr_lock) != 0) + libvarpd_panic("failed to destroy mutex sr_lock"); + + if (srp->sr_addrinfo != NULL) + freeaddrinfo(srp->sr_addrinfo); + len = strlen(srp->sr_hostname) + 1; + umem_free(srp->sr_hostname, len); + umem_free(srp, sizeof (svp_remote_t)); +} + +static int +svp_remote_create(const char *host, uint16_t port, struct in6_addr *uip, + svp_remote_t **outp) +{ + size_t hlen; + svp_remote_t *remote; + + assert(MUTEX_HELD(&svp_remote_lock)); + + remote = umem_zalloc(sizeof (svp_remote_t), UMEM_DEFAULT); + if (remote == NULL) { + mutex_exit(&svp_remote_lock); + return (ENOMEM); + } + + if (svp_shootdown_init(remote) != 0) { + umem_free(remote, sizeof (svp_remote_t)); + mutex_exit(&svp_remote_lock); + return (ENOMEM); + } + + hlen = strlen(host) + 1; + remote->sr_hostname = umem_alloc(hlen, UMEM_DEFAULT); + if (remote->sr_hostname == NULL) { + svp_shootdown_fini(remote); + umem_free(remote, sizeof (svp_remote_t)); + mutex_exit(&svp_remote_lock); + return (ENOMEM); + } + remote->sr_rport = port; + if (mutex_init(&remote->sr_lock, + USYNC_THREAD | LOCK_ERRORCHECK, NULL) != 0) + libvarpd_panic("failed to create mutex sr_lock"); + if (cond_init(&remote->sr_cond, USYNC_PROCESS, NULL) != 0) + libvarpd_panic("failed to create cond sr_cond"); + list_create(&remote->sr_conns, sizeof (svp_conn_t), + offsetof(svp_conn_t, sc_rlist)); + avl_create(&remote->sr_tree, svp_comparator, sizeof (svp_t), + offsetof(svp_t, svp_rlink)); + (void) strlcpy(remote->sr_hostname, host, hlen); + remote->sr_count = 1; + remote->sr_uip = *uip; + + svp_shootdown_start(remote); + + *outp = remote; + return (0); +} + +int +svp_remote_find(char *host, uint16_t port, struct in6_addr *uip, + svp_remote_t **outp) +{ + int ret; + svp_remote_t lookup, *remote; + + lookup.sr_hostname = host; + lookup.sr_rport = port; + lookup.sr_uip = *uip; + mutex_enter(&svp_remote_lock); + remote = avl_find(&svp_remote_tree, &lookup, NULL); + if (remote != NULL) { + assert(remote->sr_count > 0); + remote->sr_count++; + *outp = remote; + mutex_exit(&svp_remote_lock); + return (0); + } + + if ((ret = svp_remote_create(host, port, uip, outp)) != 0) { + mutex_exit(&svp_remote_lock); + return (ret); + } + + avl_add(&svp_remote_tree, *outp); + mutex_exit(&svp_remote_lock); + + /* Make sure DNS is up to date */ + svp_host_queue(*outp); + + return (0); +} + +void +svp_remote_release(svp_remote_t *srp) +{ + mutex_enter(&svp_remote_lock); + mutex_enter(&srp->sr_lock); + srp->sr_count--; + if (srp->sr_count != 0) { + mutex_exit(&srp->sr_lock); + mutex_exit(&svp_remote_lock); + return; + } + mutex_exit(&srp->sr_lock); + + avl_remove(&svp_remote_tree, srp); + mutex_exit(&svp_remote_lock); + svp_remote_destroy(srp); +} + +int +svp_remote_attach(svp_remote_t *srp, svp_t *svp) +{ + svp_t check; + avl_index_t where; + + mutex_enter(&srp->sr_lock); + if (svp->svp_remote != NULL) + libvarpd_panic("failed to create mutex sr_lock"); + + /* + * We require everything except shootdowns + */ + if (svp->svp_cb.scb_vl2_lookup == NULL) + libvarpd_panic("missing callback scb_vl2_lookup"); + if (svp->svp_cb.scb_vl3_lookup == NULL) + libvarpd_panic("missing callback scb_vl3_lookup"); + if (svp->svp_cb.scb_vl2_invalidate == NULL) + libvarpd_panic("missing callback scb_vl2_invalidate"); + if (svp->svp_cb.scb_vl3_inject == NULL) + libvarpd_panic("missing callback scb_vl3_inject"); + + check.svp_vid = svp->svp_vid; + if (avl_find(&srp->sr_tree, &check, &where) != NULL) + libvarpd_panic("found duplicate entry with vid %ld", + svp->svp_vid); + avl_insert(&srp->sr_tree, svp, where); + svp->svp_remote = srp; + mutex_exit(&srp->sr_lock); + + return (0); +} + +void +svp_remote_detach(svp_t *svp) +{ + svp_t *lookup; + svp_remote_t *srp = svp->svp_remote; + + if (srp == NULL) + libvarpd_panic("trying to detach remote when none exists"); + + mutex_enter(&srp->sr_lock); + lookup = avl_find(&srp->sr_tree, svp, NULL); + if (lookup == NULL || lookup != svp) + libvarpd_panic("inconsitent remote avl tree..."); + avl_remove(&srp->sr_tree, svp); + svp->svp_remote = NULL; + mutex_exit(&srp->sr_lock); + svp_remote_release(srp); +} + +/* + * Walk the list of connections and find the first one that's available, the + * move it to the back of the list so it's less likely to be used again. + */ +static boolean_t +svp_remote_conn_queue(svp_remote_t *srp, svp_query_t *sqp) +{ + svp_conn_t *scp; + + assert(MUTEX_HELD(&srp->sr_lock)); + for (scp = list_head(&srp->sr_conns); scp != NULL; + scp = list_next(&srp->sr_conns, scp)) { + mutex_enter(&scp->sc_lock); + if (scp->sc_cstate != SVP_CS_ACTIVE) { + mutex_exit(&scp->sc_lock); + continue; + } + svp_conn_queue(scp, sqp); + mutex_exit(&scp->sc_lock); + list_remove(&srp->sr_conns, scp); + list_insert_tail(&srp->sr_conns, scp); + return (B_TRUE); + } + + return (B_FALSE); +} + +static void +svp_remote_vl2_lookup_cb(svp_query_t *sqp, void *arg) +{ + svp_t *svp = sqp->sq_svp; + svp_vl2_ack_t *vl2a = (svp_vl2_ack_t *)sqp->sq_wdata; + + if (sqp->sq_status == SVP_S_OK) + svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, + (struct in6_addr *)vl2a->sl2a_addr, ntohs(vl2a->sl2a_port), + arg); + else + svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, NULL, 0, arg); +} + +void +svp_remote_vl2_lookup(svp_t *svp, svp_query_t *sqp, const uint8_t *mac, + void *arg) +{ + svp_remote_t *srp; + svp_vl2_req_t *vl2r = &sqp->sq_rdun.sqd_vl2r; + + srp = svp->svp_remote; + sqp->sq_func = svp_remote_vl2_lookup_cb; + sqp->sq_arg = arg; + sqp->sq_svp = svp; + sqp->sq_state = SVP_QUERY_INIT; + sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION); + sqp->sq_header.svp_op = htons(SVP_R_VL2_REQ); + sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t)); + sqp->sq_header.svp_id = id_alloc(svp_idspace); + if (sqp->sq_header.svp_id == (id_t)-1) + libvarpd_panic("failed to allcoate from svp_idspace: %d", + errno); + sqp->sq_header.svp_crc32 = htonl(0); + sqp->sq_rdata = vl2r; + sqp->sq_rsize = sizeof (svp_vl2_req_t); + sqp->sq_wdata = NULL; + sqp->sq_wsize = 0; + + bcopy(mac, vl2r->sl2r_mac, ETHERADDRL); + vl2r->sl2r_vnetid = ntohl(svp->svp_vid); + + mutex_enter(&srp->sr_lock); + if (svp_remote_conn_queue(srp, sqp) == B_FALSE) + svp->svp_cb.scb_vl2_lookup(svp, SVP_S_FATAL, NULL, NULL, arg); + mutex_exit(&srp->sr_lock); +} + +static void +svp_remote_vl3_lookup_cb(svp_query_t *sqp, void *arg) +{ + svp_t *svp = sqp->sq_svp; + svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata; + + if (sqp->sq_status == SVP_S_OK) + svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, vl3a->sl3a_mac, + (struct in6_addr *)vl3a->sl3a_uip, ntohs(vl3a->sl3a_uport), + arg); + else + svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, NULL, NULL, 0, + arg); +} + +static void +svp_remote_vl3_common(svp_remote_t *srp, svp_query_t *sqp, + const struct sockaddr *addr, svp_query_f func, void *arg, uint32_t vid) +{ + svp_vl3_req_t *vl3r = &sqp->sq_rdun.sdq_vl3r; + + if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) + libvarpd_panic("unexpected sa_family for the vl3 lookup"); + + sqp->sq_func = func; + sqp->sq_arg = arg; + sqp->sq_state = SVP_QUERY_INIT; + sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION); + sqp->sq_header.svp_op = htons(SVP_R_VL3_REQ); + sqp->sq_header.svp_size = htonl(sizeof (svp_vl3_req_t)); + sqp->sq_header.svp_id = id_alloc(svp_idspace); + if (sqp->sq_header.svp_id == (id_t)-1) + libvarpd_panic("failed to allcoate from svp_idspace: %d", + errno); + sqp->sq_header.svp_crc32 = htonl(0); + sqp->sq_rdata = vl3r; + sqp->sq_rsize = sizeof (svp_vl3_req_t); + sqp->sq_wdata = NULL; + sqp->sq_wsize = 0; + + if (addr->sa_family == AF_INET6) { + struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)addr; + vl3r->sl3r_type = htonl(SVP_VL3_IPV6); + bcopy(&s6->sin6_addr, vl3r->sl3r_ip, + sizeof (struct in6_addr)); + } else { + struct sockaddr_in *s4 = (struct sockaddr_in *)addr; + struct in6_addr v6; + + vl3r->sl3r_type = htonl(SVP_VL3_IP); + IN6_INADDR_TO_V4MAPPED(&s4->sin_addr, &v6); + bcopy(&v6, vl3r->sl3r_ip, sizeof (struct in6_addr)); + } + vl3r->sl3r_vnetid = htonl(vid); + + mutex_enter(&srp->sr_lock); + if (svp_remote_conn_queue(srp, sqp) == B_FALSE) { + sqp->sq_status = SVP_S_FATAL; + sqp->sq_func(sqp, arg); + } + mutex_exit(&srp->sr_lock); +} + +/* + * This is a request to do a VL3 look-up that originated internally as opposed + * to coming from varpd. As such we need a slightly different query callback + * function upon completion and don't go through the normal path with the svp_t. + */ +void +svp_remote_vl3_logreq(svp_remote_t *srp, svp_query_t *sqp, uint32_t vid, + const struct sockaddr *addr, svp_query_f func, void *arg) +{ + svp_remote_vl3_common(srp, sqp, addr, func, arg, vid); +} + +void +svp_remote_vl3_lookup(svp_t *svp, svp_query_t *sqp, + const struct sockaddr *addr, void *arg) +{ + svp_remote_t *srp = svp->svp_remote; + + sqp->sq_svp = svp; + svp_remote_vl3_common(srp, sqp, addr, svp_remote_vl3_lookup_cb, + arg, svp->svp_vid); +} + +static void +svp_remote_log_request_cb(svp_query_t *sqp, void *arg) +{ + svp_remote_t *srp = sqp->sq_arg; + + assert(sqp->sq_wdata != NULL); + if (sqp->sq_status == SVP_S_OK) + svp_shootdown_logr_cb(srp, sqp->sq_status, sqp->sq_wdata, + sqp->sq_size); + else + svp_shootdown_logr_cb(srp, sqp->sq_status, NULL, 0); +} + +void +svp_remote_log_request(svp_remote_t *srp, svp_query_t *sqp, void *buf, + size_t buflen) +{ + svp_log_req_t *logr = &sqp->sq_rdun.sdq_logr; + boolean_t queued; + + sqp->sq_func = svp_remote_log_request_cb; + sqp->sq_state = SVP_QUERY_INIT; + sqp->sq_arg = srp; + sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION); + sqp->sq_header.svp_op = htons(SVP_R_LOG_REQ); + sqp->sq_header.svp_size = htonl(sizeof (svp_log_req_t)); + sqp->sq_header.svp_id = id_alloc(svp_idspace); + if (sqp->sq_header.svp_id == (id_t)-1) + libvarpd_panic("failed to allcoate from svp_idspace: %d", + errno); + sqp->sq_header.svp_crc32 = htonl(0); + sqp->sq_rdata = logr; + sqp->sq_rsize = sizeof (svp_log_req_t); + sqp->sq_wdata = buf; + sqp->sq_wsize = buflen; + + logr->svlr_count = htonl(buflen); + bcopy(&srp->sr_uip, logr->svlr_ip, sizeof (struct in6_addr)); + + /* + * If this fails, there isn't much that we can't do. Give the callback + * with a fatal status. + */ + mutex_enter(&srp->sr_lock); + queued = svp_remote_conn_queue(srp, sqp); + mutex_exit(&srp->sr_lock); + + if (queued == B_FALSE) + svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0); +} + +static void +svp_remote_lrm_request_cb(svp_query_t *sqp, void *arg) +{ + svp_remote_t *srp = arg; + + svp_shootdown_lrm_cb(srp, sqp->sq_status); +} + +void +svp_remote_lrm_request(svp_remote_t *srp, svp_query_t *sqp, void *buf, + size_t buflen) +{ + boolean_t queued; + svp_lrm_req_t *svrr = buf; + + sqp->sq_func = svp_remote_lrm_request_cb; + sqp->sq_state = SVP_QUERY_INIT; + sqp->sq_arg = srp; + sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION); + sqp->sq_header.svp_op = htons(SVP_R_LOG_RM); + sqp->sq_header.svp_size = htonl(buflen); + sqp->sq_header.svp_id = id_alloc(svp_idspace); + if (sqp->sq_header.svp_id == (id_t)-1) + libvarpd_panic("failed to allcoate from svp_idspace: %d", + errno); + sqp->sq_header.svp_crc32 = htonl(0); + sqp->sq_rdata = buf; + sqp->sq_rsize = buflen; + sqp->sq_wdata = NULL; + sqp->sq_wsize = 0; + + /* + * We need to fix up the count to be in proper network order. + */ + svrr->svrr_count = htonl(svrr->svrr_count); + + /* + * If this fails, there isn't much that we can't do. Give the callback + * with a fatal status. + */ + mutex_enter(&srp->sr_lock); + queued = svp_remote_conn_queue(srp, sqp); + mutex_exit(&srp->sr_lock); + + if (queued == B_FALSE) + svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0); +} + +/* ARGSUSED */ +void +svp_remote_dns_timer(void *unused) +{ + svp_remote_t *s; + mutex_enter(&svp_remote_lock); + for (s = avl_first(&svp_remote_tree); s != NULL; + s = AVL_NEXT(&svp_remote_tree, s)) { + svp_host_queue(s); + } + mutex_exit(&svp_remote_lock); +} + +void +svp_remote_resolved(svp_remote_t *srp, struct addrinfo *newaddrs) +{ + struct addrinfo *a; + svp_conn_t *scp; + int ngen; + + mutex_enter(&srp->sr_lock); + srp->sr_gen++; + ngen = srp->sr_gen; + mutex_exit(&srp->sr_lock); + + for (a = newaddrs; a != NULL; a = a->ai_next) { + struct in6_addr in6; + struct in6_addr *addrp; + + if (a->ai_family != AF_INET && a->ai_family != AF_INET6) + continue; + + if (a->ai_family == AF_INET) { + struct sockaddr_in *v4; + v4 = (struct sockaddr_in *)a->ai_addr; + addrp = &in6; + IN6_INADDR_TO_V4MAPPED(&v4->sin_addr, addrp); + } else { + struct sockaddr_in6 *v6; + v6 = (struct sockaddr_in6 *)a->ai_addr; + addrp = &v6->sin6_addr; + } + + mutex_enter(&srp->sr_lock); + for (scp = list_head(&srp->sr_conns); scp != NULL; + scp = list_next(&srp->sr_conns, scp)) { + mutex_enter(&scp->sc_lock); + if (bcmp(addrp, &scp->sc_addr, + sizeof (struct in6_addr)) == 0) { + scp->sc_gen = ngen; + mutex_exit(&scp->sc_lock); + break; + } + mutex_exit(&scp->sc_lock); + } + + /* + * We need to be careful in the assumptions that we make here, + * as there's a good chance that svp_conn_create will + * drop the svp_remote_t`sr_lock to kick off its effective event + * loop. + */ + if (scp == NULL) + (void) svp_conn_create(srp, addrp); + mutex_exit(&srp->sr_lock); + } + + /* + * Now it's time to clean things up. We do not actively clean up the + * current connections that we have, instead allowing them to stay + * around assuming that they're still useful. Instead, we go through and + * purge the degraded list for anything that's from an older generation. + */ + mutex_enter(&srp->sr_lock); + for (scp = list_head(&srp->sr_conns); scp != NULL; + scp = list_next(&srp->sr_conns, scp)) { + boolean_t fall = B_FALSE; + mutex_enter(&scp->sc_lock); + if (scp->sc_gen < srp->sr_gen) + fall = B_TRUE; + mutex_exit(&scp->sc_lock); + if (fall == B_TRUE) + svp_conn_fallout(scp); + } + mutex_exit(&srp->sr_lock); +} + +/* + * This connection is in the process of being reset, we need to reassign all of + * its queries to other places or mark them as fatal. Note that the first + * connection was the one in flight when this failed. We always mark it as + * failed to avoid trying to reset its state. + */ +void +svp_remote_reassign(svp_remote_t *srp, svp_conn_t *scp) +{ + boolean_t first = B_TRUE; + assert(MUTEX_HELD(&srp->sr_lock)); + assert(MUTEX_HELD(&srp->sr_lock)); + svp_query_t *sqp; + + /* + * As we try to reassigning all of its queries, remove it from the list. + */ + list_remove(&srp->sr_conns, scp); + + while ((sqp = list_remove_head(&scp->sc_queries)) != NULL) { + + if (first == B_TRUE) { + sqp->sq_status = SVP_S_FATAL; + sqp->sq_func(sqp, sqp->sq_arg); + continue; + } + + sqp->sq_acttime = -1; + + /* + * We may want to maintain a queue of these for some time rather + * than just failing them all. + */ + if (svp_remote_conn_queue(srp, sqp) == B_FALSE) { + sqp->sq_status = SVP_S_FATAL; + sqp->sq_func(sqp, sqp->sq_arg); + } + } + + /* + * Now that we're done, go ahead and re-insert. + */ + list_insert_tail(&srp->sr_conns, scp); +} + +void +svp_remote_degrade(svp_remote_t *srp, svp_degrade_state_t flag) +{ + int sf, nf; + char buf[256]; + + assert(MUTEX_HELD(&srp->sr_lock)); + + if (flag == SVP_RD_ALL || flag == 0) + libvarpd_panic("invalid flag passed to degrade"); + + if ((flag & srp->sr_degrade) != 0) { + return; + } + + sf = ffs(srp->sr_degrade); + nf = ffs(flag); + srp->sr_degrade |= flag; + if (sf == 0 || sf > nf) { + svp_t *svp; + svp_remote_mkfmamsg(srp, flag, buf, sizeof (buf)); + + for (svp = avl_first(&srp->sr_tree); svp != NULL; + svp = AVL_NEXT(&srp->sr_tree, svp)) { + libvarpd_fma_degrade(svp->svp_hdl, buf); + } + } +} + +void +svp_remote_restore(svp_remote_t *srp, svp_degrade_state_t flag) +{ + int sf, nf; + + assert(MUTEX_HELD(&srp->sr_lock)); + sf = ffs(srp->sr_degrade); + if ((srp->sr_degrade & flag) != flag) + return; + srp->sr_degrade &= ~flag; + nf = ffs(srp->sr_degrade); + + /* + * If we're now empty, restore the device. If we still are degraded, but + * we now have a higher base than we used to, change the message. + */ + if (srp->sr_degrade == 0) { + svp_t *svp; + for (svp = avl_first(&srp->sr_tree); svp != NULL; + svp = AVL_NEXT(&srp->sr_tree, svp)) { + libvarpd_fma_restore(svp->svp_hdl); + } + } else if (nf != sf) { + svp_t *svp; + char buf[256]; + + svp_remote_mkfmamsg(srp, 1U << (nf - 1), buf, sizeof (buf)); + for (svp = avl_first(&srp->sr_tree); svp != NULL; + svp = AVL_NEXT(&srp->sr_tree, svp)) { + libvarpd_fma_degrade(svp->svp_hdl, buf); + } + } +} + +void +svp_remote_shootdown_vl3_cb(svp_query_t *sqp, void *arg) +{ + svp_shoot_vl3_t *squery = arg; + svp_log_vl3_t *svl3 = squery->ssv_vl3; + svp_sdlog_t *sdl = squery->ssv_log; + + if (sqp->sq_status == SVP_S_OK) { + svp_t *svp, lookup; + + svp_remote_t *srp = sdl->sdl_remote; + svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata; + + lookup.svp_vid = ntohl(svl3->svl3_vnetid); + mutex_enter(&srp->sr_lock); + if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) { + svp->svp_cb.scb_vl3_inject(svp, ntohs(svl3->svl3_vlan), + (struct in6_addr *)svl3->svl3_ip, vl3a->sl3a_mac, + NULL); + } + mutex_exit(&srp->sr_lock); + + } + + svp_shootdown_vl3_cb(sqp->sq_status, svl3, sdl); + + umem_free(squery, sizeof (svp_shoot_vl3_t)); +} + +void +svp_remote_shootdown_vl3(svp_remote_t *srp, svp_log_vl3_t *svl3, + svp_sdlog_t *sdl) +{ + svp_shoot_vl3_t *squery; + + squery = umem_zalloc(sizeof (svp_shoot_vl3_t), UMEM_DEFAULT); + if (squery == NULL) { + svp_shootdown_vl3_cb(SVP_S_FATAL, svl3, sdl); + return; + } + + squery->ssv_vl3 = svl3; + squery->ssv_log = sdl; + squery->ssv_sock.sin6_family = AF_INET6; + bcopy(svl3->svl3_ip, &squery->ssv_sock.sin6_addr, + sizeof (svl3->svl3_ip)); + svp_remote_vl3_logreq(srp, &squery->ssv_query, ntohl(svl3->svl3_vnetid), + (struct sockaddr *)&squery->ssv_sock, svp_remote_shootdown_vl3_cb, + squery); +} + +void +svp_remote_shootdown_vl2(svp_remote_t *srp, svp_log_vl2_t *svl2) +{ + svp_t *svp, lookup; + + lookup.svp_vid = ntohl(svl2->svl2_vnetid); + mutex_enter(&srp->sr_lock); + if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) { + svp->svp_cb.scb_vl2_invalidate(svp, svl2->svl2_mac); + } + mutex_exit(&srp->sr_lock); +} + +int +svp_remote_init(void) +{ + svp_idspace = id_space_create("svp_req_ids", 1, INT32_MAX); + if (svp_idspace == NULL) + return (errno); + avl_create(&svp_remote_tree, svp_remote_comparator, + sizeof (svp_remote_t), offsetof(svp_remote_t, sr_gnode)); + svp_dns_timer.st_func = svp_remote_dns_timer; + svp_dns_timer.st_arg = NULL; + svp_dns_timer.st_oneshot = B_FALSE; + svp_dns_timer.st_value = svp_dns_timer_rate; + svp_timer_add(&svp_dns_timer); + return (0); +} + +void +svp_remote_fini(void) +{ + svp_timer_remove(&svp_dns_timer); + avl_destroy(&svp_remote_tree); + if (svp_idspace == NULL) + id_space_destroy(svp_idspace); +} diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_shootdown.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_shootdown.c new file mode 100644 index 0000000000..76afb2519f --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_shootdown.c @@ -0,0 +1,474 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * Shootdown processing logic. + * + * For more information, see the big theory statement in + * lib/varpd/svp/common/libvarpd_svp.c. + */ + +#include <umem.h> +#include <sys/uuid.h> +#include <assert.h> +#include <strings.h> +#include <errno.h> +#include <sys/debug.h> + +#include <libvarpd_provider.h> +#include <libvarpd_svp.h> + +/* + * When we've determined that there's nothing left for us to do, then we go + * ahead and wait svp_shootdown_base seconds + up to an additional + * svp_shootdown_base seconds before asking again. However, if there is actually + * some work going on, just use the svp_shootdown_cont time. + */ +static int svp_shootdown_base = 5; +static int svp_shootdown_cont = 1; + +/* + * These are sizes for our logack and logrm buffers. The sizing of the shootdown + * buffere would give us approximately 18 or so VL3 entries and 32 VL2 entries + * or some combination thereof. While it's a bit of overkill, we just use the + * same sized buffer for the list of uuids that we pass to remove log entries + * that we've acted upon. + */ +static int svp_shootdown_buf = 1024; + +static void +svp_shootdown_schedule(svp_sdlog_t *sdl, boolean_t cont) +{ + assert(MUTEX_HELD(&sdl->sdl_lock)); + + if (cont == B_TRUE) { + sdl->sdl_timer.st_value = svp_shootdown_cont; + } else { + sdl->sdl_timer.st_value = svp_shootdown_base + + arc4random_uniform(svp_shootdown_base + 1); + } + svp_timer_add(&sdl->sdl_timer); +} + +void +svp_shootdown_lrm_cb(svp_remote_t *srp, svp_status_t status) +{ + svp_sdlog_t *sdl = &srp->sr_shoot; + + mutex_enter(&sdl->sdl_lock); + sdl->sdl_flags &= ~SVP_SD_RUNNING; + svp_shootdown_schedule(sdl, B_TRUE); + mutex_exit(&sdl->sdl_lock); + + if (status != SVP_S_OK) { + (void) bunyan_warn(svp_bunyan, "SVP_R_LOG_RM failed", + BUNYAN_T_STRING, "remote_host", srp->sr_hostname, + BUNYAN_T_INT32, "remote_port", srp->sr_rport, + BUNYAN_T_INT32, "status", status, + BUNYAN_T_END); + } +} + +static void +svp_shootdown_ref(svp_sdlog_t *sdl) +{ + mutex_enter(&sdl->sdl_lock); + sdl->sdl_ref++; + mutex_exit(&sdl->sdl_lock); +} + +static void +svp_shootdown_rele(svp_sdlog_t *sdl) +{ + svp_lrm_req_t *svrr = sdl->sdl_logrm; + boolean_t next; + + mutex_enter(&sdl->sdl_lock); + VERIFY(sdl->sdl_ref > 0); + sdl->sdl_ref--; + if (sdl->sdl_ref > 0) { + mutex_exit(&sdl->sdl_lock); + return; + } + + /* + * At this point we know that we hold the last reference, therefore it's + * safe for us to go ahead and clean up and move on and attempt to + * deliver the reply. We always deliver the reply by going through the + * timer. This can be rather important as the final reference may be + * coming through a failed query and it's not always safe for us to + * callback into the remote routines from this context. + * + * We should only do this if we have a non-zero number of entries to + * take down. + */ + sdl->sdl_flags &= ~SVP_SD_RUNNING; + if (svrr->svrr_count > 0) { + sdl->sdl_flags |= SVP_SD_DORM; + next = B_TRUE; + } else { + next = B_FALSE; + } + svp_shootdown_schedule(sdl, next); + mutex_exit(&sdl->sdl_lock); +} + +/* + * This is a callback used to indicate that the VL3 lookup has completed and an + * entry, if any, has been injected. If the command succeeded, eg. we got that + * the status was OK or that it was not found, then we will add it to he list to + * shoot down. Otherwise, there's nothing else for us to really do here. + */ +void +svp_shootdown_vl3_cb(svp_status_t status, svp_log_vl3_t *vl3, svp_sdlog_t *sdl) +{ + svp_lrm_req_t *svrr = sdl->sdl_logrm; + + mutex_enter(&sdl->sdl_lock); + if (status == SVP_S_OK || status == SVP_S_NOTFOUND) { + bcopy(vl3->svl3_id, &svrr->svrr_ids[svrr->svrr_count * 16], + UUID_LEN); + svrr->svrr_count++; + } + mutex_exit(&sdl->sdl_lock); + + svp_shootdown_rele(sdl); +} + +static int +svp_shootdown_logr_shoot(void *data, svp_log_type_t type, void *arg) +{ + svp_sdlog_t *sdl = arg; + svp_remote_t *srp = sdl->sdl_remote; + svp_lrm_req_t *svrr = sdl->sdl_logrm; + + if (type != SVP_LOG_VL2 && type != SVP_LOG_VL3) + libvarpd_panic("encountered unknown type: %d\n", type); + + if (type == SVP_LOG_VL2) { + svp_log_vl2_t *svl2 = data; + svp_remote_shootdown_vl2(srp, svl2); + mutex_enter(&sdl->sdl_lock); + bcopy(svl2->svl2_id, &svrr->svrr_ids[svrr->svrr_count * 16], + UUID_LEN); + svrr->svrr_count++; + mutex_exit(&sdl->sdl_lock); + } else { + svp_log_vl3_t *svl3 = data; + + /* Take a hold for the duration of this request */ + svp_shootdown_ref(sdl); + svp_remote_shootdown_vl3(srp, svl3, sdl); + } + + return (0); +} + +static int +svp_shootdown_logr_count(void *data, svp_log_type_t type, void *arg) +{ + uint_t *u = arg; + *u = *u + 1; + return (0); +} + + +static int +svp_shootdown_logr_iter(svp_remote_t *srp, void *buf, size_t len, + int (*cb)(void *, svp_log_type_t, void *), void *arg) +{ + int ret; + off_t cboff = 0; + uint32_t *typep, type; + svp_log_vl2_t *svl2; + svp_log_vl3_t *svl3; + + /* Adjust for initial status word */ + assert(len >= sizeof (uint32_t)); + len -= sizeof (uint32_t); + cboff += sizeof (uint32_t); + + while (len > 0) { + size_t opsz; + + if (len < sizeof (uint32_t)) { + (void) bunyan_warn(svp_bunyan, + "failed to get initial shootdown tag", + BUNYAN_T_STRING, "remote_host", srp->sr_hostname, + BUNYAN_T_INT32, "remote_port", srp->sr_rport, + BUNYAN_T_INT32, "response_size", cboff + len, + BUNYAN_T_INT32, "response_offset", cboff, + BUNYAN_T_END); + return (-1); + } + + typep = buf + cboff; + type = ntohl(*typep); + if (type == SVP_LOG_VL2) { + opsz = sizeof (svp_log_vl2_t); + if (len < opsz) { + (void) bunyan_warn(svp_bunyan, + "not enough data for svp_log_vl2_t", + BUNYAN_T_STRING, "remote_host", + srp->sr_hostname, + BUNYAN_T_INT32, "remote_port", + srp->sr_rport, + BUNYAN_T_INT32, "response_size", + cboff + len, + BUNYAN_T_INT32, "response_offset", cboff, + BUNYAN_T_END); + return (-1); + } + svl2 = (void *)typep; + if ((ret = cb(svl2, type, arg)) != 0) + return (ret); + } else if (type == SVP_LOG_VL3) { + + opsz = sizeof (svp_log_vl3_t); + if (len < opsz) { + (void) bunyan_warn(svp_bunyan, + "not enough data for svp_log_vl3_t", + BUNYAN_T_STRING, "remote_host", + srp->sr_hostname, + BUNYAN_T_INT32, "remote_port", + srp->sr_rport, + BUNYAN_T_INT32, "response_size", + cboff + len, + BUNYAN_T_INT32, "response_offset", cboff, + BUNYAN_T_END); + return (-1); + } + svl3 = (void *)typep; + if ((ret = cb(svl3, type, arg)) != 0) + return (ret); + } else { + (void) bunyan_warn(svp_bunyan, + "unknown log structure type", + BUNYAN_T_STRING, "remote_host", + srp->sr_hostname, + BUNYAN_T_INT32, "remote_port", srp->sr_rport, + BUNYAN_T_INT32, "response_size", cboff + len, + BUNYAN_T_INT32, "response_offset", cboff, + BUNYAN_T_INT32, "structure_type", type, + BUNYAN_T_END); + return (-1); + } + len -= opsz; + cboff += opsz; + } + + return (0); +} + +void +svp_shootdown_logr_cb(svp_remote_t *srp, svp_status_t status, void *cbdata, + size_t cbsize) +{ + uint_t count; + svp_sdlog_t *sdl = &srp->sr_shoot; + + if (status != SVP_S_OK) { + (void) bunyan_warn(svp_bunyan, + "log request not OK", + BUNYAN_T_STRING, "remote_host", srp->sr_hostname, + BUNYAN_T_INT32, "remote_port", srp->sr_rport, + BUNYAN_T_INT32, "response_size", cbsize, + BUNYAN_T_INT32, "status", status, + BUNYAN_T_END); + mutex_enter(&sdl->sdl_lock); + sdl->sdl_flags &= ~SVP_SD_RUNNING; + svp_shootdown_schedule(sdl, B_FALSE); + mutex_exit(&sdl->sdl_lock); + return; + } + + /* + * First go ahead and count the number of entries. This effectively + * allows us to validate that all the data is valid, if this fails, then + * we fail the request. + */ + count = 0; + if ((svp_shootdown_logr_iter(srp, cbdata, cbsize, + svp_shootdown_logr_count, &count)) != 0) { + mutex_enter(&sdl->sdl_lock); + sdl->sdl_flags &= ~SVP_SD_RUNNING; + svp_shootdown_schedule(sdl, B_FALSE); + mutex_exit(&sdl->sdl_lock); + return; + } + + /* + * If we have no entries, then we're also done. + */ + if (count == 0) { + mutex_enter(&sdl->sdl_lock); + sdl->sdl_flags &= ~SVP_SD_RUNNING; + svp_shootdown_schedule(sdl, B_FALSE); + mutex_exit(&sdl->sdl_lock); + return; + } + + /* + * We have work to do. Because we may have asynchronous VL3 tasks, we're + * going to first grab a reference before we do the iteration. Then, for + * each asynchronous VL3 request we make, that'll also grab a hold. Once + * we're done with the iteration, we'll drop our hold. If that's the + * last one, it'll move on accordingly. + */ + svp_shootdown_ref(sdl); + bzero(sdl->sdl_logrm, svp_shootdown_buf); + + /* + * If this fails, we're going to determine what to do next based on the + * number of entries that were entered into the log removal. At this + * point success or failure don't really look different, all it changes + * is how many entries we have to remove. + */ + (void) svp_shootdown_logr_iter(srp, cbdata, cbsize, + svp_shootdown_logr_shoot, sdl); + + /* + * Now that we're done with our work, release the hold. If we don't have + * any vl3 tasks outstanding, this'll trigger the next phase of the log + * removals. + */ + svp_shootdown_rele(sdl); +} + +static void +svp_shootdown_timer(void *arg) +{ + svp_sdlog_t *sdl = arg; + svp_remote_t *srp = sdl->sdl_remote; + boolean_t init = B_TRUE; + + mutex_enter(&sdl->sdl_lock); + + /* + * If we've been asked to quiesce, we're done. + */ + if ((sdl->sdl_flags & SVP_SD_QUIESCE) != 0) { + mutex_exit(&sdl->sdl_lock); + return; + } + + /* + * We shouldn't be able to have ourselves currently be running and reach + * here. If that's the case, we should immediately panic. + */ + if ((sdl->sdl_flags & SVP_SD_RUNNING) != 0) { + libvarpd_panic("remote %p shootdown timer fired while still " + "running", srp); + } + + if ((sdl->sdl_flags & SVP_SD_DORM) != 0) { + sdl->sdl_flags &= ~SVP_SD_DORM; + init = B_FALSE; + } + + sdl->sdl_flags |= SVP_SD_RUNNING; + mutex_exit(&sdl->sdl_lock); + + if (init == B_FALSE) { + svp_lrm_req_t *svrr = sdl->sdl_logrm; + + bzero(&sdl->sdl_query, sizeof (svp_query_t)); + svp_remote_lrm_request(sdl->sdl_remote, &sdl->sdl_query, svrr, + sizeof (*svrr) + 16 * svrr->svrr_count); + } else { + bzero(&sdl->sdl_query, sizeof (svp_query_t)); + svp_remote_log_request(srp, &sdl->sdl_query, sdl->sdl_logack, + svp_shootdown_buf); + } +} + +void +svp_shootdown_fini(svp_remote_t *srp) +{ + svp_sdlog_t *sdl = &srp->sr_shoot; + + mutex_enter(&sdl->sdl_lock); + sdl->sdl_flags |= SVP_SD_QUIESCE; + mutex_exit(&sdl->sdl_lock); + + svp_timer_remove(&sdl->sdl_timer); + + mutex_enter(&sdl->sdl_lock); + + /* + * Normally svp_timer_remove would be enough. However, the query could + * have been put out again outside of the svp_timer interface. Therefore + * we still need to check for SVP_SD_RUNNING. + */ + while (sdl->sdl_flags & SVP_SD_RUNNING) + (void) cond_wait(&sdl->sdl_cond, &sdl->sdl_lock); + mutex_exit(&sdl->sdl_lock); + + umem_free(sdl->sdl_logack, svp_shootdown_buf); + umem_free(sdl->sdl_logrm, svp_shootdown_buf); + sdl->sdl_logack = NULL; + sdl->sdl_logrm = NULL; + (void) cond_destroy(&sdl->sdl_cond); + (void) mutex_destroy(&sdl->sdl_lock); +} + +void +svp_shootdown_start(svp_remote_t *srp) +{ + svp_sdlog_t *sdl = &srp->sr_shoot; + + mutex_enter(&sdl->sdl_lock); + svp_shootdown_schedule(sdl, B_FALSE); + mutex_exit(&sdl->sdl_lock); +} + +int +svp_shootdown_init(svp_remote_t *srp) +{ + int ret; + svp_sdlog_t *sdl = &srp->sr_shoot; + if ((ret = mutex_init(&sdl->sdl_lock, USYNC_THREAD | LOCK_ERRORCHECK, + NULL)) != 0) + return (ret); + + if ((ret = cond_init(&sdl->sdl_cond, USYNC_THREAD, NULL)) != 0) { + (void) mutex_destroy(&sdl->sdl_lock); + return (ret); + } + + if ((sdl->sdl_logack = umem_alloc(svp_shootdown_buf, UMEM_DEFAULT)) == + NULL) { + ret = errno; + (void) cond_destroy(&sdl->sdl_cond); + (void) mutex_destroy(&sdl->sdl_lock); + return (ret); + } + + if ((sdl->sdl_logrm = umem_alloc(svp_shootdown_buf, UMEM_DEFAULT)) == + NULL) { + ret = errno; + umem_free(sdl->sdl_logack, svp_shootdown_buf); + (void) cond_destroy(&sdl->sdl_cond); + (void) mutex_destroy(&sdl->sdl_lock); + return (ret); + } + + sdl->sdl_remote = srp; + sdl->sdl_timer.st_oneshot = B_TRUE; + sdl->sdl_timer.st_func = svp_shootdown_timer; + sdl->sdl_timer.st_arg = sdl; + + return (0); +} diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c new file mode 100644 index 0000000000..10b02748f3 --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c @@ -0,0 +1,150 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015, Joyent, Inc. + */ + +#include <stddef.h> +#include <libvarpd_svp.h> + +/* + * svp timer backend + * + * This implements all of the logic of maintaining a timer for the svp backend. + * We have a timer that fires at a one second tick. We maintain all of our + * events in avl tree, sorted by the tick that they need to be processed at. + * + * For more information, see the big theory statement in + * lib/varpd/svp/common/libvarpd_svp.c. + */ + +int svp_tickrate = 1; +static svp_event_t svp_timer_event; +static mutex_t svp_timer_lock = ERRORCHECKMUTEX; +static cond_t svp_timer_cv = DEFAULTCV; +static avl_tree_t svp_timer_tree; +static uint64_t svp_timer_nticks; + +static int +svp_timer_comparator(const void *l, const void *r) +{ + const svp_timer_t *lt, *rt; + + lt = l; + rt = r; + + if (lt->st_expire > rt->st_expire) + return (1); + else if (lt->st_expire < rt->st_expire) + return (-1); + + /* + * Multiple timers can have the same delivery time, so sort within that + * by the address of the timer itself. + */ + if ((uintptr_t)lt > (uintptr_t)rt) + return (1); + else if ((uintptr_t)lt < (uintptr_t)rt) + return (-1); + + return (0); +} + +/* ARGSUSED */ +static void +svp_timer_tick(port_event_t *pe, void *arg) +{ + mutex_enter(&svp_timer_lock); + svp_timer_nticks++; + + for (;;) { + svp_timer_t *t; + + t = avl_first(&svp_timer_tree); + if (t == NULL || t->st_expire > svp_timer_nticks) + break; + + avl_remove(&svp_timer_tree, t); + + /* + * We drop this while performing an operation so that way state + * can advance in the face of a long-running callback. + */ + t->st_delivering = B_TRUE; + mutex_exit(&svp_timer_lock); + t->st_func(t->st_arg); + mutex_enter(&svp_timer_lock); + t->st_delivering = B_FALSE; + (void) cond_broadcast(&svp_timer_cv); + if (t->st_oneshot == B_FALSE) { + t->st_expire += t->st_value; + avl_add(&svp_timer_tree, t); + } + } + mutex_exit(&svp_timer_lock); +} + +void +svp_timer_add(svp_timer_t *stp) +{ + if (stp->st_value == 0) + libvarpd_panic("tried to add svp timer with zero value"); + + mutex_enter(&svp_timer_lock); + stp->st_delivering = B_FALSE; + stp->st_expire = svp_timer_nticks + stp->st_value; + avl_add(&svp_timer_tree, stp); + mutex_exit(&svp_timer_lock); +} + +void +svp_timer_remove(svp_timer_t *stp) +{ + mutex_enter(&svp_timer_lock); + + /* + * If the event in question is not currently being delivered, then we + * can stop it before it next fires. If it is currently being delivered, + * we need to wait for that to finish. Because we hold the timer lock, + * we know that it cannot be rearmed. Therefore, we make sure the one + * shot is set to zero, and wait until it's no longer set to delivering. + */ + if (stp->st_delivering == B_FALSE) { + avl_remove(&svp_timer_tree, stp); + mutex_exit(&svp_timer_lock); + return; + } + + stp->st_oneshot = B_TRUE; + while (stp->st_delivering == B_TRUE) + (void) cond_wait(&svp_timer_cv, &svp_timer_lock); + + mutex_exit(&svp_timer_lock); +} + +int +svp_timer_init(void) +{ + int ret; + + svp_timer_event.se_func = svp_timer_tick; + svp_timer_event.se_arg = NULL; + + avl_create(&svp_timer_tree, svp_timer_comparator, sizeof (svp_timer_t), + offsetof(svp_timer_t, st_link)); + + if ((ret = svp_event_timer_init(&svp_timer_event)) != 0) { + avl_destroy(&svp_timer_tree); + } + + return (ret); +} diff --git a/usr/src/lib/varpd/svp/common/llib-lvarpd_svp b/usr/src/lib/varpd/svp/common/llib-lvarpd_svp new file mode 100644 index 0000000000..03c34f4fcb --- /dev/null +++ b/usr/src/lib/varpd/svp/common/llib-lvarpd_svp @@ -0,0 +1,18 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + diff --git a/usr/src/lib/varpd/svp/common/mapfile-vers b/usr/src/lib/varpd/svp/common/mapfile-vers new file mode 100644 index 0000000000..6b7c5a5067 --- /dev/null +++ b/usr/src/lib/varpd/svp/common/mapfile-vers @@ -0,0 +1,35 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION SUNWprivate { + local: + *; +}; diff --git a/usr/src/lib/varpd/svp/i386/Makefile b/usr/src/lib/varpd/svp/i386/Makefile new file mode 100644 index 0000000000..f2b4f63da5 --- /dev/null +++ b/usr/src/lib/varpd/svp/i386/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/svp/sparc/Makefile b/usr/src/lib/varpd/svp/sparc/Makefile new file mode 100644 index 0000000000..f2b4f63da5 --- /dev/null +++ b/usr/src/lib/varpd/svp/sparc/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/svp/sparcv9/Makefile b/usr/src/lib/varpd/svp/sparcv9/Makefile new file mode 100644 index 0000000000..d552642882 --- /dev/null +++ b/usr/src/lib/varpd/svp/sparcv9/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) |