diff options
Diffstat (limited to 'usr/src/lib')
109 files changed, 14139 insertions, 141 deletions
diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile index 372fc4327c..18bcd74901 100644 --- a/usr/src/lib/Makefile +++ b/usr/src/lib/Makefile @@ -22,7 +22,7 @@ # Copyright 2011 Nexenta Systems, Inc. All rights reserved. # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2012 by Delphix. All rights reserved. -# Copyright (c) 2012, Joyent, Inc. All rights reserved. +# Copyright (c) 2014, Joyent, Inc. All rights reserved. # Copyright (c) 2013 Gary Mills # Copyright 2014 Garrett D'Amore <garrett@damore.org> @@ -137,6 +137,7 @@ SUBDIRS += \ libpp \ libproc \ libproject \ + librename \ libsendfile \ nametoaddr \ ncad_addr \ @@ -162,7 +163,10 @@ SUBDIRS += \ libm1 \ libm \ libmvec \ - libvnd + libvnd \ + libidspace \ + varpd \ + libbunyan SUBDIRS += \ passwdutil \ @@ -371,6 +375,7 @@ HDRSUBDIRS= \ libast \ libbrand \ libbsm \ + libbunyan \ libc \ libcmd \ libcmdutils \ @@ -400,6 +405,7 @@ HDRSUBDIRS= \ libfru \ libfstyp \ libgen \ + libidspace \ libipadm \ libipd \ libipsecutil \ @@ -431,6 +437,7 @@ HDRSUBDIRS= \ libraidcfg \ librcm \ librdc \ + librename \ libscf \ libsip \ libsmbios \ @@ -568,7 +575,7 @@ libdevinfo: libnvpair libsec libdhcpagent: libsocket libdhcputil libuuid libdlpi libcontract libdhcputil: libnsl libgen libinetutil libdlpi libdladm: libdevinfo libinetutil libsocket libscf librcm libnvpair \ - libexacct libnsl libkstat libcurses + libexacct libnsl libkstat libcurses varpd libdll: libast libdlpi: libinetutil libdladm libds: libsysevent @@ -657,14 +664,18 @@ libreparse: libnvpair libhotplug: libnvpair cfgadm_plugins: libhotplug libilb: libsocket +libidspace: libumem libipmi: libm libprtdiag: libm libsqlite: libm libstmf: libm libvscan: libm +libbunyan: libnvpair $(INTEL_BUILD)libdiskmgt:libfdisk +varpd: libavl libidspace libumem libnsl libnvpair libmd5 librename \ + libbunyan # # The reason this rule checks for the existence of the diff --git a/usr/src/lib/libbunyan/Makefile b/usr/src/lib/libbunyan/Makefile new file mode 100644 index 0000000000..a59de91113 --- /dev/null +++ b/usr/src/lib/libbunyan/Makefile @@ -0,0 +1,42 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. +# + +include ../Makefile.lib + +HDRS = bunyan.h +HDRDIR = common +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +install := TARGET = install +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber install lint: $(SUBDIRS) + +install_h: $(ROOTHDRS) + +check: $(CHECKHDRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../Makefile.targ diff --git a/usr/src/lib/libbunyan/Makefile.com b/usr/src/lib/libbunyan/Makefile.com new file mode 100644 index 0000000000..5214915c56 --- /dev/null +++ b/usr/src/lib/libbunyan/Makefile.com @@ -0,0 +1,36 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +LIBRARY = libbunyan.a +VERS = .1 +OBJECTS = bunyan.o +USDT_PROVIDERS = bunyan_provider.d + +include ../../Makefile.lib + +LIBS = $(DYNLIB) $(LINTLIB) +LDLIBS += -lc -lumem -lnvpair -lnsl +CPPFLAGS += -I../common -I. -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 + +SRCDIR = ../common + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../Makefile.targ +include ../../Makefile.usdt diff --git a/usr/src/lib/libbunyan/amd64/Makefile b/usr/src/lib/libbunyan/amd64/Makefile new file mode 100644 index 0000000000..15d904c616 --- /dev/null +++ b/usr/src/lib/libbunyan/amd64/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com +include ../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/libbunyan/common/bunyan.c b/usr/src/lib/libbunyan/common/bunyan.c new file mode 100644 index 0000000000..30398aa098 --- /dev/null +++ b/usr/src/lib/libbunyan/common/bunyan.c @@ -0,0 +1,913 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +#include <errno.h> +#include <unistd.h> +#include <pthread.h> +#include <stdarg.h> +#include <umem.h> +#include <netdb.h> +#include <string.h> +#include <strings.h> +#include <time.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <sys/sysmacros.h> +#include <thread.h> +#include <sys/debug.h> + +#include <bunyan.h> +#include <bunyan_provider_impl.h> + +struct bunyan_key; +struct bunyan_stream; +struct bunyan; + +typedef struct bunyan_stream { + struct bunyan_stream *bs_next; + char *bs_name; + bunyan_level_t bs_level; + bunyan_stream_f bs_func; + void *bs_arg; + uint_t bs_count; +} bunyan_stream_t; + +typedef struct bunyan_key { + struct bunyan_key *bk_next; + char *bk_name; + bunyan_type_t bk_type; + void *bk_data; + size_t bk_len; +} bunyan_key_t; + +typedef struct bunyan { + pthread_mutex_t bun_lock; + bunyan_key_t *bun_keys; + bunyan_stream_t *bun_streams; + char *bun_name; + char bun_host[MAXHOSTNAMELEN+1]; +} bunyan_t; + +#define ISO_TIMELEN 25 +static const int bunyan_version = 0; + +static void +bunyan_key_fini(bunyan_key_t *bkp) +{ + size_t nlen = strlen(bkp->bk_name) + 1; + umem_free(bkp->bk_data, bkp->bk_len); + umem_free(bkp->bk_name, nlen); + umem_free(bkp, sizeof (bunyan_key_t)); +} + +static void +bunyan_stream_fini(bunyan_stream_t *bsp) +{ + size_t nlen = strlen(bsp->bs_name) + 1; + umem_free(bsp->bs_name, nlen); + umem_free(bsp, sizeof (bunyan_stream_t)); +} + +int +bunyan_init(const char *name, bunyan_logger_t **bhp) +{ + int ret; + bunyan_t *b; + size_t nlen = strlen(name) + 1; + + b = umem_zalloc(sizeof (bunyan_t), UMEM_DEFAULT); + if (b == NULL) + return (ENOMEM); + + b->bun_name = umem_alloc(nlen, UMEM_DEFAULT); + if (b->bun_name == NULL) { + umem_free(b, sizeof (bunyan_t)); + return (ENOMEM); + } + bcopy(name, b->bun_name, nlen); + + if ((ret = pthread_mutex_init(&b->bun_lock, NULL)) != 0) { + umem_free(b->bun_name, nlen); + umem_free(b, sizeof (bunyan_t)); + return (ret); + } + + VERIFY(gethostname(b->bun_host, sizeof (b->bun_host)) == 0); + b->bun_host[MAXHOSTNAMELEN] = '\0'; + + *bhp = (bunyan_logger_t *)b; + return (0); +} + +void +bunyan_fini(bunyan_logger_t *bhp) +{ + bunyan_t *b = (bunyan_t *)bhp; + bunyan_key_t *bkp; + bunyan_stream_t *bsp; + + while ((bkp = b->bun_keys) != NULL) { + b->bun_keys = bkp->bk_next; + bunyan_key_fini(bkp); + } + + while ((bsp = b->bun_streams) != NULL) { + b->bun_streams = bsp->bs_next; + bunyan_stream_fini(bsp); + } + + if (b->bun_name != NULL) + umem_free(b->bun_name, strlen(b->bun_name) + 1); + + VERIFY(pthread_mutex_destroy(&b->bun_lock) == 0); + umem_free(b, sizeof (bunyan_t)); +} + +/* ARGSUSED */ +int +bunyan_stream_fd(nvlist_t *nvl, const char *js, void *arg) +{ + uintptr_t fd = (uintptr_t)arg; + size_t jslen = strlen(js); + off_t off = 0; + ssize_t ret = 0; + static int maxbuf = -1; + + if (maxbuf == -1) + maxbuf = getpagesize(); + + while (off != jslen) { + /* + * Write up to a page of data at a time. If for some reason an + * individual write fails, move on and try to still write a new + * line at least... + */ + ret = write(fd, js + off, MIN(jslen - off, maxbuf)); + if (ret < 0) + break; + off += ret; + } + + if (ret < 0) { + (void) write(fd, "\n", 1); + } else { + ret = write(fd, "\n", 1); + } + return (ret < 0 ? 1: 0); +} + +int +bunyan_stream_add(bunyan_logger_t *bhp, const char *name, int level, + bunyan_stream_f func, void *arg) +{ + bunyan_stream_t *bs, *cur; + size_t nlen = strlen(name) + 1; + bunyan_t *b = (bunyan_t *)bhp; + + if (level != BUNYAN_L_TRACE && + level != BUNYAN_L_DEBUG && + level != BUNYAN_L_INFO && + level != BUNYAN_L_WARN && + level != BUNYAN_L_ERROR && + level != BUNYAN_L_FATAL) + return (EINVAL); + + bs = umem_alloc(sizeof (bunyan_stream_t), UMEM_DEFAULT); + if (bs == NULL) + return (ENOMEM); + + bs->bs_name = umem_alloc(nlen, UMEM_DEFAULT); + if (bs->bs_name == NULL) { + umem_free(bs, sizeof (bunyan_stream_t)); + return (ENOMEM); + } + bcopy(name, bs->bs_name, nlen); + bs->bs_level = level; + bs->bs_func = func; + bs->bs_arg = arg; + bs->bs_count = 0; + (void) pthread_mutex_lock(&b->bun_lock); + cur = b->bun_streams; + while (cur != NULL) { + if (strcmp(name, cur->bs_name) == 0) { + (void) pthread_mutex_unlock(&b->bun_lock); + umem_free(bs->bs_name, nlen); + umem_free(bs, sizeof (bunyan_stream_t)); + return (EEXIST); + } + cur = cur->bs_next; + } + bs->bs_next = b->bun_streams; + b->bun_streams = bs; + (void) pthread_mutex_unlock(&b->bun_lock); + + return (0); +} + +int +bunyan_stream_remove(bunyan_logger_t *bhp, const char *name) +{ + bunyan_stream_t *cur, *prev; + bunyan_t *b = (bunyan_t *)bhp; + + (void) pthread_mutex_lock(&b->bun_lock); + prev = NULL; + cur = b->bun_streams; + while (cur != NULL) { + if (strcmp(name, cur->bs_name) == 0) + break; + prev = cur; + cur = cur->bs_next; + } + if (cur == NULL) { + (void) pthread_mutex_unlock(&b->bun_lock); + return (ENOENT); + } + if (prev == NULL) + b->bun_streams = cur->bs_next; + else + prev->bs_next = cur->bs_next; + cur->bs_next = NULL; + (void) pthread_mutex_unlock(&b->bun_lock); + + bunyan_stream_fini(cur); + + return (0); +} + +static int +bunyan_key_add_one(bunyan_t *b, const char *name, bunyan_level_t type, + const void *arg) +{ + bunyan_key_t *bkp, *cur, *prev; + size_t nlen = strlen(name) + 1; + size_t blen; + + bkp = umem_alloc(sizeof (bunyan_key_t), UMEM_DEFAULT); + if (bkp == NULL) + return (ENOMEM); + bkp->bk_name = umem_alloc(nlen, UMEM_DEFAULT); + if (bkp->bk_name == NULL) { + umem_free(bkp, sizeof (bunyan_key_t)); + return (ENOMEM); + } + bcopy(name, bkp->bk_name, nlen); + + switch (type) { + case BUNYAN_T_STRING: + blen = strlen(arg) + 1; + break; + case BUNYAN_T_POINTER: + blen = sizeof (uintptr_t); + break; + case BUNYAN_T_IP: + blen = sizeof (struct in_addr); + break; + case BUNYAN_T_IP6: + blen = sizeof (struct in6_addr); + break; + case BUNYAN_T_BOOLEAN: + blen = sizeof (boolean_t); + break; + case BUNYAN_T_INT32: + blen = sizeof (int32_t); + break; + case BUNYAN_T_INT64: + case BUNYAN_T_INT64STR: + blen = sizeof (int64_t); + break; + case BUNYAN_T_UINT32: + blen = sizeof (uint32_t); + break; + case BUNYAN_T_UINT64: + case BUNYAN_T_UINT64STR: + blen = sizeof (uint64_t); + break; + case BUNYAN_T_DOUBLE: + blen = sizeof (double); + break; + default: + umem_free(bkp->bk_name, nlen); + umem_free(bkp, sizeof (bunyan_key_t)); + return (EINVAL); + } + + bkp->bk_data = umem_alloc(blen, UMEM_DEFAULT); + if (bkp->bk_data == NULL) { + umem_free(bkp->bk_name, nlen); + umem_free(bkp, sizeof (bunyan_key_t)); + return (ENOMEM); + } + bcopy(arg, bkp->bk_data, blen); + bkp->bk_len = blen; + bkp->bk_type = type; + + (void) pthread_mutex_lock(&b->bun_lock); + prev = NULL; + cur = b->bun_keys; + while (cur != NULL) { + if (strcmp(name, cur->bk_name) == 0) + break; + prev = cur; + cur = cur->bk_next; + } + if (cur != NULL) { + if (prev == NULL) + b->bun_keys = cur->bk_next; + else + prev->bk_next = cur->bk_next; + bunyan_key_fini(cur); + } + bkp->bk_next = b->bun_keys; + b->bun_keys = bkp; + (void) pthread_mutex_unlock(&b->bun_lock); + + return (0); +} + +static int +bunyan_key_vadd(bunyan_t *b, va_list *ap) +{ + int type, ret; + void *data; + boolean_t bt; + int32_t i32; + int64_t i64; + uint32_t ui32; + uint64_t ui64; + double d; + uintptr_t ptr; + + while ((type = va_arg(*ap, int)) != BUNYAN_T_END) { + const char *name = va_arg(*ap, char *); + + switch (type) { + case BUNYAN_T_STRING: + data = va_arg(*ap, char *); + break; + case BUNYAN_T_POINTER: + ptr = (uintptr_t)va_arg(*ap, void *); + data = &ptr; + break; + case BUNYAN_T_IP: + case BUNYAN_T_IP6: + data = va_arg(*ap, void *); + break; + case BUNYAN_T_BOOLEAN: + bt = va_arg(*ap, boolean_t); + data = &bt; + break; + case BUNYAN_T_INT32: + i32 = va_arg(*ap, int32_t); + data = &i32; + break; + case BUNYAN_T_INT64: + case BUNYAN_T_INT64STR: + i64 = va_arg(*ap, int64_t); + data = &i64; + break; + case BUNYAN_T_UINT32: + ui32 = va_arg(*ap, uint32_t); + data = &ui32; + break; + case BUNYAN_T_UINT64: + case BUNYAN_T_UINT64STR: + ui64 = va_arg(*ap, uint64_t); + data = &ui64; + break; + case BUNYAN_T_DOUBLE: + d = va_arg(*ap, double); + data = &d; + break; + default: + return (EINVAL); + } + + if ((ret = bunyan_key_add_one(b, name, type, data)) != 0) + return (ret); + } + + return (0); +} + +int +bunyan_key_add(bunyan_logger_t *bhp, ...) +{ + int ret; + va_list ap; + bunyan_t *b = (bunyan_t *)bhp; + + va_start(ap, bhp); + ret = bunyan_key_vadd(b, &ap); + va_end(ap); + + return (ret); +} + +int +bunyan_key_remove(bunyan_logger_t *bhp, const char *name) +{ + bunyan_t *b = (bunyan_t *)bhp; + bunyan_key_t *cur, *prev; + + (void) pthread_mutex_lock(&b->bun_lock); + prev = NULL; + cur = b->bun_keys; + while (cur != NULL) { + if (strcmp(name, cur->bk_name) == 0) + break; + prev = cur; + cur = cur->bk_next; + } + + if (cur == NULL) { + (void) pthread_mutex_unlock(&b->bun_lock); + return (ENOENT); + } + + if (prev == NULL) + b->bun_keys = cur->bk_next; + else + prev->bk_next = cur->bk_next; + (void) pthread_mutex_unlock(&b->bun_lock); + + bunyan_key_fini(cur); + return (0); +} + +static bunyan_key_t * +bunyan_key_dup(const bunyan_key_t *bkp) +{ + bunyan_key_t *nkp; + size_t nlen = strlen(bkp->bk_name) + 1; + + nkp = umem_alloc(sizeof (bunyan_key_t), UMEM_DEFAULT); + if (nkp == NULL) + return (NULL); + nkp->bk_next = NULL; + nkp->bk_name = umem_alloc(nlen, UMEM_DEFAULT); + if (nkp->bk_name == NULL) { + umem_free(nkp, sizeof (bunyan_key_t)); + return (NULL); + } + bcopy(bkp->bk_name, nkp->bk_name, nlen); + nkp->bk_type = bkp->bk_type; + nkp->bk_data = umem_alloc(bkp->bk_len, UMEM_DEFAULT); + if (nkp->bk_data == NULL) { + umem_free(nkp->bk_name, nlen); + umem_free(nkp, sizeof (bunyan_key_t)); + return (NULL); + } + bcopy(bkp->bk_data, nkp->bk_data, bkp->bk_len); + nkp->bk_len = bkp->bk_len; + + return (nkp); +} + +static bunyan_stream_t * +bunyan_stream_dup(const bunyan_stream_t *bsp) +{ + bunyan_stream_t *nsp; + size_t nlen = strlen(bsp->bs_name) + 1; + + nsp = umem_alloc(sizeof (bunyan_stream_t), UMEM_DEFAULT); + if (nsp == NULL) + return (NULL); + + nsp->bs_next = NULL; + nsp->bs_name = umem_alloc(nlen, UMEM_DEFAULT); + if (nsp->bs_name == NULL) { + umem_free(nsp, sizeof (bunyan_stream_t)); + return (NULL); + } + bcopy(bsp->bs_name, nsp->bs_name, nlen); + nsp->bs_level = bsp->bs_level; + nsp->bs_func = bsp->bs_func; + nsp->bs_arg = bsp->bs_arg; + nsp->bs_count = 0; + + return (nsp); +} + +static bunyan_t * +bunyan_dup(const bunyan_t *b) +{ + bunyan_t *n; + const bunyan_key_t *bkp; + const bunyan_stream_t *bsp; + size_t nlen; + + n = umem_zalloc(sizeof (bunyan_t), UMEM_DEFAULT); + if (n == NULL) + return (NULL); + + if (pthread_mutex_init(&n->bun_lock, NULL) != 0) { + umem_free(n, sizeof (bunyan_t)); + return (NULL); + } + + for (bkp = b->bun_keys; bkp != NULL; bkp = bkp->bk_next) { + bunyan_key_t *nkp; + nkp = bunyan_key_dup(bkp); + if (nkp == NULL) { + bunyan_fini((bunyan_logger_t *)n); + return (NULL); + } + + nkp->bk_next = n->bun_keys; + n->bun_keys = nkp; + } + + for (bsp = b->bun_streams; bsp != NULL; bsp = bsp->bs_next) { + bunyan_stream_t *nsp; + nsp = bunyan_stream_dup(bsp); + if (bsp == NULL) { + bunyan_fini((bunyan_logger_t *)n); + return (NULL); + } + + nsp->bs_next = n->bun_streams; + n->bun_streams = nsp; + } + + nlen = strlen(b->bun_name) + 1; + n->bun_name = umem_alloc(nlen, UMEM_DEFAULT); + if (n->bun_name == NULL) { + bunyan_fini((bunyan_logger_t *)n); + return (NULL); + } + bcopy(b->bun_name, n->bun_name, nlen); + bcopy(b->bun_host, n->bun_host, MAXHOSTNAMELEN+1); + + return (n); +} + +int +bunyan_child(const bunyan_logger_t *bhp, bunyan_logger_t **outp, ...) +{ + bunyan_t *b = (bunyan_t *)bhp; + bunyan_t *n; + va_list ap; + int ret; + + n = bunyan_dup(b); + if (n == NULL) + return (ENOMEM); + + va_start(ap, outp); + ret = bunyan_key_vadd(b, &ap); + va_end(ap); + + if (ret != 0) + bunyan_fini((bunyan_logger_t *)n); + else + *outp = (bunyan_logger_t *)n; + + return (ret); +} + +static int +bunyan_iso_time(char *buf) +{ + struct timeval tv; + struct tm tm; + + if (gettimeofday(&tv, NULL) != 0) + return (errno); + + if (gmtime_r(&tv.tv_sec, &tm) == NULL) + return (errno); + + VERIFY(strftime(buf, ISO_TIMELEN, "%FT%T", &tm) == 19); + + (void) snprintf(&buf[19], 6, ".%03dZ", (int)(tv.tv_usec / 1000)); + + return (0); +} + +/* + * Note, these fields are all required, so even if a user attempts to use one of + * them in their own fields, we'll override them and therefore, have it be the + * last one. + */ +static int +bunyan_vlog_defaults(nvlist_t *nvl, bunyan_t *b, bunyan_level_t level, + const char *msg) +{ + int ret; + char tbuf[ISO_TIMELEN]; + + if ((ret = bunyan_iso_time(tbuf)) != 0) + return (ret); + + if ((ret = nvlist_add_int32(nvl, "v", bunyan_version)) != 0 || + (ret = nvlist_add_int32(nvl, "level", level) != 0) || + (ret = nvlist_add_string(nvl, "name", b->bun_name) != 0) || + (ret = nvlist_add_string(nvl, "hostname", b->bun_host) != 0) || + (ret = nvlist_add_int32(nvl, "pid", getpid()) != 0) || + (ret = nvlist_add_uint32(nvl, "tid", thr_self()) != 0) || + (ret = nvlist_add_string(nvl, "time", tbuf) != 0) || + (ret = nvlist_add_string(nvl, "msg", msg) != 0)) + return (ret); + + return (0); +} + +static int +bunyan_vlog_add(nvlist_t *nvl, const char *key, bunyan_type_t type, void *arg) +{ + int ret; + uintptr_t *up; + struct in_addr *v4; + struct in6_addr *v6; + + /* + * Our buffer needs to hold the string forms of pointers, IPv6 strings, + * etc. INET6_ADDRSTRLEN is large enough for all of these. + */ + char buf[INET6_ADDRSTRLEN]; + + switch (type) { + case BUNYAN_T_STRING: + ret = nvlist_add_string(nvl, key, (char *)arg); + break; + case BUNYAN_T_POINTER: + up = arg; + (void) snprintf(buf, sizeof (buf), "0x%p", *up); + ret = nvlist_add_string(nvl, key, buf); + break; + case BUNYAN_T_IP: + v4 = arg; + VERIFY(inet_ntop(AF_INET, v4, buf, sizeof (buf)) != NULL); + ret = nvlist_add_string(nvl, key, buf); + break; + case BUNYAN_T_IP6: + v6 = arg; + VERIFY(inet_ntop(AF_INET6, v6, buf, sizeof (buf)) != NULL); + ret = nvlist_add_string(nvl, key, buf); + break; + case BUNYAN_T_BOOLEAN: + ret = nvlist_add_boolean_value(nvl, key, *(boolean_t *)arg); + break; + case BUNYAN_T_INT32: + ret = nvlist_add_int32(nvl, key, *(int32_t *)arg); + break; + case BUNYAN_T_INT64: + ret = nvlist_add_int64(nvl, key, *(int64_t *)arg); + break; + case BUNYAN_T_UINT32: + ret = nvlist_add_uint32(nvl, key, *(uint32_t *)arg); + break; + case BUNYAN_T_UINT64: + ret = nvlist_add_uint64(nvl, key, *(uint32_t *)arg); + break; + case BUNYAN_T_DOUBLE: + ret = nvlist_add_double(nvl, key, *(double *)arg); + break; + case BUNYAN_T_INT64STR: + (void) snprintf(buf, sizeof (buf), "%lld", *(int64_t *)arg); + ret = nvlist_add_string(nvl, key, buf); + break; + case BUNYAN_T_UINT64STR: + (void) snprintf(buf, sizeof (buf), "%llu", *(uint64_t *)arg); + ret = nvlist_add_string(nvl, key, buf); + break; + default: + ret = EINVAL; + break; + } + + return (ret); +} + +static int +bunyan_vlog(bunyan_logger_t *bhp, bunyan_level_t level, const char *msg, + va_list *ap) +{ + nvlist_t *nvl = NULL; + int ret, type; + bunyan_key_t *bkp; + bunyan_stream_t *bsp; + char *buf = NULL; + bunyan_t *b = (bunyan_t *)bhp; + + if (msg == NULL) + return (EINVAL); + + (void) pthread_mutex_lock(&b->bun_lock); + + if ((ret = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0) { + (void) pthread_mutex_unlock(&b->bun_lock); + return (ret); + } + + /* + * We add pre-defined keys, then go through and process the users keys, + * and finally go ahead and our defaults. If all that succeeds, then we + * can go ahead and call all the built-in logs. + */ + for (bkp = b->bun_keys; bkp != NULL; bkp = bkp->bk_next) { + if ((ret = bunyan_vlog_add(nvl, bkp->bk_name, bkp->bk_type, + bkp->bk_data)) != 0) + goto out; + } + + while ((type = va_arg(*ap, int)) != BUNYAN_T_END) { + void *data; + boolean_t bt; + int32_t i32; + int64_t i64; + uint32_t ui32; + uint64_t ui64; + double d; + uintptr_t ptr; + const char *key = va_arg(*ap, char *); + + switch (type) { + case BUNYAN_T_STRING: + data = va_arg(*ap, char *); + break; + case BUNYAN_T_POINTER: + ptr = (uintptr_t)va_arg(*ap, void *); + data = &ptr; + break; + case BUNYAN_T_IP: + case BUNYAN_T_IP6: + data = va_arg(*ap, void *); + break; + case BUNYAN_T_BOOLEAN: + bt = va_arg(*ap, boolean_t); + data = &bt; + break; + case BUNYAN_T_INT32: + i32 = va_arg(*ap, int32_t); + data = &i32; + break; + case BUNYAN_T_INT64: + case BUNYAN_T_INT64STR: + i64 = va_arg(*ap, int64_t); + data = &i64; + break; + case BUNYAN_T_UINT32: + ui32 = va_arg(*ap, uint32_t); + data = &ui32; + break; + case BUNYAN_T_UINT64: + case BUNYAN_T_UINT64STR: + ui64 = va_arg(*ap, uint64_t); + data = &ui64; + break; + case BUNYAN_T_DOUBLE: + d = va_arg(*ap, double); + data = &d; + break; + default: + ret = EINVAL; + goto out; + } + + if ((ret = bunyan_vlog_add(nvl, key, type, data)) != 0) + goto out; + + } + /* + * This must be the last thing we do before we log to ensure that all of + * our defaults always make it out. + */ + if ((ret = bunyan_vlog_defaults(nvl, b, level, msg)) != 0) + goto out; + + if (nvlist_dump_json(nvl, &buf) < 0) { + ret = errno; + goto out; + } + + /* Fire DTrace probes */ + switch (level) { + case BUNYAN_L_TRACE: + BUNYAN_LOG_TRACE(buf); + break; + case BUNYAN_L_DEBUG: + BUNYAN_LOG_DEBUG(buf); + break; + case BUNYAN_L_INFO: + BUNYAN_LOG_INFO(buf); + break; + case BUNYAN_L_WARN: + BUNYAN_LOG_WARN(buf); + break; + case BUNYAN_L_ERROR: + BUNYAN_LOG_ERROR(buf); + break; + case BUNYAN_L_FATAL: + BUNYAN_LOG_FATAL(buf); + break; + } + + for (bsp = b->bun_streams; bsp != NULL; bsp = bsp->bs_next) { + if (bsp->bs_level <= level) + if (bsp->bs_func(nvl, buf, bsp->bs_arg) != 0) + bsp->bs_count++; + } + ret = 0; +out: + (void) pthread_mutex_unlock(&b->bun_lock); + if (buf != NULL) + nvlist_dump_json_free(nvl, buf); + if (nvl != NULL) + nvlist_free(nvl); + return (ret); +} + +int +bunyan_trace(bunyan_logger_t *bhp, const char *msg, ...) +{ + va_list va; + int ret; + + va_start(va, msg); + ret = bunyan_vlog(bhp, BUNYAN_L_TRACE, msg, &va); + va_end(va); + + return (ret); +} + +int +bunyan_debug(bunyan_logger_t *bhp, const char *msg, ...) +{ + va_list va; + int ret; + + va_start(va, msg); + ret = bunyan_vlog(bhp, BUNYAN_L_DEBUG, msg, &va); + va_end(va); + + return (ret); +} + +int +bunyan_info(bunyan_logger_t *bhp, const char *msg, ...) +{ + va_list va; + int ret; + + va_start(va, msg); + ret = bunyan_vlog(bhp, BUNYAN_L_INFO, msg, &va); + va_end(va); + + return (ret); +} + +int +bunyan_warn(bunyan_logger_t *bhp, const char *msg, ...) +{ + va_list va; + int ret; + + va_start(va, msg); + ret = bunyan_vlog(bhp, BUNYAN_L_WARN, msg, &va); + va_end(va); + + return (ret); +} + +int +bunyan_error(bunyan_logger_t *bhp, const char *msg, ...) +{ + va_list va; + int ret; + + va_start(va, msg); + ret = bunyan_vlog(bhp, BUNYAN_L_ERROR, msg, &va); + va_end(va); + + return (ret); +} + + +int +bunyan_fatal(bunyan_logger_t *bhp, const char *msg, ...) +{ + va_list va; + int ret; + + va_start(va, msg); + ret = bunyan_vlog(bhp, BUNYAN_L_FATAL, msg, &va); + va_end(va); + + return (ret); +} diff --git a/usr/src/lib/libbunyan/common/bunyan.h b/usr/src/lib/libbunyan/common/bunyan.h new file mode 100644 index 0000000000..9a01f6f6cd --- /dev/null +++ b/usr/src/lib/libbunyan/common/bunyan.h @@ -0,0 +1,88 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014, Joyent, Inc. + */ + +#ifndef _BUNYAN_H +#define _BUNYAN_H + +/* + * C version of the bunyan logging format. + */ + +#include <limits.h> +#include <libnvpair.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct bunyan_logger bunyan_logger_t; + +typedef enum bunyan_level { + BUNYAN_L_TRACE = 10, + BUNYAN_L_DEBUG = 20, + BUNYAN_L_INFO = 30, + BUNYAN_L_WARN = 40, + BUNYAN_L_ERROR = 50, + BUNYAN_L_FATAL = 60 +} bunyan_level_t; + +typedef enum bunyan_type { + BUNYAN_T_END = 0x0, + BUNYAN_T_STRING, + BUNYAN_T_POINTER, + BUNYAN_T_IP, + BUNYAN_T_IP6, + BUNYAN_T_BOOLEAN, + BUNYAN_T_INT32, + BUNYAN_T_INT64, + BUNYAN_T_UINT32, + BUNYAN_T_UINT64, + BUNYAN_T_DOUBLE, + BUNYAN_T_INT64STR, + BUNYAN_T_UINT64STR +} bunyan_type_t; + +/* + * A handle is MT-safe, but not fork-safe. + */ +extern int bunyan_init(const char *, bunyan_logger_t **); +extern int bunyan_child(const bunyan_logger_t *, bunyan_logger_t **, ...); +extern void bunyan_fini(bunyan_logger_t *); + +/* + * Bunyan stream callbacks are guaranteed to be serialized. + */ +typedef int (*bunyan_stream_f)(nvlist_t *, const char *, void *); +extern int bunyan_stream_fd(nvlist_t *, const char *, void *); + +extern int bunyan_stream_add(bunyan_logger_t *, const char *, int, + bunyan_stream_f, void *); +extern int bunyan_stream_remove(bunyan_logger_t *, const char *); + +extern int bunyan_key_add(bunyan_logger_t *, ...); +extern int bunyan_key_remove(bunyan_logger_t *, const char *); + +extern int bunyan_trace(bunyan_logger_t *, const char *msg, ...); +extern int bunyan_debug(bunyan_logger_t *, const char *msg, ...); +extern int bunyan_info(bunyan_logger_t *, const char *msg, ...); +extern int bunyan_warn(bunyan_logger_t *, const char *msg, ...); +extern int bunyan_error(bunyan_logger_t *, const char *msg, ...); +extern int bunyan_fatal(bunyan_logger_t *, const char *msg, ...); + +#ifdef __cplusplus +} +#endif + +#endif /* _BUNYAN_H */ diff --git a/usr/src/lib/libbunyan/common/bunyan_provider.d b/usr/src/lib/libbunyan/common/bunyan_provider.d new file mode 100644 index 0000000000..d47ea75733 --- /dev/null +++ b/usr/src/lib/libbunyan/common/bunyan_provider.d @@ -0,0 +1,32 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014, Joyent, Inc. + */ + +/* + * Bunyan DTrace provider + */ +provider bunyan { + probe log__trace(char *); + probe log__debug(char *); + probe log__info(char *); + probe log__warn(char *); + probe log__error(char *); + probe log__fatal(char *); +}; + +#pragma D attributes Stable/Stable/ISA provider bunyan provider +#pragma D attributes Private/Private/Unknown provider bunyan module +#pragma D attributes Private/Private/Unknown provider bunyan function +#pragma D attributes Stable/Stable/ISA provider bunyan name +#pragma D attributes Stable/Stable/ISA provider bunyan args diff --git a/usr/src/lib/libbunyan/common/llib-lbunyan b/usr/src/lib/libbunyan/common/llib-lbunyan new file mode 100644 index 0000000000..31f6a52aba --- /dev/null +++ b/usr/src/lib/libbunyan/common/llib-lbunyan @@ -0,0 +1,19 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +#include <bunyan.h> diff --git a/usr/src/lib/libbunyan/common/mapfile-vers b/usr/src/lib/libbunyan/common/mapfile-vers new file mode 100644 index 0000000000..e72bbf76c8 --- /dev/null +++ b/usr/src/lib/libbunyan/common/mapfile-vers @@ -0,0 +1,50 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION ILLUMOS_1.0 { + global: + bunyan_init; + bunyan_child; + bunyan_fini; + bunyan_stream_fd; + bunyan_stream_add; + bunyan_stream_remove; + bunyan_key_add; + bunyan_key_remove; + bunyan_trace; + bunyan_debug; + bunyan_info; + bunyan_warn; + bunyan_error; + bunyan_fatal; + local: + *; +}; diff --git a/usr/src/lib/libbunyan/i386/Makefile b/usr/src/lib/libbunyan/i386/Makefile new file mode 100644 index 0000000000..41e699e8f8 --- /dev/null +++ b/usr/src/lib/libbunyan/i386/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/libbunyan/sparc/Makefile b/usr/src/lib/libbunyan/sparc/Makefile new file mode 100644 index 0000000000..41e699e8f8 --- /dev/null +++ b/usr/src/lib/libbunyan/sparc/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/libbunyan/sparcv9/Makefile b/usr/src/lib/libbunyan/sparcv9/Makefile new file mode 100644 index 0000000000..15d904c616 --- /dev/null +++ b/usr/src/lib/libbunyan/sparcv9/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com +include ../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/libcmdutils/common/custr.c b/usr/src/lib/libcmdutils/common/custr.c index 1ec72de9dd..f59f433ae9 100644 --- a/usr/src/lib/libcmdutils/common/custr.c +++ b/usr/src/lib/libcmdutils/common/custr.c @@ -20,13 +20,20 @@ #include <stdlib.h> #include <err.h> #include <string.h> +#include <stdio.h> +#include <stdarg.h> #include "libcmdutils.h" +typedef enum { + CUSTR_FIXEDBUF = 0x01 +} custr_flags_t; + struct custr { size_t cus_strlen; size_t cus_datalen; char *cus_data; + custr_flags_t cus_flags; }; #define STRING_CHUNK_SIZE 64 @@ -53,23 +60,15 @@ custr_cstr(custr_t *cus) return (cus->cus_data); } -int -custr_appendc(custr_t *cus, char newc) -{ - char news[2]; - - news[0] = newc; - news[1] = '\0'; - - return (custr_append(cus, news)); -} - -int -custr_append(custr_t *cus, const char *news) +static int +custr_append_vprintf(custr_t *cus, const char *fmt, va_list ap) { - size_t len = strlen(news); + size_t len = vsnprintf(NULL, 0, fmt, ap); size_t chunksz = STRING_CHUNK_SIZE; + if (len == -1) + return (len); + while (chunksz < len) { chunksz *= 2; } @@ -78,6 +77,11 @@ custr_append(custr_t *cus, const char *news) char *new_data; size_t new_datalen = cus->cus_datalen + chunksz; + if (cus->cus_flags & CUSTR_FIXEDBUF) { + errno = EOVERFLOW; + return (-1); + } + /* * Allocate replacement memory: */ @@ -104,13 +108,41 @@ custr_append(custr_t *cus, const char *news) /* * Append new string to existing string: */ - (void) memcpy(cus->cus_data + cus->cus_strlen, news, len + 1); + len = vsnprintf(cus->cus_data + cus->cus_strlen, + (uintptr_t)cus->cus_data - (uintptr_t)cus->cus_strlen, fmt, ap); + if (len == -1) + return (len); cus->cus_strlen += len; return (0); } int +custr_appendc(custr_t *cus, char newc) +{ + return (custr_append_printf(cus, "%c", newc)); +} + +int +custr_append_printf(custr_t *cus, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = custr_append_vprintf(cus, fmt, ap); + va_end(ap); + + return (ret); +} + +int +custr_append(custr_t *cus, const char *name) +{ + return (custr_append_printf(cus, "%s", name)); +} + +int custr_alloc(custr_t **cus) { custr_t *t; @@ -124,12 +156,35 @@ custr_alloc(custr_t **cus) return (0); } +int +custr_alloc_buf(custr_t **cus, void *buf, size_t buflen) +{ + int ret; + + if (buflen == 0 || buf == NULL) { + errno = EINVAL; + return (-1); + } + + if ((ret = custr_alloc(cus)) != 0) + return (ret); + + (*cus)->cus_data = buf; + (*cus)->cus_datalen = buflen; + (*cus)->cus_strlen = 0; + (*cus)->cus_flags = CUSTR_FIXEDBUF; + (*cus)->cus_data[0] = '\0'; + + return (0); +} + void custr_free(custr_t *cus) { if (cus == NULL) return; - free(cus->cus_data); + if ((cus->cus_flags & CUSTR_FIXEDBUF) == 0) + free(cus->cus_data); free(cus); } diff --git a/usr/src/lib/libcmdutils/common/mapfile-vers b/usr/src/lib/libcmdutils/common/mapfile-vers index 640959e4b5..3106695eb0 100644 --- a/usr/src/lib/libcmdutils/common/mapfile-vers +++ b/usr/src/lib/libcmdutils/common/mapfile-vers @@ -43,8 +43,10 @@ SYMBOL_VERSION SUNWprivate_1.1 { global: add_tnode; custr_alloc; + custr_alloc_buf; custr_append; custr_appendc; + custr_append_printf; custr_cstr; custr_free; custr_len; diff --git a/usr/src/lib/libcmdutils/libcmdutils.h b/usr/src/lib/libcmdutils/libcmdutils.h index a280751c27..835560b8e7 100644 --- a/usr/src/lib/libcmdutils/libcmdutils.h +++ b/usr/src/lib/libcmdutils/libcmdutils.h @@ -157,6 +157,12 @@ extern int custr_alloc(custr_t **); extern void custr_free(custr_t *); /* + * Allocate a "custr_t" dynamic string object that operates on a fixed external + * buffer. + */ +extern int custr_alloc_buf(custr_t **, void *, size_t); + +/* * Append a single character, or a NUL-terminated string of characters, to a * dynamic string. Returns 0 on success and -1 otherwise. The dynamic string * will be unmodified if the function returns -1. @@ -165,6 +171,13 @@ extern int custr_appendc(custr_t *, char); extern int custr_append(custr_t *, const char *); /* + * Append a format string and arguments as though the contents were being parsed + * through snprintf. Returns 0 on success and -1 otherwise. The dynamic string + * will be unmodified if the function returns -1. + */ +extern int custr_append_printf(custr_t *, const char *, ...); + +/* * Determine the length in bytes, not including the NUL terminator, of the * dynamic string. */ diff --git a/usr/src/lib/libdladm/Makefile b/usr/src/lib/libdladm/Makefile index 92de14cc8a..a9270eb848 100644 --- a/usr/src/lib/libdladm/Makefile +++ b/usr/src/lib/libdladm/Makefile @@ -29,7 +29,7 @@ HDRS = libdladm.h libdladm_impl.h libdllink.h libdlaggr.h \ libdlwlan.h libdlwlan_impl.h libdlvnic.h libdlvlan.h \ libdlmgmt.h libdlflow.h libdlflow_impl.h libdlstat.h \ libdlether.h libdlsim.h libdlbridge.h libdliptun.h \ - libdlib.h + libdlib.h libdloverlay.h HDRDIR = common @@ -50,6 +50,14 @@ MSGFILES = common/libdladm.c common/linkprop.c common/secobj.c \ XGETFLAGS = -a -x libdladm.xcl +TYPECHECK_LIB = libdladm.so.1 +TYPELIST = overlay_ioc_create_t \ + overlay_ioc_activate_t \ + overlay_ioc_delete_t \ + overlay_ioc_nprops_t \ + overlay_ioc_propinfo_t \ + overlay_ioc_prop_t + all := TARGET = all clean := TARGET = clean clobber := TARGET = clobber @@ -62,7 +70,7 @@ all clean clobber install lint: $(SUBDIRS) install_h: $(ROOTHDRS) -check: $(CHECKHDRS) +check: $(CHECKHDRS) $(TYPECHECK) $(POFILE): pofile_MSGFILES diff --git a/usr/src/lib/libdladm/Makefile.com b/usr/src/lib/libdladm/Makefile.com index 5bb56d1440..bd98ad76fa 100644 --- a/usr/src/lib/libdladm/Makefile.com +++ b/usr/src/lib/libdladm/Makefile.com @@ -27,7 +27,8 @@ VERS = .1 OBJECTS = libdladm.o secobj.o linkprop.o libdllink.o libdlaggr.o \ libdlwlan.o libdlvnic.o libdlmgmt.o libdlvlan.o libdlib.o\ flowattr.o flowprop.o propfuncs.o libdlflow.o libdlstat.o \ - usage.o libdlether.o libdlsim.o libdlbridge.o libdliptun.o + usage.o libdlether.o libdlsim.o libdlbridge.o libdliptun.o \ + libdloverlay.o include ../../Makefile.lib @@ -36,7 +37,7 @@ include ../../Makefile.rootfs LIBS = $(DYNLIB) $(LINTLIB) LDLIBS += -ldevinfo -lc -linetutil -lsocket -lscf -lrcm -lnvpair \ - -lexacct -lnsl -lkstat -lcurses -lpool + -lexacct -lnsl -lkstat -lcurses -lpool -lvarpd SRCDIR = ../common $(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC) diff --git a/usr/src/lib/libdladm/common/libdladm.c b/usr/src/lib/libdladm/common/libdladm.c index cf113e7357..5b810e0d02 100644 --- a/usr/src/lib/libdladm/common/libdladm.c +++ b/usr/src/lib/libdladm/common/libdladm.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Joyent, Inc. All rights reserved. */ #include <unistd.h> @@ -418,6 +419,9 @@ dladm_status2str(dladm_status_t status, char *buf) case DLADM_STATUS_INVALID_MTU: s = "MTU check failed, MTU outside of device's supported range"; break; + case DLADM_STATUS_BAD_ENCAP: + s = "invalid encapsulation protocol"; + break; default: s = "<unknown error>"; break; @@ -654,6 +658,9 @@ dladm_class2str(datalink_class_t class, char *buf) case DATALINK_CLASS_PART: s = "part"; break; + case DATALINK_CLASS_OVERLAY: + s = "overlay"; + break; default: s = "unknown"; break; @@ -1132,15 +1139,15 @@ dladm_strs2range(char **prop_val, uint_t val_cnt, * Convert a mac_propval_range_t structure into an array of elements. */ dladm_status_t -dladm_range2list(mac_propval_range_t *rangep, void *elem, uint_t *nelem) +dladm_range2list(const mac_propval_range_t *rangep, void *elem, uint_t *nelem) { int i, j, k; dladm_status_t status = DLADM_STATUS_OK; switch (rangep->mpr_type) { case MAC_PROPVAL_UINT32: { - mac_propval_uint32_range_t *ur; - uint32_t *elem32 = elem; + const mac_propval_uint32_range_t *ur; + uint32_t *elem32 = elem; k = 0; ur = &rangep->mpr_range_uint32[0]; @@ -1168,13 +1175,13 @@ dladm_range2list(mac_propval_range_t *rangep, void *elem, uint_t *nelem) * of single elements or ranges. */ int -dladm_range2strs(mac_propval_range_t *rangep, char **prop_val) +dladm_range2strs(const mac_propval_range_t *rangep, char **prop_val) { int i; switch (rangep->mpr_type) { case MAC_PROPVAL_UINT32: { - mac_propval_uint32_range_t *ur; + const mac_propval_uint32_range_t *ur; /* Write ranges and individual elements */ ur = &rangep->mpr_range_uint32[0]; @@ -1191,6 +1198,20 @@ dladm_range2strs(mac_propval_range_t *rangep, char **prop_val) } return (0); } + case MAC_PROPVAL_STR: { + const mac_propval_str_range_t *str; + size_t coff, len; + + coff = 0; + str = &rangep->u.mpr_str; + for (i = 0; i < rangep->mpr_count; i++) { + len = strlen(&str->mpur_data[coff]); + (void) strlcpy(prop_val[i], &str->mpur_data[coff], + DLADM_PROP_VAL_MAX); + coff += len + 1; + } + return (0); + } default: break; } diff --git a/usr/src/lib/libdladm/common/libdladm.h b/usr/src/lib/libdladm/common/libdladm.h index f3347a0ede..fcb0551f88 100644 --- a/usr/src/lib/libdladm/common/libdladm.h +++ b/usr/src/lib/libdladm/common/libdladm.h @@ -179,7 +179,8 @@ typedef enum { DLADM_STATUS_NO_IB_HW_RESOURCE, DLADM_STATUS_INVALID_PKEY_TBL_SIZE, DLADM_STATUS_PORT_NOPROTO, - DLADM_STATUS_INVALID_MTU + DLADM_STATUS_INVALID_MTU, + DLADM_STATUS_BAD_ENCAP } dladm_status_t; typedef enum { @@ -288,9 +289,9 @@ extern dladm_status_t dladm_zone_halt(dladm_handle_t, zoneid_t); extern dladm_status_t dladm_strs2range(char **, uint_t, mac_propval_type_t, mac_propval_range_t **); -extern dladm_status_t dladm_range2list(mac_propval_range_t *, void*, +extern dladm_status_t dladm_range2list(const mac_propval_range_t *, void *, uint_t *); -extern int dladm_range2strs(mac_propval_range_t *, char **); +extern int dladm_range2strs(const mac_propval_range_t *, char **); extern dladm_status_t dladm_list2range(void *, uint_t, mac_propval_type_t, mac_propval_range_t **); diff --git a/usr/src/lib/libdladm/common/libdloverlay.c b/usr/src/lib/libdladm/common/libdloverlay.c new file mode 100644 index 0000000000..0d3bc326da --- /dev/null +++ b/usr/src/lib/libdladm/common/libdloverlay.c @@ -0,0 +1,870 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2015 Joyent, Inc. All rights reserved. + */ + +#include <libdladm_impl.h> +#include <libdllink.h> +#include <libdloverlay.h> +#include <sys/dld.h> +#include <sys/overlay.h> +#include <strings.h> +#include <unistd.h> +#include <stdlib.h> +#include <errno.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <limits.h> +#include <libvarpd_client.h> + +#define VARPD_PROPERTY_NAME "varpd/id" + +static const char *dladm_overlay_doorpath = "/var/run/varpd/varpd.door"; + +typedef struct dladm_overlay_propinfo { + boolean_t dop_isvarpd; + union { + overlay_ioc_propinfo_t *dop_overlay; + varpd_client_prop_handle_t *dop_varpd; + } dop_un; +} dladm_overlay_propinfo_t; + +dladm_status_t +dladm_overlay_prop_info(dladm_overlay_propinfo_handle_t phdl, + const char **namep, uint_t *typep, uint_t *protp, const void **defp, + uint32_t *sizep, const mac_propval_range_t **possp) +{ + dladm_overlay_propinfo_t *infop = (dladm_overlay_propinfo_t *)phdl; + overlay_ioc_propinfo_t *oinfop = infop->dop_un.dop_overlay; + + if (infop->dop_isvarpd == B_FALSE) { + if (namep != NULL) + *namep = oinfop->oipi_name; + if (typep != NULL) + *typep = oinfop->oipi_type; + if (protp != NULL) + *protp = oinfop->oipi_prot; + if (defp != NULL) + *defp = oinfop->oipi_default; + if (sizep != NULL) + *sizep = oinfop->oipi_defsize; + if (possp != NULL) + *possp = (const mac_propval_range_t *)oinfop->oipi_poss; + + } else { + int ret; + ret = libvarpd_c_prop_info(infop->dop_un.dop_varpd, namep, + typep, protp, defp, sizep, possp); + if (ret != 0) + return (dladm_errno2status(ret)); + + } + + return (DLADM_STATUS_OK); +} + +static dladm_status_t +dladm_overlay_parse_prop(overlay_prop_type_t type, void *buf, uint32_t *sizep, + const char *val) +{ + int ret; + int64_t ival; + uint64_t uval; + char *eptr; + struct in6_addr ipv6; + struct in_addr ip; + + switch (type) { + case OVERLAY_PROP_T_INT: + errno = 0; + ival = strtol(val, &eptr, 10); + if ((ival == 0 && errno == EINVAL) || + ((ival == LONG_MAX || ival == LONG_MIN) && + errno == ERANGE)) + return (DLADM_STATUS_BADARG); + bcopy(&ival, buf, sizeof (int64_t)); + *sizep = sizeof (int64_t); + break; + case OVERLAY_PROP_T_UINT: + errno = 0; + uval = strtol(val, &eptr, 10); + if ((uval == 0 && errno == EINVAL) || + (uval == ULONG_MAX && errno == ERANGE)) + return (DLADM_STATUS_BADARG); + bcopy(&uval, buf, sizeof (uint64_t)); + *sizep = sizeof (uint64_t); + break; + case OVERLAY_PROP_T_STRING: + ret = strlcpy((char *)buf, val, OVERLAY_PROP_SIZEMAX); + if (ret >= OVERLAY_PROP_SIZEMAX) + return (DLADM_STATUS_BADARG); + *sizep = ret + 1; + break; + case OVERLAY_PROP_T_IP: + /* + * Always try to parse the IP as an IPv6 address. If that fails, + * try to interpret it as an IPv4 address and transform it into + * an IPv6 mapped IPv4 address. + */ + if (inet_pton(AF_INET6, val, &ipv6) != 1) { + if (inet_pton(AF_INET, val, &ip) != 1) + return (DLADM_STATUS_BADARG); + + IN6_INADDR_TO_V4MAPPED(&ip, &ipv6); + } + bcopy(&ipv6, buf, sizeof (struct in6_addr)); + *sizep = sizeof (struct in6_addr); + break; + default: + abort(); + } + + return (DLADM_STATUS_OK); +} + +static dladm_status_t +dladm_overlay_varpd_setprop(dladm_handle_t handle, varpd_client_handle_t *chdl, + uint64_t inst, const char *name, char *const *valp, uint_t cnt) +{ + int ret; + uint32_t size; + uint8_t buf[LIBVARPD_PROP_SIZEMAX]; + varpd_client_prop_handle_t *phdl; + uint_t type; + dladm_status_t status; + + if ((ret = libvarpd_c_prop_handle_alloc(chdl, inst, &phdl)) != 0) + return (dladm_errno2status(ret)); + + if ((ret = libvarpd_c_prop_info_fill_by_name(phdl, name)) != 0) { + libvarpd_c_prop_handle_free(phdl); + return (dladm_errno2status(ret)); + } + + if ((ret = libvarpd_c_prop_info(phdl, NULL, &type, NULL, NULL, NULL, + NULL)) != 0) { + libvarpd_c_prop_handle_free(phdl); + return (dladm_errno2status(ret)); + } + + if ((status = dladm_overlay_parse_prop(type, buf, &size, valp[0])) != + DLADM_STATUS_OK) { + libvarpd_c_prop_handle_free(phdl); + return (dladm_errno2status(ret)); + } + + status = DLADM_STATUS_OK; + ret = libvarpd_c_prop_set(phdl, buf, size); + libvarpd_c_prop_handle_free(phdl); + + return (dladm_errno2status(ret)); +} + +dladm_status_t +dladm_overlay_setprop(dladm_handle_t handle, datalink_id_t linkid, + const char *name, char *const *valp, uint_t cnt) +{ + int ret; + dladm_status_t status; + overlay_ioc_propinfo_t info; + overlay_ioc_prop_t prop; + + if (linkid == DATALINK_INVALID_LINKID || + name == NULL || valp == NULL || cnt != 1) + return (DLADM_STATUS_BADARG); + + bzero(&info, sizeof (overlay_ioc_propinfo_t)); + info.oipi_linkid = linkid; + info.oipi_id = -1; + if (strlcpy(info.oipi_name, name, OVERLAY_PROP_NAMELEN) >= + OVERLAY_PROP_NAMELEN) + return (DLADM_STATUS_BADARG); + + status = DLADM_STATUS_OK; + ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_PROPINFO, &info); + if (ret != 0) + status = dladm_errno2status(errno); + + if (status != DLADM_STATUS_OK) + return (status); + + prop.oip_linkid = linkid; + prop.oip_id = info.oipi_id; + prop.oip_name[0] = '\0'; + if ((ret = dladm_overlay_parse_prop(info.oipi_type, prop.oip_value, + &prop.oip_size, valp[0])) != DLADM_STATUS_OK) + return (ret); + + status = DLADM_STATUS_OK; + ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_SETPROP, &prop); + if (ret != 0) + status = dladm_errno2status(errno); + + return (ret); +} + +/* + * Tell the user about any unset required properties. + * XXX libraries shouldn't do this. Should be a dladm_arg_list_t returned to + * dladm(1M) + */ +static int +dladm_overlay_activate_cb(dladm_handle_t handle, datalink_id_t linkid, + dladm_overlay_propinfo_handle_t phdl, void *arg) +{ + dladm_status_t status; + uint8_t buf[DLADM_OVERLAY_PROP_SIZEMAX]; + uint_t prot; + size_t size = sizeof (buf); + const char *name; + + if ((status = dladm_overlay_prop_info(phdl, &name, NULL, &prot, NULL, + NULL, NULL)) != DLADM_STATUS_OK) + return (status); + + if ((prot & OVERLAY_PROP_PERM_REQ) == 0) + return (DLADM_WALK_CONTINUE); + + if (dladm_overlay_get_prop(handle, linkid, phdl, buf, &size) != + DLADM_STATUS_OK) + return (DLADM_WALK_CONTINUE); + + if (size == 0) + fprintf(stderr, "unset required propety: %s\n", name); + + return (DLADM_WALK_CONTINUE); +} + +/* + * We need to clean up the world here. The problem is that we may or may not + * actually have everything created. While in the normal case, we'd always have + * an overlay device, assigned datalink id, and a varpd instance, we might not + * have any of those, except for the datalink instance. Therefore, as long as + * the id refers to a valid overlay, we should try to clean up as much of the + * state as possible and most importantly, we need to make sure we delete the + * datalink id. If we fail to do that, then that name will become lost to time. + */ +dladm_status_t +dladm_overlay_delete(dladm_handle_t handle, datalink_id_t linkid) +{ + datalink_class_t class; + overlay_ioc_delete_t oid; + varpd_client_handle_t *chdl; + int ret, rval = 0; + uint32_t flags; + uint64_t varpdid; + + if (dladm_datalink_id2info(handle, linkid, &flags, &class, NULL, + NULL, 0) != DLADM_STATUS_OK) + return (DLADM_STATUS_BADARG); + + if (class != DATALINK_CLASS_OVERLAY) + return (DLADM_STATUS_BADARG); + + oid.oid_linkid = linkid; + ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_DELETE, &oid); + if (ret != 0) + rval = dladm_errno2status(errno); + + if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0) { + return (dladm_errno2status(ret)); + } + + if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) { + if (ret == ENOENT) { + goto finish; + } + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); + } + + ret = libvarpd_c_instance_destroy(chdl, varpdid); +finish: + libvarpd_c_destroy(chdl); + (void) dladm_destroy_datalink_id(handle, linkid, flags); + + return (dladm_errno2status(ret)); +} + +dladm_status_t +dladm_overlay_get_prop(dladm_handle_t handle, datalink_id_t linkid, + dladm_overlay_propinfo_handle_t infohdl, void *buf, size_t *sizep) +{ + int ret; + overlay_ioc_prop_t oip; + dladm_overlay_propinfo_t *infop = (dladm_overlay_propinfo_t *)infohdl; + + /* XXX Better errno */ + if (*sizep < DLADM_OVERLAY_PROP_SIZEMAX) + return (dladm_errno2status(ERANGE)); + + if (infop->dop_isvarpd == B_FALSE) { + bzero(&oip, sizeof (overlay_ioc_prop_t)); + oip.oip_linkid = linkid; + oip.oip_id = infop->dop_un.dop_overlay->oipi_id; + ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_GETPROP, &oip); + if (ret != 0) + return (dladm_errno2status(errno)); + bcopy(oip.oip_value, buf, DLADM_OVERLAY_PROP_SIZEMAX); + *sizep = oip.oip_size; + } else { + uint32_t size = *sizep; + + ret = libvarpd_c_prop_get(infop->dop_un.dop_varpd, buf, &size); + if (ret != 0) + return (dladm_errno2status(errno)); + *sizep = size; + } + + return (DLADM_STATUS_OK); +} + +static dladm_status_t +dladm_overlay_walk_varpd_prop(dladm_handle_t handle, datalink_id_t linkid, + uint64_t varpdid, dladm_overlay_prop_f func, void *arg) +{ + int ret, i; + varpd_client_handle_t *chdl; + varpd_client_prop_handle_t *phdl; + uint_t nprops; + dladm_status_t status; + + if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0) + return (dladm_errno2status(ret)); + + if ((ret = libvarpd_c_prop_handle_alloc(chdl, varpdid, &phdl)) != 0) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); + } + + if ((ret = libvarpd_c_prop_nprops(chdl, varpdid, &nprops)) != 0) { + libvarpd_c_prop_handle_free(phdl); + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); + } + +#if 0 + printf("got n props: %d\n", nprops); +#endif + + status = DLADM_STATUS_OK; + for (i = 0; i < nprops; i++) { + dladm_overlay_propinfo_t dop; + + bzero(&dop, sizeof (dop)); + dop.dop_isvarpd = B_TRUE; + dop.dop_un.dop_varpd = phdl; + + if ((ret = libvarpd_c_prop_info_fill(phdl, i)) != 0) { + status = dladm_errno2status(ret); + break; + } + + ret = func(handle, linkid, + (dladm_overlay_propinfo_handle_t)&dop, arg); + if (ret == DLADM_WALK_TERMINATE) + break; + } + + libvarpd_c_prop_handle_free(phdl); + libvarpd_c_destroy(chdl); + + return (status); +} + +dladm_status_t +dladm_overlay_walk_prop(dladm_handle_t handle, datalink_id_t linkid, + dladm_overlay_prop_f func, void *arg) +{ + int i, ret; + dladm_status_t status; + datalink_class_t class; + overlay_ioc_nprops_t oin; + overlay_ioc_propinfo_t oipi; + dladm_overlay_propinfo_t dop; + uint64_t varpdid = UINT64_MAX; + + if (dladm_datalink_id2info(handle, linkid, NULL, &class, NULL, + NULL, 0) != DLADM_STATUS_OK) + return (DLADM_STATUS_BADARG); + + if (class != DATALINK_CLASS_OVERLAY) + return (DLADM_STATUS_BADARG); + + bzero(&oin, sizeof (overlay_ioc_nprops_t)); + status = DLADM_STATUS_OK; + oin.oipn_linkid = linkid; + ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_NPROPS, &oin); + if (ret != 0) + return (dladm_errno2status(errno)); + + for (i = 0; i < oin.oipn_nprops; i++) { + bzero(&dop, sizeof (dladm_overlay_propinfo_t)); + bzero(&oipi, sizeof (overlay_ioc_propinfo_t)); + oipi.oipi_linkid = linkid; + oipi.oipi_id = i; + ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_PROPINFO, &oipi); + if (ret != 0) { + fprintf(stderr, "failed to get propinfo %d\n", i); + return (dladm_errno2status(errno)); + } + + dop.dop_isvarpd = B_FALSE; + dop.dop_un.dop_overlay = &oipi; + ret = func(handle, linkid, + (dladm_overlay_propinfo_handle_t)&dop, arg); + if (ret == DLADM_WALK_TERMINATE) + break; + + if (strcmp(oipi.oipi_name, VARPD_PROPERTY_NAME) == 0) { + uint8_t buf[DLADM_OVERLAY_PROP_SIZEMAX]; + size_t bufsize = sizeof (buf); + uint64_t *vp; + + if ((status = dladm_overlay_get_prop(handle, linkid, + (dladm_overlay_propinfo_handle_t)&dop, buf, + &bufsize)) != DLADM_STATUS_OK) + continue; + + vp = (uint64_t *)buf; + varpdid = *vp; + } + } + + /* Should this really be possible? */ + if (varpdid == UINT64_MAX) + return (DLADM_STATUS_OK); + + return (dladm_overlay_walk_varpd_prop(handle, linkid, varpdid, func, + arg)); +} + +dladm_status_t +dladm_overlay_create(dladm_handle_t handle, const char *name, + const char *encap, const char *search, uint64_t vid, + dladm_arg_list_t *props, uint32_t flags) +{ + int ret, i; + dladm_status_t status; + datalink_id_t linkid; + overlay_ioc_create_t oic; + overlay_ioc_activate_t oia; + size_t slen; + varpd_client_handle_t *vch; + uint64_t id; + + status = dladm_create_datalink_id(handle, name, DATALINK_CLASS_OVERLAY, + DL_ETHER, flags, &linkid); + if (status != DLADM_STATUS_OK) + return (status); + + bzero(&oic, sizeof (oic)); + oic.oic_linkid = linkid; + oic.oic_vnetid = vid; + (void) strlcpy(oic.oic_encap, encap, MAXLINKNAMELEN); + + status = DLADM_STATUS_OK; + ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_CREATE, &oic); + if (ret != 0) { + /* XXX We need to have private errors here */ + status = dladm_errno2status(errno); + } + + if (status != DLADM_STATUS_OK) { + (void) dladm_destroy_datalink_id(handle, linkid, flags); + return (status); + } + + slen = strlen(search); + for (i = 0; props != NULL && i < props->al_count; i++) { + dladm_arg_info_t *aip = &props->al_info[i]; + + /* + * If it's a property for the search plugin, eg. it has the + * prefix '<search>/', then we don't set the property on the + * overlay device and instead set it on the varpd instance. + */ + if (strncmp(aip->ai_name, search, slen) == 0 && + aip->ai_name[slen] == '/') + continue; + status = dladm_overlay_setprop(handle, linkid, aip->ai_name, + aip->ai_val, aip->ai_count); + if (status != DLADM_STATUS_OK) { + /* XXX */ + fprintf(stderr, "failed to set property %s\n", + aip->ai_name); + (void) dladm_overlay_delete(handle, linkid); + return (status); + } + } + + if ((ret = libvarpd_c_create(&vch, dladm_overlay_doorpath)) != 0) { + fprintf(stderr, "failed to create libvarpd handle: %d\n", ret); + (void) dladm_overlay_delete(handle, linkid); + return (dladm_errno2status(ret)); + } + + if ((ret = libvarpd_c_instance_create(vch, linkid, search, + &id)) != 0) { + fprintf(stderr, "failed to create varpd instance: %d\n", ret); + libvarpd_c_destroy(vch); + (void) dladm_overlay_delete(handle, linkid); + return (dladm_errno2status(ret)); + } + + for (i = 0; props != NULL && i < props->al_count; i++) { + dladm_arg_info_t *aip = &props->al_info[i]; + + /* + * Skip arguments we've processed already. + */ + if (strncmp(aip->ai_name, search, slen) != 0) + continue; + + if (aip->ai_name[slen] != '/') + continue; + + ret = dladm_overlay_varpd_setprop(handle, vch, id, aip->ai_name, + aip->ai_val, aip->ai_count); + if (ret != 0) { + fprintf(stderr, "failed to set varpd prop: %s\n", + aip->ai_name); + /* XXX Need to clean up instance, but... */ + libvarpd_c_destroy(vch); + (void) dladm_overlay_delete(handle, linkid); + return (dladm_errno2status(ret)); + } + } + + if ((ret = libvarpd_c_instance_activate(vch, id)) != 0) { + fprintf(stderr, "failed to activate varpd instance: %d\n", ret); + dladm_overlay_walk_varpd_prop(handle, linkid, id, + dladm_overlay_activate_cb, NULL); + libvarpd_c_destroy(vch); + (void) dladm_overlay_delete(handle, linkid); + return (dladm_errno2status(ret)); + + } + + bzero(&oia, sizeof (oia)); + oia.oia_linkid = linkid; + status = DLADM_STATUS_OK; + ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_ACTIVATE, &oia); + if (ret != 0) { + /* XXX We need to have private errors here */ + ret = errno; + fprintf(stderr, "failed to activate %d\n", ret); + dladm_overlay_walk_prop(handle, linkid, + dladm_overlay_activate_cb, NULL); + status = dladm_errno2status(ret); + (void) libvarpd_c_instance_destroy(vch, id); + } + + libvarpd_c_destroy(vch); + if (status != DLADM_STATUS_OK) + (void) dladm_overlay_delete(handle, linkid); + + return (status); +} + + + +typedef struct overlay_walk_cb { + dladm_handle_t owc_handle; + datalink_id_t owc_linkid; + void *owc_arg; + dladm_overlay_cache_f owc_func; + uint_t owc_mode; + uint_t owc_dest; +} overlay_walk_cb_t; + +static int +dladm_overlay_walk_cache_cb(varpd_client_handle_t *chdl, uint64_t varpdid, + const struct ether_addr *key, const varpd_client_cache_entry_t *entry, + void *arg) +{ + overlay_walk_cb_t *owc = arg; + dladm_overlay_point_t point; + + bzero(&point, sizeof (dladm_overlay_point_t)); + point.dop_dest = owc->owc_dest; + point.dop_mac = entry->vcp_mac; + point.dop_flags = entry->vcp_flags; + point.dop_ip = entry->vcp_ip; + point.dop_port = entry->vcp_port; + + if (owc->owc_mode == OVERLAY_TARGET_POINT) + point.dop_flags |= DLADM_OVERLAY_F_DEFAULT; + + if (owc->owc_func(owc->owc_handle, owc->owc_linkid, key, &point, + owc->owc_arg) == DLADM_WALK_TERMINATE) + return (1); + return (0); +} + +dladm_status_t +dladm_overlay_walk_cache(dladm_handle_t handle, datalink_id_t linkid, + dladm_overlay_cache_f func, void *arg) +{ + int ret; + uint_t mode, dest; + uint64_t varpdid; + varpd_client_handle_t *chdl; + overlay_walk_cb_t cbarg; + + if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0) + return (dladm_errno2status(ret)); + + if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); + } + + if ((ret = libvarpd_c_instance_target_mode(chdl, varpdid, + &dest, &mode)) != 0) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); + } + + cbarg.owc_handle = handle; + cbarg.owc_linkid = linkid; + cbarg.owc_arg = arg; + cbarg.owc_func = func; + cbarg.owc_dest = dest; + cbarg.owc_mode = mode; + ret = libvarpd_c_instance_cache_walk(chdl, varpdid, + dladm_overlay_walk_cache_cb, &cbarg); + libvarpd_c_destroy(chdl); + + return (dladm_errno2status(ret)); +} + +dladm_status_t +dladm_overlay_cache_flush(dladm_handle_t handle, datalink_id_t linkid) +{ + int ret; + uint64_t varpdid; + varpd_client_handle_t *chdl; + + if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0) + return (dladm_errno2status(ret)); + + if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); + } + + ret = libvarpd_c_instance_cache_flush(chdl, varpdid); + libvarpd_c_destroy(chdl); + + return (dladm_errno2status(ret)); +} + +dladm_status_t +dladm_overlay_cache_delete(dladm_handle_t handle, datalink_id_t linkid, + const struct ether_addr *key) +{ + int ret; + uint64_t varpdid; + varpd_client_handle_t *chdl; + + if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0) + return (dladm_errno2status(ret)); + + if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); + } + + ret = libvarpd_c_instance_cache_delete(chdl, varpdid, key); + libvarpd_c_destroy(chdl); + + return (dladm_errno2status(ret)); +} + +dladm_status_t +dladm_overlay_cache_set(dladm_handle_t handle, datalink_id_t linkid, + const struct ether_addr *key, char *val) +{ + int ret; + uint_t dest; + uint64_t varpdid; + char *ip, *port = NULL; + varpd_client_handle_t *chdl; + varpd_client_cache_entry_t vcp; + + + if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0) + return (dladm_errno2status(ret)); + + if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); + } + + if ((ret = libvarpd_c_instance_target_mode(chdl, varpdid, + &dest, NULL)) != 0) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); + } + + /* + * Mode tells us what we should expect in val. It we have more than one + * thing listed, the canonical format of it right now is mac,ip:port. + */ + bzero(&vcp, sizeof (varpd_client_cache_entry_t)); + + if (strcasecmp(val, "drop") == 0) { + vcp.vcp_flags = OVERLAY_TARGET_CACHE_DROP; + goto send; + } + + if (dest & OVERLAY_PLUGIN_D_ETHERNET) { + if (ether_aton_r(val, &vcp.vcp_mac) == NULL) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(EINVAL)); + } + } + + if (dest & OVERLAY_PLUGIN_D_IP) { + if (dest & OVERLAY_PLUGIN_D_ETHERNET) { + if ((ip = strchr(val, ',')) == NULL) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); + } + ip++; + } else { + ip = val; + } + + if (dest & OVERLAY_PLUGIN_D_PORT) { + if ((port = strchr(val, ':')) == NULL) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); + } + *port = '\0'; + port++; + } + + /* Try v6, then fall back to v4 */ + ret = inet_pton(AF_INET6, ip, &vcp.vcp_ip); + if (ret == -1) + abort(); + if (ret == 0) { + struct in_addr v4; + + ret = inet_pton(AF_INET, ip, &v4); + if (ret == -1) + abort(); + if (ret == 0) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); + } + IN6_INADDR_TO_V4MAPPED(&v4, &vcp.vcp_ip); + } + } + + if (dest & OVERLAY_PLUGIN_D_PORT) { + char *eptr; + unsigned long l; + if (port == NULL && (dest & OVERLAY_PLUGIN_D_ETHERNET)) { + if ((port = strchr(val, ',')) == NULL) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(EINVAL)); + } + } else if (port == NULL) + port = val; + + errno = 0; + l = strtoul(port, &eptr, 10); + if (errno != 0 || *eptr != '\0') { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(EINVAL)); + } + if (l == 0 || l > UINT16_MAX) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(EINVAL)); + } + vcp.vcp_port = l; + } + +send: + ret = libvarpd_c_instance_cache_set(chdl, varpdid, key, &vcp); + + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); +} + +dladm_status_t +dladm_overlay_cache_get(dladm_handle_t handle, datalink_id_t linkid, + const struct ether_addr *key, dladm_overlay_point_t *point) +{ + int ret; + uint_t dest, mode; + uint64_t varpdid; + varpd_client_handle_t *chdl; + varpd_client_cache_entry_t entry; + + if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0) + return (dladm_errno2status(ret)); + + if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); + } + + if ((ret = libvarpd_c_instance_target_mode(chdl, varpdid, + &dest, &mode)) != 0) { + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); + } + + ret = libvarpd_c_instance_cache_get(chdl, varpdid, key, &entry); + if (ret == 0) { + point->dop_dest = dest; + point->dop_mac = entry.vcp_mac; + point->dop_flags = entry.vcp_flags; + point->dop_ip = entry.vcp_ip; + point->dop_port = entry.vcp_port; + if (mode == OVERLAY_TARGET_POINT) + point->dop_flags |= DLADM_OVERLAY_F_DEFAULT; + } + + libvarpd_c_destroy(chdl); + return (dladm_errno2status(ret)); +} + +dladm_status_t +dladm_overlay_status(dladm_handle_t handle, datalink_id_t linkid, + dladm_overlay_status_f func, void *arg) +{ + int ret; + dladm_status_t status; + overlay_ioc_status_t ois; + dladm_overlay_status_t dos; + + ois.ois_linkid = linkid; + status = DLADM_STATUS_OK; + ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_STATUS, &ois); + if (ret != 0) + status = dladm_errno2status(errno); + if (status != DLADM_STATUS_OK) + return (status); + + dos.dos_degraded = ois.ois_status == OVERLAY_I_DEGRADED ? B_TRUE : + B_FALSE; + (void) strlcpy(dos.dos_fmamsg, ois.ois_message, + sizeof (dos.dos_fmamsg)); + func(handle, linkid, &dos, arg); + return (DLADM_STATUS_OK); +} diff --git a/usr/src/lib/libdladm/common/libdloverlay.h b/usr/src/lib/libdladm/common/libdloverlay.h new file mode 100644 index 0000000000..f80bf1afe0 --- /dev/null +++ b/usr/src/lib/libdladm/common/libdloverlay.h @@ -0,0 +1,111 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2015 Joyent, Inc. All rights reserved. + */ + +#ifndef _LIBDLOVERLAY_H +#define _LIBDLOVERLAY_H + +/* + * libdladm Overlay device routines + */ + +#include <libdladm.h> +#include <libdladm_impl.h> +#include <sys/overlay.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define DLADM_OVERLAY_F_DROP 0x0001 +#define DLADM_OVERLAY_F_DEFAULT 0xf000 + +typedef struct dladm_overlay_point { + uint_t dop_dest; + struct ether_addr dop_mac; + uint16_t dop_flags; + struct in6_addr dop_ip; + uint16_t dop_port; +} dladm_overlay_point_t; + +typedef struct dladm_overlay_status { + boolean_t dos_degraded; + char dos_fmamsg[256]; +} dladm_overlay_status_t; + +extern dladm_status_t dladm_overlay_create(dladm_handle_t, const char *, + const char *, const char *, uint64_t, dladm_arg_list_t *, + uint32_t); +extern dladm_status_t dladm_overlay_delete(dladm_handle_t, datalink_id_t); + +/* + * XXX I don't really like this API, but I also have a feeling that this will + * change over time. I guess we could turf it given a lack of stability, but... + */ +typedef void (*dladm_overlay_status_f)(dladm_handle_t, datalink_id_t, + dladm_overlay_status_t *, void *); +extern dladm_status_t dladm_overlay_status(dladm_handle_t, datalink_id_t, + dladm_overlay_status_f, void *); + +extern dladm_status_t dladm_overlay_cache_flush(dladm_handle_t, datalink_id_t); +extern dladm_status_t dladm_overlay_cache_delete(dladm_handle_t, datalink_id_t, + const struct ether_addr *); +extern dladm_status_t dladm_overlay_cache_set(dladm_handle_t, datalink_id_t, + const struct ether_addr *, char *); +extern dladm_status_t dladm_overlay_cache_get(dladm_handle_t, datalink_id_t, + const struct ether_addr *, dladm_overlay_point_t *); + +#define DLADM_OVERLAY_PROP_SIZEMAX 256 +#define DLADM_OVERLAY_PROP_NAMELEN 32 + +typedef struct __dladm_overlay_propinfo *dladm_overlay_propinfo_handle_t; + +extern dladm_status_t dladm_overlay_prop_info(dladm_overlay_propinfo_handle_t, + const char **, uint_t *, uint_t *, const void **, uint32_t *, + const mac_propval_range_t **); +extern dladm_status_t dladm_overlay_get_prop(dladm_handle_t, datalink_id_t, + dladm_overlay_propinfo_handle_t, void *buf, size_t *bufsize); + +typedef int (*dladm_overlay_prop_f)(dladm_handle_t, datalink_id_t, + dladm_overlay_propinfo_handle_t, void *); +extern dladm_status_t dladm_overlay_walk_prop(dladm_handle_t, datalink_id_t, + dladm_overlay_prop_f, void *arg); + +typedef int (*dladm_overlay_cache_f)(dladm_handle_t, datalink_id_t, + const struct ether_addr *, const dladm_overlay_point_t *, void *); +extern dladm_status_t dladm_overlay_walk_cache(dladm_handle_t, datalink_id_t, + dladm_overlay_cache_f, void *); + +/* + * The following is the likely API for setting a property. + */ +#if 0 +extern dladm_status_t dladm_overlay_prop_lookup(dladm_handle_t, datalink_id_t, + const char *, dladm_overlay_propinfo_handle_t *); +extern void dladm_overlay_prop_handle_free(dladm_handle_t, datalink_id_t, + dladm_overlay_propinfo_handle_t *); +extern dladm_status_t dladm_overlay_set_prop(dladm_handle_t, datalink_id_t, + dladm_propinfo_handle_t, void *buf, size_t *bufsize); +extern dladm_status_t dladm_overlay_str_to_buf(dladm_handle_t, datalink_id_t, + dladm_overlay_propinfo_handle_t *, const char *, void *, size_t *); +extern dladm_status_t dladm_overlay_buf_to_str(dladm_handle_t, datalink_id_t, + dladm_overlay_propinfo_handle_t *, const void *, const size_t, char *, + size_t *); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBDLOVERLAY_H */ diff --git a/usr/src/lib/libdladm/common/mapfile-vers b/usr/src/lib/libdladm/common/mapfile-vers index 3eaeea656e..fc667192a4 100644 --- a/usr/src/lib/libdladm/common/mapfile-vers +++ b/usr/src/lib/libdladm/common/mapfile-vers @@ -20,7 +20,7 @@ # # # Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright (c) 2011, Joyent Inc. All rights reserved. +# Copyright (c) 2014, Joyent Inc. All rights reserved. # # @@ -272,6 +272,19 @@ SYMBOL_VERSION SUNWprivate_1.1 { dladm_strs2range; dladm_range2list; dladm_list2range; + + dladm_overlay_create; + dladm_overlay_delete; + dladm_overlay_status; + dladm_overlay_prop_info; + dladm_overlay_get_prop; + dladm_overlay_walk_prop; + + dladm_overlay_cache_set; + dladm_overlay_cache_get; + dladm_overlay_cache_delete; + dladm_overlay_cache_flush; + dladm_overlay_walk_cache; local: *; }; diff --git a/usr/src/lib/libidspace/Makefile b/usr/src/lib/libidspace/Makefile new file mode 100644 index 0000000000..44640eeddc --- /dev/null +++ b/usr/src/lib/libidspace/Makefile @@ -0,0 +1,45 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.lib + +HDRS = libidspace.h +HDRDIR = common + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +install := TARGET = install +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber install lint: $(SUBDIRS) + +install: install_h $(SUBDIRS) + +install_h: $(ROOTHDRS) + +check: $(CHECKHDRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../Makefile.targ diff --git a/usr/src/lib/libidspace/Makefile.com b/usr/src/lib/libidspace/Makefile.com new file mode 100644 index 0000000000..8cc60ffc4c --- /dev/null +++ b/usr/src/lib/libidspace/Makefile.com @@ -0,0 +1,42 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +LIBRARY = libidspace.a +VERS = .1 +OBJECTS = id_space.o \ + libidspace.o +COMDIR = $(SRC)/common/idspace + +include ../../Makefile.lib + +SRCDIR = ../common +SRCS = ../../../common/idspace/id_space.c +LIBS = $(DYNLIB) $(LINTLIB) + +LDLIBS += -lc -lumem + +$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC) + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../Makefile.targ + +objs/%.o pics/%.o: $(COMDIR)/%.c + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) diff --git a/usr/src/lib/libidspace/amd64/Makefile b/usr/src/lib/libidspace/amd64/Makefile new file mode 100644 index 0000000000..15d904c616 --- /dev/null +++ b/usr/src/lib/libidspace/amd64/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com +include ../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/libidspace/common/libidspace.c b/usr/src/lib/libidspace/common/libidspace.c new file mode 100644 index 0000000000..7a9f8acd67 --- /dev/null +++ b/usr/src/lib/libidspace/common/libidspace.c @@ -0,0 +1,25 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014, Joyent, Inc. + */ + +/* + * Wrappers around the common id_space code, for userland. + */ +#include <sys/id_space.h> + +id_t +id_alloc_specific(id_space_t *idp, id_t id) +{ + return (id_alloc_specific_nosleep(idp, id)); +} diff --git a/usr/src/lib/libidspace/common/libidspace.h b/usr/src/lib/libidspace/common/libidspace.h new file mode 100644 index 0000000000..bb8690f19c --- /dev/null +++ b/usr/src/lib/libidspace/common/libidspace.h @@ -0,0 +1,42 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +#ifndef _LIBIDSPACE_H +#define _LIBIDSPACE_H + +/* + * libidspace public header + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> + +typedef struct id_space id_space_t; + +extern id_space_t *id_space_create(const char *, id_t, id_t); +extern void id_space_destroy(id_space_t *); +extern void id_space_extend(id_space_t *, id_t, id_t); +extern id_t id_alloc(id_space_t *); +extern id_t id_alloc_specific(id_space_t *, id_t); +extern void id_free(id_space_t *, id_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBIDSPACE_H */ diff --git a/usr/src/lib/libidspace/common/llib-lidspace b/usr/src/lib/libidspace/common/llib-lidspace new file mode 100644 index 0000000000..39c628da47 --- /dev/null +++ b/usr/src/lib/libidspace/common/llib-lidspace @@ -0,0 +1,19 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +#include <libidspace.h> diff --git a/usr/src/lib/libidspace/common/mapfile-vers b/usr/src/lib/libidspace/common/mapfile-vers new file mode 100644 index 0000000000..61ae855ee0 --- /dev/null +++ b/usr/src/lib/libidspace/common/mapfile-vers @@ -0,0 +1,47 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION ILLUMOS_1.0 { # first release of libidspace + global: + id_alloc; + id_alloc_specific; + id_free; + id_space_create; + id_space_destroy; + id_space_extend; +}; + + +SYMBOL_VERSION ILLUMOSprivate { + local: + *; +}; + diff --git a/usr/src/lib/libidspace/i386/Makefile b/usr/src/lib/libidspace/i386/Makefile new file mode 100644 index 0000000000..41e699e8f8 --- /dev/null +++ b/usr/src/lib/libidspace/i386/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/libidspace/sparc/Makefile b/usr/src/lib/libidspace/sparc/Makefile new file mode 100644 index 0000000000..41e699e8f8 --- /dev/null +++ b/usr/src/lib/libidspace/sparc/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/libidspace/sparcv9/Makefile b/usr/src/lib/libidspace/sparcv9/Makefile new file mode 100644 index 0000000000..15d904c616 --- /dev/null +++ b/usr/src/lib/libidspace/sparcv9/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com +include ../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/libnvpair/libnvpair.h b/usr/src/lib/libnvpair/libnvpair.h index b05669e506..197ec37f46 100644 --- a/usr/src/lib/libnvpair/libnvpair.h +++ b/usr/src/lib/libnvpair/libnvpair.h @@ -49,6 +49,8 @@ extern int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *, extern void nvlist_print(FILE *, nvlist_t *); extern int nvlist_print_json(FILE *, nvlist_t *); extern void dump_nvlist(nvlist_t *, int); +extern int nvlist_dump_json(nvlist_t *, char **); +extern void nvlist_dump_json_free(nvlist_t *, char *); /* * Private nvlist printing interface that allows the caller some control diff --git a/usr/src/lib/libnvpair/mapfile-vers b/usr/src/lib/libnvpair/mapfile-vers index 0403964e05..9b1f048f75 100644 --- a/usr/src/lib/libnvpair/mapfile-vers +++ b/usr/src/lib/libnvpair/mapfile-vers @@ -244,6 +244,8 @@ SYMBOL_VERSION SUNWprivate_1.1 { dump_nvlist; nvlist_add_hrtime; nvlist_lookup_hrtime; + nvlist_dump_json; + nvlist_dump_json_free; nvlist_print; nvlist_print_json; nvlist_prt; diff --git a/usr/src/lib/libnvpair/nvpair_json.c b/usr/src/lib/libnvpair/nvpair_json.c index 5a317f5f94..e59510726f 100644 --- a/usr/src/lib/libnvpair/nvpair_json.c +++ b/usr/src/lib/libnvpair/nvpair_json.c @@ -17,16 +17,72 @@ #include <strings.h> #include <wchar.h> #include <sys/debug.h> +#include <stdarg.h> +#include <assert.h> #include "libnvpair.h" -#define FPRINTF(fp, ...) \ +#define FPRINTF(bufp, blen, offp, ...) \ do { \ - if (fprintf(fp, __VA_ARGS__) < 0) \ + if (nvlist_rasnprintf(bufp, blen, offp, \ + __VA_ARGS__) < 0) \ return (-1); \ } while (0) /* + * A realloc-aware snprintf/asprintf like function. + */ +/*PRINTFLIKE4*/ +static int +nvlist_rasnprintf(char **bufp, size_t *blen, off_t *boff, char *input, ...) +{ + int ret; + va_list ap; + size_t size, asize; + char *b; + + if (*bufp == NULL) { + assert(*blen == 0); + assert(*boff == 0); + /* Pick a reasonable starting point, let's say 1k */ + *blen = 1024; + *bufp = malloc(*blen); + if (*bufp == NULL) + return (-1); + } + + size = *blen - *boff; + va_start(ap, input); + /* E_SEC_PRINTF_VAR_FMT */ + ret = vsnprintf(*bufp + *boff, size, input, ap); + va_end(ap); + if (ret < 0) { + va_end(ap); + return (-1); + } + if (ret >= size) { + asize = *blen; + while (ret >= asize) + asize += 1024; + if ((b = realloc(*bufp, asize)) == NULL) + return (-1); + *bufp = b; + *blen = asize; + size = *blen - *boff; + va_start(ap, input); + /* E_SEC_PRINTF_VAR_FMT */ + ret = vsnprintf(*bufp + *boff, size, input, ap); + va_end(ap); + if (ret < 0) + return (-1); + assert(ret < size); + } + *boff += ret; + + return (0); +} + +/* * When formatting a string for JSON output we must escape certain characters, * as described in RFC4627. This applies to both member names and * DATA_TYPE_STRING values. @@ -43,7 +99,8 @@ * representable Unicode characters included in their escaped numeric form. */ static int -nvlist_print_json_string(FILE *fp, const char *input) +nvlist_print_json_string(const char *input, char **bufp, size_t *blen, + off_t *offp) { mbstate_t mbr; wchar_t c; @@ -51,29 +108,29 @@ nvlist_print_json_string(FILE *fp, const char *input) bzero(&mbr, sizeof (mbr)); - FPRINTF(fp, "\""); + FPRINTF(bufp, blen, offp, "\""); while ((sz = mbrtowc(&c, input, MB_CUR_MAX, &mbr)) > 0) { switch (c) { case '"': - FPRINTF(fp, "\\\""); + FPRINTF(bufp, blen, offp, "\\\""); break; case '\n': - FPRINTF(fp, "\\n"); + FPRINTF(bufp, blen, offp, "\\n"); break; case '\r': - FPRINTF(fp, "\\r"); + FPRINTF(bufp, blen, offp, "\\r"); break; case '\\': - FPRINTF(fp, "\\\\"); + FPRINTF(bufp, blen, offp, "\\\\"); break; case '\f': - FPRINTF(fp, "\\f"); + FPRINTF(bufp, blen, offp, "\\f"); break; case '\t': - FPRINTF(fp, "\\t"); + FPRINTF(bufp, blen, offp, "\\t"); break; case '\b': - FPRINTF(fp, "\\b"); + FPRINTF(bufp, blen, offp, "\\b"); break; default: if ((c >= 0x00 && c <= 0x1f) || @@ -83,13 +140,15 @@ nvlist_print_json_string(FILE *fp, const char *input) * characters in the Basic Multilingual Plane * as JSON-escaped multibyte characters. */ - FPRINTF(fp, "\\u%04x", (int)(0xffff & c)); + FPRINTF(bufp, blen, offp, "\\u%04x", + (int)(0xffff & c)); } else if (c >= 0x20 && c <= 0x7f) { /* * Render other 7-bit ASCII characters directly * and drop other, unrepresentable characters. */ - FPRINTF(fp, "%c", (int)(0xff & c)); + FPRINTF(bufp, blen, offp, "%c", + (int)(0xff & c)); } break; } @@ -104,98 +163,103 @@ nvlist_print_json_string(FILE *fp, const char *input) return (-1); } - FPRINTF(fp, "\""); + FPRINTF(bufp, blen, offp, "\""); return (0); } -/* - * Dump a JSON-formatted representation of an nvlist to the provided FILE *. - * This routine does not output any new-lines or additional whitespace other - * than that contained in strings, nor does it call fflush(3C). - */ -int -nvlist_print_json(FILE *fp, nvlist_t *nvl) +static int +nvlist_do_json(nvlist_t *nvl, char **bufp, size_t *blen, off_t *offp) { nvpair_t *curr; boolean_t first = B_TRUE; - FPRINTF(fp, "{"); + FPRINTF(bufp, blen, offp, "{"); for (curr = nvlist_next_nvpair(nvl, NULL); curr; curr = nvlist_next_nvpair(nvl, curr)) { data_type_t type = nvpair_type(curr); if (!first) - FPRINTF(fp, ","); + FPRINTF(bufp, blen, offp, ","); else first = B_FALSE; - if (nvlist_print_json_string(fp, nvpair_name(curr)) == -1) + if (nvlist_print_json_string(nvpair_name(curr), bufp, blen, + offp) == -1) return (-1); - FPRINTF(fp, ":"); + FPRINTF(bufp, blen, offp, ":"); switch (type) { case DATA_TYPE_STRING: { char *string = fnvpair_value_string(curr); - if (nvlist_print_json_string(fp, string) == -1) + if (nvlist_print_json_string(string, bufp, blen, + offp) == -1) return (-1); break; } case DATA_TYPE_BOOLEAN: { - FPRINTF(fp, "true"); + FPRINTF(bufp, blen, offp, "true"); break; } case DATA_TYPE_BOOLEAN_VALUE: { - FPRINTF(fp, "%s", fnvpair_value_boolean_value(curr) == - B_TRUE ? "true" : "false"); + FPRINTF(bufp, blen, offp, "%s", + fnvpair_value_boolean_value(curr) == B_TRUE ? + "true" : "false"); break; } case DATA_TYPE_BYTE: { - FPRINTF(fp, "%hhu", fnvpair_value_byte(curr)); + FPRINTF(bufp, blen, offp, "%hhu", + fnvpair_value_byte(curr)); break; } case DATA_TYPE_INT8: { - FPRINTF(fp, "%hhd", fnvpair_value_int8(curr)); + FPRINTF(bufp, blen, offp, "%hhd", + fnvpair_value_int8(curr)); break; } case DATA_TYPE_UINT8: { - FPRINTF(fp, "%hhu", fnvpair_value_uint8_t(curr)); + FPRINTF(bufp, blen, offp, "%hhu", + fnvpair_value_uint8_t(curr)); break; } case DATA_TYPE_INT16: { - FPRINTF(fp, "%hd", fnvpair_value_int16(curr)); + FPRINTF(bufp, blen, offp, "%hd", + fnvpair_value_int16(curr)); break; } case DATA_TYPE_UINT16: { - FPRINTF(fp, "%hu", fnvpair_value_uint16(curr)); + FPRINTF(bufp, blen, offp, "%hu", + fnvpair_value_uint16(curr)); break; } case DATA_TYPE_INT32: { - FPRINTF(fp, "%d", fnvpair_value_int32(curr)); + FPRINTF(bufp, blen, offp, "%d", + fnvpair_value_int32(curr)); break; } case DATA_TYPE_UINT32: { - FPRINTF(fp, "%u", fnvpair_value_uint32(curr)); + FPRINTF(bufp, blen, offp, "%u", + fnvpair_value_uint32(curr)); break; } case DATA_TYPE_INT64: { - FPRINTF(fp, "%lld", + FPRINTF(bufp, blen, offp, "%lld", (long long)fnvpair_value_int64(curr)); break; } case DATA_TYPE_UINT64: { - FPRINTF(fp, "%llu", + FPRINTF(bufp, blen, offp, "%llu", (unsigned long long)fnvpair_value_uint64(curr)); break; } @@ -203,20 +267,21 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) case DATA_TYPE_HRTIME: { hrtime_t val; VERIFY0(nvpair_value_hrtime(curr, &val)); - FPRINTF(fp, "%llu", (unsigned long long)val); + FPRINTF(bufp, blen, offp, "%llu", + (unsigned long long)val); break; } case DATA_TYPE_DOUBLE: { double val; VERIFY0(nvpair_value_double(curr, &val)); - FPRINTF(fp, "%f", val); + FPRINTF(bufp, blen, offp, "%f", val); break; } case DATA_TYPE_NVLIST: { - if (nvlist_print_json(fp, - fnvpair_value_nvlist(curr)) == -1) + if (nvlist_do_json(fnvpair_value_nvlist(curr), bufp, + blen, offp) == -1) return (-1); break; } @@ -225,14 +290,15 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) char **val; uint_t valsz, i; VERIFY0(nvpair_value_string_array(curr, &val, &valsz)); - FPRINTF(fp, "["); + FPRINTF(bufp, blen, offp, "["); for (i = 0; i < valsz; i++) { if (i > 0) - FPRINTF(fp, ","); - if (nvlist_print_json_string(fp, val[i]) == -1) + FPRINTF(bufp, blen, offp, ","); + if (nvlist_print_json_string(val[i], bufp, + blen, offp) == -1) return (-1); } - FPRINTF(fp, "]"); + FPRINTF(bufp, blen, offp, "]"); break; } @@ -240,14 +306,15 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) nvlist_t **val; uint_t valsz, i; VERIFY0(nvpair_value_nvlist_array(curr, &val, &valsz)); - FPRINTF(fp, "["); + FPRINTF(bufp, blen, offp, "["); for (i = 0; i < valsz; i++) { if (i > 0) - FPRINTF(fp, ","); - if (nvlist_print_json(fp, val[i]) == -1) + FPRINTF(bufp, blen, offp, ","); + if (nvlist_do_json(val[i], bufp, blen, + offp) == -1) return (-1); } - FPRINTF(fp, "]"); + FPRINTF(bufp, blen, offp, "]"); break; } @@ -255,14 +322,14 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) boolean_t *val; uint_t valsz, i; VERIFY0(nvpair_value_boolean_array(curr, &val, &valsz)); - FPRINTF(fp, "["); + FPRINTF(bufp, blen, offp, "["); for (i = 0; i < valsz; i++) { if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, val[i] == B_TRUE ? + FPRINTF(bufp, blen, offp, ","); + FPRINTF(bufp, blen, offp, val[i] == B_TRUE ? "true" : "false"); } - FPRINTF(fp, "]"); + FPRINTF(bufp, blen, offp, "]"); break; } @@ -270,13 +337,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) uchar_t *val; uint_t valsz, i; VERIFY0(nvpair_value_byte_array(curr, &val, &valsz)); - FPRINTF(fp, "["); + FPRINTF(bufp, blen, offp, "["); for (i = 0; i < valsz; i++) { if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%hhu", val[i]); + FPRINTF(bufp, blen, offp, ","); + FPRINTF(bufp, blen, offp, "%hhu", val[i]); } - FPRINTF(fp, "]"); + FPRINTF(bufp, blen, offp, "]"); break; } @@ -284,13 +351,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) uint8_t *val; uint_t valsz, i; VERIFY0(nvpair_value_uint8_array(curr, &val, &valsz)); - FPRINTF(fp, "["); + FPRINTF(bufp, blen, offp, "["); for (i = 0; i < valsz; i++) { if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%hhu", val[i]); + FPRINTF(bufp, blen, offp, ","); + FPRINTF(bufp, blen, offp, "%hhu", val[i]); } - FPRINTF(fp, "]"); + FPRINTF(bufp, blen, offp, "]"); break; } @@ -298,13 +365,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) int8_t *val; uint_t valsz, i; VERIFY0(nvpair_value_int8_array(curr, &val, &valsz)); - FPRINTF(fp, "["); + FPRINTF(bufp, blen, offp, "["); for (i = 0; i < valsz; i++) { if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%hd", val[i]); + FPRINTF(bufp, blen, offp, ","); + FPRINTF(bufp, blen, offp, "%hd", val[i]); } - FPRINTF(fp, "]"); + FPRINTF(bufp, blen, offp, "]"); break; } @@ -312,13 +379,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) uint16_t *val; uint_t valsz, i; VERIFY0(nvpair_value_uint16_array(curr, &val, &valsz)); - FPRINTF(fp, "["); + FPRINTF(bufp, blen, offp, "["); for (i = 0; i < valsz; i++) { if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%hu", val[i]); + FPRINTF(bufp, blen, offp, ","); + FPRINTF(bufp, blen, offp, "%hu", val[i]); } - FPRINTF(fp, "]"); + FPRINTF(bufp, blen, offp, "]"); break; } @@ -326,13 +393,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) int16_t *val; uint_t valsz, i; VERIFY0(nvpair_value_int16_array(curr, &val, &valsz)); - FPRINTF(fp, "["); + FPRINTF(bufp, blen, offp, "["); for (i = 0; i < valsz; i++) { if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%hd", val[i]); + FPRINTF(bufp, blen, offp, ","); + FPRINTF(bufp, blen, offp, "%hd", val[i]); } - FPRINTF(fp, "]"); + FPRINTF(bufp, blen, offp, "]"); break; } @@ -340,13 +407,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) uint32_t *val; uint_t valsz, i; VERIFY0(nvpair_value_uint32_array(curr, &val, &valsz)); - FPRINTF(fp, "["); + FPRINTF(bufp, blen, offp, "["); for (i = 0; i < valsz; i++) { if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%u", val[i]); + FPRINTF(bufp, blen, offp, ","); + FPRINTF(bufp, blen, offp, "%u", val[i]); } - FPRINTF(fp, "]"); + FPRINTF(bufp, blen, offp, "]"); break; } @@ -354,13 +421,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) int32_t *val; uint_t valsz, i; VERIFY0(nvpair_value_int32_array(curr, &val, &valsz)); - FPRINTF(fp, "["); + FPRINTF(bufp, blen, offp, "["); for (i = 0; i < valsz; i++) { if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%d", val[i]); + FPRINTF(bufp, blen, offp, ","); + FPRINTF(bufp, blen, offp, "%d", val[i]); } - FPRINTF(fp, "]"); + FPRINTF(bufp, blen, offp, "]"); break; } @@ -368,14 +435,14 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) uint64_t *val; uint_t valsz, i; VERIFY0(nvpair_value_uint64_array(curr, &val, &valsz)); - FPRINTF(fp, "["); + FPRINTF(bufp, blen, offp, "["); for (i = 0; i < valsz; i++) { if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%llu", + FPRINTF(bufp, blen, offp, ","); + FPRINTF(bufp, blen, offp, "%llu", (unsigned long long)val[i]); } - FPRINTF(fp, "]"); + FPRINTF(bufp, blen, offp, "]"); break; } @@ -383,13 +450,14 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) int64_t *val; uint_t valsz, i; VERIFY0(nvpair_value_int64_array(curr, &val, &valsz)); - FPRINTF(fp, "["); + FPRINTF(bufp, blen, offp, "["); for (i = 0; i < valsz; i++) { if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%lld", (long long)val[i]); + FPRINTF(bufp, blen, offp, ","); + FPRINTF(bufp, blen, offp, "%lld", + (long long)val[i]); } - FPRINTF(fp, "]"); + FPRINTF(bufp, blen, offp, "]"); break; } @@ -398,6 +466,41 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl) } } - FPRINTF(fp, "}"); + FPRINTF(bufp, blen, offp, "}"); return (0); } + +int +nvlist_dump_json(nvlist_t *nvl, char **bufp) +{ + off_t off = 0; + size_t l = 0; + + *bufp = NULL; + return (nvlist_do_json(nvl, bufp, &l, &off)); +} + +/* ARGSUSED */ +void +nvlist_dump_json_free(nvlist_t *nvl, char *buf) +{ + free(buf); +} + +/* + * Dump a JSON-formatted representation of an nvlist to the provided FILE *. + * This routine does not output any new-lines or additional whitespace other + * than that contained in strings, nor does it call fflush(3C). + */ +int +nvlist_print_json(FILE *fp, nvlist_t *nvl) +{ + int ret; + char *buf; + + if ((ret = nvlist_dump_json(nvl, &buf)) < 0) + return (ret); + ret = fprintf(fp, "%s", buf); + nvlist_dump_json_free(nvl, buf); + return (ret); +} diff --git a/usr/src/lib/librename/Makefile b/usr/src/lib/librename/Makefile new file mode 100644 index 0000000000..222523d9a1 --- /dev/null +++ b/usr/src/lib/librename/Makefile @@ -0,0 +1,44 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.lib + +HDRS = librename.h +HDRDIR = common +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +install := TARGET = install +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber lint: $(SUBDIRS) + +install: $(SUBDIRS) $(VARPD_MAPFILES) install_h + +install_h: $(ROOTHDRS) + +check: $(CHECKHDRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../Makefile.targ diff --git a/usr/src/lib/librename/Makefile.com b/usr/src/lib/librename/Makefile.com new file mode 100644 index 0000000000..f0a22f25ac --- /dev/null +++ b/usr/src/lib/librename/Makefile.com @@ -0,0 +1,34 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +LIBRARY = librename.a +VERS = .1 +OBJECTS = librename.o \ + +include ../../Makefile.lib + +LIBS = $(DYNLIB) $(LINTLIB) +LDLIBS += -lc +CPPFLAGS += -I../common + +SRCDIR = ../common + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../Makefile.targ diff --git a/usr/src/lib/librename/amd64/Makefile b/usr/src/lib/librename/amd64/Makefile new file mode 100644 index 0000000000..15d904c616 --- /dev/null +++ b/usr/src/lib/librename/amd64/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com +include ../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/librename/common/librename.c b/usr/src/lib/librename/common/librename.c new file mode 100644 index 0000000000..bb7f5a7a2d --- /dev/null +++ b/usr/src/lib/librename/common/librename.c @@ -0,0 +1,232 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* + * Implementation of librename(3RENAME) interfaces. + */ + +#include <librename.h> + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <synch.h> + +typedef enum librename_atomic_state { + LIBRENAME_ATOMIC_INITIAL = 0x0, + LIBRENAME_ATOMIC_FSYNC, + LIBRENAME_ATOMIC_RENAME, + LIBRENAME_ATOMIC_POSTSYNC, + LIBRENAME_ATOMIC_COMPLETED +} librename_atomic_state_t; + +struct librename_atomic { + char *lra_fname; /* RO */ + char *lra_altname; /* RO */ + int lra_dirfd; /* RO */ + int lra_tmpfd; /* RO */ + mutex_t lra_lock; + librename_atomic_state_t lra_state; /* lra_lock */ +}; + +int +librename_atomic_fdinit(int fd, const char *file, const char *prefix, + int mode, int flags, librename_atomic_t **outp) +{ + int ret; + int oflags; + librename_atomic_t *lrap; + struct stat st; + + if (fd < 0 || file == NULL || outp == NULL) + return (EINVAL); + + if (flags & ~(LIBRENAME_ATOMIC_NOUNLINK | LIBRENAME_ATOMIC_CLOEXEC)) + return (EINVAL); + + if (strchr(file, '/') != NULL) + return (EINVAL); + + if (prefix != NULL && strchr(prefix, '/') != NULL) + return (EINVAL); + + *outp = NULL; + lrap = malloc(sizeof (librename_atomic_t)); + if (lrap == NULL) + return (errno); + + if (fstat(fd, &st) != 0) { + ret = errno; + free(lrap); + return (ret); + } + + if (!S_ISDIR(st.st_mode)) { + if (close(lrap->lra_dirfd) != 0) + abort(); + free(lrap); + return (ENOTDIR); + } + + if ((lrap->lra_dirfd = dup(fd)) == -1) { + ret = errno; + free(lrap); + return (ret); + } + + + lrap->lra_fname = strdup(file); + if (lrap->lra_fname == NULL) { + ret = errno; + if (close(lrap->lra_dirfd) != 0) + abort(); + free(lrap); + return (ret); + } + + if (prefix == NULL) { + ret = asprintf(&lrap->lra_altname, ".%d.%s", (int)getpid(), + file); + } else { + ret = asprintf(&lrap->lra_altname, "%s%s", prefix, file); + } + if (ret == -1) { + ret = errno; + free(lrap->lra_fname); + if (close(lrap->lra_dirfd) != 0) + abort(); + free(lrap); + return (errno); + } + + oflags = O_CREAT | O_TRUNC | O_RDWR | O_NOFOLLOW; + if (flags & LIBRENAME_ATOMIC_NOUNLINK) + oflags |= O_EXCL; + + if (flags & LIBRENAME_ATOMIC_CLOEXEC) + oflags |= O_CLOEXEC; + + lrap->lra_tmpfd = openat(lrap->lra_dirfd, lrap->lra_altname, + oflags, mode); + if (lrap->lra_tmpfd < 0) { + ret = errno; + free(lrap->lra_altname); + free(lrap->lra_fname); + if (close(lrap->lra_dirfd) != 0) + abort(); + free(lrap); + return (ret); + } + + if (mutex_init(&lrap->lra_lock, USYNC_THREAD, NULL) != 0) + abort(); + + lrap->lra_state = LIBRENAME_ATOMIC_INITIAL; + *outp = lrap; + return (0); +} + +int +librename_atomic_init(const char *dir, const char *file, const char *prefix, + int mode, int flags, librename_atomic_t **outp) +{ + int fd, ret; + + if ((fd = open(dir, O_RDONLY)) < 0) + return (errno); + + ret = librename_atomic_fdinit(fd, file, prefix, mode, flags, outp); + if (close(fd) != 0) + abort(); + + return (ret); +} + +int +librename_atomic_fd(librename_atomic_t *lrap) +{ + return (lrap->lra_tmpfd); +} + +/* + * To atomically commit a file, we need to go through and do the following: + * + * o fsync the source + * o run rename + * o fsync the source again and the directory. + */ +int +librename_atomic_commit(librename_atomic_t *lrap) +{ + int ret = 0; + + if (mutex_lock(&lrap->lra_lock) != 0) + abort(); + if (lrap->lra_state == LIBRENAME_ATOMIC_COMPLETED) { + /* XXX What's a good errno to use here? */ + ret = ENXIO; + goto out; + } + + if (fsync(lrap->lra_tmpfd) != 0) { + ret = errno; + goto out; + } + lrap->lra_state = LIBRENAME_ATOMIC_FSYNC; + + if (renameat(lrap->lra_dirfd, lrap->lra_altname, lrap->lra_dirfd, + lrap->lra_fname) != 0) { + ret = errno; + goto out; + } + lrap->lra_state = LIBRENAME_ATOMIC_RENAME; + + if (fsync(lrap->lra_tmpfd) != 0) { + ret = errno; + goto out; + } + lrap->lra_state = LIBRENAME_ATOMIC_POSTSYNC; + + if (fsync(lrap->lra_dirfd) != 0) { + ret = errno; + goto out; + } + lrap->lra_state = LIBRENAME_ATOMIC_COMPLETED; + +out: + if (mutex_unlock(&lrap->lra_lock) != 0) + abort(); + return (ret); +} + +void +librename_atomic_fini(librename_atomic_t *lrap) +{ + + free(lrap->lra_altname); + free(lrap->lra_fname); + if (close(lrap->lra_tmpfd) != 0) + abort(); + if (close(lrap->lra_dirfd) != 0) + abort(); + if (mutex_destroy(&lrap->lra_lock) != 0) + abort(); + free(lrap); +} diff --git a/usr/src/lib/librename/common/librename.h b/usr/src/lib/librename/common/librename.h new file mode 100644 index 0000000000..cb344f534c --- /dev/null +++ b/usr/src/lib/librename/common/librename.h @@ -0,0 +1,43 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +#ifndef _LIBRENAME_H +#define _LIBRENAME_H + +/* + * librename(3RENAME) public interfaces + */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct librename_atomic librename_atomic_t; + +#define LIBRENAME_ATOMIC_NOUNLINK 0x01 +#define LIBRENAME_ATOMIC_CLOEXEC 0x02 +extern int librename_atomic_init(const char *, const char *, const char *, + int, int, librename_atomic_t **); +extern int librename_atomic_fdinit(int, const char *, const char *, int, int, + librename_atomic_t **); +extern int librename_atomic_fd(librename_atomic_t *); +extern int librename_atomic_commit(librename_atomic_t *); +extern void librename_atomic_fini(librename_atomic_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBRENAME_H */ diff --git a/usr/src/lib/librename/common/llib-lrename b/usr/src/lib/librename/common/llib-lrename new file mode 100644 index 0000000000..6f1dd81a7b --- /dev/null +++ b/usr/src/lib/librename/common/llib-lrename @@ -0,0 +1,19 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +#include <librename.h> diff --git a/usr/src/lib/librename/common/mapfile-vers b/usr/src/lib/librename/common/mapfile-vers new file mode 100644 index 0000000000..2b117f32f9 --- /dev/null +++ b/usr/src/lib/librename/common/mapfile-vers @@ -0,0 +1,41 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION ILLUMOS_1.0 { + global: + librename_atomic_commit; + librename_atomic_fd; + librename_atomic_fdinit; + librename_atomic_fini; + librename_atomic_init; + local: + *; +}; diff --git a/usr/src/lib/librename/i386/Makefile b/usr/src/lib/librename/i386/Makefile new file mode 100644 index 0000000000..41e699e8f8 --- /dev/null +++ b/usr/src/lib/librename/i386/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/librename/sparc/Makefile b/usr/src/lib/librename/sparc/Makefile new file mode 100644 index 0000000000..41e699e8f8 --- /dev/null +++ b/usr/src/lib/librename/sparc/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/librename/sparcv9/Makefile b/usr/src/lib/librename/sparcv9/Makefile new file mode 100644 index 0000000000..15d904c616 --- /dev/null +++ b/usr/src/lib/librename/sparcv9/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com +include ../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/libsocket/common/mapfile-vers b/usr/src/lib/libsocket/common/mapfile-vers index 2f7777f395..471774b462 100644 --- a/usr/src/lib/libsocket/common/mapfile-vers +++ b/usr/src/lib/libsocket/common/mapfile-vers @@ -39,6 +39,12 @@ $mapfile_version 2 +SYMBOL_VERSION ILLUMOS_0.2 { # reentrant ethers(3SOCKET) + global: + ether_aton_r; + ether_ntoa_r; +} ILLUMOS_0.1; + SYMBOL_VERSION ILLUMOS_0.1 { # Illumos additions global: accept4; diff --git a/usr/src/lib/libsocket/inet/ether_addr.c b/usr/src/lib/libsocket/inet/ether_addr.c index 37105bb302..523e7b472d 100644 --- a/usr/src/lib/libsocket/inet/ether_addr.c +++ b/usr/src/lib/libsocket/inet/ether_addr.c @@ -22,6 +22,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2014, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -37,8 +38,6 @@ * contributors. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * All routines necessary to deal the "ethers" database. The sources * contain mappings between 48 bit ethernet addresses and corresponding @@ -258,38 +257,42 @@ ea_buf(void) } /* - * Converts a 48 bit ethernet number to its string representation. + * Converts a 48 bit ethernet number to its string representation using a user + * defined buffer. + */ +char * +ether_ntoa_r(const struct ether_addr *e, char *buf) +{ + (void) sprintf(buf, "%x:%x:%x:%x:%x:%x", + e->ether_addr_octet[0], e->ether_addr_octet[1], + e->ether_addr_octet[2], e->ether_addr_octet[3], + e->ether_addr_octet[4], e->ether_addr_octet[5]); + return (buf); +} + +/* + * Converts a 48 bit ethernet number to its string representation using a + * per-thread buffer. */ char * ether_ntoa(const struct ether_addr *e) { eabuf_t *eabuf; - char *s; if ((eabuf = ea_buf()) == NULL) return (NULL); - s = eabuf->ea_string; - (void) sprintf(s, "%x:%x:%x:%x:%x:%x", - e->ether_addr_octet[0], e->ether_addr_octet[1], - e->ether_addr_octet[2], e->ether_addr_octet[3], - e->ether_addr_octet[4], e->ether_addr_octet[5]); - return (s); + return (ether_ntoa_r(e, eabuf->ea_string)); } /* - * Converts an ethernet address representation back into its 48 bits. + * Converts an ethernet address representation back into its 48 bits using a + * user defined buffer. */ struct ether_addr * -ether_aton(const char *s) +ether_aton_r(const char *s, struct ether_addr *e) { - eabuf_t *eabuf; - struct ether_addr *e; int i; uint_t t[6]; - - if ((eabuf = ea_buf()) == NULL) - return (NULL); - e = &eabuf->ea_addr; i = sscanf(s, " %x:%x:%x:%x:%x:%x", &t[0], &t[1], &t[2], &t[3], &t[4], &t[5]); if (i != 6) @@ -298,3 +301,17 @@ ether_aton(const char *s) e->ether_addr_octet[i] = (uchar_t)t[i]; return (e); } + +/* + * Converts an ethernet address representation back into its 48 bits using a + * per-thread buffer. + */ +struct ether_addr * +ether_aton(const char *s) +{ + eabuf_t *eabuf; + + if ((eabuf = ea_buf()) == NULL) + return (NULL); + return (ether_aton_r(s, &eabuf->ea_addr)); +} diff --git a/usr/src/lib/varpd/Makefile b/usr/src/lib/varpd/Makefile new file mode 100644 index 0000000000..5fb179c1fe --- /dev/null +++ b/usr/src/lib/varpd/Makefile @@ -0,0 +1,33 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +SUBDIRS = libvarpd .WAIT direct files svp + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +check := TARGET = check +install := TARGET = install +install_h := TARGET = install_h +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber install install_h check lint: $(SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/varpd/Makefile.plugin b/usr/src/lib/varpd/Makefile.plugin new file mode 100644 index 0000000000..67410742df --- /dev/null +++ b/usr/src/lib/varpd/Makefile.plugin @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +ROOTLIBDIR = $(ROOT)/usr/lib/varpd +ROOTLIBDIR64 = $(ROOT)/usr/lib/varpd/$(MACH64) + +MAPFILES += ../../libvarpd/common/mapfile-plugin diff --git a/usr/src/lib/varpd/direct/Makefile b/usr/src/lib/varpd/direct/Makefile new file mode 100644 index 0000000000..f026c620e6 --- /dev/null +++ b/usr/src/lib/varpd/direct/Makefile @@ -0,0 +1,40 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../../Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +install := TARGET = install +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber install lint: $(SUBDIRS) + +install_h: + +check: + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../../Makefile.targ diff --git a/usr/src/lib/varpd/direct/Makefile.com b/usr/src/lib/varpd/direct/Makefile.com new file mode 100644 index 0000000000..e48efcfcc0 --- /dev/null +++ b/usr/src/lib/varpd/direct/Makefile.com @@ -0,0 +1,35 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +LIBRARY = libvarpd_direct.a +VERS = .1 +OBJECTS = libvarpd_direct.o + +include ../../../Makefile.lib +include ../../Makefile.plugin + +LIBS = $(DYNLIB) +LDLIBS += -lc -lvarpd -lumem -lnvpair -lnsl +CPPFLAGS += -I../common + +SRCDIR = ../common + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../../Makefile.targ diff --git a/usr/src/lib/varpd/direct/amd64/Makefile b/usr/src/lib/varpd/direct/amd64/Makefile new file mode 100644 index 0000000000..5c586c1d40 --- /dev/null +++ b/usr/src/lib/varpd/direct/amd64/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/varpd/direct/common/libvarpd_direct.c b/usr/src/lib/varpd/direct/common/libvarpd_direct.c new file mode 100644 index 0000000000..fd2ee0154a --- /dev/null +++ b/usr/src/lib/varpd/direct/common/libvarpd_direct.c @@ -0,0 +1,395 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + +/* + * Point to point plug-in for varpd. + */ + +#include <libvarpd_provider.h> +#include <umem.h> +#include <errno.h> +#include <thread.h> +#include <synch.h> +#include <strings.h> +#include <assert.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <libnvpair.h> + +typedef struct varpd_direct { + overlay_plugin_dest_t vad_dest; /* RO */ + mutex_t vad_lock; /* Protects the rest */ + boolean_t vad_hip; + boolean_t vad_hport; + struct in6_addr vad_ip; + uint16_t vad_port; +} varpd_direct_t; + +static const char *varpd_direct_props[] = { + "direct/dest_ip", + "direct/dest_port" +}; + +static boolean_t +varpd_direct_valid_dest(overlay_plugin_dest_t dest) +{ + if (dest & ~(OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)) + return (B_FALSE); + + if (!(dest & (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))) + return (B_FALSE); + + return (B_TRUE); +} + +static int +varpd_direct_create(varpd_provider_handle_t *hdl, void **outp, + overlay_plugin_dest_t dest) +{ + int ret; + varpd_direct_t *vdp; + + if (varpd_direct_valid_dest(dest) == B_FALSE) + return (ENOTSUP); + + vdp = umem_alloc(sizeof (varpd_direct_t), UMEM_DEFAULT); + if (vdp == NULL) + return (ENOMEM); + + if ((ret = mutex_init(&vdp->vad_lock, USYNC_THREAD, NULL)) != 0) { + umem_free(vdp, sizeof (varpd_direct_t)); + return (ret); + } + + vdp->vad_dest = dest; + vdp->vad_hip = B_FALSE; + vdp->vad_hport = B_FALSE; + *outp = vdp; + return (0); +} + +static int +varpd_direct_start(void *arg) +{ + varpd_direct_t *vdp = arg; + + mutex_lock(&vdp->vad_lock); + if (vdp->vad_hip == B_FALSE ||((vdp->vad_dest & OVERLAY_PLUGIN_D_IP) && + vdp->vad_hport == B_FALSE)) { + mutex_unlock(&vdp->vad_lock); + return (EAGAIN); + } + mutex_unlock(&vdp->vad_lock); + + return (0); +} + +static void +varpd_direct_stop(void *arg) +{ +} + +static void +varpd_direct_destroy(void *arg) +{ + varpd_direct_t *vdp = arg; + + if (mutex_destroy(&vdp->vad_lock) != 0) + abort(); + umem_free(vdp, sizeof (varpd_direct_t)); +} + +static int +varpd_direct_default(void *arg, overlay_target_point_t *otp) +{ + varpd_direct_t *vdp = arg; + + mutex_lock(&vdp->vad_lock); + bcopy(&vdp->vad_ip, &otp->otp_ip, sizeof (struct in6_addr)); + otp->otp_port = vdp->vad_port; + mutex_unlock(&vdp->vad_lock); + + return (VARPD_LOOKUP_OK); +} + +static int +varpd_direct_nprops(void *arg, uint_t *nprops) +{ + const varpd_direct_t *vdp = arg; + + *nprops = 0; + if (vdp->vad_dest & OVERLAY_PLUGIN_D_ETHERNET) + *nprops += 1; + + if (vdp->vad_dest & OVERLAY_PLUGIN_D_IP) + *nprops += 1; + + if (vdp->vad_dest & OVERLAY_PLUGIN_D_PORT) + *nprops += 1; + + assert(*nprops == 1 || *nprops == 2); + + return (0); +} + +static int +varpd_direct_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph) +{ + varpd_direct_t *vdp = arg; + + /* + * Because we only support IP + port combos right now, prop 0 should + * always be the IP. We don't support a port without an IP. + */ + assert(vdp->vad_dest & OVERLAY_PLUGIN_D_IP); + if (propid == 0) { + libvarpd_prop_set_name(vph, varpd_direct_props[0]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP); + libvarpd_prop_set_nodefault(vph); + return (0); + } + + if (propid == 1 && vdp->vad_dest & OVERLAY_PLUGIN_D_PORT) { + libvarpd_prop_set_name(vph, varpd_direct_props[1]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT); + libvarpd_prop_set_nodefault(vph); + libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX); + return (0); + } + + return (EINVAL); +} + +static int +varpd_direct_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep) +{ + varpd_direct_t *vdp = arg; + + /* direct/dest_ip */ + if (strcmp(pname, varpd_direct_props[0]) == 0) { + if (*sizep < sizeof (struct in6_addr)) + return (EOVERFLOW); + mutex_lock(&vdp->vad_lock); + if (vdp->vad_hip == B_FALSE) { + *sizep = 0; + } else { + bcopy(&vdp->vad_ip, buf, sizeof (struct in6_addr)); + *sizep = sizeof (struct in6_addr); + } + mutex_unlock(&vdp->vad_lock); + return (0); + } + + /* direct/dest_port */ + if (strcmp(pname, varpd_direct_props[1]) == 0) { + uint64_t val; + + if (*sizep < sizeof (uint64_t)) + return (EOVERFLOW); + mutex_lock(&vdp->vad_lock); + if (vdp->vad_hport == B_FALSE) { + *sizep = 0; + } else { + val = vdp->vad_port; + bcopy(&val, buf, sizeof (uint64_t)); + *sizep = sizeof (uint64_t); + } + mutex_unlock(&vdp->vad_lock); + return (0); + } + + return (EINVAL); +} + +static int +varpd_direct_setprop(void *arg, const char *pname, const void *buf, + const uint32_t size) +{ + varpd_direct_t *vdp = arg; + + /* direct/dest_ip */ + if (strcmp(pname, varpd_direct_props[0]) == 0) { + const struct in6_addr *ipv6 = buf; + + if (size < sizeof (struct in6_addr)) + return (EOVERFLOW); + /* + * XXX What else should be disallowed? + */ + if (IN6_IS_ADDR_V4COMPAT(ipv6)) + return (EINVAL); + + mutex_lock(&vdp->vad_lock); + bcopy(buf, &vdp->vad_ip, sizeof (struct in6_addr)); + vdp->vad_hip = B_TRUE; + mutex_unlock(&vdp->vad_lock); + return (0); + } + + /* direct/dest_port */ + if (strcmp(pname, varpd_direct_props[1]) == 0) { + const uint64_t *valp = buf; + if (size < sizeof (uint64_t)) + return (EOVERFLOW); + + if (*valp == 0 || *valp > UINT16_MAX) + return (EINVAL); + + mutex_lock(&vdp->vad_lock); + vdp->vad_port = (uint16_t)*valp; + vdp->vad_hport = B_TRUE; + mutex_unlock(&vdp->vad_lock); + return (0); + } + + return (EINVAL); +} + +static int +varpd_direct_save(void *arg, nvlist_t *nvp) +{ + int ret; + varpd_direct_t *vdp = arg; + + mutex_lock(&vdp->vad_lock); + if (vdp->vad_hport == B_TRUE) { + if ((ret = nvlist_add_uint16(nvp, varpd_direct_props[1], + vdp->vad_port)) != 0) { + mutex_unlock(&vdp->vad_lock); + return (ret); + } + } + + if (vdp->vad_hip == B_TRUE) { + char buf[INET6_ADDRSTRLEN]; + + if (inet_ntop(AF_INET6, &vdp->vad_ip, buf, sizeof (buf)) == + NULL) + abort(); + if ((ret = nvlist_add_string(nvp, varpd_direct_props[0], + buf)) != 0) { + mutex_unlock(&vdp->vad_lock); + return (ret); + } + } + mutex_unlock(&vdp->vad_lock); + + return (0); +} + +static int +varpd_direct_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl, + overlay_plugin_dest_t dest, void **outp) +{ + int ret; + char *ipstr; + varpd_direct_t *vdp; + + if (varpd_direct_valid_dest(dest) == B_FALSE) + return (ENOTSUP); + + vdp = umem_alloc(sizeof (varpd_direct_t), UMEM_DEFAULT); + if (vdp == NULL) + return (ENOMEM); + + if ((ret = mutex_init(&vdp->vad_lock, USYNC_THREAD, NULL)) != 0) { + umem_free(vdp, sizeof (varpd_direct_t)); + return (ret); + } + + if ((ret = nvlist_lookup_uint16(nvp, varpd_direct_props[1], + &vdp->vad_port)) != 0) { + if (ret != ENOENT) { + if (mutex_destroy(&vdp->vad_lock) != 0) + abort(); + umem_free(vdp, sizeof (varpd_direct_t)); + return (ret); + } + vdp->vad_hport = B_FALSE; + } else { + vdp->vad_hport = B_TRUE; + } + + if ((ret = nvlist_lookup_string(nvp, varpd_direct_props[0], + &ipstr)) != 0) { + if (ret != ENOENT) { + if (mutex_destroy(&vdp->vad_lock) != 0) + abort(); + umem_free(vdp, sizeof (varpd_direct_t)); + return (ret); + } + vdp->vad_hip = B_FALSE; + } else { + ret = inet_pton(AF_INET6, ipstr, &vdp->vad_ip); + /* + * inet_pton is only defined to return -1 with errno set to + * EAFNOSUPPORT, which really, shouldn't happen. + */ + if (ret == -1) { + assert(errno == EAFNOSUPPORT); + abort(); + } + if (ret == 0) { + if (mutex_destroy(&vdp->vad_lock) != 0) + abort(); + umem_free(vdp, sizeof (varpd_direct_t)); + return (EINVAL); + } + } + + *outp = vdp; + return (0); +} + +static const varpd_plugin_ops_t varpd_direct_ops = { + 0, + varpd_direct_create, + varpd_direct_start, + varpd_direct_stop, + varpd_direct_destroy, + varpd_direct_default, + NULL, + varpd_direct_nprops, + varpd_direct_propinfo, + varpd_direct_getprop, + varpd_direct_setprop, + varpd_direct_save, + varpd_direct_restore +}; + +#pragma init(varpd_direct_init) +static void +varpd_direct_init(void) +{ + int err; + varpd_plugin_register_t *vpr; + + vpr = libvarpd_plugin_alloc(VARPD_VERSION_ONE, &err); + /* XXX How should we communicate this failure? */ + if (vpr == NULL) + return; + + vpr->vpr_mode = OVERLAY_TARGET_POINT; + vpr->vpr_name = "direct"; + vpr->vpr_ops = &varpd_direct_ops; + /* XXX We care about failure, but what do we do? */ + (void) libvarpd_plugin_register(vpr); + libvarpd_plugin_free(vpr); +} diff --git a/usr/src/lib/varpd/direct/common/llib-lvarpd_direct b/usr/src/lib/varpd/direct/common/llib-lvarpd_direct new file mode 100644 index 0000000000..31b3d36fbe --- /dev/null +++ b/usr/src/lib/varpd/direct/common/llib-lvarpd_direct @@ -0,0 +1,18 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + diff --git a/usr/src/lib/varpd/direct/common/mapfile-vers b/usr/src/lib/varpd/direct/common/mapfile-vers new file mode 100644 index 0000000000..642ef72adc --- /dev/null +++ b/usr/src/lib/varpd/direct/common/mapfile-vers @@ -0,0 +1,35 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION SUNWprivate { + local: + *; +}; diff --git a/usr/src/lib/varpd/direct/i386/Makefile b/usr/src/lib/varpd/direct/i386/Makefile new file mode 100644 index 0000000000..41e699e8f8 --- /dev/null +++ b/usr/src/lib/varpd/direct/i386/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/direct/sparc/Makefile b/usr/src/lib/varpd/direct/sparc/Makefile new file mode 100644 index 0000000000..41e699e8f8 --- /dev/null +++ b/usr/src/lib/varpd/direct/sparc/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/direct/sparcv9/Makefile b/usr/src/lib/varpd/direct/sparcv9/Makefile new file mode 100644 index 0000000000..5c586c1d40 --- /dev/null +++ b/usr/src/lib/varpd/direct/sparcv9/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/varpd/files/Makefile b/usr/src/lib/varpd/files/Makefile new file mode 100644 index 0000000000..f026c620e6 --- /dev/null +++ b/usr/src/lib/varpd/files/Makefile @@ -0,0 +1,40 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../../Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +install := TARGET = install +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber install lint: $(SUBDIRS) + +install_h: + +check: + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../../Makefile.targ diff --git a/usr/src/lib/varpd/files/Makefile.com b/usr/src/lib/varpd/files/Makefile.com new file mode 100644 index 0000000000..5433f79fd1 --- /dev/null +++ b/usr/src/lib/varpd/files/Makefile.com @@ -0,0 +1,36 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +LIBRARY = libvarpd_files.a +VERS = .1 +OBJECTS = libvarpd_files.o \ + libvarpd_files_json.o + +include ../../../Makefile.lib +include ../../Makefile.plugin + +LIBS = $(DYNLIB) +LDLIBS += -lc -lvarpd -lumem -lnvpair -lsocket -lnsl +CPPFLAGS += -I../common + +SRCDIR = ../common + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../../Makefile.targ diff --git a/usr/src/lib/varpd/files/amd64/Makefile b/usr/src/lib/varpd/files/amd64/Makefile new file mode 100644 index 0000000000..5c586c1d40 --- /dev/null +++ b/usr/src/lib/varpd/files/amd64/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/varpd/files/common/libvarpd_files.c b/usr/src/lib/varpd/files/common/libvarpd_files.c new file mode 100644 index 0000000000..0b6812bfc0 --- /dev/null +++ b/usr/src/lib/varpd/files/common/libvarpd_files.c @@ -0,0 +1,598 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2015, Joyent, Inc. All rights reserved. + */ + +/* + * Files based plug in for varpd + * + * This is a dynamic varpd plug-in that has a static backing store. In this + * case, the idea here is that the full set of mappings is fixed at creation + * time and specified in a single file which is currently expected to be in a + * JSON format of the following form: + * + * { + * "aa:bb:cc:dd:ee:ff": { + * "arp": "10.23.69.1", + * "ndp": "2600:3c00::f03c:91ff:fe96:a264", + * "ip": "192.168.1.1", + * "port": 8080 + * } + * } + */ + +#include <libvarpd_provider.h> +#include <umem.h> +#include <errno.h> +#include <thread.h> +#include <synch.h> +#include <strings.h> +#include <assert.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <libnvpair.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/ethernet.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include <libvarpd_files_json.h> + +typedef struct varpd_files { + overlay_plugin_dest_t vaf_dest; /* RO */ + varpd_provider_handle_t *vaf_hdl; /* RO */ + char *vaf_path; /* WO */ + nvlist_t *vaf_nvl; /* WO */ + uint64_t vaf_nmisses; /* Atomic */ + uint64_t vaf_narp; /* Atomic */ +} varpd_files_t; + +static const char *varpd_files_props[] = { + "files/config" +}; + +static boolean_t +varpd_files_valid_dest(overlay_plugin_dest_t dest) +{ + if (dest & ~(OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)) + return (B_FALSE); + + if (!(dest & (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))) + return (B_FALSE); + + return (B_TRUE); +} + +static int +varpd_files_create(varpd_provider_handle_t *hdl, void **outp, + overlay_plugin_dest_t dest) +{ + varpd_files_t *vaf; + + if (varpd_files_valid_dest(dest) == B_FALSE) + return (ENOTSUP); + + vaf = umem_alloc(sizeof (varpd_files_t), UMEM_DEFAULT); + if (vaf == NULL) + return (ENOMEM); + + bzero(vaf, sizeof (varpd_files_t)); + vaf->vaf_dest = dest; + vaf->vaf_path = NULL; + vaf->vaf_nvl = NULL; + vaf->vaf_hdl = hdl; + *outp = vaf; + return (0); +} + +static int +varpd_files_normalize_nvlist(varpd_files_t *vaf, nvlist_t *nvl) +{ + int ret; + nvlist_t *out; + nvpair_t *pair; + + if ((ret = nvlist_alloc(&out, NV_UNIQUE_NAME, 0)) != 0) + return (ret); + + for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL; + pair = nvlist_next_nvpair(nvl, pair)) { + char *name, fname[ETHERADDRSTRL]; + nvlist_t *data; + struct ether_addr ether, *e; + e = ðer; + + if (nvpair_type(pair) != DATA_TYPE_NVLIST) { + nvlist_free(out); + return (EINVAL); + } + + name = nvpair_name(pair); + if ((ret = nvpair_value_nvlist(pair, &data)) != 0) { + nvlist_free(out); + return (EINVAL); + } + + if (ether_aton_r(name, e) == NULL) { + nvlist_free(out); + return (EINVAL); + } + + if (ether_ntoa_r(e, fname) == NULL) { + nvlist_free(out); + return (ENOMEM); + } + + if ((ret = nvlist_add_nvlist(out, fname, data)) != 0) { + nvlist_free(out); + return (EINVAL); + } + } + + vaf->vaf_nvl = out; + return (0); +} + +static int +varpd_files_start(void *arg) +{ + int fd, ret; + void *maddr; + struct stat st; + nvlist_t *nvl; + varpd_files_t *vaf = arg; + + if (vaf->vaf_path == NULL) + return (EAGAIN); + + if ((fd = open(vaf->vaf_path, O_RDONLY)) < 0) + return (errno); + + if (fstat(fd, &st) != 0) { + ret = errno; + if (close(fd) != 0) + abort(); + return (ret); + } + + maddr = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd, 0); + if (maddr == NULL) { + ret = errno; + if (close(fd) != 0) + abort(); + return (ret); + } + + ret = nvlist_parse_json(maddr, st.st_size, &nvl, + NVJSON_FORCE_INTEGER); + if (ret == 0) { + ret = varpd_files_normalize_nvlist(vaf, nvl); + nvlist_free(nvl); + } + if (munmap(maddr, st.st_size) != 0) + abort(); + if (close(fd) != 0) + abort(); + + return (ret); +} + +static void +varpd_files_stop(void *arg) +{ + varpd_files_t *vaf = arg; + + nvlist_free(vaf->vaf_nvl); + vaf->vaf_nvl = NULL; +} + +static void +varpd_files_destroy(void *arg) +{ + varpd_files_t *vaf = arg; + + assert(vaf->vaf_nvl == NULL); + if (vaf->vaf_path != NULL) { + umem_free(vaf->vaf_path, strlen(vaf->vaf_path) + 1); + vaf->vaf_path = NULL; + } + umem_free(vaf, sizeof (varpd_files_t)); +} + +static void +varpd_files_lookup(void *arg, varpd_query_handle_t *qh, + const overlay_targ_lookup_t *otl, overlay_target_point_t *otp) +{ + char macstr[ETHERADDRSTRL], *ipstr; + nvlist_t *nvl; + varpd_files_t *vaf = arg; + int32_t port; + static const uint8_t bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + + /* We don't support a default */ + if (otl == NULL) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + if (otl->otl_sap == ETHERTYPE_ARP) { + libvarpd_plugin_proxy_arp(vaf->vaf_hdl, qh, otl); + return; + } + + if (otl->otl_sap == ETHERTYPE_IPV6 && + otl->otl_dstaddr[0] == 0x33 && + otl->otl_dstaddr[1] == 0x33) { + libvarpd_plugin_proxy_ndp(vaf->vaf_hdl, qh, otl); + return; + } + + if (otl->otl_sap == ETHERTYPE_IP && + bcmp(otl->otl_dstaddr, bcast, ETHERADDRL) == 0) { + char *mac; + struct ether_addr a, *addr; + + addr = &a; + if (ether_ntoa_r((struct ether_addr *)otl->otl_srcaddr, + macstr) == NULL) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + if (nvlist_lookup_string(nvl, "dhcp-proxy", &mac) != 0) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + if (ether_aton_r(mac, addr) == NULL) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + libvarpd_plugin_proxy_dhcp(vaf->vaf_hdl, qh, otl); + return; + } + + if (ether_ntoa_r((struct ether_addr *)otl->otl_dstaddr, + macstr) == NULL) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + if (nvlist_lookup_int32(nvl, "port", &port) != 0) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + if (port <= 0 || port > UINT16_MAX) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + otp->otp_port = port; + + if (nvlist_lookup_string(nvl, "ip", &ipstr) != 0) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + + /* + * Try to parse it as a v6 address and then if it's not, try to + * transform it into a v4 address which we'll then wrap it into a v4 + * mapped address. + */ + if (inet_pton(AF_INET6, ipstr, &otp->otp_ip) != 1) { + uint32_t v4; + if (inet_pton(AF_INET, ipstr, &v4) != 1) { + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP); + return; + } + IN6_IPADDR_TO_V4MAPPED(v4, &otp->otp_ip); + } + + libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_OK); +} + +static int +varpd_files_nprops(void *arg, uint_t *nprops) +{ + *nprops = 1; + return (0); +} + +static int +varpd_files_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph) +{ + if (propid != 0) + return (EINVAL); + + libvarpd_prop_set_name(vph, varpd_files_props[0]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING); + libvarpd_prop_set_nodefault(vph); + return (0); +} + +static int +varpd_files_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep) +{ + varpd_files_t *vaf = arg; + + if (strcmp(pname, varpd_files_props[0]) != 0) + return (EINVAL); + + if (vaf->vaf_path != NULL) { + size_t len = strlen(vaf->vaf_path) + 1; + if (*sizep < len) + return (EOVERFLOW); + *sizep = len; + (void) strlcpy(buf, vaf->vaf_path, *sizep); + + } else { + *sizep = 0; + } + + return (0); +} + +static int +varpd_files_setprop(void *arg, const char *pname, const void *buf, + const uint32_t size) +{ + varpd_files_t *vaf = arg; + + if (strcmp(pname, varpd_files_props[0]) != 0) + return (EINVAL); + + if (vaf->vaf_path != NULL) + umem_free(vaf->vaf_path, strlen(vaf->vaf_path) + 1); + + vaf->vaf_path = umem_alloc(size, UMEM_DEFAULT); + if (vaf->vaf_path == NULL) + return (ENOMEM); + (void) strlcpy(vaf->vaf_path, buf, size); + return (0); +} + +static int +varpd_files_save(void *arg, nvlist_t *nvp) +{ + int ret; + varpd_files_t *vaf = arg; + + if (vaf->vaf_path == NULL) + return (0); + + if ((ret = nvlist_add_string(nvp, varpd_files_props[0], + vaf->vaf_path)) != 0) + return (ret); + + if ((ret = nvlist_add_uint64(nvp, "files/vaf_nmisses", + vaf->vaf_nmisses)) != 0) + return (ret); + + if ((ret = nvlist_add_uint64(nvp, "files/vaf_narp", + vaf->vaf_narp)) != 0) + return (ret); + return (0); +} + +static int +varpd_files_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl, + overlay_plugin_dest_t dest, void **outp) +{ + varpd_files_t *vaf; + char *str; + int ret; + uint64_t nmisses, narp; + + if (varpd_files_valid_dest(dest) == B_FALSE) + return (EINVAL); + + ret = nvlist_lookup_string(nvp, varpd_files_props[0], &str); + if (ret != 0 && ret != ENOENT) + return (ret); + else if (ret == ENOENT) + str = NULL; + + if (nvlist_lookup_uint64(nvp, "files/vaf_nmisses", &nmisses) != 0) + return (EINVAL); + if (nvlist_lookup_uint64(nvp, "files/vaf_narp", &narp) != 0) + return (EINVAL); + + vaf = umem_alloc(sizeof (varpd_files_t), UMEM_DEFAULT); + if (vaf == NULL) + return (ENOMEM); + + bzero(vaf, sizeof (varpd_files_t)); + vaf->vaf_dest = dest; + if (str != NULL) { + size_t len = strlen(str) + 1; + vaf->vaf_path = umem_alloc(len, UMEM_DEFAULT); + if (vaf->vaf_path == NULL) { + umem_free(vaf, sizeof (varpd_files_t)); + return (ENOMEM); + } + (void) strlcpy(vaf->vaf_path, str, len); + } + + vaf->vaf_hdl = hdl; + *outp = vaf; + return (0); +} + +static void +varpd_files_proxy_arp(void *arg, varpd_arp_handle_t *vah, int kind, + const struct sockaddr *sock, uint8_t *out) +{ + varpd_files_t *vaf = arg; + const struct sockaddr_in *ip; + const struct sockaddr_in6 *ip6; + nvpair_t *pair; + + if (kind != VARPD_QTYPE_ETHERNET) { + libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP); + return; + } + + if (sock->sa_family != AF_INET && sock->sa_family != AF_INET6) { + libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP); + return; + } + + ip = (const struct sockaddr_in *)sock; + ip6 = (const struct sockaddr_in6 *)sock; + for (pair = nvlist_next_nvpair(vaf->vaf_nvl, NULL); pair != NULL; + pair = nvlist_next_nvpair(vaf->vaf_nvl, pair)) { + char *mac, *ipstr; + nvlist_t *data; + struct in_addr ia; + struct in6_addr ia6; + struct ether_addr ether, *e; + e = ðer; + + if (nvpair_type(pair) != DATA_TYPE_NVLIST) + continue; + + mac = nvpair_name(pair); + if (nvpair_value_nvlist(pair, &data) != 0) + continue; + + + if (sock->sa_family == AF_INET) { + if (nvlist_lookup_string(data, "arp", &ipstr) != 0) + continue; + + if (inet_pton(AF_INET, ipstr, &ia) != 1) + continue; + + if (bcmp(&ia, &ip->sin_addr, + sizeof (struct in_addr)) != 0) + continue; + } else { + if (nvlist_lookup_string(data, "ndp", &ipstr) != 0) + continue; + + if (inet_pton(AF_INET6, ipstr, &ia6) != 1) + continue; + + if (bcmp(&ia6, &ip6->sin6_addr, + sizeof (struct in6_addr)) != 0) + continue; + } + + /* XXX Crappy errno */ + if (ether_aton_r(mac, e) == NULL) { + libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP); + return; + } + + bcopy(e, out, ETHERADDRL); + libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_OK); + return; + } + + libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP); +} + +static void +varpd_files_proxy_dhcp(void *arg, varpd_dhcp_handle_t *vdh, int type, + const overlay_targ_lookup_t *otl, uint8_t *out) +{ + varpd_files_t *vaf = arg; + nvlist_t *nvl; + char macstr[ETHERADDRSTRL], *mac; + struct ether_addr a, *addr; + + addr = &a; + if (type != VARPD_QTYPE_ETHERNET) { + libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP); + return; + } + + if (ether_ntoa_r((struct ether_addr *)otl->otl_srcaddr, + macstr) == NULL) { + libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP); + return; + } + + if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) { + libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP); + return; + } + + if (nvlist_lookup_string(nvl, "dhcp-proxy", &mac) != 0) { + libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP); + return; + } + + if (ether_aton_r(mac, addr) == NULL) { + libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP); + return; + } + + bcopy(addr, out, ETHERADDRL); + libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_OK); +} + +static const varpd_plugin_ops_t varpd_files_ops = { + 0, + varpd_files_create, + varpd_files_start, + varpd_files_stop, + varpd_files_destroy, + NULL, + varpd_files_lookup, + varpd_files_nprops, + varpd_files_propinfo, + varpd_files_getprop, + varpd_files_setprop, + varpd_files_save, + varpd_files_restore, + varpd_files_proxy_arp, + varpd_files_proxy_dhcp +}; + +#pragma init(varpd_files_init) +static void +varpd_files_init(void) +{ + int err; + varpd_plugin_register_t *vpr; + + vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err); + /* XXX How should we communicate this failure? */ + if (vpr == NULL) + return; + + vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC; + vpr->vpr_name = "files"; + vpr->vpr_ops = &varpd_files_ops; + /* XXX We care about failure, but what do we do? */ + (void) libvarpd_plugin_register(vpr); + libvarpd_plugin_free(vpr); +} diff --git a/usr/src/lib/varpd/files/common/libvarpd_files_json.c b/usr/src/lib/varpd/files/common/libvarpd_files_json.c new file mode 100644 index 0000000000..ed1f34b9fe --- /dev/null +++ b/usr/src/lib/varpd/files/common/libvarpd_files_json.c @@ -0,0 +1,744 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#include <strings.h> +#include <errno.h> +#include <libnvpair.h> + +#include <libvarpd_files_json.h> + +typedef enum json_type { + JSON_TYPE_NOTHING, + JSON_TYPE_STRING = 1, + JSON_TYPE_INTEGER, + JSON_TYPE_DOUBLE, + JSON_TYPE_BOOLEAN, + JSON_TYPE_NULL, + JSON_TYPE_OBJECT, + JSON_TYPE_ARRAY +} json_type_t; + +typedef enum parse_state { + PARSE_ERROR = -1, + PARSE_DONE = 0, + PARSE_REST, + PARSE_OBJECT, + PARSE_KEY_STRING, + PARSE_COLON, + PARSE_STRING, + PARSE_OBJECT_COMMA, + PARSE_ARRAY, + PARSE_BAREWORD, + PARSE_NUMBER, + PARSE_ARRAY_VALUE, + PARSE_ARRAY_COMMA, +} parse_state_t; + +#define JSON_MARKER ".__json_" +#define JSON_MARKER_ARRAY JSON_MARKER "array" + +typedef struct parse_frame { + parse_state_t pf_ps; + nvlist_t *pf_nvl; + + char *pf_key; + void *pf_value; + json_type_t pf_value_type; + int pf_array_index; + + struct parse_frame *pf_next; +} parse_frame_t; + +typedef struct state { + char *s_in; + off_t s_pos; + size_t s_len; + + parse_frame_t *s_top; + + nvlist_parse_json_flags_t s_flags; +} state_t; + +typedef void (*parse_handler_t)(state_t *); + +static void +movestate(state_t *s, parse_state_t ps) +{ +#ifdef DEBUG + fprintf(stderr, "move state %d -> %d\n", s->s_top->pf_ps, ps); +#endif + s->s_top->pf_ps = ps; +} + +static void +pushstate(state_t *s, parse_state_t ps, parse_state_t retps) +{ + parse_frame_t *n = calloc(1, sizeof (*n)); + +#ifdef DEBUG + fprintf(stderr, "push state %d -> %d (ret %d)\n", s->s_top->pf_ps, ps, + retps); +#endif + + /* + * Store the state we'll return to when popping this + * frame: + */ + s->s_top->pf_ps = retps; + + /* + * Store the initial state for the new frame, and + * put it on top of the stack: + */ + n->pf_ps = ps; + n->pf_value_type = JSON_TYPE_NOTHING; + + n->pf_next = s->s_top; + s->s_top = n; +} + +static void +posterror(state_t *s, char *error) +{ + /* + * XXX do something better here. + */ + if (s->s_flags & NVJSON_ERRORS_TO_STDERR) + fprintf(stderr, "error (pos %lld): %s\n", s->s_pos, error); + movestate(s, PARSE_ERROR); +} + +static char +popchar(state_t *s) +{ + if (s->s_pos > s->s_len) { + return (0); + } + return (s->s_in[s->s_pos++]); +} + +static char +peekchar(state_t *s) +{ + if (s->s_pos > s->s_len) { + return (0); + } + return (s->s_in[s->s_pos]); +} + +static void +discard_whitespace(state_t *s) +{ + while (isspace(peekchar(s))) + popchar(s); +} + +static char *escape_pairs[] = { + "\"\"", "\\\\", "//", "b\b", "f\f", "n\n", "r\r", "t\t", NULL +}; + +static char +collect_string_escape(state_t *s) +{ + int i; + char c = popchar(s); + + if (c == '\0') { + fprintf(stderr, "ERROR: eof mid-escape\n"); + return ('\0'); + } else if (c == 'u') { + int res; + int ndigs = 0; + char digs[5]; + /* + * Deal with 4-digit unicode escape. + */ + while (ndigs < 4) { + if ((digs[ndigs++] = popchar(s)) == '\0') { + fprintf(stderr, "ERROR: eof mid-escape\n"); + return ('\0'); + } + } + digs[4] = '\0'; + res = atoi(digs); + if (res > 127) { + fprintf(stderr, "ERROR: unicode escape above 0x7f\n"); + return ('\0'); + } + return (res); + } + + for (i = 0; escape_pairs[i] != NULL; i++) { + char *ep = escape_pairs[i]; + if (ep[0] == c) + return (ep[1]); + } + + fprintf(stderr, "ERROR: unrecognised escape char %c\n", c); + return ('\0'); +} + +static char * +collect_string(state_t *s) +{ + /* XXX make this not static: */ + char buf[1000]; + char *pos = buf; + + for (;;) { + char c = popchar(s); + if (c == '\0') { + /* + * Unexpected EOF + */ + fprintf(stderr, "ERROR: unexpected EOF mid-string\n"); + return (NULL); + } else if (c == '\\') { + char esc; + /* + * Escape Character. + * + * XXX better error handling here. + */ + if ((esc = collect_string_escape(s)) == '\0') + return (NULL); + *pos++ = esc; + } else if (c == '"') { + /* + * Legal End of String. + */ + break; + } else { + *pos++ = c; + } + } + *pos = '\0'; + return (strdup(buf)); +} + +static char * +collect_bareword(state_t *s) +{ + /* XXX make this not static: */ + char out[100]; + char *pos = out; + char c; + for (;;) { + c = peekchar(s); + if (islower(c)) { + *pos++ = popchar(s); + } else { + /* + * We're done... + */ + *pos = '\0'; + break; + } + } + return (strdup(out)); +} + +static void +hdlr_bareword(state_t *s) +{ + char *str = collect_bareword(s); + if (strcmp(str, "true") == 0) { + s->s_top->pf_value_type = JSON_TYPE_BOOLEAN; + s->s_top->pf_value = (void *) B_TRUE; + } else if (strcmp(str, "false") == 0) { + s->s_top->pf_value_type = JSON_TYPE_BOOLEAN; + s->s_top->pf_value = (void *) B_FALSE; + } else if (strcmp(str, "null") == 0) { + s->s_top->pf_value_type = JSON_TYPE_NULL; + } else { + free(str); + return (posterror(s, "expected 'true', 'false' or 'null'")); + } + free(str); + return (movestate(s, PARSE_DONE)); +} + +static int +collect_number(state_t *s, boolean_t *isint, int32_t *result, + double *fresult __UNUSED) +{ + /* XXX make not static */ + char out[100]; + char *pos = out; + boolean_t neg = B_FALSE; + char c; + + if (peekchar(s) == '-') { + neg = B_TRUE; + popchar(s); + } + /* + * Read the 'int' portion: + */ + if (!isdigit(c = peekchar(s))) { + fprintf(stderr, "expected a digit (0-9)\n"); + return (-1); + } + for (;;) { + if (!isdigit(peekchar(s))) + break; + *pos++ = popchar(s); + } + if (peekchar(s) == '.' || peekchar(s) == 'e' || peekchar(s) == 'E') { + fprintf(stderr, "do not yet support FRACs or EXPs\n"); + return (-1); + } + + *isint = B_TRUE; + *pos = '\0'; + *result = neg == B_TRUE ? -atoi(out) : atoi(out); + return (0); +} + +static void +hdlr_number(state_t *s) +{ + boolean_t isint; + int32_t result; + double fresult; + + if (collect_number(s, &isint, &result, &fresult) != 0) { + return (posterror(s, "malformed number")); + } + + if (isint == B_TRUE) { + s->s_top->pf_value = (void *)(uintptr_t)result; + s->s_top->pf_value_type = JSON_TYPE_INTEGER; + } else { + s->s_top->pf_value = malloc(sizeof (fresult)); + bcopy(&fresult, s->s_top->pf_value, sizeof (fresult)); + s->s_top->pf_value_type = JSON_TYPE_DOUBLE; + } + + return (movestate(s, PARSE_DONE)); +} + +static void +hdlr_rest(state_t *s) +{ + char c; + discard_whitespace(s); + c = popchar(s); + switch (c) { + case '{': + return (movestate(s, PARSE_OBJECT)); + case '[': + return (movestate(s, PARSE_ARRAY)); + default: + return (posterror(s, "eof before object or array")); + } +} + +static int +add_empty_child(state_t *s) +{ + /* + * Here, we create an empty nvlist to represent this object + * or array: + */ + nvlist_t *empty; + if (nvlist_alloc(&empty, NV_UNIQUE_NAME, 0) != 0) + return (-1); + if (s->s_top->pf_next != NULL) { + /* + * If we're a child of the frame above, we store ourselves in + * that frame's nvlist: + */ + nvlist_t *nvl = s->s_top->pf_next->pf_nvl; + char *key = s->s_top->pf_next->pf_key; + + if (nvlist_add_nvlist(nvl, key, empty) != 0) { + nvlist_free(empty); + return (-1); + } + nvlist_free(empty); + if (nvlist_lookup_nvlist(nvl, key, &empty) != 0) { + return (-1); + } + } + s->s_top->pf_nvl = empty; + return (0); +} + +static int +decorate_array(state_t *s) +{ + /* + * When we are done creating an array, we store a 'length' + * property on it, as well as an internal-use marker value. + */ + if (nvlist_add_boolean(s->s_top->pf_nvl, JSON_MARKER_ARRAY) != 0 || + nvlist_add_uint32(s->s_top->pf_nvl, "length", + s->s_top->pf_array_index)) + return (-1); + return (0); +} + +static void +hdlr_array(state_t *s) +{ + char c; + s->s_top->pf_value_type = JSON_TYPE_ARRAY; + + if (add_empty_child(s) == -1) + return (posterror(s, "nvlist error")); + + discard_whitespace(s); + c = peekchar(s); + switch (c) { + case ']': + popchar(s); + decorate_array(s); + return (movestate(s, PARSE_DONE)); + default: + return (movestate(s, PARSE_ARRAY_VALUE)); + } +} + +static void +hdlr_array_comma(state_t *s) +{ + discard_whitespace(s); + + switch (popchar(s)) { + case ']': + decorate_array(s); + return (movestate(s, PARSE_DONE)); + case ',': + return (movestate(s, PARSE_ARRAY_VALUE)); + default: + return (posterror(s, "expected ',' or ']'")); + } +} + +static void +hdlr_array_value(state_t *s) +{ + char c; + discard_whitespace(s); + + /* + * Generate keyname from the next array index: + */ + if (s->s_top->pf_key != NULL) { + fprintf(stderr, "pf_key not null! was %s\n", s->s_top->pf_key); + abort(); + } + s->s_top->pf_key = malloc(11); /* 10 digits in uint32_t */ + if (s->s_top->pf_key == NULL) + return (posterror(s, "could not allocate memory")); + (void) snprintf(s->s_top->pf_key, 11, "%d", s->s_top->pf_array_index++); + + /* + * Select which type handler we need for the next value: + */ + switch (c = peekchar(s)) { + case '"': + popchar(s); + return (pushstate(s, PARSE_STRING, PARSE_ARRAY_COMMA)); + case '{': + popchar(s); + return (pushstate(s, PARSE_OBJECT, PARSE_ARRAY_COMMA)); + case '[': + popchar(s); + return (pushstate(s, PARSE_ARRAY, PARSE_ARRAY_COMMA)); + default: + if (islower(c)) + return (pushstate(s, PARSE_BAREWORD, + PARSE_ARRAY_COMMA)); + else if (c == '-' || isdigit(c)) + return (pushstate(s, PARSE_NUMBER, PARSE_ARRAY_COMMA)); + else + return (posterror(s, "unexpected character at start " + "of value")); + } +} + +static void +hdlr_object(state_t *s) +{ + char c; + s->s_top->pf_value_type = JSON_TYPE_OBJECT; + + if (add_empty_child(s) == -1) + return (posterror(s, "nvlist error")); + + discard_whitespace(s); + c = popchar(s); + switch (c) { + case '}': + return (movestate(s, PARSE_DONE)); + case '"': + return (movestate(s, PARSE_KEY_STRING)); + default: + return (posterror(s, "expected key or '}'")); + } +} + +static void +hdlr_key_string(state_t *s) +{ + char *str = collect_string(s); + if (str == NULL) + return (posterror(s, "could not collect key string")); + + /* + * Record the name of the next + */ + s->s_top->pf_key = str; + return (movestate(s, PARSE_COLON)); +} + +static void +hdlr_colon(state_t *s) +{ + char c; + discard_whitespace(s); + + if ((c = popchar(s)) != ':') + return (posterror(s, "expected ':'")); + + discard_whitespace(s); + + /* + * Select which type handler we need for the value after the colon: + */ + switch (c = peekchar(s)) { + case '"': + popchar(s); + return (pushstate(s, PARSE_STRING, PARSE_OBJECT_COMMA)); + case '{': + popchar(s); + return (pushstate(s, PARSE_OBJECT, PARSE_OBJECT_COMMA)); + case '[': + popchar(s); + return (pushstate(s, PARSE_ARRAY, PARSE_OBJECT_COMMA)); + default: + if (islower(c)) + return (pushstate(s, PARSE_BAREWORD, + PARSE_OBJECT_COMMA)); + else if (c == '-' || isdigit(c)) + return (pushstate(s, PARSE_NUMBER, PARSE_OBJECT_COMMA)); + else + return (posterror(s, "unexpected character at start " + "of value")); + } +} + +static void +hdlr_object_comma(state_t *s) +{ + char c; + discard_whitespace(s); + + switch (c = popchar(s)) { + case '}': + return (movestate(s, PARSE_DONE)); + case ',': + discard_whitespace(s); + if ((c = popchar(s)) != '"') + return (posterror(s, "expected '\"'")); + return (movestate(s, PARSE_KEY_STRING)); + default: + return (posterror(s, "expected ',' or '}'")); + } +} + +static void +hdlr_string(state_t *s) +{ + s->s_top->pf_value = collect_string(s); + if (s == NULL) + return (posterror(s, "could not collect string")); + s->s_top->pf_value_type = JSON_TYPE_STRING; + return (movestate(s, PARSE_DONE)); +} + +static int +store_value(state_t *s) +{ + nvlist_t *targ = s->s_top->pf_next->pf_nvl; + char *key = s->s_top->pf_next->pf_key; + json_type_t type = s->s_top->pf_value_type; + int ret = 0; + + switch (type) { + case JSON_TYPE_STRING: + ret = nvlist_add_string(targ, key, s->s_top->pf_value); + free(s->s_top->pf_value); + goto out; + case JSON_TYPE_BOOLEAN: + ret = nvlist_add_boolean_value(targ, key, + (boolean_t)s->s_top->pf_value); + goto out; + case JSON_TYPE_NULL: + ret = nvlist_add_boolean(targ, key); + goto out; + case JSON_TYPE_INTEGER: + ret = nvlist_add_int32(targ, key, + (int32_t)(uintptr_t)s->s_top->pf_value); + goto out; + case JSON_TYPE_ARRAY: + /* FALLTHRU */ + case JSON_TYPE_OBJECT: + /* + * Objects and arrays are already 'stored' in their target + * nvlist on creation. See: hdlr_object, hdlr_array. + */ + goto out; + default: + fprintf(stderr, "ERROR: could not store unknown type %d\n", + type); + abort(); + } +out: + s->s_top->pf_value = NULL; + free(s->s_top->pf_next->pf_key); + s->s_top->pf_next->pf_key = NULL; + return (ret); +} + +static parse_frame_t * +parse_frame_free(parse_frame_t *pf, boolean_t free_nvl) +{ + parse_frame_t *next = pf->pf_next; + if (pf->pf_key != NULL) + free(pf->pf_key); + if (pf->pf_value != NULL) + abort(); + if (free_nvl && pf->pf_nvl != NULL) + nvlist_free(pf->pf_nvl); + free(pf); + return (next); +} + +static parse_handler_t hdlrs[] = { + NULL, /* PARSE_DONE */ + hdlr_rest, /* PARSE_REST */ + hdlr_object, /* PARSE_OBJECT */ + hdlr_key_string, /* PARSE_KEY_STRING */ + hdlr_colon, /* PARSE_COLON */ + hdlr_string, /* PARSE_STRING */ + hdlr_object_comma, /* PARSE_OBJECT_COMMA */ + hdlr_array, /* PARSE_ARRAY */ + hdlr_bareword, /* PARSE_BAREWORD */ + hdlr_number, /* PARSE_NUMBER */ + hdlr_array_value, /* PARSE_ARRAY_VALUE */ + hdlr_array_comma, /* PARSE_ARRAY_COMMA */ +}; +#define NUM_PARSE_HANDLERS (int)(sizeof (hdlrs) / sizeof (hdlrs[0])) + +int +nvlist_parse_json(char *buf, size_t buflen, nvlist_t **nvlp, + nvlist_parse_json_flags_t flag) +{ + int ret = 0; + state_t s; + + /* + * Check for valid flags: + */ + if ((flag & (NVJSON_FORCE_INTEGER | NVJSON_FORCE_DOUBLE)) == + (NVJSON_FORCE_INTEGER | NVJSON_FORCE_DOUBLE)) + return (EINVAL); + + /* + * Initialise parsing state structure: + */ + bzero(&s, sizeof (s)); + s.s_in = buf; + s.s_pos = 0; + s.s_len = buflen; + s.s_flags = flag; + + /* + * Allocate top-most stack frame: + */ + s.s_top = calloc(1, sizeof (*s.s_top)); + if (s.s_top == NULL) { + ret = errno; + goto out; + } + + s.s_top->pf_ps = PARSE_REST; + for (;;) { + if (s.s_top->pf_ps < 0) { + /* + * The parser reported an error. + */ +#if 0 + fprintf(stderr, "parse error\n"); +#endif + ret = EFAULT; + goto out; + } else if (s.s_top->pf_ps == PARSE_DONE) { + if (s.s_top->pf_next == NULL) { + /* + * Last frame, so we're really + * done. + */ + *nvlp = s.s_top->pf_nvl; + goto out; + } else { + /* + * Otherwise, pop a frame and continue + * in previous state. + */ +#if 0 + parse_frame_t *t = s.s_top->pf_next; +#endif + + /* + * Copy out the value we created in the + * old frame: + */ + if ((ret = store_value(&s)) != 0) + goto out; +#if 0 + fprintf(stderr, "pop state %d -> %d\n", + s.s_top->pf_ps, t->pf_ps); +#endif + /* + * Free old frame: + */ + s.s_top = parse_frame_free(s.s_top, B_FALSE); + } + } + /* + * Dispatch to parser handler routine for this state: + */ + if (s.s_top->pf_ps >= NUM_PARSE_HANDLERS || + hdlrs[s.s_top->pf_ps] == NULL) { + fprintf(stderr, "no handler for state %d\n", + s.s_top->pf_ps); + abort(); + } + hdlrs[s.s_top->pf_ps](&s); + } + +out: + while (s.s_top != NULL) + s.s_top = parse_frame_free(s.s_top, ret == 0 ? B_FALSE : + B_TRUE); + return (ret); +} diff --git a/usr/src/lib/varpd/files/common/libvarpd_files_json.h b/usr/src/lib/varpd/files/common/libvarpd_files_json.h new file mode 100644 index 0000000000..2a96e55206 --- /dev/null +++ b/usr/src/lib/varpd/files/common/libvarpd_files_json.h @@ -0,0 +1,40 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + +#ifndef _LIBVARPD_FILES_JSON_H +#define _LIBVARPD_FILES_JSON_H + +#include <libnvpair.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum nvlist_parse_json_flags { + NVJSON_FORCE_INTEGER = 0x01, + NVJSON_FORCE_DOUBLE = 0x02, + NVJSON_ERRORS_TO_STDERR = 0x04 +} nvlist_parse_json_flags_t; + +extern int nvlist_parse_json(char *, size_t, nvlist_t **, + nvlist_parse_json_flags_t); + +#define __UNUSED __attribute__((unused)) + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBVARPD_FILES_JSON_H */ diff --git a/usr/src/lib/varpd/files/common/llib-lvarpd_files b/usr/src/lib/varpd/files/common/llib-lvarpd_files new file mode 100644 index 0000000000..31b3d36fbe --- /dev/null +++ b/usr/src/lib/varpd/files/common/llib-lvarpd_files @@ -0,0 +1,18 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + diff --git a/usr/src/lib/varpd/files/common/mapfile-vers b/usr/src/lib/varpd/files/common/mapfile-vers new file mode 100644 index 0000000000..642ef72adc --- /dev/null +++ b/usr/src/lib/varpd/files/common/mapfile-vers @@ -0,0 +1,35 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION SUNWprivate { + local: + *; +}; diff --git a/usr/src/lib/varpd/files/i386/Makefile b/usr/src/lib/varpd/files/i386/Makefile new file mode 100644 index 0000000000..41e699e8f8 --- /dev/null +++ b/usr/src/lib/varpd/files/i386/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/files/sparc/Makefile b/usr/src/lib/varpd/files/sparc/Makefile new file mode 100644 index 0000000000..41e699e8f8 --- /dev/null +++ b/usr/src/lib/varpd/files/sparc/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/files/sparcv9/Makefile b/usr/src/lib/varpd/files/sparcv9/Makefile new file mode 100644 index 0000000000..5c586c1d40 --- /dev/null +++ b/usr/src/lib/varpd/files/sparcv9/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/varpd/libvarpd/Makefile b/usr/src/lib/varpd/libvarpd/Makefile new file mode 100644 index 0000000000..60c9cfa07f --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/Makefile @@ -0,0 +1,54 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../../Makefile.lib + +HDRS = libvarpd.h libvarpd_client.h libvarpd_provider.h +HDRDIR = common +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +TYPECHECK_LIB = libvarpd.so.1 +TYPELIST = \ + varpd_client_instance_arg_t \ + varpd_client_nprops_arg_t \ + varpd_client_propinfo_arg_t \ + varpd_client_eresp_t \ + overlay_targ_cache_entry_t \ + overlay_targ_cache_t \ + overlay_targ_cache_iter_t + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +install := TARGET = install +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber lint: $(SUBDIRS) + +install: $(SUBDIRS) $(VARPD_MAPFILES) install_h + +install_h: $(ROOTHDRS) + +check: $(CHECKHDRS) $(TYPECHECK) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../../Makefile.targ diff --git a/usr/src/lib/varpd/libvarpd/Makefile.com b/usr/src/lib/varpd/libvarpd/Makefile.com new file mode 100644 index 0000000000..e0378f3229 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/Makefile.com @@ -0,0 +1,44 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +LIBRARY = libvarpd.a +VERS = .1 +OBJECTS = libvarpd.o \ + libvarpd_arp.o \ + libvarpd_client.o \ + libvarpd_door.o \ + libvarpd_overlay.o \ + libvarpd_panic.o \ + libvarpd_persist.o \ + libvarpd_prop.o \ + libvarpd_plugin.o \ + libvarpd_util.o + +include ../../../Makefile.lib + +LIBS = $(DYNLIB) +LDLIBS += -lc -lavl -lumem -lidspace -lnvpair -lmd5 -lrename \ + -lbunyan +CPPFLAGS += -I../common + +SRCDIR = ../common + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../../Makefile.targ diff --git a/usr/src/lib/varpd/libvarpd/amd64/Makefile b/usr/src/lib/varpd/libvarpd/amd64/Makefile new file mode 100644 index 0000000000..5c586c1d40 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/amd64/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd.c b/usr/src/lib/varpd/libvarpd/common/libvarpd.c new file mode 100644 index 0000000000..3c08a3316b --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd.c @@ -0,0 +1,355 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* + * varpd library + */ + +#include <stdlib.h> +#include <errno.h> +#include <umem.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/avl.h> +#include <stddef.h> +#include <stdio.h> +#include <strings.h> + +#include <libvarpd_impl.h> + +static int +libvarpd_instance_comparator(const void *lp, const void *rp) +{ + const varpd_instance_t *lpp, *rpp; + lpp = lp; + rpp = rp; + + if (lpp->vri_id > rpp->vri_id) + return (1); + if (lpp->vri_id < rpp->vri_id) + return (-1); + return (0); +} + +static int +libvarpd_instance_lcomparator(const void *lp, const void *rp) +{ + const varpd_instance_t *lpp, *rpp; + lpp = lp; + rpp = rp; + + if (lpp->vri_linkid > rpp->vri_linkid) + return (1); + if (lpp->vri_linkid < rpp->vri_linkid) + return (-1); + return (0); +} + + +int +libvarpd_create(varpd_handle_t **vphp) +{ + int ret; + varpd_impl_t *vip; + char buf[32]; + + if (vphp == NULL) + return (EINVAL); + + *vphp = NULL; + vip = umem_alloc(sizeof (varpd_impl_t), UMEM_DEFAULT); + if (vip == NULL) + return (errno); + + bzero(vip, sizeof (varpd_impl_t)); + (void) snprintf(buf, sizeof (buf), "varpd_%p", vip); + vip->vdi_idspace = id_space_create(buf, LIBVARPD_ID_MIN, + LIBVARPD_ID_MAX); + if (vip->vdi_idspace == NULL) { + int ret = errno; + umem_free(vip, sizeof (varpd_impl_t)); + return (ret); + } + + vip->vdi_qcache = umem_cache_create("query", sizeof (varpd_query_t), 0, + NULL, NULL, NULL, NULL, NULL, 0); + if (vip->vdi_qcache == NULL) { + int ret = errno; + id_space_destroy(vip->vdi_idspace); + umem_free(vip, sizeof (varpd_impl_t)); + return (ret); + } + + if ((ret = libvarpd_overlay_init(vip)) != 0) { + umem_cache_destroy(vip->vdi_qcache); + id_space_destroy(vip->vdi_idspace); + umem_free(vip, sizeof (varpd_impl_t)); + return (ret); + } + + if ((ret = bunyan_init("varpd", &vip->vdi_bunyan)) != 0) { + libvarpd_overlay_fini(vip); + umem_cache_destroy(vip->vdi_qcache); + id_space_destroy(vip->vdi_idspace); + umem_free(vip, sizeof (varpd_impl_t)); + return (ret); + } + + libvarpd_persist_init(vip); + + avl_create(&vip->vdi_plugins, libvarpd_plugin_comparator, + sizeof (varpd_plugin_t), offsetof(varpd_plugin_t, vpp_node)); + + avl_create(&vip->vdi_instances, libvarpd_instance_comparator, + sizeof (varpd_instance_t), offsetof(varpd_instance_t, vri_inode)); + avl_create(&vip->vdi_linstances, libvarpd_instance_lcomparator, + sizeof (varpd_instance_t), offsetof(varpd_instance_t, vri_lnode)); + + if (mutex_init(&vip->vdi_lock, USYNC_THREAD, NULL) != 0) + libvarpd_panic("failed to create mutex: %d", errno); + + vip->vdi_doorfd = -1; + *vphp = (varpd_handle_t *)vip; + return (0); +} + +void +libvarpd_destroy(varpd_handle_t *vhp) +{ + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + if (mutex_destroy(&vip->vdi_lock) != 0) + libvarpd_panic("failed to destroy mutex: %d", errno); + libvarpd_persist_fini(vip); + libvarpd_overlay_fini(vip); + umem_cache_destroy(vip->vdi_qcache); + id_space_destroy(vip->vdi_idspace); + umem_free(vip, sizeof (varpd_impl_t)); +} + +int +libvarpd_instance_create(varpd_handle_t *vhp, datalink_id_t linkid, + const char *pname, varpd_instance_handle_t **outp) +{ + int ret; + varpd_impl_t *vip = (varpd_impl_t *)vhp; + varpd_plugin_t *plugin; + varpd_instance_t *inst, lookup; + overlay_plugin_dest_t dest; + uint64_t vid; + + /* XXX Really want our own errnos */ + plugin = libvarpd_plugin_lookup(vip, pname); + if (plugin == NULL) + return (ENOENT); + + if ((ret = libvarpd_overlay_info(vip, linkid, &dest, NULL, &vid)) != 0) + return (ret); + + inst = umem_alloc(sizeof (varpd_instance_t), UMEM_DEFAULT); + if (inst == NULL) + return (ENOMEM); + + inst->vri_id = id_alloc(vip->vdi_idspace); + if (inst->vri_id == -1) + libvarpd_panic("failed to allocate id from vdi_idspace: %d", + errno); + inst->vri_linkid = linkid; + inst->vri_vnetid = vid; + inst->vri_mode = plugin->vpp_mode; + inst->vri_dest = dest; + inst->vri_plugin = plugin; + inst->vri_impl = vip; + inst->vri_flags = 0; + if ((ret = plugin->vpp_ops->vpo_create((varpd_provider_handle_t *)inst, + &inst->vri_private, dest)) != 0) { + id_free(vip->vdi_idspace, inst->vri_id); + umem_free(inst, sizeof (varpd_instance_t)); + return (ret); + } + + if (mutex_init(&inst->vri_lock, USYNC_THREAD, NULL) != 0) + libvarpd_panic("failed to create mutex: %d", errno); + + mutex_lock(&vip->vdi_lock); + lookup.vri_id = inst->vri_id; + if (avl_find(&vip->vdi_instances, &lookup, NULL) != NULL) + libvarpd_panic("found duplicate instance with id %d", + lookup.vri_id); + avl_add(&vip->vdi_instances, inst); + lookup.vri_linkid = inst->vri_linkid; + if (avl_find(&vip->vdi_linstances, &lookup, NULL) != NULL) + libvarpd_panic("found duplicate linstance with id %d", + lookup.vri_linkid); + avl_add(&vip->vdi_linstances, inst); + mutex_unlock(&vip->vdi_lock); + *outp = (varpd_instance_handle_t *)inst; + return (0); +} + +uint64_t +libvarpd_instance_id(varpd_instance_handle_t *ihp) +{ + varpd_instance_t *inst = (varpd_instance_t *)ihp; + return (inst->vri_id); +} + +uint64_t +libvarpd_plugin_vnetid(varpd_provider_handle_t *vhp) +{ + varpd_instance_t *inst = (varpd_instance_t *)vhp; + return (inst->vri_vnetid); +} + +varpd_instance_handle_t * +libvarpd_instance_lookup(varpd_handle_t *vhp, uint64_t id) +{ + varpd_impl_t *vip = (varpd_impl_t *)vhp; + varpd_instance_t lookup, *retp; + + lookup.vri_id = id; + mutex_lock(&vip->vdi_lock); + retp = avl_find(&vip->vdi_instances, &lookup, NULL); + mutex_unlock(&vip->vdi_lock); + return ((varpd_instance_handle_t *)retp); +} + +/* + * If this function becomes external to varpd, we need to change it to return a + * varpd_instance_handle_t. + */ +varpd_instance_t * +libvarpd_instance_lookup_by_dlid(varpd_impl_t *vip, datalink_id_t linkid) +{ + varpd_instance_t lookup, *retp; + + lookup.vri_linkid = linkid; + mutex_lock(&vip->vdi_lock); + retp = avl_find(&vip->vdi_linstances, &lookup, NULL); + mutex_unlock(&vip->vdi_lock); + return (retp); +} + +/* + * When an instance is being destroyed, that means we should deactivate it, as + * well as clean it up. That means here, the proper order is calling the plug-in + * stop. + */ +void +libvarpd_instance_destroy(varpd_instance_handle_t *ihp) +{ + varpd_instance_t *inst = (varpd_instance_t *)ihp; + varpd_impl_t *vip = inst->vri_impl; + + /* + * First things first, remove it from global visibility. + */ + mutex_lock(&vip->vdi_lock); + avl_remove(&vip->vdi_instances, inst); + avl_remove(&vip->vdi_linstances, inst); + mutex_unlock(&vip->vdi_lock); + + /* + * XXX We probably need a reference counting strategy here so we know + * it's safe to remove. + */ + mutex_lock(&inst->vri_lock); + + /* + * We need to clean up this instance, that means remove it from + * persistence and stopping it. Then finally we'll have to clean it up + * entirely. + */ + if (inst->vri_flags & VARPD_INSTANCE_F_ACTIVATED) { + inst->vri_flags &= ~VARPD_INSTANCE_F_ACTIVATED; + libvarpd_torch_instance(vip, inst); + inst->vri_plugin->vpp_ops->vpo_stop(inst->vri_private); + inst->vri_plugin->vpp_ops->vpo_destroy(inst->vri_private); + inst->vri_private = NULL; + } + mutex_unlock(&inst->vri_lock); + + /* Do the full clean up of the instance */ + if (mutex_destroy(&inst->vri_lock) != 0) + libvarpd_panic("failed to destroy instance vri_lock"); + id_free(vip->vdi_idspace, inst->vri_id); + umem_free(inst, sizeof (varpd_instance_t)); +} + +int +libvarpd_instance_activate(varpd_instance_handle_t *ihp) +{ + int ret; + varpd_instance_t *inst = (varpd_instance_t *)ihp; + + mutex_lock(&inst->vri_lock); + + if (inst->vri_flags & VARPD_INSTANCE_F_ACTIVATED) { + ret = EEXIST; + goto out; + } + + if ((ret = inst->vri_plugin->vpp_ops->vpo_start(inst->vri_private)) != + 0) + goto out; + + if ((ret = libvarpd_persist_instance(inst->vri_impl, inst)) != 0) + goto out; + + /* XXX We should call stop if this fails */ + if ((ret = libvarpd_overlay_associate(inst)) != 0) + goto out; + + inst->vri_flags |= VARPD_INSTANCE_F_ACTIVATED; + +out: + mutex_unlock(&inst->vri_lock); + return (ret); +} + +const bunyan_logger_t * +libvarpd_plugin_bunyan(varpd_provider_handle_t *vhp) +{ + varpd_instance_t *inst = (varpd_instance_t *)vhp; + return (inst->vri_impl->vdi_bunyan); +} + +static void +libvarpd_prefork(void) +{ + libvarpd_plugin_prefork(); +} + +static void +libvarpd_postfork(void) +{ + libvarpd_plugin_postfork(); +} + +#pragma init(libvarpd_init) +static void +libvarpd_init(void) +{ + libvarpd_plugin_init(); + if (pthread_atfork(NULL, libvarpd_prefork, libvarpd_postfork) != 0) + libvarpd_panic("failed to create varpd atfork: %d", errno); +} + +#pragma fini(libvarpd_fini) +static void +libvarpd_fini(void) +{ + libvarpd_plugin_fini(); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd.h b/usr/src/lib/varpd/libvarpd/common/libvarpd.h new file mode 100644 index 0000000000..b2dc57dd4e --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd.h @@ -0,0 +1,78 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +#ifndef _LIBVARPD_H +#define _LIBVARPD_H + +/* + * varpd interfaces + */ + +#include <sys/types.h> +#include <stdint.h> +#include <sys/mac.h> +#include <libvarpd_client.h> +#include <stdio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct __varpd_handle varpd_handle_t; +typedef struct __varpd_prop_handle varpd_prop_handle_t; +typedef struct __varpd_instance_handle varpd_instance_handle_t; + +extern int libvarpd_create(varpd_handle_t **); +extern void libvarpd_destroy(varpd_handle_t *); + +extern int libvarpd_persist_enable(varpd_handle_t *, const char *); +extern int libvarpd_persist_restore(varpd_handle_t *); +extern int libvarpd_persist_disable(varpd_handle_t *); + +extern int libvarpd_instance_create(varpd_handle_t *, datalink_id_t, + const char *, varpd_instance_handle_t **); +extern uint64_t libvarpd_instance_id(varpd_instance_handle_t *); +extern varpd_instance_handle_t *libvarpd_instance_lookup(varpd_handle_t *, + uint64_t); +extern void libvarpd_instance_destroy(varpd_instance_handle_t *); +extern int libvarpd_instance_activate(varpd_instance_handle_t *); + +extern int libvarpd_plugin_load(varpd_handle_t *, const char *); +typedef int (*libvarpd_plugin_walk_f)(varpd_handle_t *, const char *, void *); +extern int libvarpd_plugin_walk(varpd_handle_t *, libvarpd_plugin_walk_f, + void *); + +extern int libvarpd_prop_handle_alloc(varpd_handle_t *, + varpd_instance_handle_t *, varpd_prop_handle_t **); +extern void libvarpd_prop_handle_free(varpd_prop_handle_t *); +extern int libvarpd_prop_nprops(varpd_instance_handle_t *, uint_t *); +/* XXX Do we need the next two from a server perspective? */ +extern int libvarpd_prop_info_fill(varpd_prop_handle_t *, uint_t); +extern int libvarpd_prop_info(varpd_prop_handle_t *, const char **, uint_t *, + uint_t *, const void **, uint32_t *, const mac_propval_range_t **); +extern int libvarpd_prop_get(varpd_prop_handle_t *, void *, uint32_t *); +extern int libvarpd_prop_set(varpd_prop_handle_t *, const void *, uint32_t); + +extern int libvarpd_door_server_create(varpd_handle_t *, const char *); +extern void libvarpd_door_server_destroy(varpd_handle_t *); + +extern void libvarpd_overlay_lookup_run(varpd_handle_t *); +extern void libvarpd_overlay_lookup_quiesce(varpd_handle_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBVARPD_H */ diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c new file mode 100644 index 0000000000..04fbe0e05b --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c @@ -0,0 +1,645 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* + * Common routines for implmeenting proxy arp + */ + +#include <sys/types.h> +#include <net/if.h> +#include <netinet/if_ether.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/icmp6.h> +#include <netinet/udp.h> +#include <netinet/dhcp.h> +#include <libvarpd_impl.h> +#include <sys/vlan.h> +#include <strings.h> +#include <assert.h> + +#define IPV6_VERSION 6 + +typedef struct varpd_arp_query { + int vaq_type; + char vaq_buf[ETHERMAX + VLAN_TAGSZ]; + size_t vaq_bsize; + uint8_t vaq_lookup[ETHERADDRL]; + struct sockaddr_storage vaq_sock; + varpd_instance_t *vaq_inst; + struct ether_arp *vaq_ea; + varpd_query_handle_t *vaq_query; + const overlay_targ_lookup_t *vaq_otl; + ip6_t *vaq_ip6; + nd_neighbor_solicit_t *vaq_ns; +} varpd_arp_query_t; + +typedef struct varpd_dhcp_query { + char vdq_buf[ETHERMAX + VLAN_TAGSZ]; + size_t vdq_bsize; + uint8_t vdq_lookup[ETHERADDRL]; + const overlay_targ_lookup_t *vdq_otl; + varpd_instance_t *vdq_inst; + varpd_query_handle_t *vdq_query; + struct ether_header *vdq_ether; +} varpd_dhcp_query_t; + +static const uint8_t libvarpd_arp_bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff }; + +void +libvarpd_plugin_proxy_arp(varpd_provider_handle_t *hdl, + varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl) +{ + varpd_arp_query_t *vaq; + varpd_instance_t *inst = (varpd_instance_t *)hdl; + struct ether_arp *ea; + struct sockaddr_in *ip; + + vaq = umem_alloc(sizeof (varpd_arp_query_t), UMEM_DEFAULT); + if (vaq == NULL) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + return; + } + vaq->vaq_bsize = sizeof (vaq->vaq_buf); + + if (otl->otl_sap != ETHERTYPE_ARP) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + /* + * An ARP packet should not be very large because it's definited to only + * be allowed to have a single entry at a given time. But our data must + * be at least as large as an ether_arp and our header must be at least + * as large as a standard ethernet header. + */ + if (otl->otl_hdrsize + otl->otl_pktsize > vaq->vaq_bsize || + otl->otl_pktsize < sizeof (struct ether_arp) || + otl->otl_hdrsize < sizeof (struct ether_header)) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + if (libvarpd_overlay_packet(inst->vri_impl, otl, vaq->vaq_buf, + &vaq->vaq_bsize) != 0) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + /* XXX Check how many bytes we actually have */ + + ea = (void *)((uintptr_t)vaq->vaq_buf + (uintptr_t)otl->otl_hdrsize); + + /* + * Make sure it matches something that we know about. + */ + if (ntohs(ea->ea_hdr.ar_hrd) != ARPHRD_ETHER || + ntohs(ea->ea_hdr.ar_pro) != ETHERTYPE_IP || + ea->ea_hdr.ar_hln != ETHERADDRL || + ea->ea_hdr.ar_pln != sizeof (ea->arp_spa) || + ntohs(ea->ea_hdr.ar_op) != ARPOP_REQUEST) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + /* + * Now that we've verified that our data is sane, see if we're doing a + * gratuitous arp and if so, drop it. Otherwise, we may end up + * triggering duplicate address detection. + */ + if (bcmp(ea->arp_spa, ea->arp_tpa, sizeof (ea->arp_spa)) == 0) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + bzero(&vaq->vaq_sock, sizeof (struct sockaddr_storage)); + ip = (struct sockaddr_in *)&vaq->vaq_sock; + ip->sin_family = AF_INET; + bcopy(ea->arp_tpa, &ip->sin_addr, sizeof (ea->arp_tpa)); + + vaq->vaq_type = AF_INET; + vaq->vaq_inst = inst; + vaq->vaq_ea = ea; + vaq->vaq_query = vqh; + vaq->vaq_otl = otl; + + if (inst->vri_plugin->vpp_ops->vpo_arp == NULL) + libvarpd_panic("%s plugin asked to do arp, but has no method", + inst->vri_plugin->vpp_name); + + inst->vri_plugin->vpp_ops->vpo_arp(inst->vri_private, + (varpd_arp_handle_t *)vaq, VARPD_QTYPE_ETHERNET, + (struct sockaddr *)ip, vaq->vaq_lookup); +} + +static void +libvarpd_proxy_arp_fini(varpd_arp_query_t *vaq) +{ + struct ether_header *ether; + struct sockaddr_in *ip; + + ip = (struct sockaddr_in *)&vaq->vaq_sock; + /* + * Modify our packet in place for a reply. We need to swap around the + * sender and target addresses. + */ + vaq->vaq_ea->ea_hdr.ar_op = htons(ARPOP_REPLY); + bcopy(vaq->vaq_ea->arp_sha, vaq->vaq_ea->arp_tha, ETHERADDRL); + bcopy(vaq->vaq_lookup, vaq->vaq_ea->arp_sha, ETHERADDRL); + bcopy(vaq->vaq_ea->arp_spa, &ip->sin_addr, + sizeof (vaq->vaq_ea->arp_spa)); + bcopy(vaq->vaq_ea->arp_tpa, vaq->vaq_ea->arp_spa, + sizeof (vaq->vaq_ea->arp_spa)); + bcopy(&ip->sin_addr, vaq->vaq_ea->arp_tpa, + sizeof (vaq->vaq_ea->arp_spa)); + + /* + * Finally go ahead and fix up the mac header and reply to the sender + * explicitly. + */ + ether = (struct ether_header *)vaq->vaq_buf; + bcopy(ðer->ether_shost, ðer->ether_dhost, ETHERADDRL); + bcopy(vaq->vaq_lookup, ðer->ether_shost, ETHERADDRL); + + (void) libvarpd_overlay_inject(vaq->vaq_inst->vri_impl, vaq->vaq_otl, + vaq->vaq_buf, vaq->vaq_bsize); + + libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); +} + +static uint16_t +libvarpd_icmpv6_checksum(const ip6_t *v6hdr, const uint16_t *buf, uint16_t mlen) +{ + int i; + uint16_t *v; + uint32_t sum = 0; + + assert(mlen % 2 == 0); + v = (uint16_t *)&v6hdr->ip6_src; + for (i = 0; i < sizeof (struct in6_addr); i += 2, v++) + sum += *v; + v = (uint16_t *)&v6hdr->ip6_dst; + for (i = 0; i < sizeof (struct in6_addr); i += 2, v++) + sum += *v; + sum += htons(mlen); +#ifdef _BIG_ENDIAN + sum += IPPROTO_ICMPV6; +#else + sum += IPPROTO_ICMPV6 << 8; +#endif /* _BIG_ENDIAN */ + + for (i = 0; i < mlen; i += 2, buf++) + sum += *buf; + + while ((sum >> 16) != 0) + sum = (sum & 0xffff) + (sum >> 16); + + return (sum & 0xffff); +} + +/* + * Proxying NDP is much more involved than proxying ARP. For starters, NDP + * neighbor solicitations are implemented in terms of IPv6 ICMP as opposed to + * its own Ethertype. Therefore, we're going to have to grab a packet if it's a + * multicast packet and then determine if we actually want to do anything with + * it. + */ +void +libvarpd_plugin_proxy_ndp(varpd_provider_handle_t *hdl, + varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl) +{ + size_t bsize, plen; + varpd_arp_query_t *vaq; + ip6_t *v6hdr; + nd_neighbor_solicit_t *ns; + nd_opt_hdr_t *opt; + struct sockaddr_in6 *s6; + + varpd_instance_t *inst = (varpd_instance_t *)hdl; + uint8_t *eth = NULL; + + vaq = umem_alloc(sizeof (varpd_arp_query_t), UMEM_DEFAULT); + if (vaq == NULL) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + return; + } + vaq->vaq_bsize = sizeof (vaq->vaq_buf); + + if (otl->otl_dstaddr[0] != 0x33 || + otl->otl_dstaddr[1] != 0x33) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + /* + * If we have more than a standard frame size for the ICMP neighbor + * solicitation, drop it. Similarly if there isn't enough data present + * for us, drop it. + */ + if (otl->otl_hdrsize + otl->otl_pktsize > vaq->vaq_bsize) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + if (otl->otl_pktsize < sizeof (ip6_t) + + sizeof (nd_neighbor_solicit_t)) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + if (libvarpd_overlay_packet(inst->vri_impl, otl, vaq->vaq_buf, + &vaq->vaq_bsize) != 0) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + bsize = vaq->vaq_bsize; + bsize -= otl->otl_hdrsize; + assert(bsize > sizeof (ip6_t)); + + v6hdr = (ip6_t *)(vaq->vaq_buf + otl->otl_hdrsize); + if (((v6hdr->ip6_vfc & 0xf0) >> 4) != IPV6_VERSION) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + if (v6hdr->ip6_nxt != IPPROTO_ICMPV6) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + /* + * In addition to getting these requests on the multicast address for + * node solicitation, we may also end up getting them on a generic + * multicast address due to timeouts or other choices by various OSes. + * We should fairly liberal and accept both, even though the standard + * wants them to a solicitation address. + */ + if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&v6hdr->ip6_dst) && + !IN6_IS_ADDR_MC_LINKLOCAL(&v6hdr->ip6_dst)) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + bsize -= sizeof (ip6_t); + plen = ntohs(v6hdr->ip6_plen); + if (bsize < plen) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + /* + * Now we know that this is an ICMPv6 request targetting the right + * IPv6 multicast prefix. Let's go through and verify that ICMPv6 + * indicates that we have the real thing and ensure that per RFC 4861 + * the target address is not a multicast address. Further, because this + * is a multicast on Ethernet, we must have a source link-layer address. + * + * XXX We should probably validate the checksum here... + */ + ns = (nd_neighbor_solicit_t *)(vaq->vaq_buf + otl->otl_hdrsize + + sizeof (ip6_t)); + if (ns->nd_ns_type != ND_NEIGHBOR_SOLICIT && ns->nd_ns_code != 0) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + if (IN6_IS_ADDR_MULTICAST(&ns->nd_ns_target) || + IN6_IS_ADDR_V4MAPPED(&ns->nd_ns_target) || + IN6_IS_ADDR_LOOPBACK(&ns->nd_ns_target)) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + plen -= sizeof (nd_neighbor_solicit_t); + opt = (nd_opt_hdr_t *)(ns+1); + while (plen >= sizeof (struct nd_opt_hdr)) { + /* If we have an option with no lenght, that's clear bogus */ + if (opt->nd_opt_len == 0) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + if (opt->nd_opt_type == ND_OPT_SOURCE_LINKADDR) { + eth = (uint8_t *)((uintptr_t)opt + + sizeof (nd_opt_hdr_t)); + } + plen -= opt->nd_opt_len * 8; + opt = (nd_opt_hdr_t *)((uintptr_t)opt + + opt->nd_opt_len * 8); + } + + if (eth == NULL) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } + + bzero(&vaq->vaq_sock, sizeof (struct sockaddr_storage)); + s6 = (struct sockaddr_in6 *)&vaq->vaq_sock; + s6->sin6_family = AF_INET6; + bcopy(&ns->nd_ns_target, &s6->sin6_addr, sizeof (s6->sin6_addr)); + + if (inst->vri_plugin->vpp_ops->vpo_arp == NULL) + libvarpd_panic("%s plugin asked to do arp, but has no method", + inst->vri_plugin->vpp_name); + + vaq->vaq_type = AF_INET6; + vaq->vaq_inst = inst; + vaq->vaq_ea = NULL; + vaq->vaq_query = vqh; + vaq->vaq_otl = otl; + vaq->vaq_ns = ns; + vaq->vaq_ip6 = v6hdr; + inst->vri_plugin->vpp_ops->vpo_arp(inst->vri_private, + (varpd_arp_handle_t *)vaq, VARPD_QTYPE_ETHERNET, + (struct sockaddr *)s6, vaq->vaq_lookup); +} + +static void +libvarpd_proxy_ndp_fini(varpd_arp_query_t *vaq) +{ + char resp[ETHERMAX + VLAN_TAGSZ]; + struct ether_header *ether; + nd_neighbor_advert_t *na; + nd_opt_hdr_t *opt; + ip6_t *v6hdr; + size_t roff = 0; + + /* + * Now we need to assemble an RA as a response. Unlike with arp, we opt + * to use a new packet just to make things a bit simpler saner here. + */ + v6hdr = vaq->vaq_ip6; + bcopy(vaq->vaq_buf, resp, vaq->vaq_otl->otl_hdrsize); + ether = (struct ether_header *)resp; + bcopy(ðer->ether_shost, ðer->ether_dhost, ETHERADDRL); + bcopy(vaq->vaq_lookup, ðer->ether_shost, ETHERADDRL); + roff += vaq->vaq_otl->otl_hdrsize; + bcopy(v6hdr, resp + roff, sizeof (ip6_t)); + v6hdr = (ip6_t *)(resp + roff); + bcopy(&v6hdr->ip6_src, &v6hdr->ip6_dst, sizeof (struct in6_addr)); + bcopy(&vaq->vaq_ns->nd_ns_target, &v6hdr->ip6_src, + sizeof (struct in6_addr)); + roff += sizeof (ip6_t); + na = (nd_neighbor_advert_t *)(resp + roff); + na->nd_na_type = ND_NEIGHBOR_ADVERT; + na->nd_na_code = 0; + /* + * RFC 4443 defines that we should set the checksum to zero before we + * calculate the checksumat we should set the checksum to zero before we + * calculate it. + */ + na->nd_na_cksum = 0; + /* + * Nota bene, the header <netinet/icmp6.h> has already transformed this + * into the appropriate host order. Don't use htonl. + */ + na->nd_na_flags_reserved = ND_NA_FLAG_SOLICITED | ND_NA_FLAG_OVERRIDE; + bcopy(&vaq->vaq_ns->nd_ns_target, &na->nd_na_target, + sizeof (struct in6_addr)); + roff += sizeof (nd_neighbor_advert_t); + + opt = (nd_opt_hdr_t *)(resp + roff); + opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; + opt->nd_opt_len = 1; + roff += sizeof (nd_opt_hdr_t); + bcopy(vaq->vaq_lookup, resp + roff, ETHERADDRL); + roff += ETHERADDRL; + + /* + * Now that we've filled in the packet, go back and compute the checksum + * and fill in the IPv6 payload size. + */ + v6hdr->ip6_plen = htons(roff - sizeof (ip6_t) - + vaq->vaq_otl->otl_hdrsize); + na->nd_na_cksum = ~libvarpd_icmpv6_checksum(v6hdr, (uint16_t *)na, + ntohs(v6hdr->ip6_plen)) & 0xffff; + + (void) libvarpd_overlay_inject(vaq->vaq_inst->vri_impl, vaq->vaq_otl, + resp, roff); + + libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); +} + +void +libvarpd_plugin_arp_reply(varpd_arp_handle_t *vah, int action) +{ + varpd_arp_query_t *vaq = (varpd_arp_query_t *)vah; + + if (vaq == NULL) + libvarpd_panic("unknown plugin passed invalid " + "varpd_arp_handle_t"); + + if (action == VARPD_LOOKUP_DROP) { + libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP); + umem_free(vaq, sizeof (varpd_arp_query_t)); + return; + } else if (action != VARPD_LOOKUP_OK) + libvarpd_panic("%s plugin returned invalid action %d", + vaq->vaq_inst->vri_plugin->vpp_name, action); + + switch (vaq->vaq_type) { + case AF_INET: + libvarpd_proxy_arp_fini(vaq); + break; + case AF_INET6: + libvarpd_proxy_ndp_fini(vaq); + break; + default: + libvarpd_panic("encountered unknown vaq_type: %d", + vaq->vaq_type); + } +} + +void +libvarpd_plugin_proxy_dhcp(varpd_provider_handle_t *hdl, + varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl) +{ + varpd_dhcp_query_t *vdq; + struct ether_header *ether; + struct ip *ip; + struct udphdr *udp; + varpd_instance_t *inst = (varpd_instance_t *)hdl; + + vdq = umem_alloc(sizeof (varpd_dhcp_query_t), UMEM_DEFAULT); + if (vdq == NULL) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + return; + } + vdq->vdq_bsize = sizeof (vdq->vdq_buf); + + if (otl->otl_sap != ETHERTYPE_IP) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + if (bcmp(otl->otl_dstaddr, libvarpd_arp_bcast, ETHERADDRL) != 0) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + if (otl->otl_hdrsize + otl->otl_pktsize > vdq->vdq_bsize || + otl->otl_pktsize < sizeof (struct ip) + sizeof (struct udphdr) + + sizeof (struct dhcp) || + otl->otl_hdrsize < sizeof (struct ether_header)) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + if (libvarpd_overlay_packet(inst->vri_impl, otl, vdq->vdq_buf, + &vdq->vdq_bsize) != 0) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + if (vdq->vdq_bsize != otl->otl_hdrsize + otl->otl_pktsize) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + ether = (struct ether_header *)vdq->vdq_buf; + ip = (struct ip *)(vdq->vdq_buf + otl->otl_hdrsize); + + if (ip->ip_v != IPVERSION && ip->ip_p != IPPROTO_UDP) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + if (otl->otl_hdrsize + ip->ip_hl * 4 + sizeof (struct udphdr) > + vdq->vdq_bsize) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + udp = (struct udphdr *)(vdq->vdq_buf + otl->otl_hdrsize + + ip->ip_hl * 4); + + if (ntohs(udp->uh_sport) != IPPORT_BOOTPC || + ntohs(udp->uh_dport) != IPPORT_BOOTPS) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } + + vdq->vdq_ether = ether; + vdq->vdq_inst = inst; + vdq->vdq_query = vqh; + vdq->vdq_otl = otl; + + if (inst->vri_plugin->vpp_ops->vpo_dhcp == NULL) + libvarpd_panic("%s plugin asked to do dhcp, but has no method", + inst->vri_plugin->vpp_name); + + inst->vri_plugin->vpp_ops->vpo_dhcp(inst->vri_private, + (varpd_dhcp_handle_t *)vdq, VARPD_QTYPE_ETHERNET, otl, + vdq->vdq_lookup); +} + +void +libvarpd_plugin_dhcp_reply(varpd_dhcp_handle_t *vdh, int action) +{ + varpd_dhcp_query_t *vdq = (varpd_dhcp_query_t *)vdh; + + if (vdq == NULL) + libvarpd_panic("unknown plugin passed invalid " + "varpd_dhcp_handle_t"); + + if (action == VARPD_LOOKUP_DROP) { + libvarpd_plugin_query_reply(vdq->vdq_query, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); + return; + } else if (action != VARPD_LOOKUP_OK) + libvarpd_panic("%s plugin returned invalid action %d", + vdq->vdq_inst->vri_plugin->vpp_name, action); + + bcopy(vdq->vdq_lookup, &vdq->vdq_ether->ether_dhost, ETHERADDRL); + (void) libvarpd_overlay_resend(vdq->vdq_inst->vri_impl, vdq->vdq_otl, + vdq->vdq_buf, vdq->vdq_bsize); + + libvarpd_plugin_query_reply(vdq->vdq_query, VARPD_LOOKUP_DROP); + umem_free(vdq, sizeof (varpd_dhcp_query_t)); +} + +/* + * Inject a gratuitious ARP packet to the specified mac address. + */ +void +libvarpd_inject_arp(varpd_provider_handle_t *vph, const uint16_t vlan, + const uint8_t *srcmac, const struct in_addr *srcip, const uint8_t *dstmac) +{ + char buf[1500]; + size_t bsize = 0; + struct ether_arp *ea; + varpd_instance_t *inst = (varpd_instance_t *)vph; + + if (vlan != 0) { + struct ether_vlan_header *eh; + eh = (struct ether_vlan_header *)(buf + bsize); + bsize += sizeof (struct ether_vlan_header); + bcopy(dstmac, &eh->ether_dhost, ETHERADDRL); + bcopy(srcmac, &eh->ether_shost, ETHERADDRL); + eh->ether_tpid = htons(ETHERTYPE_VLAN); + eh->ether_tci = htons(VLAN_TCI(0, ETHER_CFI, vlan)); + eh->ether_type = htons(ETHERTYPE_ARP); + } else { + struct ether_header *eh; + eh = (struct ether_header *)(buf + bsize); + bsize += sizeof (struct ether_header); + bcopy(dstmac, &eh->ether_dhost, ETHERADDRL); + bcopy(srcmac, &eh->ether_shost, ETHERADDRL); + eh->ether_type = htons(ETHERTYPE_ARP); + } + + ea = (struct ether_arp *)(buf + bsize); + bsize += sizeof (struct ether_arp); + ea->ea_hdr.ar_hrd = htons(ARPHRD_ETHER); + ea->ea_hdr.ar_pro = htons(ETHERTYPE_IP); + ea->ea_hdr.ar_hln = ETHERADDRL; + ea->ea_hdr.ar_pln = sizeof (struct in_addr); + ea->ea_hdr.ar_op = htons(ARPOP_REQUEST); + bcopy(srcmac, ea->arp_sha, ETHERADDRL); + bcopy(srcip, ea->arp_spa, sizeof (struct in_addr)); + bcopy(libvarpd_arp_bcast, ea->arp_tha, ETHERADDRL); + bcopy(srcip, ea->arp_tpa, sizeof (struct in_addr)); + + (void) libvarpd_overlay_instance_inject(inst, buf, bsize); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c new file mode 100644 index 0000000000..0f616b9533 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c @@ -0,0 +1,626 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* + * varpd client interfaces + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <umem.h> +#include <unistd.h> +#include <string.h> +#include <strings.h> +#include <door.h> + +#include <libvarpd_impl.h> + +typedef struct varpd_client { + int vcl_doorfd; +} varpd_client_t; + +typedef struct varpd_client_prop_info { + varpd_client_t *vcprop_client; + uint64_t vcprop_instance; + uint_t vcprop_propid; + uint_t vcprop_type; + uint_t vcprop_prot; + uint32_t vcprop_defsize; + uint32_t vcprop_psize; + char vcprop_name[LIBVARPD_PROP_NAMELEN]; + uint8_t vcprop_default[LIBVARPD_PROP_SIZEMAX]; + uint8_t vcprop_poss[LIBVARPD_PROP_SIZEMAX]; +} varpd_client_prop_info_t; + +static int +libvarpd_c_door_call(varpd_client_t *client, varpd_client_arg_t *argp, + size_t altsize) +{ + int ret; + door_arg_t darg; + + darg.data_ptr = (char *)argp; + darg.desc_ptr = NULL; + darg.desc_num = 0; + darg.rbuf = (char *)argp; + if (altsize != 0) { + darg.data_size = altsize; + darg.rsize = altsize; + } else { + darg.data_size = sizeof (varpd_client_arg_t); + darg.rsize = sizeof (varpd_client_arg_t); + } + + do { + ret = door_call(client->vcl_doorfd, &darg); + } while (ret != 0 && errno == EINTR); + if (ret != 0) { + switch (errno) { + case E2BIG: + case EFAULT: + case EINVAL: + case ENOTSUP: + case EOVERFLOW: + case ENFILE: + libvarpd_panic("unhandalable errno from door_call: %d", + errno); + } + ret = errno; + } + + return (ret); +} + +int +libvarpd_c_create(varpd_client_handle_t **chpp, const char *doorname) +{ + varpd_client_t *client; + + client = umem_alloc(sizeof (varpd_client_t), UMEM_DEFAULT); + if (client == NULL) + return (ENOMEM); + + client->vcl_doorfd = open(doorname, O_RDWR); + if (client->vcl_doorfd < 0) { + int ret = errno; + umem_free(client, sizeof (varpd_client_t)); + return (ret); + } + + *chpp = (varpd_client_handle_t *)client; + return (0); +} + +int +libvarpd_c_destroy(varpd_client_handle_t *chp) +{ + varpd_client_t *client = (varpd_client_t *)chp; + if (close(client->vcl_doorfd) != 0) + libvarpd_panic("failed to close door fd %d: %d", + client->vcl_doorfd, errno); + + umem_free(chp, sizeof (varpd_client_handle_t *)); + return (0); +} + +int +libvarpd_c_instance_create(varpd_client_handle_t *chp, datalink_id_t linkid, + const char *search, uint64_t *cidp) +{ + int ret; + varpd_client_t *client = (varpd_client_t *)chp; + varpd_client_arg_t carg; + varpd_client_create_arg_t *cap = &carg.vca_un.vca_create; + + if (strlen(search) >= LIBVARPD_PROP_NAMELEN) + return (EINVAL); + carg.vca_command = VARPD_CLIENT_CREATE; + carg.vca_errno = 0; + cap->vcca_linkid = linkid; + (void) strlcpy(cap->vcca_plugin, search, LIBVARPD_PROP_NAMELEN); + + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + *cidp = cap->vcca_id; + + return (0); +} + +int +libvarpd_c_instance_activate(varpd_client_handle_t *chp, uint64_t cid) +{ + int ret; + varpd_client_t *client = (varpd_client_t *)chp; + varpd_client_arg_t carg; + varpd_client_instance_arg_t *vciap = &carg.vca_un.vca_instance; + + carg.vca_command = VARPD_CLIENT_ACTIVATE; + carg.vca_errno = 0; + vciap->vcia_id = cid; + + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + return (0); +} + +int +libvarpd_c_instance_destroy(varpd_client_handle_t *chp, uint64_t cid) +{ + int ret; + varpd_client_t *client = (varpd_client_t *)chp; + varpd_client_arg_t carg; + varpd_client_instance_arg_t *vciap = &carg.vca_un.vca_instance; + + carg.vca_command = VARPD_CLIENT_DESTROY; + carg.vca_errno = 0; + vciap->vcia_id = cid; + + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + return (0); +} + +int +libvarpd_c_prop_nprops(varpd_client_handle_t *chp, uint64_t cid, uint_t *nprops) +{ + int ret; + varpd_client_t *client = (varpd_client_t *)chp; + varpd_client_arg_t carg; + varpd_client_nprops_arg_t *vcnap = &carg.vca_un.vca_nprops; + + carg.vca_command = VARPD_CLIENT_NPROPS; + carg.vca_errno = 0; + vcnap->vcna_id = cid; + vcnap->vcna_nprops = 0; + + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + *nprops = vcnap->vcna_nprops; + return (0); +} + +int +libvarpd_c_prop_handle_alloc(varpd_client_handle_t *chp, uint64_t cid, + varpd_client_prop_handle_t **phdlp) +{ + varpd_client_prop_info_t *infop; + + infop = umem_alloc(sizeof (varpd_client_prop_info_t), UMEM_DEFAULT); + if (infop == NULL) + return (ENOMEM); + + bzero(infop, sizeof (varpd_client_prop_info_t)); + infop->vcprop_client = (varpd_client_t *)chp; + infop->vcprop_instance = cid; + infop->vcprop_propid = UINT_MAX; + *phdlp = (varpd_client_prop_handle_t *)infop; + return (0); +} + +void +libvarpd_c_prop_handle_free(varpd_client_prop_handle_t *phdl) +{ + umem_free(phdl, sizeof (varpd_client_prop_info_t)); + phdl = NULL; +} + +static void +libvarpd_c_prop_info_from_door(varpd_client_prop_info_t *infop, + const varpd_client_propinfo_arg_t *vcfap) +{ + infop->vcprop_propid = vcfap->vcfa_propid; + infop->vcprop_type = vcfap->vcfa_type; + infop->vcprop_prot = vcfap->vcfa_prot; + infop->vcprop_defsize = vcfap->vcfa_defsize; + infop->vcprop_psize = vcfap->vcfa_psize; + bcopy(vcfap->vcfa_name, infop->vcprop_name, LIBVARPD_PROP_NAMELEN); + bcopy(vcfap->vcfa_default, infop->vcprop_default, + LIBVARPD_PROP_SIZEMAX); + bcopy(vcfap->vcfa_poss, infop->vcprop_poss, LIBVARPD_PROP_SIZEMAX); +} + +int +libvarpd_c_prop_info_fill_by_name(varpd_client_prop_handle_t *phdl, + const char *name) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_propinfo_arg_t *vcfap = &carg.vca_un.vca_info; + varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl; + + if (strlen(name) >= LIBVARPD_PROP_NAMELEN) + return (EINVAL); + bzero(&carg, sizeof (varpd_client_arg_t)); + carg.vca_command = VARPD_CLIENT_PROPINFO; + carg.vca_errno = 0; + vcfap->vcfa_id = infop->vcprop_instance; + vcfap->vcfa_propid = UINT_MAX; + (void) strlcpy(vcfap->vcfa_name, name, LIBVARPD_PROP_NAMELEN); + + ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + libvarpd_c_prop_info_from_door(infop, vcfap); + return (0); +} + +int +libvarpd_c_prop_info_fill(varpd_client_prop_handle_t *phdl, uint_t propid) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_propinfo_arg_t *vcfap = &carg.vca_un.vca_info; + varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl; + + bzero(&carg, sizeof (varpd_client_arg_t)); + carg.vca_command = VARPD_CLIENT_PROPINFO; + carg.vca_errno = 0; + vcfap->vcfa_id = infop->vcprop_instance; + vcfap->vcfa_propid = propid; + + ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + libvarpd_c_prop_info_from_door(infop, vcfap); + return (0); +} + +int +libvarpd_c_prop_info(varpd_client_prop_handle_t *phdl, const char **namep, + uint_t *typep, uint_t *protp, const void **defp, uint32_t *defsizep, + const mac_propval_range_t **possp) +{ + varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl; + if (infop->vcprop_propid == UINT_MAX) + return (EINVAL); + + if (namep != NULL) + *namep = infop->vcprop_name; + if (typep != NULL) + *typep = infop->vcprop_type; + if (protp != NULL) + *protp = infop->vcprop_prot; + if (defp != NULL) + *defp = infop->vcprop_default; + if (defsizep != NULL) + *defsizep = infop->vcprop_defsize; + if (possp != NULL) + *possp = (const mac_propval_range_t *)infop->vcprop_poss; + return (0); +} + +int +libvarpd_c_prop_get(varpd_client_prop_handle_t *phdl, void *buf, uint32_t *len) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_prop_arg_t *vcpap = &carg.vca_un.vca_prop; + varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl; + + if (len == NULL || buf == NULL || infop->vcprop_propid == UINT_MAX) + return (EINVAL); + if (*len < LIBVARPD_PROP_SIZEMAX) + return (EOVERFLOW); + + bzero(&carg, sizeof (varpd_client_arg_t)); + carg.vca_command = VARPD_CLIENT_GETPROP; + carg.vca_errno = 0; + vcpap->vcpa_id = infop->vcprop_instance; + vcpap->vcpa_propid = infop->vcprop_propid; + + ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + /* + * XXX We should really abort, as the server shouldn't send bad input, + * but a library really shouldn't kill the client. Therefore we have a + * shitty, shitty, error case. + */ + if (vcpap->vcpa_bufsize > LIBVARPD_PROP_SIZEMAX) + return (E2BIG); + + bcopy(vcpap->vcpa_buf, buf, vcpap->vcpa_bufsize); + *len = vcpap->vcpa_bufsize; + return (0); +} + +int +libvarpd_c_prop_set(varpd_client_prop_handle_t *phdl, const void *buf, + uint32_t len) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_prop_arg_t *vcpap = &carg.vca_un.vca_prop; + varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl; + + if (len == NULL || buf == NULL || infop->vcprop_propid == UINT_MAX) + return (EINVAL); + if (len > LIBVARPD_PROP_SIZEMAX) + return (EOVERFLOW); + + carg.vca_command = VARPD_CLIENT_SETPROP; + carg.vca_errno = 0; + vcpap->vcpa_id = infop->vcprop_instance; + vcpap->vcpa_propid = infop->vcprop_propid; + vcpap->vcpa_bufsize = len; + bcopy(buf, vcpap->vcpa_buf, len); + + ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + return (0); +} + +int +libvarpd_c_instance_lookup(varpd_client_handle_t *chp, datalink_id_t linkid, + uint64_t *instp) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_lookup_arg_t *vclap = &carg.vca_un.vca_lookup; + varpd_client_t *client = (varpd_client_t *)chp; + + carg.vca_command = VARPD_CLIENT_LOOKUP; + carg.vca_errno = 0; + vclap->vcla_linkid = linkid; + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + if (instp != NULL) + *instp = vclap->vcla_id; + + return (0); +} + +int +libvarpd_c_instance_target_mode(varpd_client_handle_t *chp, uint64_t cid, + uint_t *dtype, uint_t *mtype) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_target_mode_arg_t *vctmap = &carg.vca_un.vca_mode; + varpd_client_t *client = (varpd_client_t *)chp; + + carg.vca_command = VARPD_CLIENT_TARGET_MODE; + carg.vca_errno = 0; + vctmap->vtma_id = cid; + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + if (ret == 0) { + if (mtype != NULL) + *mtype = vctmap->vtma_mode; + if (dtype != NULL) + *dtype = vctmap->vtma_dest; + } + + return (ret); +} + +int +libvarpd_c_instance_cache_flush(varpd_client_handle_t *chp, uint64_t cid) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache; + varpd_client_t *client = (varpd_client_t *)chp; + + carg.vca_command = VARPD_CLIENT_CACHE_FLUSH; + carg.vca_errno = 0; + + vctcap->vtca_id = cid; + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + return (0); +} + +int +libvarpd_c_instance_cache_delete(varpd_client_handle_t *chp, uint64_t cid, + const struct ether_addr *key) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache; + varpd_client_t *client = (varpd_client_t *)chp; + + if (key == NULL) + return (EINVAL); + + carg.vca_command = VARPD_CLIENT_CACHE_DELETE; + carg.vca_errno = 0; + vctcap->vtca_id = cid; + bcopy(key, vctcap->vtca_key, ETHERADDRL); + + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + return (0); +} + +int +libvarpd_c_instance_cache_get(varpd_client_handle_t *chp, uint64_t cid, + const struct ether_addr *key, varpd_client_cache_entry_t *entry) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache; + varpd_client_t *client = (varpd_client_t *)chp; + + if (key == NULL || entry == NULL) + return (EINVAL); + + carg.vca_command = VARPD_CLIENT_CACHE_GET; + carg.vca_errno = 0; + vctcap->vtca_id = cid; + bcopy(key, vctcap->vtca_key, ETHERADDRL); + bzero(&vctcap->vtca_entry, sizeof (varpd_client_cache_entry_t)); + + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + bcopy(&vctcap->vtca_entry, entry, sizeof (varpd_client_cache_entry_t)); + return (0); +} + +int +libvarpd_c_instance_cache_set(varpd_client_handle_t *chp, uint64_t cid, + const struct ether_addr *key, const varpd_client_cache_entry_t *entry) +{ + int ret; + varpd_client_arg_t carg; + varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache; + varpd_client_t *client = (varpd_client_t *)chp; + + if (key == NULL || entry == NULL) + return (EINVAL); + + carg.vca_command = VARPD_CLIENT_CACHE_SET; + carg.vca_errno = 0; + vctcap->vtca_id = cid; + bcopy(key, vctcap->vtca_key, ETHERADDRL); + bcopy(entry, &vctcap->vtca_entry, sizeof (varpd_client_cache_entry_t)); + + ret = libvarpd_c_door_call(client, &carg, 0); + if (ret != 0) + return (ret); + + if (carg.vca_errno != 0) + return (carg.vca_errno); + + return (0); +} + +int +libvarpd_c_instance_cache_walk(varpd_client_handle_t *chp, uint64_t cid, + varpd_client_cache_f func, void *arg) +{ + int ret = 0; + size_t bufsize = sizeof (varpd_client_arg_t) + + 100 * sizeof (varpd_client_cache_entry_t); + varpd_client_t *client = (varpd_client_t *)chp; + varpd_client_arg_t *cargp; + varpd_client_target_walk_arg_t *vctwap; + + /* + * Because the number of entries involved in a walk may be large, we + * dynamically allocate a number of queries to make at a single time. + * This also means that the average door request doesn't inflate by the + * number of entries we want. For now, let's always grab 100 entries in + * a request. + */ + cargp = umem_zalloc(bufsize, UMEM_DEFAULT); + if (cargp == NULL) + return (errno); + vctwap = &cargp->vca_un.vca_walk; + for (;;) { + int i; + + cargp->vca_command = VARPD_CLIENT_CACHE_WALK; + cargp->vca_errno = 0; + vctwap->vtcw_id = cid; + vctwap->vtcw_count = 100; + + ret = libvarpd_c_door_call(client, cargp, bufsize); + if (ret != 0) + break; + + if (cargp->vca_errno != 0) { + ret = cargp->vca_errno; + break; + } + + if (vctwap->vtcw_count == 0) { + ret = 0; + break; + } + + for (i = 0; i < vctwap->vtcw_count; i++) { + varpd_client_cache_entry_t ent; + + ent.vcp_flags = vctwap->vtcw_ents[i].otce_flags; + bcopy(vctwap->vtcw_ents[i].otce_dest.otp_mac, + &ent.vcp_mac, ETHERADDRL); + ent.vcp_ip = vctwap->vtcw_ents[i].otce_dest.otp_ip; + ent.vcp_port = vctwap->vtcw_ents[i].otce_dest.otp_port; + ret = func(chp, cid, + (struct ether_addr *)vctwap->vtcw_ents[i].otce_mac, + &ent, arg); + if (ret != 0) { + ret = 0; + goto done; + } + } + } + +done: + umem_free(cargp, bufsize); + return (ret); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h new file mode 100644 index 0000000000..b794472bc1 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h @@ -0,0 +1,94 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +#ifndef _LIBVARPD_CLIENT_H +#define _LIBVARPD_CLIENT_H + +/* + * varpd interfaces + */ + +#include <sys/types.h> +#include <stdint.h> +#include <sys/mac.h> +/* XXX Should we have our own, but compatible types? */ +#include <sys/overlay_target.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct __varpd_client_handle varpd_client_handle_t; +typedef struct __varpd_client_prop_handle varpd_client_prop_handle_t; + +typedef struct varpd_client_cache_entry { + struct ether_addr vcp_mac; + uint16_t vcp_flags; + struct in6_addr vcp_ip; + uint16_t vcp_port; +} varpd_client_cache_entry_t; + +/* + * This should stay in sync with OVERLAY_PROP_* + */ +#define LIBVARPD_PROP_SIZEMAX 256 +#define LIBVARPD_PROP_NAMELEN 32 + +extern int libvarpd_c_create(varpd_client_handle_t **, const char *); +extern int libvarpd_c_destroy(varpd_client_handle_t *); +extern int libvarpd_c_instance_create(varpd_client_handle_t *, datalink_id_t, + const char *, uint64_t *); +extern int libvarpd_c_instance_activate(varpd_client_handle_t *, uint64_t); +extern int libvarpd_c_instance_destroy(varpd_client_handle_t *, uint64_t); + +extern int libvarpd_c_prop_nprops(varpd_client_handle_t *, uint64_t, uint_t *); +extern int libvarpd_c_prop_handle_alloc(varpd_client_handle_t *, uint64_t, + varpd_client_prop_handle_t **); +extern void libvarpd_c_prop_handle_free(varpd_client_prop_handle_t *); +extern int libvarpd_c_prop_info_fill(varpd_client_prop_handle_t *, uint_t); +extern int libvarpd_c_prop_info_fill_by_name(varpd_client_prop_handle_t *, + const char *); +extern int libvarpd_c_prop_info(varpd_client_prop_handle_t *, const char **, + uint_t *, uint_t *, const void **, uint32_t *, + const mac_propval_range_t **); +extern int libvarpd_c_prop_get(varpd_client_prop_handle_t *, void *, + uint32_t *); +extern int libvarpd_c_prop_set(varpd_client_prop_handle_t *, const void *, + uint32_t); + +/* XXX Strawman */ +extern int libvarpd_c_instance_lookup(varpd_client_handle_t *, datalink_id_t, + uint64_t *); +extern int libvarpd_c_instance_target_mode(varpd_client_handle_t *, uint64_t, + uint_t *, uint_t *); +extern int libvarpd_c_instance_cache_flush(varpd_client_handle_t *, uint64_t); +extern int libvarpd_c_instance_cache_delete(varpd_client_handle_t *, uint64_t, + const struct ether_addr *); +extern int libvarpd_c_instance_cache_get(varpd_client_handle_t *, uint64_t, + const struct ether_addr *, varpd_client_cache_entry_t *); +extern int libvarpd_c_instance_cache_set(varpd_client_handle_t *, uint64_t, + const struct ether_addr *, const varpd_client_cache_entry_t *); + +typedef int (*varpd_client_cache_f)(varpd_client_handle_t *, uint64_t, + const struct ether_addr *, const varpd_client_cache_entry_t *, void *); +extern int libvarpd_c_instance_cache_walk(varpd_client_handle_t *, uint64_t, + varpd_client_cache_f, void *); + + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBVARPD_CLIENT_H */ diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c new file mode 100644 index 0000000000..f2f93562ac --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c @@ -0,0 +1,457 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +/* + * varpd door server logic + */ + +#include <door.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stropts.h> +#include <stdlib.h> +#include <strings.h> +#include <priv.h> +#include <libvarpd_impl.h> + +typedef int (libvarpd_door_f)(varpd_impl_t *, varpd_client_arg_t *, ucred_t *); + +static boolean_t +libvarpd_door_privileged(ucred_t *credp) +{ + const priv_set_t *ps; + + ps = ucred_getprivset(credp, PRIV_EFFECTIVE); + if (ps == NULL) + return (B_FALSE); + + return (priv_ismember(ps, PRIV_SYS_NET_CONFIG)); +} + +static int +libvarpd_door_f_create(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + int ret; + varpd_instance_handle_t *ihdl; + varpd_client_create_arg_t *vccap = &vcap->vca_un.vca_create; + + vccap->vcca_plugin[LIBVARPD_PROP_NAMELEN-1] = '\0'; + ret = libvarpd_instance_create((varpd_handle_t *)vip, + vccap->vcca_linkid, vccap->vcca_plugin, &ihdl); + if (ret == 0) + vccap->vcca_id = libvarpd_instance_id(ihdl); + + return (ret); +} + +static int +libvarpd_door_f_activate(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_instance_arg_t *vciap = &vcap->vca_un.vca_instance; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vciap->vcia_id); + if (ihp == NULL) + return (ENOENT); + return (libvarpd_instance_activate(ihp)); +} + +static int +libvarpd_door_f_destroy(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_instance_arg_t *vciap = &vcap->vca_un.vca_instance; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vciap->vcia_id); + if (ihp == NULL) + return (ENOENT); + libvarpd_instance_destroy(ihp); + return (0); +} + +static int +libvarpd_door_f_nprops(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_nprops_arg_t *vcnap = &vcap->vca_un.vca_nprops; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcnap->vcna_id); + if (ihp == NULL) + return (ENOENT); + + return (libvarpd_prop_nprops(ihp, &vcnap->vcna_nprops)); +} + +static int +libvarpd_door_f_propinfo(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + int ret; + varpd_instance_handle_t *ihp; + varpd_prop_handle_t *phdl; + varpd_client_propinfo_arg_t *vcfap = &vcap->vca_un.vca_info; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcfap->vcfa_id); + if (ihp == NULL) + return (ENOENT); + ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl); + if (ret != 0) + return (ret); + + if (vcfap->vcfa_propid != UINT_MAX) { + ret = libvarpd_prop_info_fill(phdl, vcfap->vcfa_propid); + if (ret != 0) { + libvarpd_prop_handle_free(phdl); + return (ret); + } + } else { + uint_t i, nprop; + const char *name; + + vcfap->vcfa_name[LIBVARPD_PROP_NAMELEN-1] = '\0'; + ret = libvarpd_prop_nprops(ihp, &nprop); + if (ret != 0) { + libvarpd_prop_handle_free(phdl); + return (ret); + } + for (i = 0; i < nprop; i++) { + ret = libvarpd_prop_info_fill(phdl, i); + if (ret != 0) { + libvarpd_prop_handle_free(phdl); + return (ret); + } + ret = libvarpd_prop_info(phdl, &name, NULL, NULL, NULL, + NULL, NULL); + if (ret != 0) { + libvarpd_prop_handle_free(phdl); + return (ret); + } + if (strcmp(vcfap->vcfa_name, name) == 0) + break; + } + + if (i == nprop) { + libvarpd_prop_handle_free(phdl); + return (ENOENT); + } + vcfap->vcfa_propid = i; + } + libvarpd_prop_door_convert(phdl, vcfap); + libvarpd_prop_handle_free(phdl); + return (0); +} + +static int +libvarpd_door_f_getprop(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + int ret; + uint32_t size; + varpd_instance_handle_t *ihp; + varpd_prop_handle_t *phdl; + varpd_client_prop_arg_t *vcpap = &vcap->vca_un.vca_prop; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcpap->vcpa_id); + if (ihp == NULL) + return (ENOENT); + ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl); + if (ret != 0) + return (ret); + + ret = libvarpd_prop_info_fill(phdl, vcpap->vcpa_propid); + if (ret != 0) { + libvarpd_prop_handle_free(phdl); + return (ret); + } + + ret = libvarpd_prop_get(phdl, vcpap->vcpa_buf, &size); + if (ret == 0) + vcpap->vcpa_bufsize = size; + libvarpd_prop_handle_free(phdl); + return (0); +} + +static int +libvarpd_door_f_setprop(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + int ret; + varpd_instance_handle_t *ihp; + varpd_prop_handle_t *phdl; + varpd_client_prop_arg_t *vcpap = &vcap->vca_un.vca_prop; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcpap->vcpa_id); + if (ihp == NULL) + return (ENOENT); + ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl); + if (ret != 0) + return (ret); + + ret = libvarpd_prop_info_fill(phdl, vcpap->vcpa_propid); + if (ret != 0) { + libvarpd_prop_handle_free(phdl); + return (ret); + } + + ret = libvarpd_prop_set(phdl, vcpap->vcpa_buf, vcpap->vcpa_bufsize); + libvarpd_prop_handle_free(phdl); + return (ret); +} + +static int +libvarpd_door_f_lookup(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_t *inst; + varpd_client_lookup_arg_t *vclap = &vcap->vca_un.vca_lookup; + + inst = libvarpd_instance_lookup_by_dlid(vip, vclap->vcla_linkid); + if (inst == NULL) + return (ENOENT); + + vclap->vcla_id = inst->vri_id; + return (0); +} + +static int +libvarpd_door_f_target(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_instance_t *inst; + varpd_client_target_mode_arg_t *vtmap = &vcap->vca_un.vca_mode; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtmap->vtma_id); + if (ihp == NULL) + return (ENOENT); + inst = (varpd_instance_t *)ihp; + vtmap->vtma_dest = inst->vri_dest; + vtmap->vtma_mode = inst->vri_mode; + return (0); +} + +static int +libvarpd_door_f_flush(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache; + + if (libvarpd_door_privileged(credp) == B_FALSE) + return (EPERM); + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id); + if (ihp == NULL) + return (ENOENT); + return (libvarpd_overlay_cache_flush((varpd_instance_t *)ihp)); +} + +static int +libvarpd_door_f_delete(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache; + + if (libvarpd_door_privileged(credp) == B_FALSE) + return (EPERM); + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id); + if (ihp == NULL) + return (ENOENT); + return (libvarpd_overlay_cache_delete((varpd_instance_t *)ihp, + vtcap->vtca_key)); +} + +static int +libvarpd_door_f_get(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache; + + /* XXX Should this be privileged? */ + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id); + if (ihp == NULL) + return (ENOENT); + return (libvarpd_overlay_cache_get((varpd_instance_t *)ihp, + vtcap->vtca_key, &vtcap->vtca_entry)); +} + +static int +libvarpd_door_f_set(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache; + + if (libvarpd_door_privileged(credp) == B_FALSE) + return (EPERM); + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id); + if (ihp == NULL) + return (ENOENT); + + return (libvarpd_overlay_cache_set((varpd_instance_t *)ihp, + vtcap->vtca_key, &vtcap->vtca_entry)); +} + +static int +libvarpd_door_f_walk(varpd_impl_t *vip, varpd_client_arg_t *vcap, + ucred_t *credp) +{ + varpd_instance_handle_t *ihp; + varpd_client_target_walk_arg_t *vctwp = &vcap->vca_un.vca_walk; + + ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vctwp->vtcw_id); + if (ihp == NULL) + return (ENOENT); + + return (libvarpd_overlay_cache_walk_fill((varpd_instance_t *)ihp, + &vctwp->vtcw_marker, &vctwp->vtcw_count, vctwp->vtcw_ents)); +} + +static libvarpd_door_f *libvarpd_door_table[] = { + libvarpd_door_f_create, + libvarpd_door_f_activate, + libvarpd_door_f_destroy, + libvarpd_door_f_nprops, + libvarpd_door_f_propinfo, + libvarpd_door_f_getprop, + libvarpd_door_f_setprop, + libvarpd_door_f_lookup, + libvarpd_door_f_target, + libvarpd_door_f_flush, + libvarpd_door_f_delete, + libvarpd_door_f_get, + libvarpd_door_f_set, + libvarpd_door_f_walk +}; + +static void +libvarpd_door_server(void *cookie, char *argp, size_t argsz, door_desc_t *dp, + uint_t ndesc) +{ + int ret; + varpd_client_eresp_t err; + ucred_t *credp = NULL; + varpd_impl_t *vip = cookie; + varpd_client_arg_t *vcap = (varpd_client_arg_t *)argp; + + err.vce_command = VARPD_CLIENT_INVALID; + /* XXX Get a check for max size */ + if (argsz < sizeof (varpd_client_arg_t)) { + err.vce_errno = EINVAL; + goto errout; + } + + if ((ret = door_ucred(&credp)) != 0) { + err.vce_errno = ret; + goto errout; + } + + if (vcap->vca_command <= 0 || vcap->vca_command >= VARPD_CLIENT_MAX) { + err.vce_errno = EINVAL; + goto errout; + } + + vcap->vca_errno = 0; + ret = libvarpd_door_table[vcap->vca_command - 1](vip, vcap, credp); + if (ret != 0) + vcap->vca_errno = ret; + + ucred_free(credp); + (void) door_return(argp, argsz, NULL, 0); + return; + +errout: + /* XXX Should we do something here? */ + (void) door_return((char *)&err, sizeof (err), NULL, 0); +} + +int +libvarpd_door_server_create(varpd_handle_t *vhp, const char *path) +{ + int fd, ret; + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + mutex_lock(&vip->vdi_lock); + if (vip->vdi_doorfd >= 0) { + mutex_unlock(&vip->vdi_lock); + return (EEXIST); + } + + vip->vdi_doorfd = door_create(libvarpd_door_server, vip, + DOOR_REFUSE_DESC | DOOR_NO_CANCEL); + if (vip->vdi_doorfd == -1) { + mutex_unlock(&vip->vdi_lock); + return (errno); + } + + if ((fd = open(path, O_CREAT | O_RDWR, 0666)) == -1) { + ret = errno; + if (door_revoke(vip->vdi_doorfd) != 0) + libvarpd_panic("failed to revoke door: %d", + errno); + mutex_unlock(&vip->vdi_lock); + return (errno); + } + /* XXX Really? */ + if (fchown(fd, UID_NETADM, GID_NETADM) != 0) { + ret = errno; + if (door_revoke(vip->vdi_doorfd) != 0) + libvarpd_panic("failed to revoke door: %d", + errno); + mutex_unlock(&vip->vdi_lock); + return (ret); + } + + if (close(fd) != 0) + libvarpd_panic("failed to close door fd %d: %d", + fd, errno); + (void) fdetach(path); + if (fattach(vip->vdi_doorfd, path) != 0) { + ret = errno; + if (door_revoke(vip->vdi_doorfd) != 0) + libvarpd_panic("failed to revoke door: %d", + errno); + mutex_unlock(&vip->vdi_lock); + return (ret); + } + + mutex_unlock(&vip->vdi_lock); + return (0); +} + +void +libvarpd_door_server_destroy(varpd_handle_t *vhp) +{ + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + mutex_lock(&vip->vdi_lock); + if (vip->vdi_doorfd != 0) { + if (door_revoke(vip->vdi_doorfd) != 0) + libvarpd_panic("failed to revoke door: %d", + errno); + vip->vdi_doorfd = -1; + } + mutex_unlock(&vip->vdi_lock); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h new file mode 100644 index 0000000000..44bee7d92a --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h @@ -0,0 +1,247 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +#ifndef _LIBVARPD_IMPL_H +#define _LIBVARPD_IMPL_H + +/* + * varpd internal interfaces + */ + +#include <libvarpd.h> +#include <libvarpd_provider.h> +#include <sys/avl.h> +#include <thread.h> +#include <synch.h> +#include <limits.h> +#include <libidspace.h> +#include <umem.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define LIBVARPD_ID_MIN 1 +#define LIBVARPD_ID_MAX INT32_MAX + +typedef struct varpd_plugin { + avl_node_t vpp_node; + const char *vpp_name; + overlay_target_mode_t vpp_mode; + const varpd_plugin_ops_t *vpp_ops; + mutex_t vpp_lock; + uint_t vpp_active; +} varpd_plugin_t; + +typedef struct varpd_impl { + mutex_t vdi_lock; + mutex_t vdi_loglock; + rwlock_t vdi_pfdlock; + avl_tree_t vdi_plugins; /* vdi_lock */ + avl_tree_t vdi_instances; /* vdi_lock */ + avl_tree_t vdi_linstances; /* vdi_lock */ + id_space_t *vdi_idspace; /* RO */ + umem_cache_t *vdi_qcache; /* RO */ + bunyan_logger_t *vdi_bunyan; /* RO */ + int vdi_overlayfd; /* RO */ + int vdi_doorfd; /* vdi_lock */ + int vdi_persistfd; /* vdi_plock */ + cond_t vdi_lthr_cv; /* vdi_lock */ + boolean_t vdi_lthr_quiesce; /* vdi_lock */ + uint_t vdi_lthr_count; /* vdi_lock */ +} varpd_impl_t; + +typedef enum varpd_instance_flags { + VARPD_INSTANCE_F_ACTIVATED = 0x01 +} varpd_instance_flags_t; + +typedef struct varpd_instance { + avl_node_t vri_inode; + avl_node_t vri_lnode; + uint64_t vri_id; /* RO */ + uint64_t vri_vnetid; /* RO */ + datalink_id_t vri_linkid; /* RO */ + overlay_target_mode_t vri_mode; /* RO */ + overlay_plugin_dest_t vri_dest; /* RO */ + varpd_impl_t *vri_impl; /* RO */ + varpd_plugin_t *vri_plugin; /* RO */ + void *vri_private; /* RO */ + mutex_t vri_lock; + varpd_instance_flags_t vri_flags; /* vri_lock */ +} varpd_instance_t; + +typedef struct varpd_query { + overlay_targ_lookup_t vq_lookup; + overlay_targ_resp_t vq_response; + varpd_instance_t *vq_instance; +} varpd_query_t; + +typedef struct varpd_client_create_arg { + datalink_id_t vcca_linkid; + uint64_t vcca_id; + char vcca_plugin[LIBVARPD_PROP_NAMELEN]; +} varpd_client_create_arg_t; + +typedef struct varpd_client_instance_arg { + uint64_t vcia_id; +} varpd_client_instance_arg_t; + +typedef struct varpd_client_nprops_arg { + uint64_t vcna_id; + uint_t vcna_nprops; +} varpd_client_nprops_arg_t; + +typedef struct varpd_client_propinfo_arg { + uint64_t vcfa_id; + uint_t vcfa_propid; + uint_t vcfa_type; + uint_t vcfa_prot; + uint32_t vcfa_defsize; + uint32_t vcfa_psize; + char vcfa_name[LIBVARPD_PROP_NAMELEN]; + uint8_t vcfa_default[LIBVARPD_PROP_SIZEMAX]; + uint8_t vcfa_poss[LIBVARPD_PROP_SIZEMAX]; +} varpd_client_propinfo_arg_t; + +typedef struct varpd_client_prop_arg { + uint64_t vcpa_id; + uint_t vcpa_propid; + uint8_t vcpa_buf[LIBVARPD_PROP_SIZEMAX]; + size_t vcpa_bufsize; +} varpd_client_prop_arg_t; + +typedef struct varpd_client_lookup_arg { + datalink_id_t vcla_linkid; + uint32_t vcla_pad; + uint64_t vcla_id; +} varpd_client_lookup_arg_t; + +typedef struct varpd_client_target_mode_arg { + uint64_t vtma_id; + uint32_t vtma_dest; + uint32_t vtma_mode; +} varpd_client_target_mode_arg_t; + +typedef struct varpd_client_target_cache_arg { + uint64_t vtca_id; + uint8_t vtca_key[ETHERADDRL]; + uint8_t vtca_pad[2]; + varpd_client_cache_entry_t vtca_entry; +} varpd_client_target_cache_arg_t; + +typedef struct varpd_client_target_walk_arg { + uint64_t vtcw_id; + uint64_t vtcw_marker; + uint64_t vtcw_count; + overlay_targ_cache_entry_t vtcw_ents[]; +} varpd_client_target_walk_arg_t; + +typedef enum varpd_client_command { + VARPD_CLIENT_INVALID = 0x0, + VARPD_CLIENT_CREATE, + VARPD_CLIENT_ACTIVATE, + VARPD_CLIENT_DESTROY, + VARPD_CLIENT_NPROPS, + VARPD_CLIENT_PROPINFO, + VARPD_CLIENT_GETPROP, + VARPD_CLIENT_SETPROP, + VARPD_CLIENT_LOOKUP, + VARPD_CLIENT_TARGET_MODE, + VARPD_CLIENT_CACHE_FLUSH, + VARPD_CLIENT_CACHE_DELETE, + VARPD_CLIENT_CACHE_GET, + VARPD_CLIENT_CACHE_SET, + VARPD_CLIENT_CACHE_WALK, + VARPD_CLIENT_MAX +} varpd_client_command_t; + +typedef struct varpd_client_arg { + uint_t vca_command; + uint_t vca_errno; + union { + varpd_client_create_arg_t vca_create; + varpd_client_instance_arg_t vca_instance; + varpd_client_nprops_arg_t vca_nprops; + varpd_client_propinfo_arg_t vca_info; + varpd_client_prop_arg_t vca_prop; + varpd_client_lookup_arg_t vca_lookup; + varpd_client_target_mode_arg_t vca_mode; + varpd_client_target_cache_arg_t vca_cache; + varpd_client_target_walk_arg_t vca_walk; + } vca_un; +} varpd_client_arg_t; + +typedef struct varpd_client_eresp { + uint_t vce_command; + uint_t vce_errno; +} varpd_client_eresp_t; + +extern void libvarpd_plugin_init(void); +extern void libvarpd_plugin_prefork(void); +extern void libvarpd_plugin_postfork(void); +extern void libvarpd_plugin_fini(void); +extern int libvarpd_plugin_comparator(const void *, const void *); +extern varpd_plugin_t *libvarpd_plugin_lookup(varpd_impl_t *, const char *); +extern varpd_instance_t *libvarpd_instance_lookup_by_dlid(varpd_impl_t *, + datalink_id_t); + +extern void libvarpd_prop_door_convert(const varpd_prop_handle_t *, + varpd_client_propinfo_arg_t *); + +extern const char *libvarpd_isaext(void); +typedef int (*libvarpd_dirwalk_f)(varpd_impl_t *, const char *, void *); +extern int libvarpd_dirwalk(varpd_impl_t *, const char *, const char *, + libvarpd_dirwalk_f, void *); + +extern int libvarpd_overlay_init(varpd_impl_t *); +extern void libvarpd_overlay_fini(varpd_impl_t *); +extern int libvarpd_overlay_info(varpd_impl_t *, datalink_id_t, + overlay_plugin_dest_t *, uint64_t *, uint64_t *); +extern int libvarpd_overlay_associate(varpd_instance_t *); +extern int libvarpd_overlay_disassociate(varpd_instance_t *); +extern int libvarpd_overlay_degrade(varpd_instance_t *, const char *); +extern int libvarpd_overlay_degrade_datalink(varpd_impl_t *, datalink_id_t, + const char *); +extern int libvarpd_overlay_restore(varpd_instance_t *); +extern int libvarpd_overlay_packet(varpd_impl_t *, + const overlay_targ_lookup_t *, void *, size_t *); +extern int libvarpd_overlay_inject(varpd_impl_t *, + const overlay_targ_lookup_t *, void *, size_t); +extern int libvarpd_overlay_instance_inject(varpd_instance_t *, void *, size_t); +extern int libvarpd_overlay_resend(varpd_impl_t *, + const overlay_targ_lookup_t *, void *, size_t); +typedef int (*libvarpd_overlay_iter_f)(varpd_impl_t *, datalink_id_t, void *); +extern int libvarpd_overlay_iter(varpd_impl_t *, libvarpd_overlay_iter_f, + void *); +extern int libvarpd_overlay_cache_flush(varpd_instance_t *); +extern int libvarpd_overlay_cache_delete(varpd_instance_t *, const uint8_t *); +extern int libvarpd_overlay_cache_delete(varpd_instance_t *, const uint8_t *); +extern int libvarpd_overlay_cache_get(varpd_instance_t *, const uint8_t *, + varpd_client_cache_entry_t *); +extern int libvarpd_overlay_cache_set(varpd_instance_t *, const uint8_t *, + const varpd_client_cache_entry_t *); +extern int libvarpd_overlay_cache_walk_fill(varpd_instance_t *, uint64_t *, + uint64_t *, overlay_targ_cache_entry_t *); + +extern void libvarpd_persist_init(varpd_impl_t *); +extern void libvarpd_persist_fini(varpd_impl_t *); +extern int libvarpd_persist_instance(varpd_impl_t *, varpd_instance_t *); +extern void libvarpd_torch_instance(varpd_impl_t *, varpd_instance_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBVARPD_IMPL_H */ diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c new file mode 100644 index 0000000000..f2cad47394 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c @@ -0,0 +1,574 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2015 Joyent, Inc. + */ + +/* + * Interactions with /dev/overlay + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <assert.h> +#include <unistd.h> +#include <stdlib.h> +#include <stropts.h> +#include <strings.h> +#include <umem.h> + +#include <libvarpd_impl.h> +#include <sys/overlay_target.h> + +#define OVERLAY_PATH "/dev/overlay" + +int +libvarpd_overlay_init(varpd_impl_t *vip) +{ + vip->vdi_overlayfd = open(OVERLAY_PATH, O_RDWR); + if (vip->vdi_overlayfd == -1) + return (errno); + return (0); +} + +void +libvarpd_overlay_fini(varpd_impl_t *vip) +{ + assert(vip->vdi_overlayfd > 0); + if (close(vip->vdi_overlayfd) != 0) + libvarpd_panic("failed to close /dev/overlay fd %d: %d", + vip->vdi_overlayfd, errno); +} + +int +libvarpd_overlay_info(varpd_impl_t *vip, datalink_id_t linkid, + overlay_plugin_dest_t *destp, uint64_t *flags, uint64_t *vnetid) +{ + overlay_targ_info_t oti; + + oti.oti_linkid = linkid; + if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_INFO, &oti) != 0) + return (errno); + + if (destp != NULL) + *destp = oti.oti_needs; + if (flags != NULL) + *flags = oti.oti_flags; + if (vnetid != NULL) + *vnetid = oti.oti_vnetid; + return (0); +} + +int +libvarpd_overlay_associate(varpd_instance_t *inst) +{ + overlay_targ_associate_t ota; + varpd_impl_t *vip = inst->vri_impl; + + bzero(&ota, sizeof (overlay_targ_associate_t)); + ota.ota_linkid = inst->vri_linkid; + ota.ota_mode = inst->vri_mode; + ota.ota_id = inst->vri_id; + ota.ota_provides = inst->vri_dest; + + if (ota.ota_mode == OVERLAY_TARGET_POINT) { + int ret; + ret = inst->vri_plugin->vpp_ops->vpo_default(inst->vri_private, + &ota.ota_point); + if (ret != VARPD_LOOKUP_OK) + return (ret); + } + + if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_ASSOCIATE, &ota) != 0) + return (errno); + + return (0); +} + +int +libvarpd_overlay_disassociate(varpd_instance_t *inst) +{ + overlay_targ_id_t otid; + varpd_impl_t *vip = inst->vri_impl; + + otid.otid_linkid = inst->vri_linkid; + if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_DISASSOCIATE, &otid) != 0) + return (errno); + return (0); +} + +int +libvarpd_overlay_degrade_datalink(varpd_impl_t *vip, datalink_id_t linkid, + const char *msg) +{ + overlay_targ_degrade_t otd; + + otd.otd_linkid = linkid; + (void) strlcpy(otd.otd_buf, msg, OVERLAY_STATUS_BUFLEN); + if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_DEGRADE, &otd) != 0) + return (errno); + return (0); + +} + +int +libvarpd_overlay_degrade(varpd_instance_t *inst, const char *msg) +{ + return (libvarpd_overlay_degrade_datalink(inst->vri_impl, + inst->vri_linkid, msg)); +} + +int +libvarpd_overlay_restore(varpd_instance_t *inst) +{ + overlay_targ_id_t otid; + varpd_impl_t *vip = inst->vri_impl; + + otid.otid_linkid = inst->vri_linkid; + if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_RESTORE, &otid) != 0) + return (errno); + return (0); +} + +int +libvarpd_overlay_packet(varpd_impl_t *vip, const overlay_targ_lookup_t *otl, + void *buf, size_t *buflen) +{ + int ret; + overlay_targ_pkt_t otp; + + otp.otp_linkid = UINT64_MAX; + otp.otp_reqid = otl->otl_reqid; + otp.otp_size = *buflen; + otp.otp_buf = buf; + + do { + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_PKT, &otp); + } while (ret != 0 && errno == EINTR); + if (ret != 0 && errno == EFAULT) + libvarpd_panic("OVERLAY_TARG_PKT ioctl efault"); + else if (ret != 0) + ret = errno; + + if (ret == 0) + *buflen = otp.otp_size; + + return (ret); +} + +static int +libvarpd_overlay_inject_common(varpd_impl_t *vip, varpd_instance_t *inst, + const overlay_targ_lookup_t *otl, void *buf, size_t buflen, int cmd) +{ + int ret; + overlay_targ_pkt_t otp; + + if (otl == NULL) { + otp.otp_linkid = inst->vri_linkid; + otp.otp_reqid = 0; + } else { + otp.otp_linkid = UINT64_MAX; + otp.otp_reqid = otl->otl_reqid; + } + otp.otp_size = buflen; + otp.otp_buf = buf; + + do { + ret = ioctl(vip->vdi_overlayfd, cmd, &otp); + } while (ret != 0 && errno == EINTR); + if (ret != 0 && errno == EFAULT) + libvarpd_panic("overlay_inject_common ioctl efault"); + else if (ret != 0) + ret = errno; + + return (ret); +} + +int +libvarpd_overlay_inject(varpd_impl_t *vip, const overlay_targ_lookup_t *otl, + void *buf, size_t buflen) +{ + return (libvarpd_overlay_inject_common(vip, NULL, otl, buf, buflen, + OVERLAY_TARG_INJECT)); +} + +int +libvarpd_overlay_instance_inject(varpd_instance_t *inst, void *buf, + size_t buflen) +{ + return (libvarpd_overlay_inject_common(inst->vri_impl, inst, NULL, buf, + buflen, OVERLAY_TARG_INJECT)); +} + +int +libvarpd_overlay_resend(varpd_impl_t *vip, const overlay_targ_lookup_t *otl, + void *buf, size_t buflen) +{ + return (libvarpd_overlay_inject_common(vip, NULL, otl, buf, buflen, + OVERLAY_TARG_RESEND)); +} + +static void +libvarpd_overlay_lookup_reply(varpd_impl_t *vip, + const overlay_targ_lookup_t *otl, overlay_targ_resp_t *otr, int cmd) +{ + int ret; + + otr->otr_reqid = otl->otl_reqid; + do { + ret = ioctl(vip->vdi_overlayfd, cmd, otr); + } while (ret != 0 && errno == EINTR); + /* XXX abort feels wrong here */ + if (ret != 0 && errno != EINVAL) + libvarpd_panic("receieved bad errno from lookup_reply " + "(cmd %d): %d\n", cmd, errno); +} + +static void +libvarpd_overlay_lookup_handle(varpd_impl_t *vip) +{ + int ret; + varpd_query_t *vqp; + overlay_targ_lookup_t *otl; + overlay_targ_resp_t *otr; + varpd_instance_t *inst; + + vqp = umem_cache_alloc(vip->vdi_qcache, UMEM_DEFAULT); + otl = &vqp->vq_lookup; + otr = &vqp->vq_response; + /* + * XXX abort doesn't really help here, we should figure out what to do. + * Try and force a reap, then some? + */ + if (vqp == NULL) + libvarpd_panic("failed to allocate memory for lookup " + "handle..., we should not panic()"); + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_LOOKUP, otl); + if (ret != 0 && errno != ETIME && errno != EINTR) + libvarpd_panic("received bad errno from OVERLAY_TARG_LOOKUP: " + "%d", errno); + + if (ret != 0) { + umem_cache_free(vip->vdi_qcache, vqp); + return; + } + + inst = (varpd_instance_t *)libvarpd_instance_lookup( + (varpd_handle_t *)vip, otl->otl_varpdid); + if (inst == NULL) { + libvarpd_overlay_lookup_reply(vip, otl, otr, + OVERLAY_TARG_DROP); + umem_cache_free(vip->vdi_qcache, vqp); + return; + } + vqp->vq_instance = inst; + + inst->vri_plugin->vpp_ops->vpo_lookup(inst->vri_private, + (varpd_query_handle_t *)vqp, otl, &otr->otr_answer); +} + +void +libvarpd_overlay_lookup_run(varpd_handle_t *vhp) +{ + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + mutex_lock(&vip->vdi_lock); + if (vip->vdi_lthr_quiesce == B_TRUE) { + mutex_unlock(&vip->vdi_lock); + return; + } + vip->vdi_lthr_count++; + + for (;;) { + mutex_unlock(&vip->vdi_lock); + libvarpd_overlay_lookup_handle(vip); + mutex_lock(&vip->vdi_lock); + if (vip->vdi_lthr_quiesce == B_TRUE) + break; + } + assert(vip->vdi_lthr_count > 0); + vip->vdi_lthr_count--; + cond_signal(&vip->vdi_lthr_cv); + mutex_unlock(&vip->vdi_lock); +} + +void +libvarpd_overlay_lookup_quiesce(varpd_handle_t *vhp) +{ + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + mutex_lock(&vip->vdi_lock); + if (vip->vdi_lthr_count == 0) { + mutex_unlock(&vip->vdi_lock); + return; + } + vip->vdi_lthr_quiesce = B_TRUE; + while (vip->vdi_lthr_count > 0) + (void) cond_wait(&vip->vdi_lthr_cv, &vip->vdi_lock); + vip->vdi_lthr_quiesce = B_FALSE; + mutex_unlock(&vip->vdi_lock); +} + +int +libvarpd_overlay_iter(varpd_impl_t *vip, libvarpd_overlay_iter_f func, + void *arg) +{ + uint32_t curents = 0, i; + size_t size; + overlay_targ_list_t *otl; + + for (;;) { + size = sizeof (overlay_targ_list_t) + + sizeof (uint32_t) * curents; + otl = umem_alloc(size, UMEM_DEFAULT); + if (otl == NULL) + return (ENOMEM); + + otl->otl_nents = curents; + if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_LIST, otl) != 0) { + if (errno == EFAULT) + libvarpd_panic("OVERLAY_TARG_LIST ioctl " + "efault"); + umem_free(otl, size); + if (errno == EINTR) + continue; + else + return (errno); + } + + if (otl->otl_nents == curents) + break; + + curents = otl->otl_nents; + umem_free(otl, size); + } + + for (i = 0; i < otl->otl_nents; i++) { + if (func(vip, otl->otl_ents[i], arg) != 0) + break; + } + umem_free(otl, size); + return (0); +} + +int +libvarpd_overlay_cache_flush(varpd_instance_t *inst) +{ + int ret; + overlay_targ_cache_t cache; + varpd_impl_t *vip = inst->vri_impl; + + bzero(&cache, sizeof (overlay_targ_cache_t)); + cache.otc_linkid = inst->vri_linkid; + + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_FLUSH, &cache); + if (ret != 0 && errno == EFAULT) + libvarpd_panic("OVERLAY_TARG_CACHE_FLUSH ioctl efault"); + else if (ret != 0) + ret = errno; + + return (ret); +} + +int +libvarpd_overlay_cache_delete(varpd_instance_t *inst, const uint8_t *key) +{ + int ret; + overlay_targ_cache_t cache; + varpd_impl_t *vip = inst->vri_impl; + + bzero(&cache, sizeof (overlay_targ_cache_t)); + cache.otc_linkid = inst->vri_linkid; + bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL); + + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_REMOVE, &cache); + if (ret != 0 && errno == EFAULT) + libvarpd_panic("OVERLAY_TARG_CACHE_REMOVE ioctl efault"); + else if (ret != 0) + ret = errno; + + return (ret); + +} + +int +libvarpd_overlay_cache_get(varpd_instance_t *inst, const uint8_t *key, + varpd_client_cache_entry_t *entry) +{ + int ret; + overlay_targ_cache_t cache; + varpd_impl_t *vip = inst->vri_impl; + + bzero(&cache, sizeof (overlay_targ_cache_t)); + cache.otc_linkid = inst->vri_linkid; + bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL); + + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_GET, &cache); + if (ret != 0 && errno == EFAULT) + libvarpd_panic("OVERLAY_TARG_CACHE_GET ioctl efault"); + else if (ret != 0) + return (errno); + + bcopy(cache.otc_entry.otce_dest.otp_mac, &entry->vcp_mac, ETHERADDRL); + entry->vcp_flags = cache.otc_entry.otce_flags; + entry->vcp_ip = cache.otc_entry.otce_dest.otp_ip; + entry->vcp_port = cache.otc_entry.otce_dest.otp_port; + + return (0); +} + +int +libvarpd_overlay_cache_set(varpd_instance_t *inst, const uint8_t *key, + const varpd_client_cache_entry_t *entry) +{ + int ret; + overlay_targ_cache_t cache; + varpd_impl_t *vip = inst->vri_impl; + + bzero(&cache, sizeof (overlay_targ_cache_t)); + cache.otc_linkid = inst->vri_linkid; + bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL); + bcopy(&entry->vcp_mac, cache.otc_entry.otce_dest.otp_mac, ETHERADDRL); + cache.otc_entry.otce_flags = entry->vcp_flags; + cache.otc_entry.otce_dest.otp_ip = entry->vcp_ip; + cache.otc_entry.otce_dest.otp_port = entry->vcp_port; + + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_SET, &cache); + if (ret != 0 && errno == EFAULT) + libvarpd_panic("OVERLAY_TARG_CACHE_SET ioctl efault"); + else if (ret != 0) + return (errno); + + return (0); +} + +int +libvarpd_overlay_cache_walk_fill(varpd_instance_t *inst, uint64_t *markerp, + uint64_t *countp, overlay_targ_cache_entry_t *ents) +{ + int ret; + size_t asize; + overlay_targ_cache_iter_t *iter; + varpd_impl_t *vip = inst->vri_impl; + + if (*countp > 200) + return (E2BIG); + + asize = sizeof (overlay_targ_cache_iter_t) + + *countp * sizeof (overlay_targ_cache_entry_t); + iter = umem_alloc(asize, UMEM_DEFAULT); + if (iter == NULL) + return (ENOMEM); + + iter->otci_linkid = inst->vri_linkid; + iter->otci_marker = *markerp; + iter->otci_count = *countp; + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_ITER, iter); + if (ret != 0 && errno == EFAULT) + libvarpd_panic("OVERLAY_TARG_CACHE_ITER ioctl efault"); + else if (ret != 0) { + ret = errno; + goto out; + } + + *markerp = iter->otci_marker; + *countp = iter->otci_count; + bcopy(iter->otci_ents, ents, + *countp * sizeof (overlay_targ_cache_entry_t)); +out: + umem_free(iter, asize); + return (ret); +} + +void +libvarpd_plugin_query_reply(varpd_query_handle_t *vqh, int action) +{ + varpd_query_t *vqp = (varpd_query_t *)vqh; + + if (vqp == NULL) + libvarpd_panic("unkonwn plugin passed invalid " + "varpd_query_handle_t"); + + if (action == VARPD_LOOKUP_DROP) + libvarpd_overlay_lookup_reply(vqp->vq_instance->vri_impl, + &vqp->vq_lookup, &vqp->vq_response, OVERLAY_TARG_DROP); + else if (action == VARPD_LOOKUP_OK) + libvarpd_overlay_lookup_reply(vqp->vq_instance->vri_impl, + &vqp->vq_lookup, &vqp->vq_response, OVERLAY_TARG_RESPOND); + else + libvarpd_panic("plugin %s passed in an invalid action: %d", + vqp->vq_instance->vri_plugin->vpp_name, action); +} + +void +libvarpd_inject_varp(varpd_provider_handle_t *vph, const uint8_t *mac, + const overlay_target_point_t *otp) +{ + int ret; + overlay_targ_cache_t otc; + varpd_instance_t *inst = (varpd_instance_t *)vph; + varpd_impl_t *vip = inst->vri_impl; + + otc.otc_linkid = inst->vri_linkid; + otc.otc_entry.otce_flags = 0; + bcopy(mac, otc.otc_entry.otce_mac, ETHERADDRL); + bcopy(otp, &otc.otc_entry.otce_dest, sizeof (overlay_target_point_t)); + + ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_SET, &otc); + if (ret != 0) { + switch (errno) { + case EBADF: + case EFAULT: + case ENOTSUP: + libvarpd_panic("received bad errno from " + "OVERLAY_TARG_CACHE_SET: %d", errno); + default: + break; + } + } +} + +void +libvarpd_fma_degrade(varpd_provider_handle_t *vph, const char *msg) +{ + int ret; + varpd_instance_t *inst = (varpd_instance_t *)vph; + + ret = libvarpd_overlay_degrade(inst, msg); + switch (ret) { + case ENOENT: + case EFAULT: + libvarpd_panic("received bad errno from degrade ioctl: %d", + errno); + default: + break; + } +} + +void +libvarpd_fma_restore(varpd_provider_handle_t *vph) +{ + int ret; + varpd_instance_t *inst = (varpd_instance_t *)vph; + + ret = libvarpd_overlay_restore(inst); + switch (ret) { + case ENOENT: + case EFAULT: + libvarpd_panic("received bad errno from restore ioctl: %d", + errno); + default: + break; + } +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c new file mode 100644 index 0000000000..9d02504139 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c @@ -0,0 +1,48 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014, Joyent, Inc. + */ + +/* + * No, 'tis not so deep as a well, nor so wide as a church door; but 'tis + * enough,'twill serve. Ask for me tomorrow, and you shall find me a grave man. + * + * This file maintains various routines for handling when we die. + */ + +#include <stdio.h> +#include <stdarg.h> +#include <errno.h> +#include <thread.h> +#include <stdlib.h> + +static int varpd_panic_errno; +static char varpd_panic_buf[1024]; +static thread_t varpd_panic_thread; + +void +libvarpd_panic(const char *fmt, ...) +{ + va_list ap; + + /* Always save errno first! */ + varpd_panic_errno = errno; + varpd_panic_thread = thr_self(); + + if (fmt != NULL) { + va_start(ap, fmt); + (void) vsnprintf(varpd_panic_buf, sizeof (varpd_panic_buf), fmt, + ap); + } + abort(); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c new file mode 100644 index 0000000000..255622b63b --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c @@ -0,0 +1,590 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* + * varpd persistence backend + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <strings.h> +#include <librename.h> +#include <md5.h> +#include <sys/sysmacros.h> +#include <dirent.h> +#include <sys/mman.h> +#include <umem.h> + +#include <libvarpd_impl.h> + +static uint8_t varpd_persist_magic[4] = { + 'v', + 'a', + 'r', + 'p', +}; + +#define VARPD_PERSIST_MAXWRITE 4096 +#define VARPD_PERSIST_VERSION_ONE 1 +#define VARPD_PERSIST_SUFFIX ".varpd" + +/* + * XXX ctfdiff this structure + */ +typedef struct varpd_persist_header { + uint8_t vph_magic[4]; + uint32_t vph_version; + uint8_t vph_md5[16]; +} varpd_persist_header_t; + +void +libvarpd_persist_init(varpd_impl_t *vip) +{ + vip->vdi_persistfd = -1; + if (rwlock_init(&vip->vdi_pfdlock, USYNC_THREAD, NULL) != 0) + libvarpd_panic("failed to create rw vdi_pfdlock"); +} + +void +libvarpd_persist_fini(varpd_impl_t *vip) +{ + /* + * Clean up for someone that left something behind. + */ + if (vip->vdi_persistfd != -1) { + if (close(vip->vdi_persistfd) != 0) + libvarpd_panic("failed to close persist fd %d: %d", + vip->vdi_persistfd, errno); + vip->vdi_persistfd = -1; + } + if (rwlock_destroy(&vip->vdi_pfdlock) != 0) + libvarpd_panic("failed to destroy rw vdi_pfdlock"); +} + +int +libvarpd_persist_enable(varpd_handle_t *vhp, const char *rootdir) +{ + int fd; + struct stat st; + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + fd = open(rootdir, O_RDONLY); + if (fd < 0) + return (errno); + + if (fstat(fd, &st) != 0) { + int ret = errno; + if (close(fd) != 0) + libvarpd_panic("failed to close rootdir fd (%s) %d: %d", + rootdir, fd, errno); + return (ret); + } + + if (!S_ISDIR(st.st_mode)) { + if (close(fd) != 0) + libvarpd_panic("failed to close rootdir fd (%s) %d: %d", + rootdir, fd, errno); + return (EINVAL); + } + + + rw_wrlock(&vip->vdi_pfdlock); + if (vip->vdi_persistfd != -1) { + rw_unlock(&vip->vdi_pfdlock); + if (close(fd) != 0) + libvarpd_panic("failed to close rootdir fd (%s) %d: %d", + rootdir, fd, errno); + return (EEXIST); + } + vip->vdi_persistfd = fd; + rw_unlock(&vip->vdi_pfdlock); + + return (0); +} + +static int +libvarpd_persist_write(int fd, const void *buf, size_t buflen) +{ + size_t ret; + off_t off = 0; + + while (buflen > 0) { + ret = write(fd, buf + off, + MIN(buflen, VARPD_PERSIST_MAXWRITE)); + if (ret == -1 && errno == EINTR) + continue; + if (ret == -1) + return (errno); + + off += ret; + buflen -= ret; + } + + return (0); +} + +static int +libvarpd_persist_nvlist(int dirfd, uint64_t id, nvlist_t *nvl) +{ + int err, fd; + size_t size; + varpd_persist_header_t hdr; + librename_atomic_t *lrap; + char *buf = NULL, *name; + + if ((err = nvlist_pack(nvl, &buf, &size, NV_ENCODE_XDR, 0)) != 0) + return (err); + + if (asprintf(&name, "%lld%s", id, ".varpd") == -1) { + err = errno; + free(buf); + return (err); + } + + if ((err = librename_atomic_fdinit(dirfd, name, NULL, 0600, 0, + &lrap)) != 0) { + free(name); + free(buf); + return (err); + } + + fd = librename_atomic_fd(lrap); + + bzero(&hdr, sizeof (varpd_persist_header_t)); + bcopy(varpd_persist_magic, hdr.vph_magic, sizeof (varpd_persist_magic)); + hdr.vph_version = VARPD_PERSIST_VERSION_ONE; + md5_calc(hdr.vph_md5, buf, size); + + if ((err = libvarpd_persist_write(fd, &hdr, + sizeof (varpd_persist_header_t))) != 0) { + librename_atomic_fini(lrap); + free(name); + free(buf); + return (err); + } + + if ((err = libvarpd_persist_write(fd, buf, size)) != 0) { + librename_atomic_fini(lrap); + free(name); + free(buf); + return (err); + } + + do { + err = librename_atomic_commit(lrap); + } while (err == EINTR); + + librename_atomic_fini(lrap); + free(name); + free(buf); + return (err); +} + +int +libvarpd_persist_instance(varpd_impl_t *vip, varpd_instance_t *inst) +{ + int err = 0; + nvlist_t *nvl = NULL, *cvl = NULL; + + rw_rdlock(&vip->vdi_pfdlock); + /* Check if persistence exists */ + if (vip->vdi_persistfd == -1) + goto out; + + if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0) + goto out; + + if ((err = nvlist_alloc(&cvl, NV_UNIQUE_NAME, 0)) != 0) + goto out; + + if ((err = nvlist_add_uint64(nvl, "vri_id", inst->vri_id)) != 0) + goto out; + + if ((err = nvlist_add_uint32(nvl, "vri_linkid", inst->vri_linkid)) != 0) + goto out; + + if ((err = nvlist_add_uint32(nvl, "vri_dest", + (uint32_t)inst->vri_dest)) != 0) + goto out; + if ((err = nvlist_add_uint32(nvl, "vri_mode", + (uint32_t)inst->vri_mode)) != 0) + goto out; + + if ((err = nvlist_add_string(nvl, "vri_plugin", + inst->vri_plugin->vpp_name)) != 0) + goto out; + + err = inst->vri_plugin->vpp_ops->vpo_save(inst->vri_private, cvl); + if (err != 0) + goto out; + + if ((err = nvlist_add_nvlist(nvl, "vri_private", cvl)) != 0) + goto out; + + err = libvarpd_persist_nvlist(vip->vdi_persistfd, inst->vri_id, nvl); +out: + nvlist_free(nvl); + nvlist_free(cvl); + rw_unlock(&vip->vdi_pfdlock); + return (err); +} + +void +libvarpd_torch_instance(varpd_impl_t *vip, varpd_instance_t *inst) +{ + char buf[32]; + int ret; + + rw_rdlock(&vip->vdi_pfdlock); + if (vip->vdi_persistfd == -1) { + rw_unlock(&vip->vdi_pfdlock); + return; + } + + if (snprintf(buf, sizeof (buf), "%lld.varpd", inst->vri_id) >= 32) + libvarpd_panic("somehow exceeded static value for " + "libvarpd_torch_instance buffer"); + + do { + ret = unlinkat(vip->vdi_persistfd, buf, 0); + } while (ret == -1 && errno == EINTR); + if (ret != 0) { + switch (errno) { + case ENOENT: + break; + default: + libvarpd_panic("failed to unlinkat %d`%s: %s", + vip->vdi_persistfd, buf, strerror(errno)); + } + } + + rw_unlock(&vip->vdi_pfdlock); +} + +static int +libvarpd_persist_restore_instance(varpd_impl_t *vip, nvlist_t *nvl) +{ + nvlist_t *pvl; + uint64_t id, flags, vid; + uint32_t linkid, dest, mode; + char *pluginstr; + varpd_plugin_t *plugin; + overlay_plugin_dest_t adest; + varpd_instance_t *inst, lookup; + + if (nvlist_lookup_uint64(nvl, "vri_id", &id) != 0) + return (EINVAL); + + if (nvlist_lookup_uint32(nvl, "vri_linkid", &linkid) != 0) + return (EINVAL); + + if (nvlist_lookup_uint32(nvl, "vri_dest", &dest) != 0) + return (EINVAL); + + if (nvlist_lookup_uint32(nvl, "vri_mode", &mode) != 0) + return (EINVAL); + + if (nvlist_lookup_string(nvl, "vri_plugin", &pluginstr) != 0) + return (EINVAL); + + if (nvlist_lookup_nvlist(nvl, "vri_private", &pvl) != 0) + return (EINVAL); + + plugin = libvarpd_plugin_lookup(vip, pluginstr); + if (plugin == NULL) + return (EINVAL); + + if (plugin->vpp_mode != mode) + return (EINVAL); + + if (libvarpd_overlay_info(vip, linkid, &adest, &flags, &vid) != 0) + return (EINVAL); + + if (dest != adest) + return (EINVAL); + + /* XXX This failure shouldn't cause us to unlink... */ + inst = umem_alloc(sizeof (varpd_instance_t), UMEM_DEFAULT); + if (inst == NULL) + return (ENOMEM); + + inst->vri_id = id_alloc_specific(vip->vdi_idspace, id); + if (inst->vri_id != id) { + umem_free(inst, sizeof (varpd_instance_t)); + return (EINVAL); + } + + inst->vri_linkid = linkid; + inst->vri_vnetid = vid; + inst->vri_mode = plugin->vpp_mode; + inst->vri_dest = dest; + inst->vri_plugin = plugin; + inst->vri_impl = vip; + inst->vri_flags = 0; + if (plugin->vpp_ops->vpo_restore(pvl, (varpd_provider_handle_t *)inst, + dest, &inst->vri_private) != 0) { + id_free(vip->vdi_idspace, id); + umem_free(inst, sizeof (varpd_instance_t)); + return (EINVAL); + } + + if (mutex_init(&inst->vri_lock, USYNC_THREAD, NULL) != 0) + libvarpd_panic("failed to create vri_lock mutex"); + + mutex_lock(&vip->vdi_lock); + lookup.vri_id = inst->vri_id; + if (avl_find(&vip->vdi_instances, &lookup, NULL) != NULL) + libvarpd_panic("found duplicate instance with id %d", + lookup.vri_id); + avl_add(&vip->vdi_instances, inst); + lookup.vri_linkid = inst->vri_linkid; + if (avl_find(&vip->vdi_linstances, &lookup, NULL) != NULL) + libvarpd_panic("found duplicate linstance with id %d", + lookup.vri_linkid); + avl_add(&vip->vdi_linstances, inst); + mutex_unlock(&vip->vdi_lock); + + if (plugin->vpp_ops->vpo_start(inst->vri_private) != 0) { + libvarpd_instance_destroy((varpd_instance_handle_t *)inst); + return (EINVAL); + } + + if (flags & OVERLAY_TARG_INFO_F_ACTIVE) + libvarpd_overlay_disassociate(inst); + + if (libvarpd_overlay_associate(inst) != 0) { + libvarpd_instance_destroy((varpd_instance_handle_t *)inst); + return (EINVAL); + } + + if (flags & OVERLAY_TARG_INFO_F_DEGRADED) + libvarpd_overlay_restore(inst); + + mutex_lock(&inst->vri_lock); + inst->vri_flags |= VARPD_INSTANCE_F_ACTIVATED; + mutex_unlock(&inst->vri_lock); + + return (0); +} + +static int +libvarpd_persist_restore_one(varpd_impl_t *vip, int fd) +{ + int err; + size_t fsize; + struct stat st; + void *buf, *datap; + varpd_persist_header_t *hdr; + uint8_t md5[16]; + nvlist_t *nvl; + + if (fstat(fd, &st) != 0) + return (errno); + + if (st.st_size <= sizeof (varpd_persist_header_t)) + return (EINVAL); + fsize = st.st_size - sizeof (varpd_persist_header_t); + + buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (buf == MAP_FAILED) + return (errno); + + hdr = buf; + if (bcmp(varpd_persist_magic, hdr->vph_magic, + sizeof (varpd_persist_magic)) != 0) { + if (munmap(buf, st.st_size) != 0) + libvarpd_panic("failed to munmap %p: %d", buf, errno); + return (EINVAL); + } + + if (hdr->vph_version != VARPD_PERSIST_VERSION_ONE) { + if (munmap(buf, st.st_size) != 0) + libvarpd_panic("failed to munmap %p: %d", buf, errno); + return (EINVAL); + } + + datap = (void *)((uintptr_t)buf + sizeof (varpd_persist_header_t)); + md5_calc(md5, datap, fsize); + if (bcmp(md5, hdr->vph_md5, sizeof (uint8_t) * 16) != 0) { + if (munmap(buf, st.st_size) != 0) + libvarpd_panic("failed to munmap %p: %d", buf, errno); + return (EINVAL); + } + + err = nvlist_unpack(datap, fsize, &nvl, 0); + if (munmap(buf, st.st_size) != 0) + libvarpd_panic("failed to munmap %p: %d", buf, errno); + + if (err != 0) + return (EINVAL); + + err = libvarpd_persist_restore_instance(vip, nvl); + nvlist_free(nvl); + return (err); +} + +/* + * XXX ew, O(n^2) + */ +static int +libvarpd_check_degrade_cb(varpd_impl_t *vip, datalink_id_t linkid, void *arg) +{ + varpd_instance_t *inst; + + mutex_lock(&vip->vdi_lock); + for (inst = avl_first(&vip->vdi_instances); inst != NULL; + inst = AVL_NEXT(&vip->vdi_instances, inst)) { + if (inst->vri_linkid == linkid) { + mutex_unlock(&vip->vdi_lock); + return (0); + } + } + + mutex_unlock(&vip->vdi_lock); + + (void) libvarpd_overlay_degrade_datalink(vip, linkid, + "no varpd instance exists"); + return (0); +} + +static void +libvarpd_check_degrade(varpd_impl_t *vip) +{ + libvarpd_overlay_iter(vip, libvarpd_check_degrade_cb, NULL); +} + +/* + * XXX We need to go through and mark any kernel devices that we don't know + * about as degraded. + */ +int +libvarpd_persist_restore(varpd_handle_t *vhp) +{ + int dirfd; + int ret = 0; + DIR *dirp = NULL; + struct dirent *dp; + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + rw_rdlock(&vip->vdi_pfdlock); + if ((dirfd = dup(vip->vdi_persistfd)) < 0) { + ret = errno; + goto out; + } + + if ((dirp = fdopendir(dirfd)) == NULL) { + ret = errno; + if (close(dirfd) != 0) + libvarpd_panic("failed to close dirfd %d: %d", + dirfd, errno); + goto out; + } + + for (;;) { + int fd; + uint64_t id; + char *eptr; + struct stat st; + + errno = 0; + dp = readdir(dirp); + if (dp == NULL) { + ret = errno; + break; + } + + if (strcmp(dp->d_name, ".") == 0 || + strcmp(dp->d_name, "..") == 0) + continue; + + /* + * Leave files that we don't recognize alone. A valid file has + * the format `%llu.varpd`. + */ + errno = 0; + id = strtoull(dp->d_name, &eptr, 10); + if ((id == 0 && errno == EINVAL) || + (id == ULLONG_MAX && errno == ERANGE)) + continue; + + if (strcmp(eptr, VARPD_PERSIST_SUFFIX) != 0) + continue; + + fd = openat(vip->vdi_persistfd, dp->d_name, O_RDONLY); + if (fd < 0) { + ret = errno; + break; + } + + if (fstat(fd, &st) != 0) { + ret = errno; + break; + } + + if (!S_ISREG(st.st_mode)) { + if (close(fd) != 0) + libvarpd_panic("failed to close fd (%s) %d: " + "%d\n", dp->d_name, fd, errno); + continue; + } + + ret = libvarpd_persist_restore_one(vip, fd); + if (close(fd) != 0) + libvarpd_panic("failed to close fd (%s) %d: " + "%d\n", dp->d_name, fd, errno); + /* + * This is an invalid file. We'll unlink it to save us this + * trouble in the future. XXX We shouldn't unlink on all + * failures presumably... + */ + if (ret != 0) { + if (unlinkat(vip->vdi_persistfd, dp->d_name, 0) != 0) { + ret = errno; + break; + } + } + } + + libvarpd_check_degrade(vip); + +out: + if (dirp != NULL) + closedir(dirp); + rw_unlock(&vip->vdi_pfdlock); + return (ret); +} + +int +libvarpd_persist_disable(varpd_handle_t *vhp) +{ + varpd_impl_t *vip = (varpd_impl_t *)vhp; + + rw_wrlock(&vip->vdi_pfdlock); + if (vip->vdi_persistfd == -1) { + mutex_unlock(&vip->vdi_lock); + rw_unlock(&vip->vdi_pfdlock); + return (ENOENT); + } + if (close(vip->vdi_persistfd) != 0) + libvarpd_panic("failed to close persist fd %d: %d", + vip->vdi_persistfd, errno); + vip->vdi_persistfd = -1; + rw_unlock(&vip->vdi_pfdlock); + return (0); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c new file mode 100644 index 0000000000..df53ee5d1d --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c @@ -0,0 +1,233 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +/* + * varpd plugin management + */ + +#include <libvarpd_impl.h> +#include <errno.h> +#include <umem.h> +#include <assert.h> +#include <strings.h> +#include <dlfcn.h> +#include <link.h> +#include <stdio.h> + +static varpd_impl_t *varpd_load_handle; +static mutex_t varpd_load_lock; +static cond_t varpd_load_cv; + +int +libvarpd_plugin_comparator(const void *lp, const void *rp) +{ + int ret; + const varpd_plugin_t *lpp, *rpp; + + lpp = lp; + rpp = rp; + + ret = strcmp(lpp->vpp_name, rpp->vpp_name); + if (ret > 0) + return (1); + if (ret < 0) + return (-1); + return (0); +} + +varpd_plugin_register_t * +libvarpd_plugin_alloc(uint_t version, int *errp) +{ + int err; + varpd_plugin_register_t *vprp; + + if (errp == NULL) + errp = &err; + + if (version != VARPD_VERSION_ONE) { + *errp = EINVAL; + return (NULL); + } + + vprp = umem_alloc(sizeof (varpd_plugin_register_t), UMEM_DEFAULT); + if (vprp == NULL) { + *errp = ENOMEM; + return (NULL); + } + + vprp->vpr_version = VARPD_VERSION_ONE; + + return (vprp); +} + +void +libvarpd_plugin_free(varpd_plugin_register_t *vprp) +{ + umem_free(vprp, sizeof (varpd_plugin_register_t)); +} + +int +libvarpd_plugin_register(varpd_plugin_register_t *vprp) +{ + varpd_plugin_t *vpp; + varpd_plugin_t lookup; + + vpp = umem_alloc(sizeof (varpd_plugin_t), UMEM_DEFAULT); + if (vpp == NULL) + return (ENOMEM); + + /* Watch out for an evil plugin */ + if (vprp->vpr_version != VARPD_VERSION_ONE) + return (EINVAL); + + mutex_lock(&varpd_load_lock); + if (varpd_load_handle == NULL) + libvarpd_panic("varpd_load_handle was unexpectedly null"); + + mutex_lock(&varpd_load_handle->vdi_lock); + lookup.vpp_name = vprp->vpr_name; + if (avl_find(&varpd_load_handle->vdi_plugins, &lookup, NULL) != NULL) { + mutex_unlock(&varpd_load_handle->vdi_lock); + mutex_unlock(&varpd_load_lock); + umem_free(vpp, sizeof (varpd_plugin_t)); + return (EEXIST); + } + vpp->vpp_name = strdup(vprp->vpr_name); + if (vpp->vpp_name == NULL) { + mutex_unlock(&varpd_load_handle->vdi_lock); + mutex_unlock(&varpd_load_lock); + umem_free(vpp, sizeof (varpd_plugin_t)); + return (EEXIST); + } + + vpp->vpp_mode = vprp->vpr_mode; + vpp->vpp_ops = vprp->vpr_ops; + if (mutex_init(&vpp->vpp_lock, USYNC_THREAD, NULL) != 0) + libvarpd_panic("failed to create plugin's vpp_lock"); + vpp->vpp_active = 0; + avl_add(&varpd_load_handle->vdi_plugins, vpp); + mutex_unlock(&varpd_load_handle->vdi_lock); + mutex_unlock(&varpd_load_lock); + + return (0); +} + +varpd_plugin_t * +libvarpd_plugin_lookup(varpd_impl_t *vip, const char *name) +{ + varpd_plugin_t lookup, *ret; + + lookup.vpp_name = name; + mutex_lock(&vip->vdi_lock); + ret = avl_find(&vip->vdi_plugins, &lookup, NULL); + mutex_unlock(&vip->vdi_lock); + + return (ret); +} + +static int +libvarpd_plugin_load_cb(varpd_impl_t *vip, const char *path, void *unused) +{ + void *dlp; + + dlp = dlopen(path, RTLD_LOCAL | RTLD_NOW); + if (dlp == NULL) { + /* XXX This should be a real error */ + fprintf(stderr, "dlopen failed: %s\n", dlerror()); + } + + return (0); +} + +int +libvarpd_plugin_load(varpd_handle_t *vph, const char *path) +{ + int ret = 0; + varpd_impl_t *vip = (varpd_impl_t *)vph; + + if (vip == NULL || path == NULL) + return (EINVAL); + mutex_lock(&varpd_load_lock); + while (varpd_load_handle != NULL) + cond_wait(&varpd_load_cv, &varpd_load_lock); + varpd_load_handle = vip; + mutex_unlock(&varpd_load_lock); + + ret = libvarpd_dirwalk(vip, path, ".so", libvarpd_plugin_load_cb, NULL); + + mutex_lock(&varpd_load_lock); + varpd_load_handle = NULL; + cond_signal(&varpd_load_cv); + mutex_unlock(&varpd_load_lock); + + return (ret); +} + +int +libvarpd_plugin_walk(varpd_handle_t *vph, libvarpd_plugin_walk_f func, + void *arg) +{ + varpd_impl_t *vip = (varpd_impl_t *)vph; + varpd_plugin_t *vpp; + + mutex_lock(&vip->vdi_lock); + for (vpp = avl_first(&vip->vdi_plugins); vpp != NULL; + vpp = AVL_NEXT(&vip->vdi_plugins, vpp)) { + if (func(vph, vpp->vpp_name, arg) != 0) { + mutex_unlock(&vip->vdi_lock); + return (1); + } + } + mutex_unlock(&vip->vdi_lock); + return (0); +} + +void +libvarpd_plugin_init(void) +{ + if (mutex_init(&varpd_load_lock, USYNC_THREAD | LOCK_RECURSIVE | + LOCK_ERRORCHECK, NULL) != 0) + libvarpd_panic("failed to create varpd_load_lock"); + + if (cond_init(&varpd_load_cv, USYNC_THREAD, NULL) != 0) + libvarpd_panic("failed to create varpd_load_cv"); + + varpd_load_handle = NULL; +} + +void +libvarpd_plugin_fini(void) +{ + assert(varpd_load_handle == NULL); + if (mutex_destroy(&varpd_load_lock) != 0) + libvarpd_panic("failed to destroy varpd_load_lock"); + if (cond_destroy(&varpd_load_cv) != 0) + libvarpd_panic("failed to destroy varpd_load_cv"); +} + +void +libvarpd_plugin_prefork(void) +{ + mutex_lock(&varpd_load_lock); + while (varpd_load_handle != NULL) + cond_wait(&varpd_load_cv, &varpd_load_lock); +} + +void +libvarpd_plugin_postfork(void) +{ + cond_signal(&varpd_load_cv); + mutex_unlock(&varpd_load_lock); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c new file mode 100644 index 0000000000..32ab9e81ab --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c @@ -0,0 +1,238 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +/* + * varpd property management + */ + +#include <libvarpd_impl.h> +#include <errno.h> +#include <strings.h> +#include <sys/mac.h> +#include <umem.h> + +typedef struct varpd_prop_info { + varpd_impl_t *vprop_vip; + varpd_instance_t *vprop_instance; + uint_t vprop_type; + uint_t vprop_prot; + uint32_t vprop_defsize; + uint32_t vprop_psize; + char vprop_name[LIBVARPD_PROP_NAMELEN]; + uint8_t vprop_default[LIBVARPD_PROP_SIZEMAX]; + uint8_t vprop_poss[LIBVARPD_PROP_SIZEMAX]; +} varpd_prop_info_t; + +void +libvarpd_prop_set_name(varpd_prop_handle_t *phdl, const char *name) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + (void) strlcpy(infop->vprop_name, name, OVERLAY_PROP_NAMELEN); +} + +void +libvarpd_prop_set_prot(varpd_prop_handle_t *phdl, overlay_prop_prot_t perm) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + infop->vprop_prot = perm; +} + +void +libvarpd_prop_set_type(varpd_prop_handle_t *phdl, overlay_prop_type_t type) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + infop->vprop_type = type; +} + +int +libvarpd_prop_set_default(varpd_prop_handle_t *phdl, void *buf, ssize_t len) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + + if (len > LIBVARPD_PROP_SIZEMAX) + return (E2BIG); + + if (len < 0) + return (EOVERFLOW); + + bcopy(buf, infop->vprop_default, len); + infop->vprop_defsize = len; + return (0); +} + +void +libvarpd_prop_set_nodefault(varpd_prop_handle_t *phdl) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + + infop->vprop_default[0] = '\0'; + infop->vprop_defsize = 0; +} + +void +libvarpd_prop_set_range_uint32(varpd_prop_handle_t *phdl, uint32_t min, + uint32_t max) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss; + + /* XXX We should probably set some kind of error here... */ + if (rangep->mpr_count != 0 && rangep->mpr_type != MAC_PROPVAL_UINT32) + return; + + /* XXX We should probably set some kind of error here... */ + if (infop->vprop_psize + sizeof (mac_propval_uint32_range_t) > + sizeof (infop->vprop_poss)) + return; + + infop->vprop_psize += sizeof (mac_propval_uint32_range_t); + rangep->mpr_count++; + rangep->mpr_type = MAC_PROPVAL_UINT32; + rangep->u.mpr_uint32[rangep->mpr_count-1].mpur_min = min; + rangep->u.mpr_uint32[rangep->mpr_count-1].mpur_max = max; +} + +void +libvarpd_prop_set_range_str(varpd_prop_handle_t *phdl, const char *str) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + size_t len = strlen(str) + 1; /* Account for a null terminator */ + mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss; + mac_propval_str_range_t *pstr = &rangep->u.mpr_str; + + /* XXX Errors */ + if (rangep->mpr_count != 0 && rangep->mpr_type != MAC_PROPVAL_STR) + return; + + /* XXX Errors */ + if (infop->vprop_psize + len > sizeof (infop->vprop_poss)) + return; + + rangep->mpr_count++; + rangep->mpr_type = MAC_PROPVAL_STR; + strlcpy((char *)&pstr->mpur_data[pstr->mpur_nextbyte], str, + sizeof (infop->vprop_poss) - infop->vprop_psize); + pstr->mpur_nextbyte += len; + infop->vprop_psize += len; +} + +int +libvarpd_prop_handle_alloc(varpd_handle_t *vph, varpd_instance_handle_t *inst, + varpd_prop_handle_t **phdlp) +{ + varpd_prop_info_t *infop; + + infop = umem_alloc(sizeof (varpd_prop_info_t), UMEM_DEFAULT); + if (infop == NULL) + return (ENOMEM); + + bzero(infop, sizeof (varpd_prop_info_t)); + infop->vprop_vip = (varpd_impl_t *)vph; + infop->vprop_instance = (varpd_instance_t *)inst; + + *phdlp = (varpd_prop_handle_t *)infop; + return (0); +} + +void +libvarpd_prop_handle_free(varpd_prop_handle_t *phdl) +{ + umem_free(phdl, sizeof (varpd_prop_info_t)); +} + +int +libvarpd_prop_nprops(varpd_instance_handle_t *ihdl, uint_t *np) +{ + varpd_instance_t *instp = (varpd_instance_t *)ihdl; + + return (instp->vri_plugin->vpp_ops->vpo_nprops(instp->vri_private, np)); +} + +int +libvarpd_prop_info_fill(varpd_prop_handle_t *phdl, uint_t propid) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + varpd_instance_t *instp = infop->vprop_instance; + mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss; + + infop->vprop_psize = sizeof (mac_propval_range_t); + bzero(rangep, sizeof (mac_propval_range_t)); + return (instp->vri_plugin->vpp_ops->vpo_propinfo(instp->vri_private, + propid, phdl)); +} + +int +libvarpd_prop_info(varpd_prop_handle_t *phdl, const char **namep, + uint_t *typep, uint_t *protp, const void **defp, uint32_t *sizep, + const mac_propval_range_t **possp) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + if (namep != NULL) + *namep = infop->vprop_name; + if (typep != NULL) + *typep = infop->vprop_type; + if (protp != NULL) + *protp = infop->vprop_prot; + if (defp != NULL) + *defp = infop->vprop_default; + if (sizep != NULL) + *sizep = infop->vprop_psize; + if (possp != NULL) + *possp = (mac_propval_range_t *)infop->vprop_poss; + return (0); +} + +int +libvarpd_prop_get(varpd_prop_handle_t *phdl, void *buf, uint32_t *sizep) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + varpd_instance_t *instp = infop->vprop_instance; + + /* XXX We should maybe keep a boolean to keep track of this? */ + if (infop->vprop_name[0] == '\0') + return (EINVAL); + + return (instp->vri_plugin->vpp_ops->vpo_getprop(instp->vri_private, + infop->vprop_name, buf, sizep)); +} + +int +libvarpd_prop_set(varpd_prop_handle_t *phdl, const void *buf, uint32_t size) +{ + varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl; + varpd_instance_t *instp = infop->vprop_instance; + + /* XXX We should maybe keep a boolean to keep track of this? */ + if (infop->vprop_name[0] == '\0') + return (EINVAL); + + return (instp->vri_plugin->vpp_ops->vpo_setprop(instp->vri_private, + infop->vprop_name, buf, size)); +} + +void +libvarpd_prop_door_convert(const varpd_prop_handle_t *phdl, + varpd_client_propinfo_arg_t *vcfap) +{ + const varpd_prop_info_t *infop = (const varpd_prop_info_t *)phdl; + + vcfap->vcfa_type = infop->vprop_type; + vcfap->vcfa_prot = infop->vprop_prot; + vcfap->vcfa_defsize = infop->vprop_defsize; + vcfap->vcfa_psize = infop->vprop_psize; + bcopy(infop->vprop_name, vcfap->vcfa_name, LIBVARPD_PROP_NAMELEN); + bcopy(infop->vprop_default, vcfap->vcfa_default, LIBVARPD_PROP_SIZEMAX); + bcopy(infop->vprop_poss, vcfap->vcfa_poss, LIBVARPD_PROP_SIZEMAX); +} diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h new file mode 100644 index 0000000000..232d92e82e --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h @@ -0,0 +1,226 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +#ifndef _LIBVARPD_PROVIDER_H +#define _LIBVARPD_PROVIDER_H + +/* + * varpd provider interface + * + * This header file defines all the structures and functions that a given plugin + * should register. + */ + +#include <bunyan.h> +#include <libvarpd.h> +#include <libnvpair.h> +#include <sys/socket.h> +#include <sys/overlay_target.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define VARPD_VERSION_ONE 1 +#define VARPD_CURRENT_VERSION VARPD_VERSION_ONE + +typedef struct __varpd_provier_handle varpd_provider_handle_t; +typedef struct __varpd_query_handle varpd_query_handle_t; +typedef struct __varpd_arp_handle varpd_arp_handle_t; +typedef struct __varpd_dhcp_handle varpd_dhcp_handle_t; + +/* + * Create a new instance of a plugin. + */ +typedef int (*varpd_plugin_create_f)(varpd_provider_handle_t *, void **, + overlay_plugin_dest_t); + +/* + * Upon the return of this, the lookup function will be called. + */ +typedef int (*varpd_plugin_start_f)(void *); + +/* + * Upon the entry of this function, the lookup function will not be called. + */ +typedef void (*varpd_plugin_stop_f)(void *); + +/* + * Destroy an instance of a plugin. + */ +typedef void (*varpd_plugin_destroy_f)(void *); + +/* + * The varpd_plugin_default_f and varpd_plugin_lookup_f both look up + * destinations and should have them written into the overlay_target_point_t. + * The varpd_plugin_default_f should only be implemented for plugins which are + * of type OVERLAY_TARGET_POINT, where as only the lookup function should be + * implemented by plugins that are of type OVERLAY_TARGET_DYNAMIC. + * + * In both cases, the answer should be filled into the overlay_target_point_t. + * In the case of the varpd_plugin_default_f, one of the VARPD_LOOKUP_* values + * should be returned by the function. + * + * In the case of the varpd_plugin_lookup_f, no value is returned. Instead, this + * is allowed to be an asynchronous operation and therefore any thread may call + * back the status by using the function varpd_plugin_reply. Again, specifying + * the appropriate VARPD_LOOKUP_* flags. + * + * The flag, VARPD_LOOKUP_OK indicates that the overlay_target_point_t has been + * filled in completely. The flag, VARPD_LOOKUP_DROP indicates that the packet + * in question should be dropped. + */ +#define VARPD_LOOKUP_OK (0) +#define VARPD_LOOKUP_DROP (-1) +typedef int (*varpd_plugin_default_f)(void *, overlay_target_point_t *); +typedef void (*varpd_plugin_lookup_f)(void *, varpd_query_handle_t *, + const overlay_targ_lookup_t *, overlay_target_point_t *); + +/* + * Do a proxy ARP/NDP lookup. + */ +#define VARPD_QTYPE_ETHERNET 0x0 +typedef void (*varpd_plugin_arp_f)(void *, varpd_arp_handle_t *, int, + const struct sockaddr *, uint8_t *); + +typedef void (*varpd_plugin_dhcp_f)(void *, varpd_dhcp_handle_t *, int, + const overlay_targ_lookup_t *, uint8_t *); + +/* + * The following four functions all revolve around properties that exist for + * varpd. A plugin should strive to have a uniform set of properties that exist, + * however a given plugin may not always support every property. For example, in + * a vxlan world, the target IP address and port are both required; however, + * there are other encapsulation protocols which only require an IP address, or + * maybe require something else. + */ + +/* + * Obtain a total number of properties. + */ +typedef int (*varpd_plugin_nprops_f)(void *, uint_t *); + +/* + * Obtain information about a property. + */ +typedef int (*varpd_plugin_propinfo_f)(void *, const uint_t, + varpd_prop_handle_t *); + +/* + * Get the value for a single property. + */ +typedef int (*varpd_plugin_getprop_f)(void *, const char *, void *, uint32_t *); + +/* + * Set the value for a single property. + */ +typedef int (*varpd_plugin_setprop_f)(void *, const char *, const void *, + const uint32_t); + +/* + * Save a plugin's private data into an nvlist. + */ +typedef int (*varpd_plugin_save_f)(void *, nvlist_t *); + +/* + * Restore a plugin's private data to an nvlist. + */ +typedef int (*varpd_plugin_restore_f)(nvlist_t *, varpd_provider_handle_t *, + overlay_plugin_dest_t, void **); + +typedef struct varpd_plugin_ops { + uint_t vpo_callbacks; + varpd_plugin_create_f vpo_create; + varpd_plugin_start_f vpo_start; + varpd_plugin_stop_f vpo_stop; + varpd_plugin_destroy_f vpo_destroy; + varpd_plugin_default_f vpo_default; + varpd_plugin_lookup_f vpo_lookup; + varpd_plugin_nprops_f vpo_nprops; + varpd_plugin_propinfo_f vpo_propinfo; + varpd_plugin_getprop_f vpo_getprop; + varpd_plugin_setprop_f vpo_setprop; + varpd_plugin_save_f vpo_save; + varpd_plugin_restore_f vpo_restore; + varpd_plugin_arp_f vpo_arp; + varpd_plugin_dhcp_f vpo_dhcp; +} varpd_plugin_ops_t; + +typedef struct varpd_plugin_register { + uint_t vpr_version; + uint_t vpr_mode; + const char *vpr_name; + const varpd_plugin_ops_t *vpr_ops; +} varpd_plugin_register_t; + +extern varpd_plugin_register_t *libvarpd_plugin_alloc(uint_t, int *); +extern void libvarpd_plugin_free(varpd_plugin_register_t *); +extern int libvarpd_plugin_register(varpd_plugin_register_t *); + +/* + * Blowing up and logging + */ +extern void libvarpd_panic(const char *, ...) __NORETURN; +extern const bunyan_logger_t *libvarpd_plugin_bunyan(varpd_provider_handle_t *); + +/* + * Misc. Information APIs + */ +extern uint64_t libvarpd_plugin_vnetid(varpd_provider_handle_t *); + +/* + * Lookup Replying query and proxying + */ +extern void libvarpd_plugin_query_reply(varpd_query_handle_t *, int); + +extern void libvarpd_plugin_proxy_arp(varpd_provider_handle_t *, + varpd_query_handle_t *, const overlay_targ_lookup_t *); +extern void libvarpd_plugin_proxy_ndp(varpd_provider_handle_t *, + varpd_query_handle_t *, const overlay_targ_lookup_t *); +extern void libvarpd_plugin_arp_reply(varpd_arp_handle_t *, int); + +extern void libvarpd_plugin_proxy_dhcp(varpd_provider_handle_t *, + varpd_query_handle_t *, const overlay_targ_lookup_t *); +extern void libvarpd_plugin_dhcp_reply(varpd_dhcp_handle_t *, int); + + +/* + * Property information callbacks + */ +extern void libvarpd_prop_set_name(varpd_prop_handle_t *, const char *); +extern void libvarpd_prop_set_prot(varpd_prop_handle_t *, overlay_prop_prot_t); +extern void libvarpd_prop_set_type(varpd_prop_handle_t *, overlay_prop_type_t); +extern int libvarpd_prop_set_default(varpd_prop_handle_t *, void *, ssize_t); +extern void libvarpd_prop_set_nodefault(varpd_prop_handle_t *); +extern void libvarpd_prop_set_range_uint32(varpd_prop_handle_t *, uint32_t, + uint32_t); +extern void libvarpd_prop_set_range_str(varpd_prop_handle_t *, const char *); + +/* + * Various injecting and invalidation routines + */ +extern void libvarpd_inject_varp(varpd_provider_handle_t *, const uint8_t *, + const overlay_target_point_t *); +extern void libvarpd_inject_arp(varpd_provider_handle_t *, const uint16_t, + const uint8_t *, const struct in_addr *, const uint8_t *); +extern void libvarpd_fma_degrade(varpd_provider_handle_t *, const char *); +extern void libvarpd_fma_restore(varpd_provider_handle_t *); +/* TODO NDP */ + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBVARPD_PROVIDER_H */ diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c new file mode 100644 index 0000000000..f5d8f3c796 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c @@ -0,0 +1,98 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +#include <libvarpd_impl.h> +#include <assert.h> +#include <stdio.h> +#include <sys/types.h> +#include <dirent.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +const char * +libvarpd_isaext(void) +{ +#if defined(__sparc) +#if defined(__sparcv9) + return ("64"); +#else /* __sparcv9 */ + return (""); +#endif /* __sparvc9 */ +#elif defined(__amd64) + return ("64"); +#elif defined(__i386) + return (""); +#else +#error "unkonwn ISA" +#endif +} + +int +libvarpd_dirwalk(varpd_impl_t *vip, const char *path, const char *suffix, + libvarpd_dirwalk_f func, void *arg) +{ + int ret; + size_t slen; + char *dirpath, *filepath; + DIR *dirp; + struct dirent *dp; + assert(vip != NULL && path != NULL); + + if (asprintf(&dirpath, "%s/%s", path, libvarpd_isaext()) == -1) + return (errno); + + if ((dirp = opendir(dirpath)) == NULL) { + ret = errno; + free(filepath); + return (ret); + } + + slen = strlen(suffix); + for (;;) { + size_t len; + + errno = 0; + dp = readdir(dirp); + if (dp == NULL) { + ret = errno; + break; + } + + len = strlen(dp->d_name); + if (len <= slen) + continue; + + if (strcmp(suffix, dp->d_name + (len - slen)) != 0) + continue; + + if (asprintf(&filepath, "%s/%s", dirpath, dp->d_name) == -1) { + ret = errno; + break; + } + + if (func(vip, filepath, arg) != 0) { + free(filepath); + ret = 0; + break; + } + + free(filepath); + } + + closedir(dirp); + free(dirpath); + return (ret); +} diff --git a/usr/src/lib/varpd/libvarpd/common/llib-lvarpd b/usr/src/lib/varpd/libvarpd/common/llib-lvarpd new file mode 100644 index 0000000000..24d819d290 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/llib-lvarpd @@ -0,0 +1,19 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +#include <libvarpd.h> diff --git a/usr/src/lib/varpd/libvarpd/common/mapfile-plugin b/usr/src/lib/varpd/libvarpd/common/mapfile-plugin new file mode 100644 index 0000000000..7d208168e8 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/mapfile-plugin @@ -0,0 +1,47 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_SCOPE { + global: + libvarpd_plugin_alloc { FLAGS = EXTERN }; + libvarpd_plugin_free { FLAGS = EXTERN }; + libvarpd_plugin_proxy_arp { FLAGS = EXTERN }; + libvarpd_plugin_proxy_dhcp { FLAGS = EXTERN }; + libvarpd_plugin_proxy_ndp { FLAGS = EXTERN }; + libvarpd_plugin_register { FLAGS = EXTERN }; + libvarpd_prop_set_name { FLAGS = EXTERN }; + libvarpd_prop_set_prot { FLAGS = EXTERN }; + libvarpd_prop_set_type { FLAGS = EXTERN }; + libvarpd_prop_set_default { FLAGS = EXTERN }; + libvarpd_prop_set_nodefault { FLAGS = EXTERN }; + libvarpd_prop_set_range_uint32 { FLAGS = EXTERN }; + libvarpd_prop_set_rangestr { FLAGS = EXTERN }; +}; diff --git a/usr/src/lib/varpd/libvarpd/common/mapfile-vers b/usr/src/lib/varpd/libvarpd/common/mapfile-vers new file mode 100644 index 0000000000..62fbb5e879 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/common/mapfile-vers @@ -0,0 +1,113 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION SUNWprivate { + global: + libvarpd_c_create; + libvarpd_c_destroy; + libvarpd_c_instance_activate; + libvarpd_c_instance_create; + libvarpd_c_instance_destroy; + libvarpd_c_prop_nprops; + libvarpd_c_prop_handle_alloc; + libvarpd_c_prop_handle_free; + libvarpd_c_prop_info_fill; + libvarpd_c_prop_info_fill_by_name; + libvarpd_c_prop_info; + libvarpd_c_prop_get; + libvarpd_c_prop_set; + + libvarpd_c_instance_lookup; + libvarpd_c_instance_target_mode; + libvarpd_c_instance_cache_flush; + libvarpd_c_instance_cache_delete; + libvarpd_c_instance_cache_get; + libvarpd_c_instance_cache_set; + libvarpd_c_instance_cache_walk; + + libvarpd_create; + libvarpd_destroy; + + libvarpd_door_server_create; + libvarpd_door_server_destroy; + + libvarpd_fma_degrade; + libvarpd_fma_restore; + + libvarpd_inject_varp; + libvarpd_inject_arp; + + libvarpd_instance_activate; + libvarpd_instance_create; + libvarpd_instance_destroy; + libvarpd_instance_lookup; + libvarpd_instance_id; + + libvarpd_panic; + + libvarpd_persist_disable; + libvarpd_persist_enable; + libvarpd_persist_restore; + + libvarpd_plugin_alloc; + libvarpd_plugin_load; + libvarpd_plugin_free; + libvarpd_plugin_arp_reply; + libvarpd_plugin_dhcp_reply; + libvarpd_plugin_query_reply; + libvarpd_plugin_proxy_arp; + libvarpd_plugin_proxy_dhcp; + libvarpd_plugin_proxy_ndp; + libvarpd_plugin_register; + libvarpd_plugin_walk; + libvarpd_plugin_vnetid; + + libvarpd_prop_set_default; + libvarpd_prop_set_nodefault; + libvarpd_prop_set_name; + libvarpd_prop_set_prot; + libvarpd_prop_set_range_uint32; + libvarpd_prop_set_range_str; + libvarpd_prop_set_type; + + libvarpd_prop_handle_alloc; + libvarpd_prop_handle_free; + libvarpd_prop_nprops; + libvarpd_prop_info_fill; + libvarpd_prop_info; + libvarpd_prop_get; + libvarpd_prop_set; + + libvarpd_overlay_lookup_quiesce; + libvarpd_overlay_lookup_run; + local: + *; +}; diff --git a/usr/src/lib/varpd/libvarpd/i386/Makefile b/usr/src/lib/varpd/libvarpd/i386/Makefile new file mode 100644 index 0000000000..41e699e8f8 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/i386/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/libvarpd/sparc/Makefile b/usr/src/lib/varpd/libvarpd/sparc/Makefile new file mode 100644 index 0000000000..41e699e8f8 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/sparc/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/libvarpd/sparcv9/Makefile b/usr/src/lib/varpd/libvarpd/sparcv9/Makefile new file mode 100644 index 0000000000..5c586c1d40 --- /dev/null +++ b/usr/src/lib/varpd/libvarpd/sparcv9/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/varpd/svp/Makefile b/usr/src/lib/varpd/svp/Makefile new file mode 100644 index 0000000000..f026c620e6 --- /dev/null +++ b/usr/src/lib/varpd/svp/Makefile @@ -0,0 +1,40 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +include ../../Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +install := TARGET = install +lint := TARGET = lint + +.KEEP_STATE: + +all clean clobber install lint: $(SUBDIRS) + +install_h: + +check: + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../../Makefile.targ diff --git a/usr/src/lib/varpd/svp/Makefile.com b/usr/src/lib/varpd/svp/Makefile.com new file mode 100644 index 0000000000..814b70354e --- /dev/null +++ b/usr/src/lib/varpd/svp/Makefile.com @@ -0,0 +1,48 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +LIBRARY = libvarpd_svp.a +VERS = .1 +OBJECTS = libvarpd_svp.o \ + libvarpd_svp_conn.o \ + libvarpd_svp_crc.o \ + libvarpd_svp_host.o \ + libvarpd_svp_loop.o \ + libvarpd_svp_remote.o \ + libvarpd_svp_timer.o + +include ../../../Makefile.lib +include ../../Makefile.plugin + +LIBS = $(DYNLIB) + +# +# Yes, this isn't a command, but libcmdutils does have the list(9F) +# functions and better to use that then compile list.o yet again +# ourselves... probably. +# +LDLIBS += -lc -lvarpd -lumem -lnvpair -lsocket -lnsl -lavl \ + -lcmdutils -lidspace -lbunyan +CPPFLAGS += -I../common + +SRCDIR = ../common + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../../Makefile.targ diff --git a/usr/src/lib/varpd/svp/amd64/Makefile b/usr/src/lib/varpd/svp/amd64/Makefile new file mode 100644 index 0000000000..b64b830ddd --- /dev/null +++ b/usr/src/lib/varpd/svp/amd64/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp.c b/usr/src/lib/varpd/svp/common/libvarpd_svp.c new file mode 100644 index 0000000000..23f9586ba6 --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp.c @@ -0,0 +1,755 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014, Joyent, Inc. + */ + +/* + * This plugin implements the SDC VXLAN Protocol (SVP). + * + * XXX Expand on everything. + */ + +#include <umem.h> +#include <errno.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <libnvpair.h> +#include <strings.h> +#include <string.h> +#include <assert.h> +#include <unistd.h> + +#include <libvarpd_provider.h> +#include "libvarpd_svp.h" + +bunyan_logger_t *svp_bunyan; +static int svp_defport = 1296; +static int svp_defuport = 1339; +static umem_cache_t *svp_lookup_cache; + +typedef enum svp_lookup_type { + SVP_L_UNKNOWN = 0x0, + SVP_L_VL2 = 0x1, + SVP_L_VL3 = 0x2 +} svp_lookup_type_t; + +typedef struct svp_lookup { + int svl_type; + union { + struct svl_lookup_vl2 { + varpd_query_handle_t *svl_handle; + overlay_target_point_t *svl_point; + } svl_vl2; + struct svl_lookup_vl3 { + varpd_arp_handle_t *svl_vah; + uint8_t *svl_out; + } svl_vl3; + } svl_u; + svp_query_t svl_query; +} svp_lookup_t; + +static const char *varpd_svp_props[] = { + "svp/host", + "svp/port", + "svp/underlay_ip", + "svp/underlay_port" +}; + +int +svp_comparator(const void *l, const void *r) +{ + const svp_t *ls = l; + const svp_t *rs = r; + + if (ls->svp_vid > rs->svp_vid) + return (1); + if (ls->svp_vid < rs->svp_vid) + return (-1); + return (0); +} + +static void +svp_vl2_lookup_cb(svp_t *svp, svp_status_t status, const struct in6_addr *uip, + const uint16_t uport, void *arg) +{ + svp_lookup_t *svl = arg; + overlay_target_point_t *otp; + + assert(svp != NULL); + assert(arg != NULL); + + if (status != SVP_S_OK) { + libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle, + VARPD_LOOKUP_DROP); + umem_cache_free(svp_lookup_cache, svl); + return; + } + + otp = svl->svl_u.svl_vl2.svl_point; + bcopy(uip, &otp->otp_ip, sizeof (struct in6_addr)); + otp->otp_port = uport; + libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle, + VARPD_LOOKUP_OK); + umem_cache_free(svp_lookup_cache, svl); +} + +static void +svp_vl3_lookup_cb(svp_t *svp, svp_status_t status, const uint8_t *vl2mac, + const struct in6_addr *uip, const uint16_t uport, void *arg) +{ + overlay_target_point_t point; + svp_lookup_t *svl = arg; + + assert(svp != NULL); + assert(svl != NULL); + + if (status != SVP_S_OK) { + libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah, + VARPD_LOOKUP_DROP); + umem_cache_free(svp_lookup_cache, svl); + return; + } + + /* Inject the L2 mapping before the L3 */ + bcopy(uip, &point.otp_ip, sizeof (struct in6_addr)); + point.otp_port = uport; + libvarpd_inject_varp(svp->svp_hdl, vl2mac, &point); + + bcopy(vl2mac, svl->svl_u.svl_vl3.svl_out, ETHERADDRL); + libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah, + VARPD_LOOKUP_OK); + umem_cache_free(svp_lookup_cache, svl); +} + +static void +svp_vl2_invalidate_cb(svp_t *svp, const uint8_t *vl2mac) +{ + libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL); +} + +static void +svp_vl3_inject_cb(svp_t *svp, const uint16_t vlan, const struct in6_addr *vl3ip, + const uint8_t *vl2mac, const uint8_t *targmac) +{ + struct in_addr v4; + + if (IN6_IS_ADDR_V4MAPPED(vl3ip) == 0) + libvarpd_panic("implement libvarpd_inject_ndp"); + IN6_V4MAPPED_TO_INADDR(vl3ip, &v4); + libvarpd_inject_arp(svp->svp_hdl, vlan, vl2mac, &v4, targmac); +} + +static void +svp_shootdown_cb(svp_t *svp, const uint8_t *vl2mac, const struct in6_addr *uip, + const uint16_t uport) +{ + /* + * XXX We should probably do a conditional invlaidation here. + */ + libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL); +} + +static svp_cb_t svp_defops = { + svp_vl2_lookup_cb, + svp_vl3_lookup_cb, + svp_vl2_invalidate_cb, + svp_vl3_inject_cb, + svp_shootdown_cb +}; + +static boolean_t +varpd_svp_valid_dest(overlay_plugin_dest_t dest) +{ + if (dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)) + return (B_FALSE); + + return (B_TRUE); +} + +static int +varpd_svp_create(varpd_provider_handle_t *hdl, void **outp, + overlay_plugin_dest_t dest) +{ + int ret; + svp_t *svp; + + if (varpd_svp_valid_dest(dest) == B_FALSE) + return (ENOTSUP); + + svp = umem_zalloc(sizeof (svp_t), UMEM_DEFAULT); + if (svp == NULL) + return (ENOMEM); + + if ((ret = mutex_init(&svp->svp_lock, USYNC_THREAD, NULL) != 0)) { + umem_free(svp, sizeof (svp_t)); + return (ret); + } + + svp->svp_port = svp_defport; + svp->svp_uport = svp_defuport; + svp->svp_cb = svp_defops; + svp->svp_hdl = hdl; + svp->svp_vid = libvarpd_plugin_vnetid(svp->svp_hdl); + *outp = svp; + return (0); +} + +static int +varpd_svp_start(void *arg) +{ + int ret; + svp_remote_t *srp; + svp_t *svp = arg; + + mutex_lock(&svp->svp_lock); + if (svp->svp_host == NULL || svp->svp_port == 0 || + svp->svp_huip == B_FALSE || svp->svp_uport == 0) { + mutex_unlock(&svp->svp_lock); + return (EAGAIN); + } + mutex_unlock(&svp->svp_lock); + + if ((ret = svp_remote_find(svp->svp_host, svp->svp_port, &srp)) != 0) + return (ret); + + if ((ret = svp_remote_attach(srp, svp)) != 0) { + svp_remote_release(srp); + return (ret); + } + + return (0); +} + +static void +varpd_svp_stop(void *arg) +{ + svp_t *svp = arg; + + svp_remote_detach(svp); +} + +static void +varpd_svp_destroy(void *arg) +{ + svp_t *svp = arg; + + if (svp->svp_host != NULL) + umem_free(svp->svp_host, strlen(svp->svp_host) + 1); + + if (mutex_destroy(&svp->svp_lock) != 0) + libvarpd_panic("failed to destroy svp_t`svp_lock"); + + umem_free(svp, sizeof (svp_t)); +} + +static void +varpd_svp_lookup(void *arg, varpd_query_handle_t *vqh, + const overlay_targ_lookup_t *otl, overlay_target_point_t *otp) +{ + svp_lookup_t *slp; + svp_t *svp = arg; + + /* + * Check if this is something that we need to proxy, eg. arp or ndp. + */ + if (otl->otl_sap == ETHERTYPE_ARP) { + libvarpd_plugin_proxy_arp(svp->svp_hdl, vqh, otl); + return; + } + + if (otl->otl_sap == ETHERTYPE_IPV6 && + otl->otl_dstaddr[0] == 0x33 && + otl->otl_dstaddr[1] == 0x33) { + libvarpd_plugin_proxy_ndp(svp->svp_hdl, vqh, otl); + } + + /* XXX CACHES */ + + /* + * If we have a failure to allocate memory for this, that's not good. + * However, telling the kernel to just drop this packet is much better + * than the alternative at this moment. At least we'll try again and we + * may have something more available to us in a little bit. + * + * TODO We need to have observability around this case. + */ + slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT); + if (slp == NULL) { + libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP); + return; + } + + slp->svl_type = SVP_L_VL2; + slp->svl_u.svl_vl2.svl_handle = vqh; + slp->svl_u.svl_vl2.svl_point = otp; + + svp_remote_vl2_lookup(svp, &slp->svl_query, otl->otl_dstaddr, slp); +} + +static int +varpd_svp_nprops(void *arg, uint_t *nprops) +{ + *nprops = sizeof (varpd_svp_props) / sizeof (char *); + return (0); +} + +static int +varpd_svp_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph) +{ + switch (propid) { + case 0: + /* svp/host */ + libvarpd_prop_set_name(vph, varpd_svp_props[0]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING); + libvarpd_prop_set_nodefault(vph); + break; + case 1: + /* svp/port */ + libvarpd_prop_set_name(vph, varpd_svp_props[1]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT); + libvarpd_prop_set_default(vph, &svp_defport, + sizeof (svp_defport)); + libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX); + break; + case 2: + /* svp/underlay_ip */ + libvarpd_prop_set_name(vph, varpd_svp_props[2]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP); + libvarpd_prop_set_nodefault(vph); + break; + case 3: + /* svp/underlay_port */ + libvarpd_prop_set_name(vph, varpd_svp_props[3]); + libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW); + libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT); + libvarpd_prop_set_default(vph, &svp_defuport, + sizeof (svp_defuport)); + libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX); + break; + default: + return (EINVAL); + } + return (0); +} + +static int +varpd_svp_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep) +{ + svp_t *svp = arg; + + /* svp/host */ + if (strcmp(pname, varpd_svp_props[0]) == 0) { + size_t len; + + mutex_lock(&svp->svp_lock); + if (svp->svp_host == NULL) { + *sizep = 0; + } else { + len = strlen(svp->svp_host) + 1; + if (*sizep < len) { + mutex_unlock(&svp->svp_lock); + return (EOVERFLOW); + } + *sizep = len; + (void) strlcpy(buf, svp->svp_host, *sizep); + } + mutex_unlock(&svp->svp_lock); + return (0); + } + + /* svp/port */ + if (strcmp(pname, varpd_svp_props[1]) == 0) { + uint64_t val; + + if (*sizep < sizeof (uint64_t)) + return (EOVERFLOW); + + mutex_lock(&svp->svp_lock); + if (svp->svp_port == 0) { + *sizep = 0; + } else { + val = svp->svp_port; + bcopy(&val, buf, sizeof (uint64_t)); + *sizep = sizeof (uint64_t); + } + + mutex_unlock(&svp->svp_lock); + return (0); + } + + /* svp/underlay_ip */ + if (strcmp(pname, varpd_svp_props[2]) == 0) { + if (*sizep > sizeof (struct in6_addr)) + return (EOVERFLOW); + mutex_lock(&svp->svp_lock); + if (svp->svp_huip == B_FALSE) { + *sizep = 0; + } else { + bcopy(&svp->svp_uip, buf, sizeof (struct in6_addr)); + *sizep = sizeof (struct in6_addr); + } + return (0); + } + + /* svp/underlay_port */ + if (strcmp(pname, varpd_svp_props[3]) == 0) { + uint64_t val; + + if (*sizep < sizeof (uint64_t)) + return (EOVERFLOW); + + mutex_lock(&svp->svp_lock); + if (svp->svp_uport == 0) { + *sizep = 0; + } else { + val = svp->svp_uport; + bcopy(&val, buf, sizeof (uint64_t)); + *sizep = sizeof (uint64_t); + } + + mutex_unlock(&svp->svp_lock); + return (0); + } + + return (EINVAL); +} + +static int +varpd_svp_setprop(void *arg, const char *pname, const void *buf, + const uint32_t size) +{ + svp_t *svp = arg; + + /* svp/host */ + if (strcmp(pname, varpd_svp_props[0]) == 0) { + char *dup; + /* XXX Validate hostname characters, maybe grab a C locale */ + dup = umem_alloc(size, UMEM_DEFAULT); + (void) strlcpy(dup, buf, size); + if (dup == NULL) + return (ENOMEM); + mutex_lock(&svp->svp_lock); + if (svp->svp_host != NULL) + umem_free(svp->svp_host, strlen(svp->svp_host) + 1); + svp->svp_host = dup; + mutex_unlock(&svp->svp_lock); + return (0); + } + + /* svp/port */ + if (strcmp(pname, varpd_svp_props[1]) == 0) { + const uint64_t *valp = buf; + if (size < sizeof (uint64_t)) + return (EOVERFLOW); + + if (*valp == 0 || *valp > UINT16_MAX) + return (EINVAL); + + mutex_lock(&svp->svp_lock); + svp->svp_port = (uint16_t)*valp; + mutex_unlock(&svp->svp_lock); + return (0); + } + + /* svp/underlay_ip */ + if (strcmp(pname, varpd_svp_props[2]) == 0) { + const struct in6_addr *ipv6 = buf; + + if (size < sizeof (struct in6_addr)) + return (EOVERFLOW); + + /* + * XXX Is anything else disallowed? + */ + if (IN6_IS_ADDR_V4COMPAT(ipv6)) + return (EINVAL); + mutex_lock(&svp->svp_lock); + bcopy(buf, &svp->svp_uip, sizeof (struct in6_addr)); + svp->svp_huip = B_TRUE; + mutex_unlock(&svp->svp_lock); + return (0); + } + + /* svp/underlay_port */ + if (strcmp(pname, varpd_svp_props[3]) == 0) { + const uint64_t *valp = buf; + if (size < sizeof (uint64_t)) + return (EOVERFLOW); + + if (*valp == 0 || *valp > UINT16_MAX) + return (EINVAL); + + mutex_lock(&svp->svp_lock); + svp->svp_uport = (uint16_t)*valp; + mutex_unlock(&svp->svp_lock); + + return (0); + } + + return (EINVAL); +} + +static int +varpd_svp_save(void *arg, nvlist_t *nvp) +{ + int ret; + svp_t *svp = arg; + + mutex_lock(&svp->svp_lock); + if (svp->svp_host != NULL) { + if ((ret = nvlist_add_string(nvp, varpd_svp_props[0], + svp->svp_host)) != 0) { + mutex_unlock(&svp->svp_lock); + return (ret); + } + } + + if (svp->svp_port != 0) { + if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[1], + svp->svp_port)) != 0) { + mutex_unlock(&svp->svp_lock); + return (ret); + } + } + + if (svp->svp_huip == B_TRUE) { + char buf[INET6_ADDRSTRLEN]; + + if (inet_ntop(AF_INET6, &svp->svp_uip, buf, sizeof (buf)) == + NULL) + libvarpd_panic("unexpected inet_ntop failure: %d", + errno); + + if ((ret = nvlist_add_string(nvp, varpd_svp_props[2], + buf)) != 0) { + mutex_unlock(&svp->svp_lock); + return (ret); + } + } + + if (svp->svp_uport != 0) { + if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[3], + svp->svp_uport)) != 0) { + mutex_unlock(&svp->svp_lock); + return (ret); + } + } + + mutex_unlock(&svp->svp_lock); + return (0); +} + +static int +varpd_svp_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl, + overlay_plugin_dest_t dest, void **outp) +{ + int ret; + svp_t *svp; + char *ipstr, *hstr; + + if (varpd_svp_valid_dest(dest) == B_FALSE) + return (ENOTSUP); + + if ((ret = varpd_svp_create(hdl, (void **)&svp, dest)) != 0) + return (ret); + + /* XXX Validate hostname */ + if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[0], + &hstr)) != 0) { + if (ret != ENOENT) { + varpd_svp_destroy(svp); + return (ret); + } + svp->svp_host = NULL; + } else { + size_t blen = strlen(hstr) + 1; + svp->svp_host = umem_alloc(blen, UMEM_DEFAULT); + (void) strlcpy(svp->svp_host, hstr, blen); + } + + if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[1], + &svp->svp_port)) != 0) { + if (ret != ENOENT) { + varpd_svp_destroy(svp); + return (ret); + } + svp->svp_port = 0; + } + + if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[2], + &ipstr)) != 0) { + if (ret != ENOENT) { + varpd_svp_destroy(svp); + return (ret); + } + svp->svp_huip = B_FALSE; + } else { + ret = inet_pton(AF_INET6, ipstr, &svp->svp_uip); + if (ret == -1) { + assert(errno == EAFNOSUPPORT); + libvarpd_panic("unexpected inet_pton failure: %d", + errno); + } + + if (ret == 0) { + varpd_svp_destroy(svp); + return (EINVAL); + } + svp->svp_huip = B_TRUE; + } + + if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[3], + &svp->svp_uport)) != 0) { + if (ret != ENOENT) { + varpd_svp_destroy(svp); + return (ret); + } + svp->svp_uport = 0; + } + + svp->svp_hdl = hdl; + *outp = svp; + return (0); +} + +static void +varpd_svp_arp(void *arg, varpd_arp_handle_t *vah, int type, + const struct sockaddr *sock, uint8_t *out) +{ + svp_t *svp = arg; + svp_lookup_t *svl; + + if (type != VARPD_QTYPE_ETHERNET) { + libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP); + return; + } + + /* XXX CACHES */ + + svl = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT); + if (svl == NULL) { + libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP); + return; + } + + svl->svl_type = SVP_L_VL3; + svl->svl_u.svl_vl3.svl_vah = vah; + svl->svl_u.svl_vl3.svl_out = out; + svp_remote_vl3_lookup(svp, &svl->svl_query, sock, svl); +} + +static const varpd_plugin_ops_t varpd_svp_ops = { + 0, + varpd_svp_create, + varpd_svp_start, + varpd_svp_stop, + varpd_svp_destroy, + NULL, + varpd_svp_lookup, + varpd_svp_nprops, + varpd_svp_propinfo, + varpd_svp_getprop, + varpd_svp_setprop, + varpd_svp_save, + varpd_svp_restore, + varpd_svp_arp, + NULL +}; + +static int +svp_bunyan_init(void) +{ + int ret; + + if ((ret = bunyan_init("svp", &svp_bunyan)) != 0) + return (ret); + ret = bunyan_stream_add(svp_bunyan, "stderr", BUNYAN_L_INFO, + bunyan_stream_fd, (void *)STDERR_FILENO); + if (ret != 0) + bunyan_fini(svp_bunyan); + return (ret); +} + +static void +svp_bunyan_fini(void) +{ + if (svp_bunyan != NULL) + bunyan_fini(svp_bunyan); +} + +#pragma init(varpd_svp_init) +static void +varpd_svp_init(void) +{ + int err; + varpd_plugin_register_t *vpr; + + /* XXX Revisit and make sure we have proper clean up */ + if (svp_bunyan_init() != 0) + return; + + if ((err == svp_host_init()) != 0) { + svp_bunyan_fini(); + return; + } + + /* XXX Communicate failure */ + svp_lookup_cache = umem_cache_create("svp_lookup", + sizeof (svp_lookup_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + if (svp_lookup_cache == NULL) { + svp_bunyan_fini(); + return; + } + + if ((err = svp_event_init()) != 0) { + svp_bunyan_fini(); + umem_cache_destroy(svp_lookup_cache); + return; + } + + if ((err = svp_timer_init()) != 0) { + svp_event_fini(); + umem_cache_destroy(svp_lookup_cache); + svp_bunyan_fini(); + return; + } + + if ((err = svp_remote_init()) != 0) { + svp_event_fini(); + umem_cache_destroy(svp_lookup_cache); + svp_bunyan_fini(); + return; + } + + /* XXX Revisit failure semantics here */ + vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err); + if (vpr == NULL) { + svp_remote_fini(); + svp_event_fini(); + umem_cache_destroy(svp_lookup_cache); + return; + } + + vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC; + vpr->vpr_name = "svp"; + vpr->vpr_ops = &varpd_svp_ops; + + (void) libvarpd_plugin_register(vpr); + libvarpd_plugin_free(vpr); +} diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp.h b/usr/src/lib/varpd/svp/common/libvarpd_svp.h new file mode 100644 index 0000000000..90acf325aa --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp.h @@ -0,0 +1,377 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +#ifndef _LIBVARPD_SVP_H +#define _LIBVARPD_SVP_H + +/* + * Implementation details of the SVP plugin and the SVP protocol. + */ + +#include <netinet/in.h> +#include <sys/ethernet.h> +#include <thread.h> +#include <synch.h> +#include <libvarpd_provider.h> +#include <sys/avl.h> +#include <port.h> +#include <sys/list.h> +#include <bunyan.h> + +#include <libvarpd_svp_prot.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct svp svp_t; +typedef struct svp_remote svp_remote_t; +typedef struct svp_conn svp_conn_t; +typedef struct svp_query svp_query_t; + +typedef void (*svp_event_f)(port_event_t *, void *); + +typedef struct svp_event { + svp_event_f se_func; + void *se_arg; + int se_events; +} svp_event_t; + +typedef void (*svp_timer_f)(void *); + +typedef struct svp_timer { + svp_timer_f st_func; + void *st_arg; + boolean_t st_oneshot; + uint32_t st_value; + /* Fields below here are private to the svp_timer implementaiton */ + uint64_t st_expire; + boolean_t st_delivering; + avl_node_t st_link; +} svp_timer_t; + +typedef union svp_query_data { + svp_vl2_req_t sqd_vl2r; + svp_vl2_ack_t sqd_vl2a; + svp_vl3_req_t sdq_vl3r; + svp_vl3_ack_t sdq_vl3a; +} svp_query_data_t; + +typedef void (*svp_query_f)(svp_query_t *, void *); + +typedef enum svp_query_state { + SVP_QUERY_INIT = 0x00, + SVP_QUERY_WRITING = 0x01, + SVP_QUERY_READING = 0x02, + SVP_QUERY_FINISHED = 0x03 +} svp_query_state_t; + +/* + * The query structure is usable for all forms of svp queries that end up + * getting passed across. Right now it's optimized for the fixed size data + * requests as opposed to requests whose responses will always be streaming in + * nature. Though, the streaming requests are the less common ones we have. + * + * XXX Fix that and make this streaming friendly + */ +struct svp_query { + list_node_t sq_lnode; + svp_query_f sq_func; + svp_query_state_t sq_state; + void *sq_arg; + svp_t *sq_svp; + svp_req_t sq_header; + svp_query_data_t sq_rdun; + svp_query_data_t sq_wdun; + svp_status_t sq_status; + void *sq_rdata; + size_t sq_rsize; + void *sq_wdata; + size_t sq_wsize; + hrtime_t sq_acttime; +}; + +/* + * XXX Centralize this somewhere more generally, big theroy statement, where are + * you? + * + * We have a connection pool that's built upon DNS records. DNS describes the + * membership of the set of remote peers that make up our pool and we maintain + * one connection to each of them. In addition, we maintain an exponential + * backoff for each peer and will attempt to reconect immediately before backing + * off. The following are the valid states that a connection can be in: + * + * SVP_CS_INITIAL This is the initial state of a connection, all + * that should exist is an unbound socket. + * + * SVP_CS_CONNECTING A call to connect has been made and we are + * polling for it to complete. + * + * SVP_CS_BACKOFF A connect attempt has failed and we are + * currently backing off, waiting to try again. + * + * SVP_CS_ACTIVE We have successfully connected to the remote + * system. + * + * SVP_CS_WINDDOWN This connection is going to valhalla. In other + * words, a previously active connection is no + * longer valid in DNS, so we should curb our use + * of it, and reap it as soon as we have other + * active connections. + * + * SVP_CS_REAPING This connection object will be freed and reaped. + * It will no longer be used. + * + * The following diagram attempts to describe our state transition scheme, and + * when we transition from one state to the next. + * + * | + * * New remote IP from DNS resolution, + * | not currently active in the system. + * | + * v Socket Error, + * +----------------+ still in DNS + * | SVP_CS_INITIAL |<----------------------*-----+ + * +----------------+ | + * | | + * Connection failed .. Always * Successful | + * backoff limit . | connect() | + * not exceeded +----*---------+ | +-----------*--+ | + * | | | | | | + * V ^ v ^ V ^ + * +----------------+ +-------------------+ +---------------+ + * +-<-| SVP_CS_BACKOFF | | SVP_CS_CONNECTING | | SVP_CS_ACTIVE | + * | +----------------+ +-------------------+ +---------------+ + * | V ^ | ^ V + * | Backoff wait * | v | * Removed + * | interval +--------------+ | Added to * | from DNS + * | finished | DNS | | + * | | | | + * | | ^ V + * | | +-----------------+ + * +---->---------------+-----<-------+ +-<-| SVP_CS_WINDDOWN | + * v Conn * +-----------------+ + * | Error | V + * Removed from * v | + * DNS | +----------------+ * Connection + * +----------->| SVP_CS_REAPING |<--------+ Quiesced + * +----------------+ + * + */ +typedef enum svp_conn_state { + SVP_CS_ERROR = 0x00, + SVP_CS_INITIAL = 0x01, + SVP_CS_CONNECTING = 0x02, + SVP_CS_BACKOFF = 0x03, + SVP_CS_ACTIVE = 0x04, + SVP_CS_WINDDOWN = 0x05 +} svp_conn_state_t; + +typedef enum svp_conn_error { + SVP_CE_NONE = 0x00, + SVP_CE_ASSOCIATE = 0x01, + SVP_CE_NOPOLLOUT = 0x02, + SVP_CE_SOCKET = 0x03 +} svp_conn_error_t; + +typedef enum svp_conn_flags { + SVP_CF_ADDED = 0x01, + SVP_CF_DEGRADED = 0x02, + SVP_CF_REAP = 0x04, + SVP_CF_TEARDOWN = 0x08, + SVP_CF_UFLAG = 0x0c, + SVP_CF_USER = 0x10 +} svp_conn_flags_t; + +typedef struct svp_conn_out { + svp_query_t *sco_query; + size_t sco_offset; +} svp_conn_out_t; + +typedef struct svp_conn_in { + svp_query_t *sci_query; + svp_req_t sci_req; + size_t sci_offset; +} svp_conn_in_t; + +struct svp_conn { + svp_remote_t *sc_remote; /* RO */ + struct in6_addr sc_addr; /* RO */ + list_node_t sc_rlist; /* svp_remote_t`sr_lock */ + mutex_t sc_lock; + svp_event_t sc_event; + svp_timer_t sc_btimer; + svp_timer_t sc_qtimer; + int sc_socket; + uint_t sc_gen; + uint_t sc_nbackoff; + svp_conn_flags_t sc_flags; + svp_conn_state_t sc_cstate; + svp_conn_error_t sc_error; + int sc_errno; + hrtime_t sc_lastact; + list_t sc_queries; + svp_conn_out_t sc_output; + svp_conn_in_t sc_input; +}; + +typedef enum svp_remote_state { + SVP_RS_LOOKUP_SCHEDULED = 0x01, /* On the DNS Queue */ + SVP_RS_LOOKUP_INPROGRESS = 0x02, /* Doing a DNS lookup */ + SVP_RS_LOOKUP_VALID = 0x04 /* addrinfo valid */ +} svp_remote_state_t; + +/* + * These series of bit-based flags should be ordered such that the most severe + * is first. We only can set one message that user land can see, so if more than + * one is set we want to make sure that one is there. + */ +typedef enum svp_degrade_state { + SVP_RD_DNS_FAIL = 0x01, /* DNS Resolution Failure */ + SVP_RD_REMOTE_FAIL = 0x02, /* cannot reach any remote peers */ + SVP_RD_ALL = 0x03 /* Only suitable for restore */ +} svp_degrade_state_t; + +struct svp_remote { + char *sr_hostname; /* RO */ + uint16_t sr_rport; /* RO */ + avl_node_t sr_gnode; /* svp_remote_lock */ + svp_remote_t *sr_nexthost; /* svp_host_lock */ + mutex_t sr_lock; + svp_remote_state_t sr_state; + svp_degrade_state_t sr_degrade; + struct addrinfo *sr_addrinfo; + avl_tree_t sr_tree; + uint_t sr_count; /* active count */ + uint_t sr_gen; + uint_t sr_tconns; /* total conns + dconns */ + uint_t sr_ndconns; /* number of degraded conns */ + list_t sr_conns; /* all conns */ +}; + +/* + * We have a bunch of different things that we get back from the API at the + * plug-in layer. These include: + * + * o OOB Shootdowns + * o VL3->VL2 Lookups + * o VL2->UL3 Lookups + * o VL2 Log invalidations + * o VL3 Log injections + */ +typedef void (*svp_vl2_lookup_f)(svp_t *, svp_status_t, const struct in6_addr *, + const uint16_t, void *); +typedef void (*svp_vl3_lookup_f)(svp_t *, svp_status_t, const uint8_t *, + const struct in6_addr *, const uint16_t, void *); +typedef void (*svp_vl2_invalidation_f)(svp_t *, const uint8_t *); +typedef void (*svp_vl3_inject_f)(svp_t *, const uint16_t, + const struct in6_addr *, const uint8_t *, const uint8_t *); +typedef void (*svp_shootdown_f)(svp_t *, const uint8_t *, + const struct in6_addr *, const uint16_t uport); + +typedef struct svp_cb { + svp_vl2_lookup_f scb_vl2_lookup; + svp_vl3_lookup_f scb_vl3_lookup; + svp_vl2_invalidation_f scb_vl2_invalidate; + svp_vl3_inject_f scb_vl3_inject; + svp_shootdown_f scb_shootdown; +} svp_cb_t; + +/* + * Core implementation structure. + */ +struct svp { + overlay_plugin_dest_t svp_dest; /* RO */ + varpd_provider_handle_t *svp_hdl; /* RO */ + svp_cb_t svp_cb; /* RO */ + uint64_t svp_vid; /* RO? */ + avl_node_t svp_rlink; /* Owned by svp_remote */ + svp_remote_t *svp_remote; /* ROish XXX */ + mutex_t svp_lock; + char *svp_host; + uint16_t svp_port; + uint16_t svp_uport; + boolean_t svp_huip; + struct in6_addr svp_uip; +}; + +extern bunyan_logger_t *svp_bunyan; + +/* + * XXX Strawman backend APIs + */ +extern int svp_remote_find(char *, uint16_t, svp_remote_t **); +extern int svp_remote_attach(svp_remote_t *, svp_t *); +extern void svp_remote_detach(svp_t *); +extern void svp_remote_release(svp_remote_t *); +extern void svp_remote_vl3_lookup(svp_t *, svp_query_t *, + const struct sockaddr *, void *); +extern void svp_remote_vl2_lookup(svp_t *, svp_query_t *, const uint8_t *, + void *); + +/* + * Init functions + */ +extern int svp_remote_init(void); +extern void svp_remote_fini(void); +extern int svp_event_init(void); +extern int svp_event_timer_init(svp_event_t *); +extern void svp_event_fini(void); +extern int svp_host_init(void); +extern int svp_timer_init(void); + +/* + * Timers + */ +extern int svp_tickrate; +extern void svp_timer_add(svp_timer_t *); +extern void svp_timer_remove(svp_timer_t *); + +/* + * Event loop management + */ +extern int svp_event_associate(svp_event_t *, int); +extern int svp_event_dissociate(svp_event_t *, int); +extern int svp_event_inject(void *); + +/* + * Connection manager + */ +extern int svp_conn_create(svp_remote_t *, const struct in6_addr *); +extern void svp_conn_destroy(svp_conn_t *); +extern void svp_conn_fallout(svp_conn_t *); +extern void svp_conn_queue(svp_conn_t *, svp_query_t *); + +/* + * FMA related + */ +extern void svp_remote_degrade(svp_remote_t *, svp_degrade_state_t); +extern void svp_remote_restore(svp_remote_t *, svp_degrade_state_t); + +/* + * Misc. + */ +extern int svp_comparator(const void *, const void *); +extern void svp_remote_reassign(svp_remote_t *, svp_conn_t *); +extern void svp_remote_resolved(svp_remote_t *, struct addrinfo *); +extern void svp_host_queue(svp_remote_t *); +extern void svp_query_release(svp_query_t *); +extern void svp_query_crc32(svp_req_t *, void *, size_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBVARPD_SVP_H */ diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c new file mode 100644 index 0000000000..b9d3925b64 --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c @@ -0,0 +1,945 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +/* + * Logic to manage an individual connection to a remote host. + * + * Individual connections always are associated with an svp_remote_t from their + * creation to their destruction. + */ + +#include <assert.h> +#include <umem.h> +#include <errno.h> +#include <strings.h> +#include <unistd.h> +#include <stddef.h> +#include <sys/uio.h> + +#include <libvarpd_svp.h> + +static int svp_conn_query_timeout = 30; +static int svp_conn_backoff_tbl[] = { 1, 2, 4, 8, 16, 32 }; +static int svp_conn_nbackoff = sizeof (svp_conn_backoff_tbl) / sizeof (int); + +typedef enum svp_conn_act { + SVP_RA_NONE = 0x00, + SVP_RA_DEGRADE = 0x01, + SVP_RA_RESTORE = 0x02, + SVP_RA_ERROR = 0x03 +} svp_conn_act_t; + +static void +svp_conn_inject(svp_conn_t *scp) +{ + int ret; + assert(MUTEX_HELD(&scp->sc_lock)); + + if (scp->sc_flags & SVP_CF_USER) + return; + scp->sc_flags |= SVP_CF_USER; + if ((ret = svp_event_inject(scp)) != 0) + libvarpd_panic("failed to inject event: %d\n", ret); +} + +static void +svp_conn_degrade(svp_conn_t *scp) +{ + svp_remote_t *srp = scp->sc_remote; + + assert(MUTEX_HELD(&srp->sr_lock)); + assert(MUTEX_HELD(&scp->sc_lock)); + + if (scp->sc_flags & SVP_CF_DEGRADED) + return; + + scp->sc_flags |= SVP_CF_DEGRADED; + srp->sr_ndconns++; + if (srp->sr_ndconns == srp->sr_tconns) + svp_remote_degrade(srp, SVP_RD_REMOTE_FAIL); +} + +static void +svp_conn_restore(svp_conn_t *scp) +{ + svp_remote_t *srp = scp->sc_remote; + + assert(MUTEX_HELD(&srp->sr_lock)); + assert(MUTEX_HELD(&scp->sc_lock)); + + if (!(scp->sc_flags & SVP_CF_DEGRADED)) + return; + + scp->sc_flags &= ~SVP_CF_DEGRADED; + if (srp->sr_ndconns == srp->sr_tconns) + svp_remote_restore(srp, SVP_RD_REMOTE_FAIL); + srp->sr_ndconns--; +} + +static void +svp_conn_add(svp_conn_t *scp) +{ + svp_remote_t *srp = scp->sc_remote; + + assert(MUTEX_HELD(&srp->sr_lock)); + assert(MUTEX_HELD(&scp->sc_lock)); + + if (scp->sc_flags & SVP_CF_ADDED) + return; + + list_insert_tail(&srp->sr_conns, scp); + scp->sc_flags |= SVP_CF_ADDED; + srp->sr_tconns++; +} + +static void +svp_conn_remove(svp_conn_t *scp) +{ + svp_remote_t *srp = scp->sc_remote; + + assert(MUTEX_HELD(&srp->sr_lock)); + assert(MUTEX_HELD(&scp->sc_lock)); + + if (!(scp->sc_flags & SVP_CF_ADDED)) + return; + + scp->sc_flags &= ~SVP_CF_ADDED; + if (scp->sc_flags & SVP_CF_DEGRADED) + srp->sr_ndconns--; + srp->sr_tconns--; + if (srp->sr_tconns == srp->sr_ndconns) + svp_remote_degrade(srp, SVP_RD_REMOTE_FAIL); +} + +static svp_query_t * +svp_conn_query_find(svp_conn_t *scp, uint32_t id) +{ + svp_query_t *sqp; + + assert(MUTEX_HELD(&scp->sc_lock)); + + for (sqp = list_head(&scp->sc_queries); sqp != NULL; + sqp = list_next(&scp->sc_queries, sqp)) { + if (sqp->sq_header.svp_id == id) + break; + } + + return (sqp); +} + +static svp_conn_act_t +svp_conn_backoff(svp_conn_t *scp) +{ + assert(MUTEX_HELD(&scp->sc_lock)); + + if (close(scp->sc_socket) != 0) + libvarpd_panic("failed to close socket %d: %d\n", + scp->sc_socket, errno); + scp->sc_socket = -1; + + scp->sc_cstate = SVP_CS_BACKOFF; + scp->sc_nbackoff++; + if (scp->sc_nbackoff >= svp_conn_nbackoff) { + scp->sc_btimer.st_value = + svp_conn_backoff_tbl[svp_conn_nbackoff - 1]; + } else { + scp->sc_btimer.st_value = + svp_conn_backoff_tbl[scp->sc_nbackoff - 1]; + } + svp_timer_add(&scp->sc_btimer); + + if (scp->sc_nbackoff > svp_conn_nbackoff) + return (SVP_RA_DEGRADE); + return (SVP_RA_NONE); +} + +static svp_conn_act_t +svp_conn_connect(svp_conn_t *scp) +{ + int ret; + struct sockaddr_in6 in6; + + assert(MUTEX_HELD(&scp->sc_lock)); + assert(scp->sc_cstate == SVP_CS_BACKOFF || + scp->sc_cstate == SVP_CS_INITIAL); + assert(scp->sc_socket == -1); + if (scp->sc_cstate == SVP_CS_INITIAL) + scp->sc_nbackoff = 0; + + scp->sc_socket = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); + if (scp->sc_socket == -1) { + scp->sc_error = SVP_CE_SOCKET; + scp->sc_errno = errno; + scp->sc_cstate = SVP_CS_ERROR; + return (SVP_RA_DEGRADE); + } + + bzero(&in6, sizeof (struct sockaddr_in6)); + in6.sin6_family = AF_INET6; + in6.sin6_port = htons(scp->sc_remote->sr_rport); + bcopy(&scp->sc_addr, &in6.sin6_addr, sizeof (struct in6_addr)); + ret = connect(scp->sc_socket, (struct sockaddr *)&in6, + sizeof (struct sockaddr_in6)); + if (ret != 0) { + boolean_t async = B_FALSE; + + switch (errno) { + case EACCES: + case EADDRINUSE: + case EAFNOSUPPORT: + case EALREADY: + case EBADF: + case EISCONN: + case ELOOP: + case ENOENT: + case ENOSR: + case EWOULDBLOCK: + libvarpd_panic("unanticipated connect errno %d", errno); + case EINPROGRESS: + case EINTR: + async = B_TRUE; + default: + break; + } + + /* + * So, we will be connecting to this in the future, advance our + * state and make sure that we poll for the next round. + */ + if (async == B_TRUE) { + scp->sc_cstate = SVP_CS_CONNECTING; + scp->sc_event.se_events = POLLOUT | POLLHUP; + ret = svp_event_associate(&scp->sc_event, + scp->sc_socket); + if (ret == 0) + return (SVP_RA_NONE); + scp->sc_error = SVP_CE_ASSOCIATE; + scp->sc_errno = ret; + scp->sc_cstate = SVP_CS_ERROR; + return (SVP_RA_DEGRADE); + } else { + /* + * This call failed, which means that we obtained one of + * the following: + * + * EADDRNOTAVAIL + * ECONNREFUSED + * EIO + * ENETUNREACH + * EHOSTUNREACH + * ENXIO + * ETIMEDOUT + * + * Therefore we need to set ourselves into backoff and + * wait for that to clear up. + */ + return (svp_conn_backoff(scp)); + } + } + + /* + * We've connected. Successfully move ourselves to the bound + * state and start polling. + */ + scp->sc_cstate = SVP_CS_ACTIVE; + scp->sc_event.se_events = POLLIN | POLLRDNORM | POLLHUP; + ret = svp_event_associate(&scp->sc_event, scp->sc_socket); + if (ret == 0) + return (SVP_RA_RESTORE); + scp->sc_error = SVP_CE_ASSOCIATE; + scp->sc_cstate = SVP_CS_ERROR; + + return (SVP_RA_DEGRADE); +} + +/* + * This should be the first call we get after a connect. If we have successfully + * connected, we should see a writeable event. We may also see an error or a + * hang up. In either of these cases, we transition to error mode. If there is + * also a readable event, we ignore it at the moment and just let a + * reassociation pick it up so we can simplify the set of state transitions that + * we have. + */ +static svp_conn_act_t +svp_conn_poll_connect(port_event_t *pe, svp_conn_t *scp) +{ + int ret, err; + socklen_t sl = sizeof (err); + if (!(pe->portev_events & POLLOUT)) { + scp->sc_errno = 0; + scp->sc_error = SVP_CE_NOPOLLOUT; + scp->sc_cstate = SVP_CS_ERROR; + return (SVP_RA_DEGRADE); + } + + ret = getsockopt(scp->sc_socket, SOL_SOCKET, SO_ERROR, &err, &sl); + /* XXX Really none of these? */ + if (ret != 0) + libvarpd_panic("unanticipated getsockopt error"); + if (err != 0) { + return (svp_conn_backoff(scp)); + } + + scp->sc_cstate = SVP_CS_ACTIVE; + scp->sc_event.se_events = POLLIN | POLLRDNORM | POLLHUP; + ret = svp_event_associate(&scp->sc_event, scp->sc_socket); + if (ret == 0) + return (SVP_RA_RESTORE); + scp->sc_error = SVP_CE_ASSOCIATE; + scp->sc_errno = ret; + scp->sc_cstate = SVP_CS_ERROR; + return (SVP_RA_DEGRADE); +} + +static svp_conn_act_t +svp_conn_pollout(svp_conn_t *scp) +{ + svp_query_t *sqp; + svp_req_t *req; + size_t off; + struct iovec iov[2]; + int nvecs = 0; + ssize_t ret; + + assert(MUTEX_HELD(&scp->sc_lock)); + + /* + * We need to find a query and start writing it out. + */ + if (scp->sc_output.sco_query == NULL) { + for (sqp = list_head(&scp->sc_queries); sqp != NULL; + sqp = list_next(&scp->sc_queries, sqp)) { + if (sqp->sq_state != SVP_QUERY_INIT) + continue; + break; + } + + if (sqp == NULL) { + scp->sc_event.se_events &= ~POLLOUT; + return (SVP_RA_NONE); + } + + scp->sc_output.sco_query = sqp; + scp->sc_output.sco_offset = 0; + sqp->sq_state = SVP_QUERY_WRITING; + svp_query_crc32(&sqp->sq_header, sqp->sq_rdata, sqp->sq_rsize); + } + + sqp = scp->sc_output.sco_query; + req = &sqp->sq_header; + off = scp->sc_output.sco_offset; + if (off < sizeof (svp_req_t)) { + iov[nvecs].iov_base = (void *)((uintptr_t)req + off); + iov[nvecs].iov_len = sizeof (svp_req_t) - off; + nvecs++; + off = 0; + } else { + off -= sizeof (svp_req_t); + } + + iov[nvecs].iov_base = (void *)((uintptr_t)sqp->sq_rdata + off); + iov[nvecs].iov_len = sqp->sq_rsize - off; + nvecs++; + + do { + ret = writev(scp->sc_socket, iov, nvecs); + } while (ret == -1 && errno == EAGAIN); + if (ret == -1) { + switch (errno) { + case EAGAIN: + scp->sc_event.se_events |= POLLOUT; + return (SVP_RA_NONE); + case EIO: + case ENXIO: + case ECONNRESET: + return (SVP_RA_ERROR); + default: + libvarpd_panic("unexpected errno: %d", errno); + } + } + + scp->sc_output.sco_offset += ret; + if (ret >= sizeof (svp_req_t) + sqp->sq_rsize) { + sqp->sq_state = SVP_QUERY_READING; + scp->sc_output.sco_query = NULL; + scp->sc_output.sco_offset = 0; + scp->sc_event.se_events |= POLLOUT; + } + return (SVP_RA_NONE); +} + +static boolean_t +svp_conn_pollin_validate(svp_conn_t *scp) +{ + svp_query_t *sqp; + uint32_t nsize; + uint16_t nvers, nop; + svp_req_t *resp = &scp->sc_input.sci_req; + + assert(MUTEX_HELD(&scp->sc_lock)); + + nvers = ntohs(resp->svp_ver); + nop = ntohs(resp->svp_op); + nsize = ntohl(resp->svp_size); + + /* XXX Best practice around spaces in key names */ + if (nvers != SVP_CURRENT_VERSION) { + bunyan_warn(svp_bunyan, "unsupported version", + BUNYAN_T_IP, "remote ip", &scp->sc_addr, + BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport, + BUNYAN_T_INT32, "version", nvers, + BUNYAN_T_INT32, "operation", nop, + BUNYAN_T_INT32, "response id", resp->svp_id, + BUNYAN_T_END); + return (B_FALSE); + } + + if (nop != SVP_R_VL2_ACK && nop != SVP_R_VL3_ACK) { + bunyan_warn(svp_bunyan, "unsupported operation", + BUNYAN_T_IP, "remote ip", &scp->sc_addr, + BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport, + BUNYAN_T_INT32, "version", nvers, + BUNYAN_T_INT32, "operation", nop, + BUNYAN_T_INT32, "response id", resp->svp_id, + BUNYAN_T_END); + return (B_FALSE); + } + + sqp = svp_conn_query_find(scp, resp->svp_id); + if (sqp == NULL) { + bunyan_warn(svp_bunyan, "unknown response id", + BUNYAN_T_IP, "remote ip", &scp->sc_addr, + BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport, + BUNYAN_T_INT32, "version", nvers, + BUNYAN_T_INT32, "operation", nop, + BUNYAN_T_INT32, "response id", resp->svp_id, + BUNYAN_T_END); + return (B_FALSE); + } + + if (sqp->sq_state != SVP_QUERY_READING) { + bunyan_warn(svp_bunyan, "got response for unexpecting query", + BUNYAN_T_IP, "remote ip", &scp->sc_addr, + BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport, + BUNYAN_T_INT32, "version", nvers, + BUNYAN_T_INT32, "operation", nop, + BUNYAN_T_INT32, "response id", resp->svp_id, + BUNYAN_T_INT32, "query state", sqp->sq_state, + BUNYAN_T_END); + return (B_FALSE); + } + + if ((nop == SVP_R_VL2_ACK && nsize != sizeof (svp_vl2_ack_t)) || + (nop == SVP_R_VL3_ACK && nsize != sizeof (svp_vl3_ack_t))) { + bunyan_warn(svp_bunyan, "response size too large", + BUNYAN_T_IP, "remote ip", &scp->sc_addr, + BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport, + BUNYAN_T_INT32, "version", nvers, + BUNYAN_T_INT32, "operation", nop, + BUNYAN_T_INT32, "response id", resp->svp_id, + BUNYAN_T_INT32, "response size", nsize, + BUNYAN_T_INT32, "expected size", nop == SVP_R_VL2_ACK ? + sizeof (svp_vl2_ack_t) : sizeof (svp_vl3_ack_t), + BUNYAN_T_INT32, "query state", sqp->sq_state, + BUNYAN_T_END); + return (B_FALSE); + } + + scp->sc_input.sci_query = sqp; + sqp->sq_wdata = &sqp->sq_wdun; + sqp->sq_wsize = sizeof (svp_query_data_t); + + return (B_TRUE); +} + +static svp_conn_act_t +svp_conn_pollin(svp_conn_t *scp) +{ + size_t off, total; + ssize_t ret; + svp_query_t *sqp; + uint32_t crc; + uint16_t nop; + + assert(MUTEX_HELD(&scp->sc_lock)); + + /* + * No query implies that we're reading in the header and that the offset + * is associted with it. + */ + off = scp->sc_input.sci_offset; + sqp = scp->sc_input.sci_query; + if (scp->sc_input.sci_query == NULL) { + svp_req_t *resp = &scp->sc_input.sci_req; + + assert(off < sizeof (svp_req_t)); + + do { + ret = read(scp->sc_socket, + (void *)((uintptr_t)resp + off), + sizeof (svp_req_t) - off); + } while (ret == -1 && errno == EINTR); + if (ret == -1) { + switch (errno) { + case EAGAIN: + scp->sc_event.se_events |= POLLIN | POLLRDNORM; + return (SVP_RA_NONE); + case EIO: + case ECONNRESET: + return (SVP_RA_ERROR); + break; + default: + libvarpd_panic("unexpeted read errno: %d", + errno); + } + } else if (ret == 0) { + /* Try to reconnect to the remote host */ + return (SVP_RA_ERROR); + } + + /* Didn't get all the data we need */ + if (off + ret < sizeof (svp_req_t)) { + scp->sc_input.sci_offset += ret; + scp->sc_event.se_events |= POLLIN | POLLRDNORM; + return (SVP_RA_NONE); + } + + if (svp_conn_pollin_validate(scp) != B_TRUE) + return (SVP_RA_ERROR); + } + + sqp = scp->sc_input.sci_query; + assert(sqp != NULL); + total = ntohl(scp->sc_input.sci_req.svp_size); + do { + ret = read(scp->sc_socket, sqp->sq_wdata + off, total - off); + } while (ret == -1 && errno == EINTR); + + if (ret == -1) { + switch (errno) { + case EAGAIN: + scp->sc_event.se_events |= POLLIN | POLLRDNORM; + return (SVP_RA_NONE); + case EIO: + case ECONNRESET: + return (SVP_RA_ERROR); + break; + default: + libvarpd_panic("unexpeted read errno: %d", errno); + } + } else if (ret == 0) { + /* Try to reconnect to the remote host */ + return (SVP_RA_ERROR); + } + + if (ret + off < total) { + scp->sc_input.sci_offset += ret; + return (SVP_RA_NONE); + } + + nop = ntohs(scp->sc_input.sci_req.svp_op); + crc = scp->sc_input.sci_req.svp_crc32; + svp_query_crc32(&scp->sc_input.sci_req, sqp->sq_wdata, total); + if (crc != scp->sc_input.sci_req.svp_crc32) { + bunyan_info(svp_bunyan, "crc32 mismatch", + BUNYAN_T_IP, "remote ip", &scp->sc_addr, + BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport, + BUNYAN_T_INT32, "version", + ntohs(scp->sc_input.sci_req.svp_ver), + BUNYAN_T_INT32, "operation", nop, + BUNYAN_T_INT32, "response id", + ntohl(scp->sc_input.sci_req.svp_id), + BUNYAN_T_INT32, "query state", sqp->sq_state, + BUNYAN_T_UINT32, "msg_crc", ntohl(crc), + BUNYAN_T_UINT32, "calc_crc", + ntohl(scp->sc_input.sci_req.svp_crc32), + BUNYAN_T_END); + return (SVP_RA_ERROR); + } + scp->sc_input.sci_query = NULL; + scp->sc_input.sci_offset = 0; + + if (nop == SVP_R_VL2_ACK) { + svp_vl2_ack_t *sl2a = sqp->sq_wdata; + sqp->sq_status = ntohs(sl2a->sl2a_status); + } else if (nop == SVP_R_VL3_ACK) { + svp_vl3_ack_t *sl3a = sqp->sq_wdata; + sqp->sq_status = ntohs(sl3a->sl3a_status); + } else { + libvarpd_panic("unhandled nop: %d", nop); + } + + /* + * XXX What assumptions can now be violated? + */ + list_remove(&scp->sc_queries, sqp); + mutex_unlock(&scp->sc_lock); + + /* + * We have to release all of our resources associated with this entry + * before we call the callback. After we call it, the memory will be + * lost to time. + */ + svp_query_release(sqp); + sqp->sq_func(sqp, sqp->sq_arg); + mutex_lock(&scp->sc_lock); + scp->sc_event.se_events |= POLLIN | POLLRDNORM; + + return (SVP_RA_NONE); +} + +static svp_conn_act_t +svp_conn_reset(svp_conn_t *scp) +{ + svp_remote_t *srp = scp->sc_remote; + + assert(MUTEX_HELD(&srp->sr_lock)); + assert(MUTEX_HELD(&scp->sc_lock)); + + assert(svp_event_dissociate(&scp->sc_event, scp->sc_socket) == + ENOENT); + if (close(scp->sc_socket) != 0) + libvarpd_panic("failed to close socket %d: %d", scp->sc_socket, + errno); + scp->sc_socket = -1; + scp->sc_cstate = SVP_CS_INITIAL; + scp->sc_input.sci_query = NULL; + scp->sc_output.sco_query = NULL; + + svp_remote_reassign(srp, scp); + + return (svp_conn_connect(scp)); +} + +/* + * This is our general state transition function. We're called here when we want + * to advance part of our state machine as well as to re-arm ourselves. We can + * also end up here from the standard event loop as a result of having a user + * event posted. + */ +static void +svp_conn_handler(port_event_t *pe, void *arg) +{ + svp_conn_t *scp = arg; + svp_remote_t *srp = scp->sc_remote; + svp_conn_act_t ret = SVP_RA_NONE; + + mutex_lock(&scp->sc_lock); + + /* + * Check if one of our event interrupts is set. An event interrupt, such + * as having to be reaped or be torndown is notified by a + * PORT_SOURCE_USER event that tries to take care of this. However, + * because of the fact that the event loop can be ongoing despite this, + * we may get here before the PORT_SOURCE_USER has casued us to get + * here. In such a case, if the PORT_SOURCE_USER event is tagged, then + * we're going to opt to do nothing here and wait for it to come and + * tear us down. That will also indicate to us that we have nothing to + * worry about as far as general timing and the like goes. + */ + + if ((scp->sc_flags & SVP_CF_UFLAG) != 0 && + (scp->sc_flags & SVP_CF_USER) != 0 && + pe != NULL && + pe->portev_source != PORT_SOURCE_USER) { + mutex_unlock(&scp->sc_lock); + return; + } + + if (pe != NULL && pe->portev_source == PORT_SOURCE_USER) { + scp->sc_flags &= ~SVP_CF_USER; + if ((scp->sc_flags & SVP_CF_UFLAG) == 0) { + mutex_unlock(&scp->sc_lock); + return; + } + } + + /* Check if this needs to be freed */ + if (scp->sc_flags & SVP_CF_REAP) { + mutex_unlock(&scp->sc_lock); + svp_conn_destroy(scp); + return; + } + + /* Check if this needs to be reset */ + if (scp->sc_flags & SVP_CF_TEARDOWN) { + ret = SVP_RA_ERROR; + goto out; + } + + switch (scp->sc_cstate) { + case SVP_CS_INITIAL: + case SVP_CS_BACKOFF: + assert(pe == NULL); + ret = svp_conn_connect(scp); + break; + case SVP_CS_CONNECTING: + assert(pe != NULL); + ret = svp_conn_poll_connect(pe, scp); + break; + case SVP_CS_ACTIVE: + assert(pe != NULL); + if (pe->portev_events & POLLOUT) + ret = svp_conn_pollout(scp); + if (ret == SVP_RA_NONE && (pe->portev_events & POLLIN)) + ret = svp_conn_pollin(scp); + if (ret == SVP_RA_NONE) { + int err; + if ((err = svp_event_associate(&scp->sc_event, + scp->sc_socket)) != 0) { + scp->sc_error = SVP_CE_ASSOCIATE; + scp->sc_errno = ret; + scp->sc_cstate = SVP_CS_ERROR; + ret = SVP_RA_DEGRADE; + } + } + break; + default: + libvarpd_panic("svp_conn_handler encountered unexpected " + "state: %d", scp->sc_cstate); + } +out: + mutex_unlock(&scp->sc_lock); + + if (ret == SVP_RA_NONE) + return; + + mutex_lock(&srp->sr_lock); + mutex_lock(&scp->sc_lock); + if (ret == SVP_RA_ERROR) + ret = svp_conn_reset(scp); + + if (ret == SVP_RA_DEGRADE) + svp_conn_degrade(scp); + else if (ret == SVP_RA_RESTORE) + svp_conn_restore(scp); + mutex_unlock(&scp->sc_lock); + mutex_unlock(&srp->sr_lock); +} + +static void +svp_conn_backtimer(void *arg) +{ + svp_conn_t *scp = arg; + + svp_conn_handler(NULL, scp); +} + +/* + * This fires every svp_conn_query_timeout seconds. Its purpos is to determine + * if we haven't heard back on a request with in svp_conn_query_timeout seconds. + * If any of the svp_conn_query_t's that have been started (indicated by + * svp_query_t`sq_acttime != -1), and more than svp_conn_query_timeout seconds + * have passed, we basically tear this connection down and reassign outstanding + * queries. + */ +static void +svp_conn_querytimer(void *arg) +{ + svp_query_t *sqp; + svp_conn_t *scp = arg; + hrtime_t now = gethrtime(); + + mutex_lock(&scp->sc_lock); + + /* + * If we're not in the active state, then we don't care about this as + * we're already either going to die or we have no connections to worry + * about. + */ + if (scp->sc_cstate != SVP_CS_ACTIVE) { + mutex_unlock(&scp->sc_lock); + return; + } + + for (sqp = list_head(&scp->sc_queries); sqp != NULL; + sqp = list_next(&scp->sc_queries, sqp)) { + if (sqp->sq_acttime == -1) + continue; + if ((sqp->sq_acttime - now) / NANOSEC > svp_conn_query_timeout) + break; + } + + /* Nothing timed out, we're good here */ + if (sqp == NULL) { + mutex_unlock(&scp->sc_lock); + return; + } + + scp->sc_flags |= SVP_CF_TEARDOWN; + svp_conn_inject(scp); + + mutex_unlock(&scp->sc_lock); +} + +/* + * This connection has fallen out of DNS, figure out what we need to do with it. + */ +void +svp_conn_fallout(svp_conn_t *scp) +{ + svp_remote_t *srp = scp->sc_remote; + + assert(MUTEX_HELD(&srp->sr_lock)); + + mutex_lock(&scp->sc_lock); + switch (scp->sc_cstate) { + case SVP_CS_ERROR: + /* + * Connection is already inactive, so it's safe to tear down. + * Fire it off through the state machine to tear down via the + * backoff timer. + */ + svp_conn_remove(scp); + scp->sc_flags |= SVP_CF_REAP; + svp_conn_inject(scp); + break; + case SVP_CS_INITIAL: + case SVP_CS_BACKOFF: + case SVP_CS_CONNECTING: + /* + * Here, we have something actively going on, so we'll let it be + * clean up the next time we hit the event loop by the event + * loop itself. As it has no connections, there isn't much to + * really do, though we'll take this chance to go ahead and + * remove it from the remote. + */ + svp_conn_remove(scp); + scp->sc_flags |= SVP_CF_REAP; + svp_conn_inject(scp); + break; + case SVP_CS_ACTIVE: + scp->sc_cstate = SVP_CS_WINDDOWN; + /* + * XXX We need to look at what's currently outstanding. If + * nothing is going on at the moment, we should try to + * port disassociate, and if succsesful, eg. not ENOENT, clean + * up right here and now. + */ + break; + case SVP_CS_WINDDOWN: + /* + * Nothing specific to do here, we'e finishing up with this, + * just haven't finished yet. + */ + break; + default: + libvarpd_panic("svp_conn_fallout encountered" + "unkonwn state"); + } + mutex_unlock(&scp->sc_lock); + mutex_unlock(&srp->sr_lock); +} + +int +svp_conn_create(svp_remote_t *srp, const struct in6_addr *addr) +{ + svp_conn_t *scp; + + assert(MUTEX_HELD(&srp->sr_lock)); + scp = umem_zalloc(sizeof (svp_conn_t), UMEM_DEFAULT); + if (scp == NULL) + return (ENOMEM); + + scp->sc_remote = srp; + scp->sc_event.se_func = svp_conn_handler; + scp->sc_event.se_arg = scp; + scp->sc_btimer.st_func = svp_conn_backtimer; + scp->sc_btimer.st_arg = scp; + scp->sc_btimer.st_oneshot = B_TRUE; + scp->sc_btimer.st_value = 1; + + scp->sc_qtimer.st_func = svp_conn_querytimer; + scp->sc_qtimer.st_arg = scp; + scp->sc_qtimer.st_oneshot = B_FALSE; + scp->sc_qtimer.st_value = svp_conn_query_timeout; + + scp->sc_socket = -1; + + list_create(&scp->sc_queries, sizeof (svp_query_t), + offsetof(svp_query_t, sq_lnode)); + scp->sc_gen = srp->sr_gen; + bcopy(addr, &scp->sc_addr, sizeof (struct in6_addr)); + scp->sc_cstate = SVP_CS_INITIAL; + mutex_lock(&scp->sc_lock); + svp_conn_add(scp); + mutex_unlock(&scp->sc_lock); + + /* Now that we're locked and loaded, add our timers */ + svp_timer_add(&scp->sc_qtimer); + svp_timer_add(&scp->sc_btimer); + + return (0); +} + +/* + * At the time of calling, the entry has been removed from all lists. In + * addition, the entries state should be SVP_CS_ERROR, therefore, we know that + * the fd should not be associated with the event loop. We'll double check that + * just in case. We should also have already been removed from the remote's + * list. + */ +void +svp_conn_destroy(svp_conn_t *scp) +{ + int ret; + + mutex_lock(&scp->sc_lock); + if (scp->sc_cstate != SVP_CS_ERROR) + libvarpd_panic("asked to tear down an active connection"); + if (scp->sc_flags & SVP_CF_ADDED) + libvarpd_panic("asked to remove a connection still in " + "the remote list\n"); + if (!list_is_empty(&scp->sc_queries)) + libvarpd_panic("asked to remove a connection with non-empty " + "query list"); + + if ((ret = svp_event_dissociate(&scp->sc_event, scp->sc_socket)) != + ENOENT) { + libvarpd_panic("dissociate failed or was actually " + "associated: %d", ret); + } + mutex_unlock(&scp->sc_lock); + + /* Verify our timers are killed */ + svp_timer_remove(&scp->sc_btimer); + svp_timer_remove(&scp->sc_qtimer); + + if (scp->sc_socket != -1 && close(scp->sc_socket) != 0) + libvarpd_panic("failed to close svp_conn_t`scp_socket fd " + "%d: %d", scp->sc_socket, errno); + + list_destroy(&scp->sc_queries); + umem_free(scp, sizeof (svp_conn_t)); +} + +void +svp_conn_queue(svp_conn_t *scp, svp_query_t *sqp) +{ + assert(MUTEX_HELD(&scp->sc_lock)); + assert(scp->sc_cstate == SVP_CS_ACTIVE); + + sqp->sq_acttime = -1; + list_insert_tail(&scp->sc_queries, sqp); + if (!(scp->sc_event.se_events & POLLOUT)) { + scp->sc_event.se_events |= POLLOUT; + /* + * XXX If this fails, we should give up this set of conns or + * something... For now, abort. + */ + if (svp_event_associate(&scp->sc_event, scp->sc_socket) != 0) + libvarpd_panic("svp_event_associate failed somehow"); + } +} diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c new file mode 100644 index 0000000000..43d064d64d --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c @@ -0,0 +1,50 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014, Joyent, Inc. + */ + +/* + * Perform standard crc32 functions. + * + * XXX This should probably be a library + */ + +#include <sys/crc32.h> +#include <stdint.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <inttypes.h> +#include <libvarpd_svp.h> + +static uint32_t svp_crc32_tab[] = { CRC32_TABLE }; + +static uint32_t +svp_crc32(uint32_t old, const uint8_t *buf, size_t len) +{ + uint32_t out; + + CRC32(out, buf, len, old, svp_crc32_tab); + return (out); +} + +void +svp_query_crc32(svp_req_t *shp, void *buf, size_t data) +{ + uint32_t crc = -1U; + + shp->svp_crc32 = 0; + crc = svp_crc32(crc, (uint8_t *)shp, sizeof (svp_req_t)); + crc = svp_crc32(crc, buf, data); + crc = ~crc; + shp->svp_crc32 = htonl(crc); +} diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c new file mode 100644 index 0000000000..2c80de097e --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c @@ -0,0 +1,173 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +/* + * DNS Host-name related functions. + * + * Every backend is stored in DNS. To find out memebership, we query DNS and use + * that to update our world. We update our DNS records on both a timer + * granularity and immediately after creation. We'll also XXX go through and do + * this after all of our valid entries have disappeared. + * + * Unfortuantely, doing host name resolution in a way that allows us to leverage + * the system resolvers and the system's caching, require us to use blocking + * calls in libc. If we can't reach a given server, that will tie up a thread + * for quite some time. To work around that fact, we're going to create a fixed + * number of threads and we'll use them to service this kind of work. While not + * great, we don't have many better options. + */ + +#include <sys/socket.h> +#include <netdb.h> +#include <thread.h> +#include <synch.h> +#include <assert.h> +#include <errno.h> + +#include <libvarpd_svp.h> + +int svp_host_nthreads = 8; + +static mutex_t svp_host_lock = DEFAULTMUTEX; +static cond_t svp_host_cv = DEFAULTCV; +static svp_remote_t *svp_host_head; + +static void * +svp_host_loop(void *unused) +{ + for (;;) { + int err; + svp_remote_t *srp; + struct addrinfo *addrs; + + mutex_lock(&svp_host_lock); + while (svp_host_head == NULL) + cond_wait(&svp_host_cv, &svp_host_lock); + srp = svp_host_head; + svp_host_head = srp->sr_nexthost; + if (svp_host_head != NULL) + cond_signal(&svp_host_cv); + mutex_unlock(&svp_host_lock); + + mutex_lock(&srp->sr_lock); + assert(srp->sr_state & SVP_RS_LOOKUP_SCHEDULED); + srp->sr_state &= ~SVP_RS_LOOKUP_SCHEDULED; + if (srp->sr_state & SVP_RS_LOOKUP_INPROGRESS) { + mutex_unlock(&srp->sr_lock); + continue; + } + srp->sr_state |= SVP_RS_LOOKUP_INPROGRESS; + mutex_unlock(&srp->sr_lock); + + for (;;) { + err = getaddrinfo(srp->sr_hostname, NULL, NULL, &addrs); + if (err == 0) + break; + if (err != 0) { + switch (err) { + case EAI_ADDRFAMILY: + case EAI_BADFLAGS: + case EAI_FAMILY: + case EAI_SERVICE: + case EAI_SOCKTYPE: + case EAI_OVERFLOW: + default: + libvarpd_panic("unexpected getaddrinfo " + "failure: %d", err); + case EAI_AGAIN: + case EAI_MEMORY: + case EAI_SYSTEM: + continue; + case EAI_FAIL: + case EAI_NODATA: + case EAI_NONAME: + /* + * XXX At this point in time we have + * something which isn't very good. This + * may have been a typo or something may + * have been destroyed. We should go + * ahead and degrade this overall + * instance, because we're not going to + * make much forward progress... It'd be + * great if we could actually issue more + * of an EREPORT to describe what + * happened... + */ + mutex_lock(&srp->sr_lock); + svp_remote_degrade(srp, SVP_RD_DNS_FAIL); + mutex_unlock(&srp->sr_lock); + break; + } + } + break; + } + + if (err == 0) { + /* + * We've successfully resolved something, mark this + * degredation over for now. + */ + mutex_lock(&srp->sr_lock); + svp_remote_restore(srp, SVP_RD_DNS_FAIL); + mutex_unlock(&srp->sr_lock); + svp_remote_resolved(srp, addrs); + } + + mutex_lock(&srp->sr_lock); + srp->sr_state &= ~SVP_RS_LOOKUP_INPROGRESS; + mutex_unlock(&srp->sr_lock); + } +} + +void +svp_host_queue(svp_remote_t *srp) +{ + svp_remote_t *s; + mutex_lock(&svp_host_lock); + mutex_lock(&srp->sr_lock); + if (srp->sr_state & SVP_RS_LOOKUP_SCHEDULED) { + mutex_unlock(&srp->sr_lock); + mutex_unlock(&svp_host_lock); + return; + } + srp->sr_state |= SVP_RS_LOOKUP_SCHEDULED; + s = svp_host_head; + while (s != NULL && s->sr_nexthost != NULL) + s = s->sr_nexthost; + if (s == NULL) { + assert(s == svp_host_head); + svp_host_head = srp; + } else { + s->sr_nexthost = srp; + } + srp->sr_nexthost = NULL; + cond_signal(&svp_host_cv); + mutex_unlock(&srp->sr_lock); + mutex_unlock(&svp_host_lock); +} + +int +svp_host_init(void) +{ + int i; + + for (i = 0; i < svp_host_nthreads; i++) { + if (thr_create(NULL, 0, svp_host_loop, NULL, + THR_DETACHED | THR_DAEMON, NULL) != 0) + return (errno); + } + + return (0); +} diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c new file mode 100644 index 0000000000..a3579320ff --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c @@ -0,0 +1,206 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +/* + * Event loop mechanism for our backend. + */ + +#include <unistd.h> +#include <thread.h> +#include <port.h> +#include <signal.h> +#include <time.h> +#include <errno.h> +#include <umem.h> + +#include <libvarpd_svp.h> + +typedef struct svp_event_loop { + int sel_port; /* RO */ + int sel_nthread; /* RO */ + thread_t *sel_threads; /* RO */ + boolean_t sel_stop; /* svp_elock */ + timer_t sel_hosttimer; +} svp_event_loop_t; + +static svp_event_loop_t svp_event; +static mutex_t svp_elock = DEFAULTMUTEX; + +static void * +svp_event_thr(void *arg) +{ + for (;;) { + int ret; + port_event_t pe; + svp_event_t *sep; + + mutex_lock(&svp_elock); + if (svp_event.sel_stop == B_TRUE) { + mutex_unlock(&svp_elock); + break; + } + mutex_unlock(&svp_elock); + + ret = port_get(svp_event.sel_port, &pe, NULL); + if (ret != 0) { + switch (errno) { + case EFAULT: + case EBADF: + case EINVAL: + libvarpd_panic("unexpected port_get errno: %d", + errno); + default: + break; + } + } + + /* TODO Process the event */ + if (pe.portev_user == NULL) + libvarpd_panic("received event (%p) without " + "protev_user set", &pe); + sep = (svp_event_t *)pe.portev_user; + sep->se_func(&pe, sep->se_arg); + } + + return (NULL); +} + +int +svp_event_associate(svp_event_t *sep, int fd) +{ + int ret; + + ret = port_associate(svp_event.sel_port, PORT_SOURCE_FD, fd, + sep->se_events, sep); + if (ret != 0) { + switch (errno) { + case EBADF: + case EBADFD: + case EINVAL: + case EAGAIN: + libvarpd_panic("unexpected port_associate error: %d", + errno); + default: + ret = errno; + break; + } + } + + return (ret); +} + +int +svp_event_dissociate(svp_event_t *sep, int fd) +{ + int ret; + + ret = port_dissociate(svp_event.sel_port, PORT_SOURCE_FD, fd); + if (ret != 0) { + if (errno != ENOENT) + libvarpd_panic("unexpected port_dissociate error: %d", + errno); + ret = errno; + } + return (ret); +} + +int +svp_event_inject(void *user) +{ + return (port_send(svp_event.sel_port, 0, user)); +} + +int +svp_event_timer_init(svp_event_t *sep) +{ + port_notify_t pn; + struct sigevent evp; + struct itimerspec ts; + + pn.portnfy_port = svp_event.sel_port; + pn.portnfy_user = sep; + evp.sigev_notify = SIGEV_PORT; + evp.sigev_value.sival_ptr = &pn; + + if (timer_create(CLOCK_REALTIME, &evp, &svp_event.sel_hosttimer) != 0) + return (errno); + + ts.it_value.tv_sec = svp_tickrate; + ts.it_value.tv_nsec = 0; + ts.it_interval.tv_sec = svp_tickrate; + ts.it_interval.tv_nsec = 0; + + if (timer_settime(svp_event.sel_hosttimer, TIMER_RELTIME, &ts, + NULL) != 0) { + int ret = errno; + (void) timer_delete(svp_event.sel_hosttimer); + return (ret); + } + + return (0); +} + +int +svp_event_init(void) +{ + long i, ncpus; + + svp_event.sel_port = port_create(); + if (svp_event.sel_port == -1) + return (errno); + + ncpus = sysconf(_SC_NPROCESSORS_ONLN) * 2 + 1; + if (ncpus <= 0) + libvarpd_panic("sysconf for nprocs failed... %d/%d", + ncpus, errno); + + svp_event.sel_threads = umem_alloc(sizeof (thread_t) * ncpus, + UMEM_DEFAULT); + if (svp_event.sel_threads == NULL) { + int ret = errno; + (void) timer_delete(svp_event.sel_hosttimer); + (void) close(svp_event.sel_port); + svp_event.sel_port = -1; + return (ret); + } + + for (i = 0; i < ncpus; i++) { + int ret; + thread_t *thr = &svp_event.sel_threads[i]; + + ret = thr_create(NULL, 0, svp_event_thr, NULL, + THR_DETACHED | THR_DAEMON, thr); + if (ret != 0) { + ret = errno; + (void) timer_delete(svp_event.sel_hosttimer); + (void) close(svp_event.sel_port); + svp_event.sel_port = -1; + return (errno); + } + } + + return (0); +} + +void +svp_event_fini(void) +{ + mutex_lock(&svp_elock); + svp_event.sel_stop = B_TRUE; + mutex_unlock(&svp_elock); + + (void) timer_delete(svp_event.sel_hosttimer); + (void) close(svp_event.sel_port); +} diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h b/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h new file mode 100644 index 0000000000..e95f3e4c61 --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h @@ -0,0 +1,172 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +#ifndef _LIBVARPD_SVP_PROT_H +#define _LIBVARPD_SVP_PROT_H + +/* + * SVP protocol Definitions + */ + +#include <sys/types.h> +#include <inttypes.h> +#include <sys/ethernet.h> +#include <netinet/in.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * SDC VXLAN Protocol Definitions + */ + +#define SVP_VERSION_ONE 1 +#define SVP_CURRENT_VERSION SVP_VERSION_ONE + +typedef struct svp_req { + uint16_t svp_ver; + uint16_t svp_op; + uint32_t svp_size; + uint32_t svp_id; + uint32_t svp_crc32; +} svp_req_t; + +typedef enum svp_op { + SVP_R_UNKNOWN = 0x00, + SVP_R_PING = 0x01, + SVP_R_PONG = 0x02, + SVP_R_VL2_REQ = 0x03, + SVP_R_VL2_ACK = 0x04, + SVP_R_VL3_REQ = 0x05, + SVP_R_VL3_ACK = 0x06, + SVP_R_BULK_REQ = 0x07, + SVP_R_BULK_ACK = 0x08, + SVP_R_LOG_REQ = 0x09, + SVP_R_LOG_ACK = 0x0A, + SVP_R_LOG_RM = 0x0B, + SVP_R_LOG_RACK = 0x0C, + SVP_R_SHOOTDOWN = 0x0D +} svp_op_t; + +typedef enum svp_status { + SVP_S_OK = 0x00, /* Everything OK */ + SVP_S_FATAL = 0x01, /* Fatal error, close connection */ + SVP_S_NOTFOUND = 0x02, /* Entry not found */ + SVP_S_BADL3TYPE = 0x03, /* Unknown svp_vl3_type_t */ + SVP_S_BADBULK = 0x04, /* Unknown svp_bulk_type_t */ + SVP_S_BADLOG = 0x05, /* Unknown svp_log_type_t */ + SVP_S_LOGAGAIN = 0x06 /* Nothing in the log yet */ +} svp_status_t; + +typedef struct svp_vl2_req { + uint8_t sl2r_mac[ETHERADDRL]; + uint8_t sl2r_pad[2]; + uint32_t sl2r_vnetid; +} svp_vl2_req_t; + +typedef struct svp_vl2_ack { + uint16_t sl2a_status; + uint16_t sl2a_port; + uint8_t sl2a_addr[16]; +} svp_vl2_ack_t; + +typedef enum svp_vl3_type { + SVP_VL3_IP = 0x01, + SVP_VL3_IPV6 = 0x02 +} svp_vl3_type_t; + +typedef struct svp_vl3_req { + uint8_t sl3r_ip[16]; + uint32_t sl3r_type; + uint32_t sl3r_vnetid; +} svp_vl3_req_t; + +typedef struct svp_vl3_ack { + uint32_t sl3a_status; + uint8_t sl3a_mac[ETHERADDRL]; + uint16_t sl3a_uport; + uint8_t sl3a_uip[16]; +} svp_vl3_ack_t; + +typedef enum svp_bulk_type { + SVP_BULK_VL2 = 0x01, + SVP_BULK_VL3 = 0x02 +} svp_bulk_type_t; + +typedef struct svp_bulk_req { + uint32_t svbr_type; +} svp_bulk_req_t; + +typedef struct svp_bulk_ack { + uint32_t svba_status; + uint32_t svba_type; + uint8_t svba_data[]; +} svp_bulk_ack_t; + +typedef enum svp_log_type { + SVP_LOG_VL2 = 0x01, + SVP_LOG_VL3 = 0x02 +} svp_log_type_t; + +typedef struct svp_log_req { + uint32_t svlr_type; + uint32_t svlr_count; +} svp_log_req_t; + +typedef struct svp_log_vl2 { + uint8_t svl2_id[16]; /* 16-byte UUID */ + uint8_t svl2_mac[ETHERADDRL]; + uint8_t svl2_pad[2]; + uint32_t svl2_vnetid; +} svp_log_vl2_t; + +typedef struct svp_log_vl3 { + uint8_t svl3_id[16]; /* 16-byte UUID */ + uint8_t slv3_ip[16]; + uint8_t svl3_mac[ETHERADDRL]; + uint16_t svl3_vlan; + uint8_t svl3_tmac[ETHERADDRL]; + uint8_t svl3_tpad[2]; + uint32_t svl3_vnetid; +} svp_log_vl3_t; + +typedef struct svp_log_ack { + uint32_t svla_status; + uint32_t svla_type; + uint8_t svla_data[]; +} svp_log_ack_t; + +typedef struct svp_lrm_req { + uint32_t svrr_type; + uint32_t svrr_pad; + uint8_t svrr_ids[]; +} svp_lrm_req_t; + +typedef struct svp_lrm_ack { + uint32_t svra_status; +} svp_lrm_ack_t; + +typedef struct svp_shootdown { + uint8_t svsd_mac[ETHERADDRL]; + uint8_t svsd_pad[2]; + uint32_t svsd_vnetid; +} svp_shootdown_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBVARPD_SVP_PROT_H */ diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c new file mode 100644 index 0000000000..6da565246c --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c @@ -0,0 +1,596 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. + */ + +/* + * This file encapsulates all of the logic for dealing with a given remote host + * that is being used to service requests. Multiple different overlay devices + * all share the same single device here. + */ + +#include <umem.h> +#include <strings.h> +#include <string.h> +#include <stddef.h> +#include <thread.h> +#include <synch.h> +#include <assert.h> +#include <sys/socket.h> +#include <netdb.h> +#include <errno.h> +#include <libidspace.h> + +#include <libvarpd_provider.h> +#include <libvarpd_svp.h> + +static mutex_t svp_remote_lock = DEFAULTMUTEX; +static avl_tree_t svp_remote_tree; +static svp_timer_t svp_dns_timer; +static id_space_t *svp_idspace; +static int svp_dns_timer_rate = 30; /* seconds */ + +static void +svp_remote_mkfmamsg(svp_remote_t *srp, svp_degrade_state_t state, char *buf, + size_t buflen) +{ + switch (state) { + case SVP_RD_DNS_FAIL: + (void) snprintf(buf, buflen, "failed to resolve or find " + "entries for hostname %s", srp->sr_hostname); + break; + case SVP_RD_REMOTE_FAIL: + (void) snprintf(buf, buflen, "cannot reach any remote peers"); + break; + default: + (void) snprintf(buf, buflen, "unkonwn error state: %d", state); + } +} + +static int +svp_remote_comparator(const void *l, const void *r) +{ + int ret; + const svp_remote_t *lr = l, *rr = r; + + ret = strcmp(lr->sr_hostname, rr->sr_hostname); + if (ret > 0) + return (1); + else if (ret < 0) + return (-1); + + if (lr->sr_rport > rr->sr_rport) + return (1); + else if (lr->sr_rport < rr->sr_rport) + return (-1); + else + return (0); +} + +void +svp_query_release(svp_query_t *sqp) +{ + id_free(svp_idspace, sqp->sq_header.svp_id); +} + +static void +svp_remote_destroy(svp_remote_t *srp) +{ + size_t len; + + /* + * XXX Clean up DNS related information, eg. make sure we're not in the + * queue. Likely need a flag cv... + */ + + if (mutex_destroy(&srp->sr_lock) != 0) + libvarpd_panic("failed to destroy mutex sr_lock"); + + if (srp->sr_addrinfo != NULL) + freeaddrinfo(srp->sr_addrinfo); + len = strlen(srp->sr_hostname) + 1; + umem_free(srp->sr_hostname, len); + umem_free(srp, sizeof (svp_remote_t)); +} + +static int +svp_remote_create(const char *host, uint16_t port, svp_remote_t **outp) +{ + size_t hlen; + svp_remote_t *remote; + + assert(MUTEX_HELD(&svp_remote_lock)); + + remote = umem_zalloc(sizeof (svp_remote_t), UMEM_DEFAULT); + if (remote == NULL) { + mutex_unlock(&svp_remote_lock); + return (ENOMEM); + } + hlen = strlen(host) + 1; + remote->sr_hostname = umem_alloc(hlen, UMEM_DEFAULT); + if (remote->sr_hostname == NULL) { + umem_free(remote, sizeof (svp_remote_t)); + mutex_unlock(&svp_remote_lock); + return (ENOMEM); + } + remote->sr_rport = port; + if (mutex_init(&remote->sr_lock, USYNC_THREAD, NULL) != 0) + libvarpd_panic("failed to create mutex sr_lock"); + list_create(&remote->sr_conns, sizeof (svp_conn_t), + offsetof(svp_conn_t, sc_rlist)); + avl_create(&remote->sr_tree, svp_comparator, sizeof (svp_t), + offsetof(svp_t, svp_rlink)); + (void) strlcpy(remote->sr_hostname, host, hlen); + remote->sr_count = 1; + + *outp = remote; + return (0); +} + +int +svp_remote_find(char *host, uint16_t port, svp_remote_t **outp) +{ + int ret; + svp_remote_t lookup, *remote; + + lookup.sr_hostname = host; + lookup.sr_rport = port; + mutex_lock(&svp_remote_lock); + remote = avl_find(&svp_remote_tree, &lookup, NULL); + if (remote != NULL) { + assert(remote->sr_count > 0); + remote->sr_count++; + *outp = remote; + mutex_unlock(&svp_remote_lock); + return (0); + } + + if ((ret = svp_remote_create(host, port, outp)) != 0) { + mutex_unlock(&svp_remote_lock); + return (ret); + } + + avl_add(&svp_remote_tree, *outp); + mutex_unlock(&svp_remote_lock); + + /* Make sure DNS is up to date */ + svp_host_queue(*outp); + + return (0); +} + +void +svp_remote_release(svp_remote_t *srp) +{ + mutex_lock(&svp_remote_lock); + mutex_lock(&srp->sr_lock); + srp->sr_count--; + if (srp->sr_count != 0) { + mutex_unlock(&srp->sr_lock); + mutex_unlock(&svp_remote_lock); + return; + } + mutex_unlock(&srp->sr_lock); + + avl_remove(&svp_remote_tree, srp); + mutex_unlock(&svp_remote_lock); + svp_remote_destroy(srp); +} + +int +svp_remote_attach(svp_remote_t *srp, svp_t *svp) +{ + svp_t check; + avl_index_t where; + + mutex_lock(&srp->sr_lock); + if (svp->svp_remote != NULL) + libvarpd_panic("failed to create mutex sr_lock"); + + /* + * We require everything except shootdowns + */ + if (svp->svp_cb.scb_vl2_lookup == NULL) + libvarpd_panic("missing callback scb_vl2_lookup"); + if (svp->svp_cb.scb_vl3_lookup == NULL) + libvarpd_panic("missing callback scb_vl3_lookup"); + if (svp->svp_cb.scb_vl2_invalidate == NULL) + libvarpd_panic("missing callback scb_vl2_invalidate"); + if (svp->svp_cb.scb_vl3_inject == NULL) + libvarpd_panic("missing callback scb_vl3_inject"); + + check.svp_vid = svp->svp_vid; + if (avl_find(&srp->sr_tree, &check, &where) != NULL) + libvarpd_panic("found duplicate entry with vid %ld", + svp->svp_vid); + avl_insert(&srp->sr_tree, svp, where); + svp->svp_remote = srp; + mutex_unlock(&srp->sr_lock); + + return (0); +} + +void +svp_remote_detach(svp_t *svp) +{ + svp_t *lookup; + svp_remote_t *srp = svp->svp_remote; + + if (srp == NULL) + libvarpd_panic("trying to detach remote when none exists"); + + mutex_lock(&srp->sr_lock); + lookup = avl_find(&srp->sr_tree, svp, NULL); + if (lookup == NULL || lookup != svp) + libvarpd_panic("inconsitent remote avl tree..."); + avl_remove(&srp->sr_tree, svp); + svp->svp_remote = NULL; + mutex_unlock(&srp->sr_lock); + svp_remote_release(srp); +} + +/* + * Walk the list of connections and find the first one that's available, the + * move it to the back of the list so it's less likely to be used again. + */ +static boolean_t +svp_remote_conn_queue(svp_remote_t *srp, svp_query_t *sqp) +{ + svp_conn_t *scp; + + assert(MUTEX_HELD(&srp->sr_lock)); + for (scp = list_head(&srp->sr_conns); scp != NULL; + scp = list_next(&srp->sr_conns, scp)) { + mutex_lock(&scp->sc_lock); + if (scp->sc_cstate != SVP_CS_ACTIVE) { + mutex_unlock(&scp->sc_lock); + continue; + } + svp_conn_queue(scp, sqp); + mutex_unlock(&scp->sc_lock); + list_remove(&srp->sr_conns, scp); + list_insert_tail(&srp->sr_conns, scp); + return (B_TRUE); + } + + return (B_FALSE); +} + +static void +svp_remote_vl2_lookup_cb(svp_query_t *sqp, void *arg) +{ + svp_t *svp = sqp->sq_svp; + svp_vl2_ack_t *vl2a = (svp_vl2_ack_t *)sqp->sq_wdata; + + if (sqp->sq_status == SVP_S_OK) + svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, + (struct in6_addr *)vl2a->sl2a_addr, ntohs(vl2a->sl2a_port), + arg); + else + svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, NULL, 0, arg); +} + +void +svp_remote_vl2_lookup(svp_t *svp, svp_query_t *sqp, const uint8_t *mac, + void *arg) +{ + svp_remote_t *srp; + svp_vl2_req_t *vl2r = &sqp->sq_rdun.sqd_vl2r; + + srp = svp->svp_remote; + sqp->sq_func = svp_remote_vl2_lookup_cb; + sqp->sq_arg = arg; + sqp->sq_svp = svp; + sqp->sq_state = SVP_QUERY_INIT; + sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION); + sqp->sq_header.svp_op = htons(SVP_R_VL2_REQ); + sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t)); + /* + * XXX ID, crc32 need real values + */ + sqp->sq_header.svp_id = id_alloc(svp_idspace); + if (sqp->sq_header.svp_id == -1) + libvarpd_panic("failed to allcoate from svp_idspace: %d", + errno); + sqp->sq_header.svp_crc32 = htonl(0); + sqp->sq_rdata = vl2r; + sqp->sq_rsize = sizeof (svp_vl2_req_t); + sqp->sq_wdata = NULL; + sqp->sq_wsize = 0; + + bcopy(mac, vl2r->sl2r_mac, ETHERADDRL); + vl2r->sl2r_vnetid = ntohl(svp->svp_vid); + + mutex_lock(&srp->sr_lock); + if (svp_remote_conn_queue(srp, sqp) == B_FALSE) + svp->svp_cb.scb_vl2_lookup(svp, SVP_S_FATAL, NULL, NULL, arg); + mutex_unlock(&srp->sr_lock); +} + +static void +svp_remote_vl3_lookup_cb(svp_query_t *sqp, void *arg) +{ + svp_t *svp = sqp->sq_svp; + svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata; + + if (sqp->sq_status == SVP_S_OK) + svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, vl3a->sl3a_mac, + (struct in6_addr *)vl3a->sl3a_uip, ntohs(vl3a->sl3a_uport), + arg); + else + svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, NULL, NULL, 0, + arg); +} + +void +svp_remote_vl3_lookup(svp_t *svp, svp_query_t *sqp, + const struct sockaddr *addr, void *arg) +{ + svp_remote_t *srp; + svp_vl3_req_t *vl3r = &sqp->sq_rdun.sdq_vl3r; + + if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) + libvarpd_panic("unexpected sa_family for the vl3 lookup"); + + srp = svp->svp_remote; + sqp->sq_func = svp_remote_vl3_lookup_cb; + sqp->sq_arg = arg; + sqp->sq_svp = svp; + sqp->sq_state = SVP_QUERY_INIT; + sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION); + sqp->sq_header.svp_op = htons(SVP_R_VL3_REQ); + sqp->sq_header.svp_size = htons(sizeof (svp_vl3_req_t)); + /* + * XXX ID, crc32 need real values + */ + sqp->sq_header.svp_id = id_alloc(svp_idspace); + if (sqp->sq_header.svp_id == -1) + libvarpd_panic("failed to allcoate from svp_idspace: %d", + errno); + sqp->sq_header.svp_crc32 = htonl(0); + sqp->sq_rdata = vl3r; + sqp->sq_rsize = sizeof (svp_vl3_req_t); + sqp->sq_wdata = NULL; + sqp->sq_wsize = 0; + + if (addr->sa_family == AF_INET6) { + struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)addr; + vl3r->sl3r_type = SVP_VL3_IPV6; + bcopy(&s6->sin6_addr, vl3r->sl3r_ip, + sizeof (struct in6_addr)); + } else { + struct sockaddr_in *s4 = (struct sockaddr_in *)addr; + struct in6_addr v6; + + vl3r->sl3r_type = SVP_VL3_IP; + IN6_INADDR_TO_V4MAPPED(&s4->sin_addr, &v6); + bcopy(&v6, vl3r->sl3r_ip, sizeof (struct in6_addr)); + } + vl3r->sl3r_vnetid = ntohl(svp->svp_vid); + + mutex_lock(&srp->sr_lock); + if (svp_remote_conn_queue(srp, sqp) == B_FALSE) + svp->svp_cb.scb_vl3_lookup(svp, SVP_S_FATAL, NULL, NULL, NULL, + arg); + mutex_unlock(&srp->sr_lock); +} + +void +svp_remote_dns_timer(void *unused) +{ + svp_remote_t *s; + mutex_lock(&svp_remote_lock); + for (s = avl_first(&svp_remote_tree); s != NULL; + s = AVL_NEXT(&svp_remote_tree, s)) { + svp_host_queue(s); + } + mutex_unlock(&svp_remote_lock); +} + +void +svp_remote_resolved(svp_remote_t *srp, struct addrinfo *newaddrs) +{ + struct addrinfo *a; + svp_conn_t *scp; + int ngen; + + mutex_lock(&srp->sr_lock); + srp->sr_gen++; + ngen = srp->sr_gen; + mutex_unlock(&srp->sr_lock); + + for (a = newaddrs; a != NULL; a = a->ai_next) { + struct in6_addr in6; + struct in6_addr *addrp; + + if (a->ai_family != AF_INET && a->ai_family != AF_INET6) + continue; + + if (a->ai_family == AF_INET) { + struct sockaddr_in *v4; + v4 = (struct sockaddr_in *)a->ai_addr; + addrp = &in6; + IN6_INADDR_TO_V4MAPPED(&v4->sin_addr, addrp); + } else { + struct sockaddr_in6 *v6; + v6 = (struct sockaddr_in6 *)a->ai_addr; + addrp = &v6->sin6_addr; + } + + mutex_lock(&srp->sr_lock); + for (scp = list_head(&srp->sr_conns); scp != NULL; + scp = list_next(&srp->sr_conns, scp)) { + mutex_lock(&scp->sc_lock); + if (bcmp(addrp, &scp->sc_addr, + sizeof (struct in6_addr)) == 0) { + scp->sc_gen = ngen; + mutex_unlock(&scp->sc_lock); + break; + } + mutex_unlock(&scp->sc_lock); + } + + /* + * We need to be careful in the assumptions that we make here, + * as there's a good chance that svp_conn_create will + * drop the svp_remote_t`sr_lock to kick off its effective event + * loop. + */ + if (scp == NULL) + svp_conn_create(srp, addrp); + mutex_unlock(&srp->sr_lock); + } + + /* + * Now it's time to clean things up. We do not actively clean up the + * current connections that we have, instead allowing them to stay + * around assuming that they're still useful. Instead, we go through and + * purge the degraded list for anything that's from an older generation. + */ + mutex_lock(&srp->sr_lock); + for (scp = list_head(&srp->sr_conns); scp != NULL; + scp = list_next(&srp->sr_conns, scp)) { + boolean_t fall = B_FALSE; + mutex_lock(&scp->sc_lock); + if (scp->sc_gen < srp->sr_gen) + fall = B_TRUE; + mutex_unlock(&scp->sc_lock); + if (fall == B_TRUE) + svp_conn_fallout(scp); + } + mutex_unlock(&srp->sr_lock); +} + +/* + * This connection is in the process of being reset, we need to reassign all of + * its queries to other places or mark them as fatal. + */ +void +svp_remote_reassign(svp_remote_t *srp, svp_conn_t *scp) +{ + assert(MUTEX_HELD(&srp->sr_lock)); + svp_query_t *sqp; + + /* + * As we try to reassing all of its queries, remove it from the list. + */ + list_remove(&srp->sr_conns, scp); + + while ((sqp = list_remove_head(&scp->sc_queries)) != NULL) { + sqp->sq_wdata = NULL; + sqp->sq_wsize = 0; + sqp->sq_acttime = -1; + + /* + * XXX We probably want to maintain a queue of these for some + * time. + */ + if (svp_remote_conn_queue(srp, sqp) == B_FALSE) { + sqp->sq_status = SVP_S_FATAL; + sqp->sq_func(sqp, sqp->sq_arg); + } + } + + /* + * Now that we're done, go ahead and re-insert. + */ + list_insert_tail(&srp->sr_conns, scp); +} + +void +svp_remote_degrade(svp_remote_t *srp, svp_degrade_state_t flag) +{ + int sf, nf; + char buf[256]; + + assert(MUTEX_HELD(&srp->sr_lock)); + + if (flag == SVP_RD_ALL || flag == 0) + libvarpd_panic("invalid flag passed to degrade"); + + if ((flag & srp->sr_degrade) != 0) { + return; + } + + sf = ffs(srp->sr_degrade); + nf = ffs(flag); + srp->sr_degrade |= flag; + if (sf == 0 || sf > nf) { + svp_t *svp; + svp_remote_mkfmamsg(srp, flag, buf, sizeof (buf)); + + for (svp = avl_first(&srp->sr_tree); svp != NULL; + svp = AVL_NEXT(&srp->sr_tree, svp)) { + libvarpd_fma_degrade(svp->svp_hdl, buf); + } + } +} + +void +svp_remote_restore(svp_remote_t *srp, svp_degrade_state_t flag) +{ + int sf, nf; + + assert(MUTEX_HELD(&srp->sr_lock)); + sf = ffs(srp->sr_degrade); + if ((srp->sr_degrade & flag) != flag) + return; + srp->sr_degrade &= ~flag; + nf = ffs(srp->sr_degrade); + + /* + * If we're now empty, restore the device. If we still are degraded, but + * we now have a higher base than we used to, change the message. + */ + if (srp->sr_degrade == 0) { + svp_t *svp; + for (svp = avl_first(&srp->sr_tree); svp != NULL; + svp = AVL_NEXT(&srp->sr_tree, svp)) { + libvarpd_fma_restore(svp->svp_hdl); + } + } else if (nf != sf) { + svp_t *svp; + char buf[256]; + + svp_remote_mkfmamsg(srp, 1U << (nf - 1), buf, sizeof (buf)); + for (svp = avl_first(&srp->sr_tree); svp != NULL; + svp = AVL_NEXT(&srp->sr_tree, svp)) { + libvarpd_fma_degrade(svp->svp_hdl, buf); + } + } +} + +int +svp_remote_init(void) +{ + svp_idspace = id_space_create("svp_req_ids", 1, INT32_MAX); + if (svp_idspace == NULL) + return (errno); + avl_create(&svp_remote_tree, svp_remote_comparator, + sizeof (svp_remote_t), offsetof(svp_remote_t, sr_gnode)); + svp_dns_timer.st_func = svp_remote_dns_timer; + svp_dns_timer.st_arg = NULL; + svp_dns_timer.st_oneshot = B_FALSE; + svp_dns_timer.st_value = svp_dns_timer_rate; + svp_timer_add(&svp_dns_timer); + return (0); +} + +void +svp_remote_fini(void) +{ + svp_timer_remove(&svp_dns_timer); + avl_destroy(&svp_remote_tree); + if (svp_idspace == NULL) + id_space_destroy(svp_idspace); +} diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c new file mode 100644 index 0000000000..f1fb2908c7 --- /dev/null +++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c @@ -0,0 +1,144 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014, Joyent, Inc. + */ + +#include <stddef.h> +#include <libvarpd_svp.h> + +/* + * This implements all of the logic of maintaining a timer for the svp backend. + * We have a timer that fires at a one second tick. We maintain all of our + * events in avl tree, sorted by the tick that they need to be processed at. + */ + +int svp_tickrate = 1; +static svp_event_t svp_timer_event; +static mutex_t svp_timer_lock = DEFAULTMUTEX; +static cond_t svp_timer_cv = DEFAULTCV; +static avl_tree_t svp_timer_tree; +static uint64_t svp_timer_nticks; + +static int +svp_timer_comparator(const void *l, const void *r) +{ + const svp_timer_t *lt, *rt; + + lt = l; + rt = r; + + if (lt->st_expire > rt->st_expire) + return (1); + else if (lt->st_expire < rt->st_expire) + return (-1); + + /* + * Multiple timers can have the same delivery time, so sort within that + * by the address of the timer itself. + */ + if ((uintptr_t)lt > (uintptr_t)rt) + return (1); + else if ((uintptr_t)lt < (uintptr_t)rt) + return (-1); + + return (0); +} + +static void +svp_timer_tick(port_event_t *pe, void *arg) +{ + mutex_lock(&svp_timer_lock); + svp_timer_nticks++; + + for (;;) { + svp_timer_t *t; + + t = avl_first(&svp_timer_tree); + if (t == NULL || t->st_expire > svp_timer_nticks) + break; + + avl_remove(&svp_timer_tree, t); + + /* + * We drop this while performing an operation so that way state + * can advance in the face of a long-running callback. + */ + t->st_delivering = B_TRUE; + mutex_unlock(&svp_timer_lock); + t->st_func(t->st_arg); + mutex_lock(&svp_timer_lock); + t->st_delivering = B_FALSE; + cond_broadcast(&svp_timer_cv); + if (t->st_oneshot == B_FALSE) { + t->st_expire += t->st_value; + avl_add(&svp_timer_tree, t); + } + } + mutex_unlock(&svp_timer_lock); +} + +void +svp_timer_add(svp_timer_t *stp) +{ + if (stp->st_value == 0) + libvarpd_panic("tried to add svp timer with zero value"); + + mutex_lock(&svp_timer_lock); + stp->st_delivering = B_FALSE; + stp->st_expire = svp_timer_nticks + stp->st_value; + avl_add(&svp_timer_tree, stp); + mutex_unlock(&svp_timer_lock); +} + +void +svp_timer_remove(svp_timer_t *stp) +{ + mutex_lock(&svp_timer_lock); + + /* + * If the event in question is not currently being delivered, then we + * can stop it before it next fires. If it is currently being delivered, + * we need to wait for that to finish. Because we hold the timer lock, + * we know that it cannot be rearmed. Therefore, we make sure the one + * shot is set to zero, and wait until it's no longer set to delivering. + */ + if (stp->st_delivering == B_FALSE) { + avl_remove(&svp_timer_tree, stp); + mutex_unlock(&svp_timer_lock); + return; + } + + stp->st_oneshot = B_TRUE; + while (stp->st_delivering == B_TRUE) + cond_wait(&svp_timer_cv, &svp_timer_lock); + + mutex_unlock(&svp_timer_lock); +} + +int +svp_timer_init(void) +{ + int ret; + + svp_timer_event.se_func = svp_timer_tick; + svp_timer_event.se_arg = NULL; + + avl_create(&svp_timer_tree, svp_timer_comparator, sizeof (svp_timer_t), + offsetof(svp_timer_t, st_link)); + + if ((ret = svp_event_timer_init(&svp_timer_event)) != 0) { + avl_destroy(&svp_timer_tree); + } + + return (ret); +} diff --git a/usr/src/lib/varpd/svp/common/llib-lvarpd_svp b/usr/src/lib/varpd/svp/common/llib-lvarpd_svp new file mode 100644 index 0000000000..31b3d36fbe --- /dev/null +++ b/usr/src/lib/varpd/svp/common/llib-lvarpd_svp @@ -0,0 +1,18 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + diff --git a/usr/src/lib/varpd/svp/common/mapfile-vers b/usr/src/lib/varpd/svp/common/mapfile-vers new file mode 100644 index 0000000000..642ef72adc --- /dev/null +++ b/usr/src/lib/varpd/svp/common/mapfile-vers @@ -0,0 +1,35 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION SUNWprivate { + local: + *; +}; diff --git a/usr/src/lib/varpd/svp/i386/Makefile b/usr/src/lib/varpd/svp/i386/Makefile new file mode 100644 index 0000000000..cf2f2487af --- /dev/null +++ b/usr/src/lib/varpd/svp/i386/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/svp/sparc/Makefile b/usr/src/lib/varpd/svp/sparc/Makefile new file mode 100644 index 0000000000..cf2f2487af --- /dev/null +++ b/usr/src/lib/varpd/svp/sparc/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/varpd/svp/sparcv9/Makefile b/usr/src/lib/varpd/svp/sparcv9/Makefile new file mode 100644 index 0000000000..b64b830ddd --- /dev/null +++ b/usr/src/lib/varpd/svp/sparcv9/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. +# + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) |