summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--GCC-README.md129
-rw-r--r--README291
-rw-r--r--README.illumos49
-rw-r--r--manifest38
-rw-r--r--usr/src/Targetdirs4
-rw-r--r--usr/src/cmd/Makefile1
-rw-r--r--usr/src/cmd/cmd-inet/etc/services2
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile3
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h1
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c3
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_rport.c4
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_vxlan.c68
-rw-r--r--usr/src/cmd/devfsadm/misc_link.c3
-rw-r--r--usr/src/cmd/dladm/Makefile2
-rw-r--r--usr/src/cmd/dladm/dladm.c733
-rw-r--r--usr/src/cmd/mdb/common/modules/mpt_sas/mpt_sas.c2
-rw-r--r--usr/src/cmd/varpd/Makefile61
-rw-r--r--usr/src/cmd/varpd/svc-varpd35
-rw-r--r--usr/src/cmd/varpd/varpd.c440
-rw-r--r--usr/src/cmd/varpd/varpd.xml52
-rw-r--r--usr/src/common/idspace/id_space.c (renamed from usr/src/uts/common/os/id_space.c)35
-rw-r--r--usr/src/docs/admin252
-rw-r--r--usr/src/lib/Makefile17
-rw-r--r--usr/src/lib/libbunyan/Makefile42
-rw-r--r--usr/src/lib/libbunyan/Makefile.com36
-rw-r--r--usr/src/lib/libbunyan/amd64/Makefile19
-rw-r--r--usr/src/lib/libbunyan/common/bunyan.c913
-rw-r--r--usr/src/lib/libbunyan/common/bunyan.h88
-rw-r--r--usr/src/lib/libbunyan/common/bunyan_provider.d32
-rw-r--r--usr/src/lib/libbunyan/common/llib-lbunyan19
-rw-r--r--usr/src/lib/libbunyan/common/mapfile-vers50
-rw-r--r--usr/src/lib/libbunyan/i386/Makefile18
-rw-r--r--usr/src/lib/libbunyan/sparc/Makefile18
-rw-r--r--usr/src/lib/libbunyan/sparcv9/Makefile19
-rw-r--r--usr/src/lib/libcmdutils/common/custr.c87
-rw-r--r--usr/src/lib/libcmdutils/common/mapfile-vers2
-rw-r--r--usr/src/lib/libcmdutils/libcmdutils.h13
-rw-r--r--usr/src/lib/libdladm/Makefile12
-rw-r--r--usr/src/lib/libdladm/Makefile.com5
-rw-r--r--usr/src/lib/libdladm/common/libdladm.c31
-rw-r--r--usr/src/lib/libdladm/common/libdladm.h7
-rw-r--r--usr/src/lib/libdladm/common/libdloverlay.c870
-rw-r--r--usr/src/lib/libdladm/common/libdloverlay.h111
-rw-r--r--usr/src/lib/libdladm/common/mapfile-vers15
-rw-r--r--usr/src/lib/libidspace/Makefile45
-rw-r--r--usr/src/lib/libidspace/Makefile.com42
-rw-r--r--usr/src/lib/libidspace/amd64/Makefile19
-rw-r--r--usr/src/lib/libidspace/common/libidspace.c25
-rw-r--r--usr/src/lib/libidspace/common/libidspace.h42
-rw-r--r--usr/src/lib/libidspace/common/llib-lidspace19
-rw-r--r--usr/src/lib/libidspace/common/mapfile-vers47
-rw-r--r--usr/src/lib/libidspace/i386/Makefile18
-rw-r--r--usr/src/lib/libidspace/sparc/Makefile18
-rw-r--r--usr/src/lib/libidspace/sparcv9/Makefile19
-rw-r--r--usr/src/lib/libnvpair/libnvpair.h2
-rw-r--r--usr/src/lib/libnvpair/mapfile-vers2
-rw-r--r--usr/src/lib/libnvpair/nvpair_json.c285
-rw-r--r--usr/src/lib/librename/Makefile44
-rw-r--r--usr/src/lib/librename/Makefile.com34
-rw-r--r--usr/src/lib/librename/amd64/Makefile19
-rw-r--r--usr/src/lib/librename/common/librename.c232
-rw-r--r--usr/src/lib/librename/common/librename.h43
-rw-r--r--usr/src/lib/librename/common/llib-lrename19
-rw-r--r--usr/src/lib/librename/common/mapfile-vers41
-rw-r--r--usr/src/lib/librename/i386/Makefile18
-rw-r--r--usr/src/lib/librename/sparc/Makefile18
-rw-r--r--usr/src/lib/librename/sparcv9/Makefile19
-rw-r--r--usr/src/lib/libsocket/common/mapfile-vers6
-rw-r--r--usr/src/lib/libsocket/inet/ether_addr.c53
-rw-r--r--usr/src/lib/varpd/Makefile33
-rw-r--r--usr/src/lib/varpd/Makefile.plugin19
-rw-r--r--usr/src/lib/varpd/direct/Makefile40
-rw-r--r--usr/src/lib/varpd/direct/Makefile.com35
-rw-r--r--usr/src/lib/varpd/direct/amd64/Makefile19
-rw-r--r--usr/src/lib/varpd/direct/common/libvarpd_direct.c395
-rw-r--r--usr/src/lib/varpd/direct/common/llib-lvarpd_direct18
-rw-r--r--usr/src/lib/varpd/direct/common/mapfile-vers35
-rw-r--r--usr/src/lib/varpd/direct/i386/Makefile18
-rw-r--r--usr/src/lib/varpd/direct/sparc/Makefile18
-rw-r--r--usr/src/lib/varpd/direct/sparcv9/Makefile19
-rw-r--r--usr/src/lib/varpd/files/Makefile40
-rw-r--r--usr/src/lib/varpd/files/Makefile.com36
-rw-r--r--usr/src/lib/varpd/files/amd64/Makefile19
-rw-r--r--usr/src/lib/varpd/files/common/libvarpd_files.c598
-rw-r--r--usr/src/lib/varpd/files/common/libvarpd_files_json.c744
-rw-r--r--usr/src/lib/varpd/files/common/libvarpd_files_json.h40
-rw-r--r--usr/src/lib/varpd/files/common/llib-lvarpd_files18
-rw-r--r--usr/src/lib/varpd/files/common/mapfile-vers35
-rw-r--r--usr/src/lib/varpd/files/i386/Makefile18
-rw-r--r--usr/src/lib/varpd/files/sparc/Makefile18
-rw-r--r--usr/src/lib/varpd/files/sparcv9/Makefile19
-rw-r--r--usr/src/lib/varpd/libvarpd/Makefile54
-rw-r--r--usr/src/lib/varpd/libvarpd/Makefile.com44
-rw-r--r--usr/src/lib/varpd/libvarpd/amd64/Makefile19
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd.c355
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd.h78
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c645
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_client.c626
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_client.h94
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_door.c457
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h247
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c574
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c48
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c590
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c233
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c238
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h226
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_util.c98
-rw-r--r--usr/src/lib/varpd/libvarpd/common/llib-lvarpd19
-rw-r--r--usr/src/lib/varpd/libvarpd/common/mapfile-plugin47
-rw-r--r--usr/src/lib/varpd/libvarpd/common/mapfile-vers113
-rw-r--r--usr/src/lib/varpd/libvarpd/i386/Makefile18
-rw-r--r--usr/src/lib/varpd/libvarpd/sparc/Makefile18
-rw-r--r--usr/src/lib/varpd/libvarpd/sparcv9/Makefile19
-rw-r--r--usr/src/lib/varpd/svp/Makefile40
-rw-r--r--usr/src/lib/varpd/svp/Makefile.com48
-rw-r--r--usr/src/lib/varpd/svp/amd64/Makefile19
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp.c755
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp.h377
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c945
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c50
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_host.c173
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c206
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h172
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c596
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c144
-rw-r--r--usr/src/lib/varpd/svp/common/llib-lvarpd_svp18
-rw-r--r--usr/src/lib/varpd/svp/common/mapfile-vers35
-rw-r--r--usr/src/lib/varpd/svp/i386/Makefile18
-rw-r--r--usr/src/lib/varpd/svp/sparc/Makefile18
-rw-r--r--usr/src/lib/varpd/svp/sparcv9/Makefile19
-rw-r--r--usr/src/man/man1m/ping.1m17
-rw-r--r--usr/src/man/man3socket/Makefile2
-rw-r--r--usr/src/man/man3socket/ethers.3socket27
-rw-r--r--usr/src/test/util-tests/runfiles/default.run1
-rw-r--r--usr/src/test/util-tests/tests/bunyan/Makefile69
-rw-r--r--usr/src/test/util-tests/tests/bunyan/btest.c312
-rw-r--r--usr/src/test/util-tests/tests/bunyan/bunyan.ksh26
-rw-r--r--usr/src/uts/Makefile.targ3
-rw-r--r--usr/src/uts/Makefile.uts6
-rw-r--r--usr/src/uts/common/Makefile.files12
-rw-r--r--usr/src/uts/common/Makefile.rules28
-rw-r--r--usr/src/uts/common/fs/dev/sdev_netops.c6
-rw-r--r--usr/src/uts/common/fs/sockfs/sockcommon.c7
-rw-r--r--usr/src/uts/common/fs/sockfs/sockcommon_sops.c32
-rw-r--r--usr/src/uts/common/fs/sockfs/sockcommon_subr.c50
-rw-r--r--usr/src/uts/common/io/dld/dld_drv.c3
-rw-r--r--usr/src/uts/common/io/ksocket/ksocket.c12
-rw-r--r--usr/src/uts/common/io/ksocket/ksocket_impl.h5
-rw-r--r--usr/src/uts/common/io/overlay/overlay.c1432
-rw-r--r--usr/src/uts/common/io/overlay/overlay.conf16
-rw-r--r--usr/src/uts/common/io/overlay/overlay.mapfile46
-rw-r--r--usr/src/uts/common/io/overlay/overlay_fm.c80
-rw-r--r--usr/src/uts/common/io/overlay/overlay_mux.c345
-rw-r--r--usr/src/uts/common/io/overlay/overlay_plugin.c261
-rw-r--r--usr/src/uts/common/io/overlay/overlay_prop.c123
-rw-r--r--usr/src/uts/common/io/overlay/overlay_target.c1590
-rw-r--r--usr/src/uts/common/io/overlay/plugins/overlay_geneve.c171
-rw-r--r--usr/src/uts/common/io/overlay/plugins/overlay_nvgre.c200
-rw-r--r--usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c337
-rw-r--r--usr/src/uts/common/io/pseudonex.c22
-rw-r--r--usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas.c2
-rw-r--r--usr/src/uts/common/netinet/in.h1
-rw-r--r--usr/src/uts/common/refhash/refhash.c (renamed from usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_hash.c)4
-rw-r--r--usr/src/uts/common/sys/Makefile4
-rw-r--r--usr/src/uts/common/sys/dld_ioc.h3
-rw-r--r--usr/src/uts/common/sys/dls_mgmt.h6
-rw-r--r--usr/src/uts/common/sys/ethernet.h3
-rw-r--r--usr/src/uts/common/sys/geneve.h64
-rw-r--r--usr/src/uts/common/sys/id_space.h5
-rw-r--r--usr/src/uts/common/sys/ksocket.h4
-rw-r--r--usr/src/uts/common/sys/mac.h9
-rw-r--r--usr/src/uts/common/sys/nvgre.h56
-rw-r--r--usr/src/uts/common/sys/overlay.h96
-rw-r--r--usr/src/uts/common/sys/overlay_common.h65
-rw-r--r--usr/src/uts/common/sys/overlay_impl.h207
-rw-r--r--usr/src/uts/common/sys/overlay_plugin.h150
-rw-r--r--usr/src/uts/common/sys/overlay_target.h292
-rw-r--r--usr/src/uts/common/sys/refhash.h (renamed from usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_hash.h)6
-rw-r--r--usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h2
-rw-r--r--usr/src/uts/common/sys/socketvar.h12
-rw-r--r--usr/src/uts/common/sys/stt.h67
-rw-r--r--usr/src/uts/common/sys/vxlan.h48
-rw-r--r--usr/src/uts/common/syscall/sendfile.c19
-rw-r--r--usr/src/uts/intel/Makefile.intel8
-rw-r--r--usr/src/uts/intel/geneve/Makefile52
-rw-r--r--usr/src/uts/intel/nvgre/Makefile49
-rw-r--r--usr/src/uts/intel/overlay/Makefile56
-rw-r--r--usr/src/uts/intel/vxlan/Makefile49
189 files changed, 22711 insertions, 357 deletions
diff --git a/GCC-README.md b/GCC-README.md
deleted file mode 100644
index 012284aef7..0000000000
--- a/GCC-README.md
+++ /dev/null
@@ -1,129 +0,0 @@
-
-# Illumos GCC 4
-
-## Intro
-
-This is a work in progress that may or may not work (or work well) at any
-given time. If you encounter problems you think are likely my fault, send me
-email describing them in detail.
-
-This has booted to login and done fairly basic work on 32bit and 64bit x86 in
-qemu, VirtualBox (32bit), and a random whitebox Athlon 64. And on a Sun-Fire
-V250 on sun4u. It has been tried nowhere else.
-
-## What you need
-
-### Patched GCC 4.4.4
-
-This is the GCC available at http://github.com/richlowe/gcc on the il-4_4_4
-branch. It is patched in numerous ways, a stock GCC will most certainly not
-work.
-
-I've made binary tarballs available, they assume a `--prefix` of
-`/opt/gcc/4.4.4` (unpack them from `/opt`)
-
-i386: http://richlowe.openindiana.org/~richlowe/il-gcc-444-i386.tar.bz2
-sparc: http://richlowe.openindiana.org/~richlowe/il-gcc-444-sparc.tar.bz2
-
-### Further patch GCC
-
-Unfortunately, we need to bake appropriate runpaths for libstdc++ and libgcc_s
-into the GCC spec, at least at present, so you need to further patch GCC to
-reflect the location in which you're going to install it.
-
-Choose a prefix in which to install GCC (the one you'll give to `configure
---prefix`), I'm using /opt/gcc/4.4.4.
-
-Now, look at revision a1583073 (`git show a1583073`). In every place that
-changeset added a /usr/sfw/lib path, adding a matching path based on your
-prefix _before_ the SFW path (you need your libcc_s to be found first).
-
-I'm hoping to find a way to avoid doing this, but at present haven't come up
-with one that works in every case.
-
-### Build GCC
-
-I've been using a script to make this easier on myself
-
-```bash
-#!/bin/ksh -e
-VER=$1
-
-if [[ -z $VER ]]; then
- print -u2 "Usage: build.sh <Version>"
- exit 2
-fi
-
-export PATH="/opt/dejagnu/bin:/opt/SUNWspro/bin:"
-PATH="$PATH:/usr/gnu/bin:/usr/sfw/bin"
-PATH="$PATH:/usr/bin:/usr/ccs/bin"
-
-export CC=/usr/sfw/bin/gcc
-export CFLAGS="-g -O2"
-
-AS_OPTIONS=""
-if [[ $(mach) == "sparc" ]]; then
- CFLAGS="$CFLAGS -fkeep-inline-functions"
- AS_OPTIONS="--without-gnu-as --with-as=/usr/ccs/bin/as"
-else
- AS_OPTIONS="--with-gnu-as --with-as=/usr/sfw/bin/as"
-fi
-
-export STAGE1_CFLAGS=$CFLAGS
-export CFLAGS_FOR_TARGET=$CFLAGS
-
-GMSGFMT=/usr/gnu/bin/msgfmt \
-../../configure --prefix=/opt/gcc/$VER $AS_OPTIONS \
- --with-ld=/usr/bin/ld \
- --without-gnu-ld \
- --enable-languages="c,c++,objc" \
- --enable-shared \
- --with-mpfr-include=/usr/include/mpfr \
- --with-gmp-include=/usr/include/gmp
-
-gmake -j8 CFLAGS="$CFLAGS" STAGE1_CFLAGS="$CFLAGS" \
- CFLAGS_FOR_TARGET="$CFLAGS" bootstrap
-```
-
-Then:
-
-```bash
-mkdir -p builds/il-444 && cd builds/il-444
-../../build.sh 4.4.4
-```
-
-If you wish to run the tests, you'll need to install expect, then build and
-install dejagnu and run 'gmake check-gcc'. It's most useful to compare the
-test results of an unpatched build and a patch build at the same GCC revision
-(the gcc-4.4.4 tag, v. the il-4_4_4 branch for instance).
-
-## Build Illumos
-
-As part of this, I've (at least temporarily) adjusted the build infrastruture
-to support building with either version of GCC as either shadow or primary
-compiler. This adds a little complication to building with GCC4
-
-In addition to your normal settings in your env file you should add
-
-```bash
-source ./illumos.sh # Source your normal environment file
-export GCC_ROOT=/opt/gcc/4.4.4; # Where to find GCC4.x
-export CW_GCC_DIR=${GCC_ROOT}/bin; # A temporary hack to allow bootstrap of cw(1)
-export __GNUC=""; # Use GCC as the primary compiler
-export __GNUC4=""; # Use GCC4 specific flags
-```
-
-This should be sufficient to have GCC 4 used as the primary throughout the
-build (check nightly.log, and/or run `mcs -p foo.o` to check individual
-objects.
-
-## Illumos Live is really helpful
-
-A good and convenient way to test stuff is to use Joyent's illumos-live
-(http://github.com/joyent/illumos-live). My fork contains some additional
-changes to make it mildly more convenient if you don't have netbooting
-infrastructure.
-
-Just make sure that the projects/illumos tree is a clone of this branch, edit
-the generated illumos.sh env file as in "Build Illumos" above, and build it
-following their directions.
diff --git a/README b/README
index 9fd99ad6c8..66e2887b78 100644
--- a/README
+++ b/README
@@ -1,49 +1,268 @@
-illumos gate README - Sep 16, 2012.
+ ___ _ ____ _
+ / _ \__ _____ _ __| | __ _ _ _ / ___| __ _| |_ ___
+| | | \ \ / / _ \ '__| |/ _` | | | | | | _ / _` | __/ _ \
+| |_| |\ V / __/ | | | (_| | |_| | | |_| | (_| | || __/
+ \___/ \_/ \___|_| |_|\__,_|\__, | \____|\__,_|\__\___|
+ |___/
-This is the illumos gate. This is the illumos source tree. It contains
-the following subdirectories:
+This branch of illumos-joyent is the overlay gate. It's purpose is to
+serve as a development branch for a new dladm device called an overlay,
+whose purpose is to support encapsulation protocols like VXLAN and
+NVGRE; however, also allow for a user to supplement the protocol with
+their own means of doing discovery, rather than the pre-defined ones.
- usr/src - The actual source code
+## Warning
- exception_lists - These are lists of exceptional cases
- used to limit noise during builds.
- Ideally this directory would consist of
- only empty files.
+This is a work in progress, things will be changing quickly, and
+panicking is certainly not out of the question. You probably don't want
+to be using this.
-Integration Rules:
+## Current Status
- All changes must have been reviewed, and approved by an advocate
- (below). A code review may be performed by someone other than the
- advocate, but the final integration should still be approved by the
- advocate.
+Basic VXLAN tunnels that interoperate with not only ourselves, but also
+VXLAN, work. Configuration is done through dladm and information is
+persisted in varpd.
- The advocate will want to see your webrev and hg outgoing -v. The
- advocate will also ask about your testing, and may ask to see your
- build logs.
+## High-level Design Overview
- All changes must adhere to typical ON style and quality rules.
- For example, pass full cstyle, applicable lint rules, etc.
+WARNING: This is subject to change the further down the implementation
+path we get. Major changse should cause this document to be updated, but
+the author is only human and subject to time and forgetfulness.
- All new code must be licensed under CDDL, and modifications to existing
- code may not alter the original license terms. Integrations of code
- from upstream sources that use another liberal open source license are
- permissible, subject to approval of the advocates or developer council.
- Exceptions to this policy shall require the approval of the developer
- council.
- Commits should have comments of the following form:
+There have been many different attempts and solutions trying to tackle
+the space of network virtualization through the use of overlay networks.
+These networks act in similar ways to VLANs, but with two large
+differences. They have significantly larger ID spaces, and they fully
+encapsulate a layer two frame in a layer three frame with some
+additional metadata. The most common and widely used of these today is
+VXLAN and NVGRE.
- 1234 This is a sample bug report synopsis
- 4567 If you have a second bug synopsis…
- Reviewed by: Frodo Baggins <frodo.baggins@underhill.net>
- Reviewed by: Legolas <elf-coder@mirkwood.org>
- Approved by: Gandalf The Grey <rti-advocate@white-council.com>
+While the wire formats of all of these have stabilized, the means of
+looking up another host have not. Some RFCs describe simple point to
+point tunnels or suggest the use of a single mutlicast group for each
+virtual network. While these are useful, most users will find that they
+want their own schemes that allows for alternate control and more
+dynamic mappings. For example, if a centralized database exists that
+describes the mapping between physical hosts and mac address on a given
+virtual network, this may be used to send a direct unicast message.
- Each commit must have at least one bug id that is listed in the
- illumos-gate project at www.illumos.org.
+To facilitate this, we are building something that breaks the two pieces
+apart:
+ o encapsulation/decapsulation
+ o determining the destination of a frame
-RTI Advocates: advocates@lists.illumos.org
-Developer Council: developer-council@lists.illumos.org
-IRC channel: #illumos on irc.freenode.net
-Mailing list: developer@lists.illumos.org
+While the kernel will be in charge of the first part. This will be a new
+GLDv3 device, that looks similar to an etherstub (in so far as it
+creates a virtual switch), but will send out encapsulated data. We'll
+call that specifically a dladm overlay. An overlay device has properties
+that describe the encapsulation protocol, the overlay id, and the lookup
+scheme. It is not a true datalink itself, meaning that it cannot have an
+IP device plumbed on top of it; however, it supports things like vnics
+and the like being created over it.
+
+The second part will be handled by a userland daemon that we call
+'varpd' the virtual arp daemon. It's named this way because not only
+does it do ARP-like things, but the interface between it and the kernel
+is similar. Importantly, both pieces of this will be highly pluggable.
+The kernel will support arbitrary encapsulation and decapsulation
+modules; while, varpd will support arbitrary lookup modules that allow
+for as little or as much complexity as desired.
+
+The following image roughly describes what this looks like going out.
+
+```
+Outgoing Data Path
+
+ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+ . Kernel * . . Userland * .
+ . ******** . .*********** .
+ . . . +---------+ .
+ . +------------+ +---------+ . . | Virutal | +--------+ .
+ . | TCP/IP/VND | | Overlay |=======*=| ARP |---| Lookup | .
+ . +------------+ +->| Cache | . . * | Daemon | | Plugin | .
+ . | | | Lookup | . . * +---------+ | Module | .
+ . +------+ | +---------+ . . * +--------+ .
+ . | VNIC | | | . . * Async | .
+ . +------+ | | . . * Upcall | .
+ . | | | . . . . . . . . . . . . .|. . . .
+ . +------------+ | +--------+ . |
+ . | Overlay |->-+ | Encap | . |
+ . | Device |-<----| Plugin | . +---------+
+ . +------------+ | Engine | . | TCP/UDP |
+ . | +--------+ . | Katar |
+ . +-------------+ . | Request |
+ . | GZ IP Stack | . +---------+
+ . +-------------+ .
+ . | .
+ . +-----------------+ .
+ . | GZ VNIC, 9K MTU | .
+ . | On Encap VLAN | .
+ . | On External TAG | .
+ . +-----------------+ .
+ . | .
+ . . . . .|. . . . . . . . . . . . . .
+ |
+ +---------------------+
+ | Top of Rack Switch |
+ +---------------------+
+```
+
+The incoming data path is similar to the
+outgoing data path. The exact mechanisms by which it works are still a
+bit up in the air. However, what we'd ideally like to do is have the
+kernel use ksockets to listen on the appropriate interfaces for the
+configured backend, eg. for VXLAN, it'd be on a UDP port, and then use
+the decapsulation engine to get out the raw packet. Next we would send
+that back through the software classifier which will inject the frame
+into the appropriate VNICs on the overlay device. At that point, the
+packet will enter the normal processing for TCP/IP and vnd. The devil is
+in the details there, and those details need to still be determined.
+
+Importantly here, you'll note that varpd and the kernel communicate
+through an asynchronous upcall mechanism. This will look very much like
+that we do for ARP today. We will not be doing synchronous door upcalls.
+We just simply cannot block the kernel for that amount of time. Instead,
+we'll do something where varpd has threads that are basically looking
+for work which gets serviced by a taskq in the kernel.
+
+### Data walk through
+
+Let's follow what happens when a given instance sends a unicast packet
+out for which it already has an ARP record for, but has never
+communicated out over the overlay device to that recipient before. At
+this point, the IP layer or VND sends a full layer two packet down to
+DLS. That goes through and its VLAN tag is strictly enforced on the
+packet as necessary. For this, we'll assume that we have a VXLAN device.
+
+At that point, the overlay device will then see if it has that mac
+address in its target cache. If it does, it will encapsulate and send
+out the message block. In the more interesting case where it does not
+have that mapping, it has to contact varpd. As part of this, the current
+thread of control will basically queue this message block chain in a
+list of outstanding requests like we do for ARP, and then signal varpd.
+varpd will then look at some basic header data, e.g., overlay id, VLAN id,
+mac address, ethertype, etc.
+
+Based on the network id it will map that to its configuration
+information and use that to map to configuration information for a
+specific plug-in. While Joyent will have its own plug-in that integrates
+into the proposed SDC design, other plug-ins may exist such as: a static files
+mapping, sending all traffic to a single unicast address, or sending it
+to a multicast address. The goal is to ensure that what we
+build can be reused by the broader illumos ecosystem and allow ourselves
+greater flexibility in the future.
+
+In this case, the Joyent varpd plug-in would contact a katar caching
+server using a DNS-like protocol. The purpose of the katar caching
+server is to be the interface between the compute nodes and
+electric-moray and provide a read cache for moray. Upon receiving a
+response, it would then go through and ioctl/reply to the asynchronous
+upcall. The kernel would use that, send it through the encapsulation
+engine which would append a message block that contains the VXLAN
+header. That in turn, would then be sent through the kernel to the
+appropriate interface/socket. In this case, that would be a UDP
+connection that the kernel actively controls on top of a data
+link in the global zone. The UDP packet would be directed towards the IP
+address of the CN that contains the instance that the MAC address
+corresponded to.
+
+It would then go through the global zone's UDP/IP stack and then out
+that VNIC and physical interface. In particular, we need to ensure a few
+properties about that interface. The first, is that it's on a particular
+VLAN. The second, is that it has a 9K MTU. At that point, it would leave
+the VNIC and go out on the physical network destined for another CN on
+the VXLAN port.
+
+That CN would receive a packet on that port and the kernel's classifier
+would send it all the way to overlay device immediately. The overlay
+device would decapsulate it based on the port and ID that it was
+received on. From there, we would send it back through the
+classifier again to direct it to the appropriate soft rings, replicating
+broadcast and multicast as necessary, and then it will go through the
+normal networking stack, IP or vnd, as appropriate.
+
+If for some reason the CN received a unicast packet to which it didn't
+have a valid destination, it will fire an request to varpd to
+send an invalidation request back to the unicast address of the other CN
+that the packet came from which will also be running varpd.
+
+### dladm overlay devices and varpd
+
+I'd like to go through and spend a bit more time on the organization of
+these dladm overlay devices, varpd, and the associated encapsulation and
+lookup plug-ins.
+
+We specifically want the overlay devices in the kernel and the
+encapsulation plug-ins to be fairly dumb and not have to do very much.
+So while the kernel will have to set up the devices that it listens over
+and wire up those sockets (eg. UDP port for VXLAN, an IP type for NVGRE,
+etc.) the kernel devices and the kernel plug-ins will not know about
+what they should be directly. That will need to be something that is
+configured by userland in conjunction with varpd.
+
+The encapsulation plug-ins themselves should be very dumb and
+essentially support only two functions: an encapsulation operation and a
+decapsulation operation. They will be simple miscellaneous modules that
+depend on the broader overlay module and register with it, much like mac
+has plug-in modules for Ethernet, Infiniband, Wi-Fi, etc. This will also
+make it easier to go through and add newer encapsulation modules.
+
+In the userland side, there are a few different abstractions that we
+have with varpd. The first is a notion of a search plug-in. The search
+plug-in is responsible for determining how we find the destination host
+for a packet. If you follow the VXLAN spec, there are two obvious
+plug-ins that it suggests, one that sends everything to a unicast
+address and one that sends everything to a multicast address. It is in
+this logic that we would write the SDC plug-in that talks to the katar
+instances.
+
+However, each of these plug-ins may have properties themselves. We'd
+like to be able to leverage the same plug-in that deals with a single
+unicast tunnel or a single multicast address, but just tweak some
+configuration parameters, e.g., what that address is.
+
+Another thing that we'd like to be able to do is to optionally define an
+out of band invalidation protocol. Ideally this would be plug and play
+with the other search protocols. Realistically, for the case of a single
+unicast or multicast tunnel, there's no reason to go ahead and use the
+invalidation protocols at all.
+
+What this suggests to me is an idea of a profile for an overlay device.
+A profile would involve the set of a specific encapsulation protocol, a
+search plug-in, and optionally an invalidation plug-in, as well as some
+metadata. For example, the metadata would include things like the
+overlay id that should be used and what series of ports need to be
+listened on. I think the way that this all plays out and what the user
+interface looks like is still up in the air; however, leaving the bulk
+of the responsibility to userland is important.
+
+As the needs of the plug-in will need to change more frequently than the
+kernel modules, we'll need to establish a module path such that we can
+reload all of the plug-ins and deliver something out of band via /opt.
+It will be important that the kernel basically needs to be able to
+survive the varpd communication mechanism going down.
+
+## Current planned deliverables
+
+Note this is entirely subject to change:
+
+ o New dladm overlay object
+ o varpd userland daemon
+ o VXLAN overlay plugin and basic point to point, multicast tunnels
+ o Improvements to the ksocket API
+ o A Joyent-specific varpd plugin for constructing dynamic mappings
+ o Some form of zone that is used to create virtual routers
+
+As a general note, while the gate has had prototypes of VXLAN, NVGRE,
+Geneve, and STT for design help, it is not likely that all will survive,
+particularly STT.
+
+## Contact
+
+For questions and more information, contact:
+
+Robert Mustacchi
+rm@joyent.com
+rmustacc on irc.freenode.net
diff --git a/README.illumos b/README.illumos
new file mode 100644
index 0000000000..9fd99ad6c8
--- /dev/null
+++ b/README.illumos
@@ -0,0 +1,49 @@
+illumos gate README - Sep 16, 2012.
+
+This is the illumos gate. This is the illumos source tree. It contains
+the following subdirectories:
+
+ usr/src - The actual source code
+
+ exception_lists - These are lists of exceptional cases
+ used to limit noise during builds.
+ Ideally this directory would consist of
+ only empty files.
+
+Integration Rules:
+
+ All changes must have been reviewed, and approved by an advocate
+ (below). A code review may be performed by someone other than the
+ advocate, but the final integration should still be approved by the
+ advocate.
+
+ The advocate will want to see your webrev and hg outgoing -v. The
+ advocate will also ask about your testing, and may ask to see your
+ build logs.
+
+ All changes must adhere to typical ON style and quality rules.
+ For example, pass full cstyle, applicable lint rules, etc.
+
+ All new code must be licensed under CDDL, and modifications to existing
+ code may not alter the original license terms. Integrations of code
+ from upstream sources that use another liberal open source license are
+ permissible, subject to approval of the advocates or developer council.
+ Exceptions to this policy shall require the approval of the developer
+ council.
+
+ Commits should have comments of the following form:
+
+ 1234 This is a sample bug report synopsis
+ 4567 If you have a second bug synopsis…
+ Reviewed by: Frodo Baggins <frodo.baggins@underhill.net>
+ Reviewed by: Legolas <elf-coder@mirkwood.org>
+ Approved by: Gandalf The Grey <rti-advocate@white-council.com>
+
+ Each commit must have at least one bug id that is listed in the
+ illumos-gate project at www.illumos.org.
+
+RTI Advocates: advocates@lists.illumos.org
+Developer Council: developer-council@lists.illumos.org
+
+IRC channel: #illumos on irc.freenode.net
+Mailing list: developer@lists.illumos.org
diff --git a/manifest b/manifest
index 348a67da78..3b0f194d99 100644
--- a/manifest
+++ b/manifest
@@ -653,6 +653,7 @@ f kernel/drv/amd64/oce 0755 root sys
f kernel/drv/amd64/ohci 0755 root sys
f kernel/drv/amd64/openeepr 0755 root sys
f kernel/drv/amd64/options 0755 root sys
+f kernel/drv/amd64/overlay 0755 root sys
f kernel/drv/amd64/pci_pci 0755 root sys
f kernel/drv/amd64/pcieb 0755 root sys
f kernel/drv/amd64/physmem 0755 root sys
@@ -796,6 +797,7 @@ f kernel/drv/nxge.conf 0644 root sys
f kernel/drv/ohci.conf 0644 root sys
f kernel/drv/openeepr.conf 0644 root sys
f kernel/drv/options.conf 0644 root sys
+f kernel/drv/overlay.conf 0644 root sys
f kernel/drv/pcieb.conf 0644 root sys
f kernel/drv/physmem.conf 0644 root sys
f kernel/drv/pmcs.conf 0644 root sys
@@ -1048,6 +1050,11 @@ f kernel/misc/scsi_vhci/amd64/scsi_vhci_f_sym_hds 0755 root sys
f kernel/misc/scsi_vhci/amd64/scsi_vhci_f_tape 0755 root sys
f kernel/misc/scsi_vhci/amd64/scsi_vhci_f_tpgs 0755 root sys
f kernel/misc/scsi_vhci/amd64/scsi_vhci_f_tpgs_tape 0755 root sys
+d kernel/overlay 0755 root sys
+d kernel/overlay/amd64 0755 root sys
+f kernel/overlay/amd64/geneve 0755 root sys
+f kernel/overlay/amd64/nvgre 0755 root sys
+f kernel/overlay/amd64/vxlan 0755 root sys
d kernel/sched 0755 root sys
d kernel/sched/amd64 0755 root sys
f kernel/sched/amd64/SDC 0755 root sys
@@ -1558,6 +1565,7 @@ f lib/svc/manifest/network/smb/server.xml 0444 root sys
f lib/svc/manifest/network/smtp-sendmail.xml 0444 root sys
f lib/svc/manifest/network/ssh.xml 0444 root sys
f lib/svc/manifest/network/time.xml 0444 root sys
+f lib/svc/manifest/network/varpd.xml 0444 root sys
f lib/svc/manifest/network/vrrp.xml 0444 root sys
d lib/svc/manifest/platform 0755 root sys
d lib/svc/manifest/platform/i86pc 0755 root sys
@@ -1711,6 +1719,7 @@ f lib/svc/method/svc-sv 0744 root sys
f lib/svc/method/svc-syseventd 0555 root sys
f lib/svc/method/svc-tnd 0555 root sys
f lib/svc/method/svc-utmpd 0555 root bin
+f lib/svc/method/svc-varpd 0555 root sys
f lib/svc/method/svc-vrrp 0555 root sys
f lib/svc/method/svc-wusb 0555 root sys
f lib/svc/method/svc-zones 0555 root sys
@@ -2592,6 +2601,7 @@ f usr/include/bsm/audit_uevents.h 0644 root bin
f usr/include/bsm/devalloc.h 0644 root bin
f usr/include/bsm/devices.h 0644 root bin
f usr/include/bsm/libbsm.h 0644 root bin
+f usr/include/bunyan.h 0644 root bin
f usr/include/complex.h 0644 root bin
f usr/include/config_admin.h 0644 root bin
f usr/include/cpio.h 0644 root bin
@@ -4701,6 +4711,8 @@ f usr/lib/amd64/libbsdmalloc.so.1 0755 root bin
s usr/lib/amd64/libbsdmalloc.so=libbsdmalloc.so.1
s usr/lib/amd64/libbsm.so.1=../../../lib/amd64/libbsm.so.1
s usr/lib/amd64/libbsm.so=../../../lib/amd64/libbsm.so.1
+f usr/lib/amd64/libbunyan.so.1 0755 root bin
+s usr/lib/amd64/libbunyan.so=libbunyan.so.1
s usr/lib/amd64/libc.so.1=../../../lib/amd64/libc.so.1
s usr/lib/amd64/libc.so=../../../lib/amd64/libc.so.1
s usr/lib/amd64/libc_db.so.1=../../../lib/amd64/libc_db.so.1
@@ -4786,6 +4798,8 @@ f usr/lib/amd64/libhotplug.so.1 0755 root bin
s usr/lib/amd64/libhotplug.so=libhotplug.so.1
f usr/lib/amd64/libidmap.so.1 0755 root bin
s usr/lib/amd64/libidmap.so=libidmap.so.1
+f usr/lib/amd64/libidspace.so.1 0755 root bin
+s usr/lib/amd64/libidspace.so=libidspace.so.1
f usr/lib/amd64/libike.so.1 0755 root bin
s usr/lib/amd64/libike.so=libike.so.1
f usr/lib/amd64/libilb.so.1 0755 root bin
@@ -4885,6 +4899,7 @@ f usr/lib/amd64/libraidcfg.so.1 0755 root bin
s usr/lib/amd64/libraidcfg.so=libraidcfg.so.1
s usr/lib/amd64/librcm.so.1=../../../lib/amd64/librcm.so.1
s usr/lib/amd64/librcm.so=../../../lib/amd64/librcm.so.1
+f usr/lib/amd64/librename.so.1 0755 root bin
f usr/lib/amd64/libreparse.so.1 0755 root bin
s usr/lib/amd64/libreparse.so=libreparse.so.1
s usr/lib/amd64/libresolv.so.2=../../../lib/amd64/libresolv.so.2
@@ -4980,6 +4995,7 @@ s usr/lib/amd64/libuuid.so.1=../../../lib/amd64/libuuid.so.1
s usr/lib/amd64/libuuid.so=../../../lib/amd64/libuuid.so.1
s usr/lib/amd64/libuutil.so.1=../../../lib/amd64/libuutil.so.1
s usr/lib/amd64/libuutil.so=../../../lib/amd64/libuutil.so.1
+f usr/lib/amd64/libvarpd.so.1 0755 root bin
f usr/lib/amd64/libvnd.so.1 0755 root bin
s usr/lib/amd64/libvnd.so=libvnd.so.1
f usr/lib/amd64/libvolmgt.so.1 0755 root bin
@@ -6094,6 +6110,8 @@ f usr/lib/libbsdmalloc.so.1 0755 root bin
s usr/lib/libbsdmalloc.so=libbsdmalloc.so.1
s usr/lib/libbsm.so.1=../../lib/libbsm.so.1
s usr/lib/libbsm.so=../../lib/libbsm.so.1
+f usr/lib/libbunyan.so.1 0755 root bin
+s usr/lib/libbunyan.so=libbunyan.so.1
d usr/lib/libc 0755 root bin
s usr/lib/libc.so.1=../../lib/libc.so.1
s usr/lib/libc.so=../../lib/libc.so.1
@@ -6195,6 +6213,8 @@ f usr/lib/libhotplug.so.1 0755 root bin
s usr/lib/libhotplug.so=libhotplug.so.1
f usr/lib/libidmap.so.1 0755 root bin
s usr/lib/libidmap.so=libidmap.so.1
+f usr/lib/libidspace.so.1 0755 root bin
+s usr/lib/libidspace.so=libidspace.so.1
f usr/lib/libike.so.1 0755 root bin
s usr/lib/libike.so=libike.so.1
f usr/lib/libilb.so.1 0755 root bin
@@ -6331,6 +6351,7 @@ s usr/lib/librcm.so.1=../../lib/librcm.so.1
s usr/lib/librcm.so=../../lib/librcm.so.1
f usr/lib/librdc.so.1 0755 root bin
s usr/lib/librdc.so=librdc.so.1
+f usr/lib/librename.so.1 0755 root bin
f usr/lib/libreparse.so.1 0755 root bin
s usr/lib/libreparse.so=libreparse.so.1
s usr/lib/libresolv.so.1=../../lib/libresolv.so.1
@@ -6437,6 +6458,7 @@ s usr/lib/libuutil.so.1=../../lib/libuutil.so.1
s usr/lib/libuutil.so=../../lib/libuutil.so.1
f usr/lib/libvnd.so.1 0755 root bin
s usr/lib/libvnd.so=libvnd.so.1
+f usr/lib/libvarpd.so.1 0755 root bin
f usr/lib/libvolmgt.so.1 0755 root bin
s usr/lib/libvolmgt.so=libvolmgt.so.1
f usr/lib/libvrrpadm.so.1 0755 root bin
@@ -10064,6 +10086,21 @@ f usr/lib/values-Xs.o 0644 root bin
f usr/lib/values-Xt.o 0644 root bin
f usr/lib/values-xpg4.o 0644 root bin
f usr/lib/values-xpg6.o 0644 root bin
+d usr/lib/varpd 0755 root bin
+s usr/lib/varpd/64=amd64
+d usr/lib/varpd/amd64 0755 root bin
+f usr/lib/varpd/amd64/libvarpd_direct.so.1 0555 root bin
+s usr/lib/varpd/amd64/libvarpd_direct.so=libvarpd_direct.so.1
+f usr/lib/varpd/amd64/libvarpd_files.so.1 0555 root bin
+s usr/lib/varpd/amd64/libvarpd_files.so=libvarpd_files.so.1
+f usr/lib/varpd/amd64/libvarpd_svp.so.1 0555 root bin
+s usr/lib/varpd/amd64/libvarpd_svp.so=libvarpd_svp.so.1
+f usr/lib/varpd/libvarpd_direct.so.1 0555 root bin
+s usr/lib/varpd/libvarpd_direct.so=libvarpd_direct.so.1
+f usr/lib/varpd/libvarpd_files.so.1 0555 root bin
+s usr/lib/varpd/libvarpd_files.so=libvarpd_files.so.1
+f usr/lib/varpd/libvarpd_svp.so.1 0555 root bin
+s usr/lib/varpd/libvarpd_svp.so=libvarpd_svp.so.1
f usr/lib/vfontedpr 0555 root bin
f usr/lib/vgrindefs 0444 root bin
d usr/lib/vscan 0755 root bin
@@ -10514,6 +10551,7 @@ h usr/sbin/update_drv=usr/lib/isaexec
h usr/sbin/useradd=usr/sbin/roleadd
h usr/sbin/userdel=usr/sbin/roledel
h usr/sbin/usermod=usr/sbin/rolemod
+f usr/sbin/varpd 0555 root bin
f usr/sbin/vndadm 0555 root bin
f usr/sbin/volcopy 0555 root bin
f usr/sbin/vrrpadm 0555 root bin
diff --git a/usr/src/Targetdirs b/usr/src/Targetdirs
index 79fdd83e6f..3b9ddcda35 100644
--- a/usr/src/Targetdirs
+++ b/usr/src/Targetdirs
@@ -285,6 +285,7 @@ DIRS= \
/usr/lib/mdb/kvm \
/usr/lib/mdb/proc \
/usr/lib/nfs \
+ /usr/lib/varpd \
/usr/net \
/usr/net/servers \
/usr/lib/pool \
@@ -555,6 +556,7 @@ DIRS64= \
/usr/lib/security/$(MACH64) \
/usr/lib/smbsrv/$(MACH64) \
/usr/lib/abi/$(MACH64) \
+ /usr/lib/varpd/$(MACH64) \
/usr/sbin/$(MACH64) \
/usr/ucb/$(MACH64) \
/usr/ucblib/$(MACH64) \
@@ -618,6 +620,7 @@ SYM.DIRS64= \
/usr/lib/lwp/64 \
/usr/lib/secure/64 \
/usr/lib/security/64 \
+ /usr/lib/varpd/64 \
/usr/xpg4/lib/64 \
/var/ld/64 \
/usr/ucblib/64
@@ -727,6 +730,7 @@ $(BUILD64) $(ROOT)/usr/lib/lwp/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/link_audit/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/secure/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/security/64:= LINKDEST=$(MACH64)
+$(BUILD64) $(ROOT)/usr/lib/varpd/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/xpg4/lib/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/var/ld/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/ucblib/64:= LINKDEST=$(MACH64)
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index f78feac556..2303cfd8c4 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -432,6 +432,7 @@ COMMON_SUBDIRS= \
utmp_update \
utmpd \
valtools \
+ varpd \
vgrind \
vi \
vndadm \
diff --git a/usr/src/cmd/cmd-inet/etc/services b/usr/src/cmd/cmd-inet/etc/services
index 37514ac0a7..5dad7af0ad 100644
--- a/usr/src/cmd/cmd-inet/etc/services
+++ b/usr/src/cmd/cmd-inet/etc/services
@@ -215,6 +215,8 @@ krb5_prop 754/tcp # Kerberos V5 KDC propogation
swat 901/tcp # Samba Web Adm.Tool
ufsd 1008/tcp ufsd # UFS-aware server
ufsd 1008/udp ufsd
+portolan 1296/tcp # Portolan
+svp-underlay 1339/tcp # SDC VXLAN underlay invalidation
cvc 1495/tcp # Network Console
ingreslock 1524/tcp
www-ldap-gw 1760/tcp # HTTP to LDAP gateway
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile
index e285aa09d3..65f605f9bb 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile
@@ -39,7 +39,8 @@ OBJS= nfs4_xdr.o snoop.o snoop_aarp.o snoop_adsp.o snoop_aecho.o \
snoop_pppoe.o snoop_rip.o snoop_rip6.o snoop_rpc.o snoop_rpcprint.o \
snoop_rpcsec.o snoop_rport.o snoop_rquota.o snoop_rstat.o snoop_rtmp.o \
snoop_sctp.o snoop_slp.o snoop_smb.o snoop_socks.o snoop_solarnet.o \
- snoop_tcp.o snoop_tftp.o snoop_trill.o snoop_udp.o snoop_zip.o
+ snoop_tcp.o snoop_tftp.o snoop_trill.o snoop_udp.o snoop_vxlan.o \
+ snoop_zip.o
SRCS= $(OBJS:.o=.c)
HDRS= snoop.h snoop_mip.h at.h snoop_ospf.h snoop_ospf6.h
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h
index 40cefa2c59..46fc46428d 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h
@@ -260,6 +260,7 @@ extern int interpret_socks_reply(int, char *, int);
extern int interpret_trill(int, struct ether_header **, char *, int *);
extern int interpret_isis(int, char *, int, boolean_t);
extern int interpret_bpdu(int, char *, int);
+extern int interpret_vxlan(int, char *, int);
extern void init_ldap(void);
extern boolean_t arp_for_ether(char *, struct ether_addr *);
extern char *ether_ouiname(uint32_t);
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c
index 5c6bde0cd6..881cf68754 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c
@@ -54,8 +54,9 @@
static headerlen_fn_t ether_header_len, fddi_header_len, tr_header_len,
ib_header_len, ipnet_header_len, ipv4_header_len, ipv6_header_len;
-static interpreter_fn_t interpret_ether, interpret_fddi, interpret_tr,
+static interpreter_fn_t interpret_fddi, interpret_tr,
interpret_ib, interpret_ipnet, interpret_iptun;
+interpreter_fn_t interpret_ether;
static void addr_copy_swap(struct ether_addr *, struct ether_addr *);
static int tr_machdr_len(char *, int *, int *);
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_rport.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_rport.c
index b84ee3c1b4..29c5cb7064 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_rport.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_rport.c
@@ -76,6 +76,7 @@ static const struct porttable pt_udp[] = {
{ 560, "RMONITOR" },
{ 561, "MONITOR" },
{ IPPORT_SOCKS, "SOCKS" },
+ { IPPORT_VXLAN, "VXLAN" },
{ 0, NULL }
};
@@ -428,6 +429,9 @@ interpret_reserved(int flags, int proto, in_port_t src, in_port_t dst,
(void) interpret_socks_reply(flags, data,
dlen);
return (1);
+ case IPPORT_VXLAN:
+ (void) interpret_vxlan(flags, data, dlen);
+ return (1);
}
}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_vxlan.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_vxlan.c
new file mode 100644
index 0000000000..041fedf8b7
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_vxlan.c
@@ -0,0 +1,68 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Decode VXLAN encapsulated packets.
+ */
+
+#include <sys/vxlan.h>
+#include "snoop.h"
+
+int
+interpret_vxlan(int flags, char *data, int fraglen)
+{
+ vxlan_hdr_t *vxlan = (vxlan_hdr_t *)data;
+ uint32_t id, vxf;
+
+ if (fraglen < sizeof (vxlan_hdr_t)) {
+ if (flags & F_SUM)
+ (void) snprintf(get_sum_line(), MAXLINE,
+ "VXLAN RUNT");
+ if (flags & F_DTAIL)
+ show_header("VXLAN RUNT: ", "Short packet", fraglen);
+
+ return (fraglen);
+ }
+
+ id = ntohl(vxlan->vxlan_id) >> VXLAN_ID_SHIFT;
+ vxf = ntohl(vxlan->vxlan_flags);
+
+ if (flags & F_SUM) {
+ (void) snprintf(get_sum_line(), MAXLINE,
+ "VXLAN VNI=%d", id);
+ }
+
+ if (flags & F_DTAIL) {
+ show_header("VXLAN: ", "VXLAN Header", sizeof (vxlan_hdr_t));
+ show_space();
+ (void) snprintf(get_line(0, 0), get_line_remain(),
+ "Flags = 0x%08x", vxf);
+ (void) snprintf(get_line(0, 0), get_line_remain(), " %s",
+ getflag(vxf >> 24, VXLAN_F_VDI >> 24, "vni present",
+ "vni missing"));
+ (void) snprintf(get_line(0, 0), get_line_remain(),
+ "VXLAN network id (VNI) = %d", id);
+ show_space();
+ }
+
+ if (flags & (F_DTAIL | F_ALLSUM)) {
+ fraglen -= sizeof (vxlan_hdr_t);
+ data += sizeof (vxlan_hdr_t);
+
+ return (interpret_ether(flags, data, fraglen, fraglen));
+ }
+
+ return (0);
+}
diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c
index f37a1227b6..0d7a430d3e 100644
--- a/usr/src/cmd/devfsadm/misc_link.c
+++ b/usr/src/cmd/devfsadm/misc_link.c
@@ -206,6 +206,9 @@ static devfsadm_create_t misc_cbt[] = {
{ "pseudo", "ddi_pseudo", "tpm",
TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
},
+ { "pseudo", "ddi_pseudo", "overlay",
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
+ }
};
DEVFSADM_CREATE_INIT_V0(misc_cbt);
diff --git a/usr/src/cmd/dladm/Makefile b/usr/src/cmd/dladm/Makefile
index 143086e26a..a5538b60c5 100644
--- a/usr/src/cmd/dladm/Makefile
+++ b/usr/src/cmd/dladm/Makefile
@@ -35,7 +35,7 @@ include ../Makefile.cmd
XGETFLAGS += -a -x $(PROG).xcl
LDLIBS += -L$(ROOT)/lib -lsocket
LDLIBS += -ldladm -ldlpi -lkstat -lsecdb -lbsm -linetutil -ldevinfo
-LDLIBS += $(ZLAZYLOAD) -lrstp $(ZNOLAZYLOAD)
+LDLIBS += $(ZLAZYLOAD) -lrstp $(ZNOLAZYLOAD) -lnsl -lumem -lcmdutils
CERRWARN += -_gcc=-Wno-switch
CERRWARN += -_gcc=-Wno-unused-label
diff --git a/usr/src/cmd/dladm/dladm.c b/usr/src/cmd/dladm/dladm.c
index fb0bfdd009..668e6db2e9 100644
--- a/usr/src/cmd/dladm/dladm.c
+++ b/usr/src/cmd/dladm/dladm.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
@@ -60,6 +60,7 @@
#include <libdliptun.h>
#include <libdlsim.h>
#include <libdlbridge.h>
+#include <libdloverlay.h>
#include <libinetutil.h>
#include <libvrrpadm.h>
#include <bsm/adt.h>
@@ -75,6 +76,7 @@
#include <stddef.h>
#include <stp_in.h>
#include <ofmt.h>
+#include <libcmdutils.h>
#define MAXPORT 256
#define MAXVNIC 256
@@ -194,6 +196,7 @@ static ofmt_cb_t print_lacp_cb, print_phys_one_mac_cb;
static ofmt_cb_t print_xaggr_cb, print_aggr_stats_cb;
static ofmt_cb_t print_phys_one_hwgrp_cb, print_wlan_attr_cb;
static ofmt_cb_t print_wifi_status_cb, print_link_attr_cb;
+static ofmt_cb_t print_overlay_cb, print_overlay_fma_cb, print_overlay_targ_cb;
static void dladm_ofmt_check(ofmt_status_t, boolean_t, ofmt_handle_t);
typedef void cmdfunc_t(int, char **, const char *);
@@ -221,6 +224,8 @@ static cmdfunc_t do_create_bridge, do_modify_bridge, do_delete_bridge;
static cmdfunc_t do_add_bridge, do_remove_bridge, do_show_bridge;
static cmdfunc_t do_create_iptun, do_modify_iptun, do_delete_iptun;
static cmdfunc_t do_show_iptun, do_up_iptun, do_down_iptun;
+static cmdfunc_t do_create_overlay, do_delete_overlay, do_modify_overlay;
+static cmdfunc_t do_show_overlay;
static void do_up_vnic_common(int, char **, const char *, boolean_t);
@@ -353,7 +358,7 @@ static cmd_t cmds[] = {
" delete-vnic [-t] [-z zonename] <vnic-link>" },
{ "show-vnic", do_show_vnic,
" show-vnic [-pP] [-l <link>] [-z zonename] "
- "[-s [-i <interval>]] [<link>]\n" },
+ "[-s [-i <interval>]] [<link>]\n" },
{ "up-vnic", do_up_vnic, NULL },
{ "create-part", do_create_part,
" create-part [-t] [-f] -l <link> [-P <pkey>]\n"
@@ -404,6 +409,16 @@ static cmd_t cmds[] = {
" <bridge>\n"
" show-bridge -t [-p] [-o <field>,...] [-s [-i <interval>]]"
" <bridge>\n" },
+ { "create-overlay", do_create_overlay,
+ " create-overlay -e <encap> -v <vnetid>\n"
+ "\t\t [ -p <prop>=<value>[,...]] <overlay-name>" },
+ { "delete-overlay", do_delete_overlay,
+ " delete-overlay <overlay-name>" },
+ { "modify-overlay", do_modify_overlay,
+ " modify-overlay [-d mac] [-f] [-s mac=ip:port] "
+ "<overlay-name>" },
+ { "show-overlay", do_show_overlay,
+ " show-overlay [-t] <overlay> \n" },
{ "show-usage", do_show_usage,
" show-usage [-a] [-d | -F <format>] "
"[-s <DD/MM/YYYY,HH:MM:SS>]\n"
@@ -1432,6 +1447,57 @@ static ofmt_field_t bridge_trill_fields[] = {
offsetof(bridge_trill_fields_buf_t, bridget_nexthop), print_default_cb },
{ NULL, 0, 0, NULL}};
+/*
+ * Structures for dladm show-overlay
+ */
+typedef enum {
+ OVERLAY_LINK,
+ OVERLAY_PROPERTY,
+ OVERLAY_PERM,
+ OVERLAY_REQ,
+ OVERLAY_VALUE,
+ OVERLAY_DEFAULT,
+ OVERLAY_POSSIBLE
+} overlay_field_index_t;
+
+static const ofmt_field_t overlay_fields[] = {
+/* name, field width, index */
+{ "LINK", 13, OVERLAY_LINK, print_overlay_cb },
+{ "PROPERTY", 20, OVERLAY_PROPERTY, print_overlay_cb },
+{ "PERM", 5, OVERLAY_PERM, print_overlay_cb },
+{ "REQ", 4, OVERLAY_REQ, print_overlay_cb },
+{ "VALUE", 12, OVERLAY_VALUE, print_overlay_cb },
+{ "DEFAULT", 12, OVERLAY_DEFAULT, print_overlay_cb },
+{ "POSSIBLE", 12, OVERLAY_POSSIBLE, print_overlay_cb },
+{ NULL, 0, 0, NULL }
+};
+
+typedef enum {
+ OVERLAY_FMA_LINK,
+ OVERLAY_FMA_STATUS,
+ OVERLAY_FMA_DETAILS
+} overlay_fma_field_index_t;
+
+static const ofmt_field_t overlay_fma_fields[] = {
+{ "LINK", 14, OVERLAY_FMA_LINK, print_overlay_fma_cb },
+{ "STATUS", 8, OVERLAY_FMA_STATUS, print_overlay_fma_cb },
+{ "DETAILS", 58, OVERLAY_FMA_DETAILS, print_overlay_fma_cb },
+{ NULL, 0, 0, NULL }
+};
+
+typedef enum {
+ OVERLAY_TARG_LINK,
+ OVERLAY_TARG_TARGET,
+ OVERLAY_TARG_DEST
+} overlay_targ_field_index_t;
+
+static const ofmt_field_t overlay_targ_fields[] = {
+{ "LINK", 14, OVERLAY_TARG_LINK, print_overlay_targ_cb },
+{ "TARGET", 8, OVERLAY_TARG_TARGET, print_overlay_targ_cb },
+{ "DESTINATION", 58, OVERLAY_TARG_DEST, print_overlay_targ_cb },
+{ NULL, 0, 0, NULL }
+};
+
static char *progname;
static sig_atomic_t signalled;
@@ -5045,7 +5111,7 @@ print_vnic(show_vnic_state_t *state, datalink_id_t linkid)
NULL, devname, sizeof (devname)) != DLADM_STATUS_OK)
(void) sprintf(devname, "?");
-
+
zonename[0] = '\0';
if (!is_etherstub) {
valptr[0] = zonename;
@@ -5054,7 +5120,7 @@ print_vnic(show_vnic_state_t *state, datalink_id_t linkid)
}
if (state->vs_zonename != NULL &&
- strcmp(state->vs_zonename, zonename) != 0)
+ strcmp(state->vs_zonename, zonename) != 0)
return (DLADM_STATUS_OK);
state->vs_found = B_TRUE;
@@ -5138,7 +5204,7 @@ print_vnic(show_vnic_state_t *state, datalink_id_t linkid)
sizeof (vbuf.vnic_zone), "%s", zonename);
else
(void) strlcpy(vbuf.vnic_zone, "--",
- sizeof (vbuf.vnic_zone));
+ sizeof (vbuf.vnic_zone));
}
ofmt_print(state->vs_ofmt, &vbuf);
@@ -9761,3 +9827,660 @@ do_up_part(int argc, char *argv[], const char *use)
(void) dladm_part_up(handle, partid, 0);
}
+
+static void
+do_create_overlay(int argc, char *argv[], const char *use)
+{
+ int opt;
+ char *encap = NULL, *endp, *search = NULL;
+ char name[MAXLINKNAMELEN];
+ dladm_status_t status;
+ uint32_t flags = DLADM_OPT_ACTIVE | DLADM_OPT_TRANSIENT;
+ uint64_t vid;
+ boolean_t havevid = B_FALSE;
+ char propstr[DLADM_STRSIZE];
+ dladm_arg_list_t *proplist = NULL;
+
+ bzero(propstr, sizeof (propstr));
+ while ((opt = getopt(argc, argv, ":e:v:p:s:")) != -1) {
+ switch (opt) {
+ case 'e':
+ encap = optarg;
+ break;
+ case 's':
+ search = optarg;
+ break;
+ case 'p':
+ (void) strlcat(propstr, optarg, DLADM_STRSIZE);
+ if (strlcat(propstr, ",", DLADM_STRSIZE) >=
+ DLADM_STRSIZE)
+ die("property list too long '%s'", propstr);
+ break;
+ case 'v':
+ vid = strtoul(optarg, &endp, 10);
+ if (*endp != '\0' || (vid == 0 && errno == EINVAL))
+ die("couldn't parse virtual networkd id: %s",
+ optarg);
+ if (vid == ULONG_MAX && errno == ERANGE)
+ die("virtual networkd id too large: %s",
+ optarg);
+ havevid = B_TRUE;
+ break;
+ default:
+ die_opterr(optopt, opt, use);
+ }
+ }
+
+ if (havevid == B_FALSE)
+ die("missing required virtual network id");
+
+ if (encap == NULL)
+ die("missing required encapsulation plugin");
+
+ if (encap == NULL)
+ die("missing required search plugin");
+
+ if (optind != (argc - 1))
+ usage();
+
+ if (strlcpy(name, argv[optind], MAXLINKNAMELEN) >= MAXLINKNAMELEN)
+ die("link name too long '%s'", argv[optind]);
+
+ if (!dladm_valid_linkname(name))
+ die("invalid link name '%s'", argv[optind]);
+
+ if (strlen(encap) + 1 > MAXLINKNAMELEN)
+ die("encapsulation plugin name too long '%s'", encap);
+
+ if (strlen(search) + 1 > MAXLINKNAMELEN)
+ die("search plugin name too long '%s'", encap);
+
+ if (dladm_parse_link_props(propstr, &proplist, B_FALSE)
+ != DLADM_STATUS_OK)
+ die("invalid overlay property");
+
+ status = dladm_overlay_create(handle, name, encap, search, vid,
+ proplist, flags);
+ dladm_free_props(proplist);
+ if (status != DLADM_STATUS_OK) {
+ die_dlerr(status, "overlay creation failed");
+ }
+}
+
+static void
+do_delete_overlay(int argc, char *argv[], const char *use)
+{
+ datalink_id_t linkid = DATALINK_ALL_LINKID;
+ dladm_status_t status;
+
+ if (argc != 2) {
+ usage();
+ }
+
+ status = dladm_name2info(handle, argv[1], &linkid, NULL, NULL, NULL);
+ if (status != DLADM_STATUS_OK)
+ die_dlerr(status, "failed to delete %s", argv[1]);
+
+ status = dladm_overlay_delete(handle, linkid);
+ if (status != DLADM_STATUS_OK)
+ die_dlerr(status, "failed to delete %s", argv[1]);
+}
+
+typedef struct showoverlay_state {
+ ofmt_handle_t sho_ofmt;
+ const char *sho_linkname;
+ dladm_overlay_propinfo_handle_t sho_info;
+ uint8_t sho_value[DLADM_OVERLAY_PROP_SIZEMAX];
+ uint32_t sho_size;
+} showoverlay_state_t;
+
+typedef struct showoverlay_fma_state {
+ ofmt_handle_t shof_ofmt;
+ const char *shof_linkname;
+ dladm_overlay_status_t *shof_status;
+} showoverlay_fma_state_t;
+
+typedef struct showoverlay_targ_state {
+ ofmt_handle_t shot_ofmt;
+ const char *shot_linkname;
+ const struct ether_addr *shot_key;
+ const dladm_overlay_point_t *shot_point;
+} showoverlay_targ_state_t;
+
+/*
+ * TODO This uses the OVERLAY_PROP_ macros and the like from the kernel. Is it
+ * all right to expose them?
+ */
+static void
+print_overlay_value(char *outbuf, uint_t bufsize, uint_t type, const void *pbuf,
+ const size_t psize)
+{
+ const struct in6_addr *ipv6;
+ struct in_addr ip;
+
+ switch (type) {
+ case OVERLAY_PROP_T_INT:
+ if (psize != 1 && psize != 2 && psize != 4 && psize != 8) {
+ snprintf(outbuf, bufsize, "?");
+ break;
+ }
+ if (psize == 1)
+ snprintf(outbuf, bufsize, "%d", *(int8_t *)pbuf);
+ if (psize == 2)
+ snprintf(outbuf, bufsize, "%d", *(int16_t *)pbuf);
+ if (psize == 4)
+ snprintf(outbuf, bufsize, "%d", *(int32_t *)pbuf);
+ if (psize == 8)
+ snprintf(outbuf, bufsize, "%d", *(int64_t *)pbuf);
+ break;
+ case OVERLAY_PROP_T_UINT:
+ if (psize != 1 && psize != 2 && psize != 4 && psize != 8) {
+ snprintf(outbuf, bufsize, "?");
+ break;
+ }
+ if (psize == 1)
+ snprintf(outbuf, bufsize, "%d", *(uint8_t *)pbuf);
+ if (psize == 2)
+ snprintf(outbuf, bufsize, "%d", *(uint16_t *)pbuf);
+ if (psize == 4)
+ snprintf(outbuf, bufsize, "%d", *(uint32_t *)pbuf);
+ if (psize == 8)
+ snprintf(outbuf, bufsize, "%d", *(uint64_t *)pbuf);
+ break;
+ case OVERLAY_PROP_T_IP:
+ if (psize != sizeof (struct in6_addr)) {
+ warn("malformed overlay IP property: %d bytes\n",
+ psize);
+ (void) snprintf(outbuf, bufsize, "--");
+ break;
+ }
+
+ ipv6 = pbuf;
+ if (IN6_IS_ADDR_V4MAPPED(ipv6)) {
+ IN6_V4MAPPED_TO_INADDR(ipv6, &ip);
+ if (inet_ntop(AF_INET, &ip, outbuf, bufsize) == NULL) {
+ warn("malformed overlay IP property\n");
+ (void) snprintf(outbuf, bufsize, "--");
+ break;
+ }
+ } else {
+ if (inet_ntop(AF_INET6, ipv6, outbuf, bufsize) ==
+ NULL) {
+ warn("malformed overlay IP property\n");
+ (void) snprintf(outbuf, bufsize, "--");
+ break;
+ }
+ }
+
+ break;
+ case OVERLAY_PROP_T_STRING:
+ (void) snprintf(outbuf, bufsize, "%s", pbuf);
+ break;
+ default:
+ abort();
+ }
+
+ return;
+
+}
+
+static boolean_t
+print_overlay_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize)
+{
+ dladm_status_t status;
+ showoverlay_state_t *sp = ofarg->ofmt_cbarg;
+ dladm_overlay_propinfo_handle_t infop = sp->sho_info;
+ const char *pname;
+ uint_t type, prot;
+ const void *def;
+ uint32_t defsize;
+ const mac_propval_range_t *rangep;
+
+ if ((status = dladm_overlay_prop_info(infop, &pname, &type, &prot, &def,
+ &defsize, &rangep)) != DLADM_STATUS_OK) {
+ warn_dlerr(status, "failed to get get property info");
+ return (B_TRUE);
+ }
+
+ switch (ofarg->ofmt_id) {
+ case OVERLAY_LINK:
+ snprintf(buf, bufsize, "%s", sp->sho_linkname);
+ break;
+ case OVERLAY_PROPERTY:
+ snprintf(buf, bufsize, "%s", pname);
+ break;
+ case OVERLAY_PERM:
+ if ((prot & OVERLAY_PROP_PERM_RW) == OVERLAY_PROP_PERM_RW) {
+ snprintf(buf, bufsize, "%s", "rw");
+ } else if ((prot & OVERLAY_PROP_PERM_RW) ==
+ OVERLAY_PROP_PERM_READ) {
+ snprintf(buf, bufsize, "%s", "r-");
+ } else {
+ snprintf(buf, bufsize, "%s", "--");
+ }
+ break;
+ case OVERLAY_REQ:
+ snprintf(buf, bufsize, "%s",
+ prot & OVERLAY_PROP_PERM_REQ ? "y" : "-");
+ break;
+ case OVERLAY_VALUE:
+ if (sp->sho_size == 0) {
+ snprintf(buf, bufsize, "%s", "--");
+ } else {
+ print_overlay_value(buf, bufsize, type, sp->sho_value,
+ sp->sho_size);
+ }
+ break;
+ case OVERLAY_DEFAULT:
+ if (defsize == 0) {
+ snprintf(buf, bufsize, "%s", "--");
+ } else {
+ print_overlay_value(buf, bufsize, type, def, defsize);
+ }
+ break;
+ case OVERLAY_POSSIBLE: {
+ int i;
+ char **vals, *ptr, *lim;
+ if (rangep->mpr_count == 0) {
+ snprintf(buf, bufsize, "%s", "--");
+ break;
+ }
+
+ vals = malloc((sizeof (char *) + DLADM_PROP_VAL_MAX) *
+ rangep->mpr_count);
+ if (vals == NULL)
+ die("insufficient memory");
+ for (i = 0; i < rangep->mpr_count; i++) {
+ vals[i] = (char *)vals + sizeof (char *) *
+ rangep->mpr_count + i * DLADM_MAX_PROP_VALCNT;
+ }
+
+ if (dladm_range2strs(rangep, vals) != 0) {
+ free(vals);
+ snprintf(buf, bufsize, "%s", "?");
+ break;
+ }
+
+ ptr = buf;
+ lim = buf + bufsize;
+ for (i = 0; i < rangep->mpr_count; i++) {
+ ptr += snprintf(ptr, lim - ptr, "%s,", vals[i]);
+ if (ptr >= lim)
+ break;
+ }
+ if (rangep->mpr_count > 0)
+ buf[strlen(buf) - 1] = '\0';
+ free(vals);
+ break;
+ }
+ default:
+ abort();
+ }
+ return (B_TRUE);
+}
+
+static int
+dladm_overlay_show_one(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_overlay_propinfo_handle_t phdl, void *arg)
+{
+ showoverlay_state_t *sp = arg;
+ sp->sho_info = phdl;
+
+ sp->sho_size = sizeof (sp->sho_value);
+ if (dladm_overlay_get_prop(handle, linkid, phdl, &sp->sho_value,
+ &sp->sho_size) != DLADM_STATUS_OK)
+ return (DLADM_WALK_CONTINUE);
+
+ ofmt_print(sp->sho_ofmt, sp);
+ return (DLADM_WALK_CONTINUE);
+}
+
+static int
+show_one_overlay(dladm_handle_t hdl, datalink_id_t linkid, void *arg)
+{
+ char buf[MAXLINKNAMELEN];
+ showoverlay_state_t state;
+ datalink_class_t class;
+
+ if (dladm_datalink_id2info(hdl, linkid, NULL, &class, NULL, buf,
+ MAXLINKNAMELEN) != DLADM_STATUS_OK ||
+ class != DATALINK_CLASS_OVERLAY)
+ return (DLADM_WALK_CONTINUE);
+
+ state.sho_linkname = buf;
+ state.sho_ofmt = arg;
+
+ (void) dladm_overlay_walk_prop(handle, linkid, dladm_overlay_show_one,
+ &state);
+
+ return (DLADM_WALK_CONTINUE);
+}
+
+static boolean_t
+print_overlay_targ_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize)
+{
+ char keybuf[ETHERADDRSTRL];
+ const showoverlay_targ_state_t *shot = ofarg->ofmt_cbarg;
+ const dladm_overlay_point_t *point = shot->shot_point;
+ char macbuf[ETHERADDRSTRL];
+ char ipbuf[INET6_ADDRSTRLEN];
+ custr_t *cus;
+
+ switch (ofarg->ofmt_id) {
+ case OVERLAY_TARG_LINK:
+ snprintf(buf, bufsize, shot->shot_linkname);
+ break;
+ case OVERLAY_TARG_TARGET:
+ if ((point->dop_flags & DLADM_OVERLAY_F_DEFAULT) != 0) {
+ (void) snprintf(buf, bufsize, "*:*:*:*:*:*");
+ } else {
+ if (ether_ntoa_r(shot->shot_key, keybuf) == NULL) {
+ warn("encountered malformed mac address key\n");
+ return (B_FALSE);
+ }
+ snprintf(buf, bufsize, "%s", keybuf);
+ }
+ break;
+ case OVERLAY_TARG_DEST:
+ if (custr_alloc_buf(&cus, buf, bufsize) != 0) {
+ die("ran out of memory for printing the overlay "
+ "target destination");
+ }
+
+ if (point->dop_dest & OVERLAY_PLUGIN_D_ETHERNET) {
+ if (ether_ntoa_r(&point->dop_mac, macbuf) == NULL) {
+ warn("encountered malformed mac address target "
+ "for key %s\n", keybuf);
+ return (B_FALSE);
+ }
+ (void) custr_append(cus, macbuf);
+ }
+
+ if (point->dop_dest & OVERLAY_PLUGIN_D_IP) {
+ if (IN6_IS_ADDR_V4MAPPED(&point->dop_ip)) {
+ struct in_addr v4;
+ IN6_V4MAPPED_TO_INADDR(&point->dop_ip, &v4);
+ if (inet_ntop(AF_INET, &v4, ipbuf,
+ sizeof (ipbuf)) == NULL)
+ abort();
+ } else if (inet_ntop(AF_INET6, &point->dop_ip, ipbuf,
+ sizeof (ipbuf)) == NULL) {
+ /*
+ * The only failrues we should get are
+ * EAFNOSUPPORT and ENOSPC because of buffer
+ * exhaustion. In either of these cases, that
+ * means something has gone horribly wrong.
+ */
+ abort();
+ }
+ if (point->dop_dest & OVERLAY_PLUGIN_D_ETHERNET)
+ (void) custr_appendc(cus, ',');
+ (void) custr_append(cus, ipbuf);
+ }
+
+ if (point->dop_dest & OVERLAY_PLUGIN_D_PORT) {
+ if (point->dop_dest & OVERLAY_PLUGIN_D_IP)
+ (void) custr_appendc(cus, ':');
+ else if (point->dop_dest & OVERLAY_PLUGIN_D_ETHERNET)
+ (void) custr_appendc(cus, ',');
+ (void) custr_append_printf(cus, "%u", point->dop_port);
+ }
+
+ custr_free(cus);
+
+ break;
+ }
+ return (B_TRUE);
+}
+
+static int
+show_one_overlay_table_entry(dladm_handle_t handle, datalink_id_t linkid,
+ const struct ether_addr *key, const dladm_overlay_point_t *point, void *arg)
+{
+ showoverlay_targ_state_t *shot = arg;
+
+ shot->shot_key = key;
+ shot->shot_point = point;
+ ofmt_print(shot->shot_ofmt, shot);
+
+ return (DLADM_WALK_CONTINUE);
+}
+
+static int
+show_one_overlay_table(dladm_handle_t handle, datalink_id_t linkid, void *arg)
+{
+ char linkbuf[MAXLINKNAMELEN];
+ int ret;
+ showoverlay_targ_state_t shot;
+ datalink_class_t class;
+
+ if (dladm_datalink_id2info(handle, linkid, NULL, &class, NULL, linkbuf,
+ MAXLINKNAMELEN) != DLADM_STATUS_OK ||
+ class != DATALINK_CLASS_OVERLAY)
+ return (DLADM_WALK_CONTINUE);
+
+ shot.shot_ofmt = arg;
+ shot.shot_linkname = linkbuf;
+
+ ret = dladm_overlay_walk_cache(handle, linkid,
+ show_one_overlay_table_entry, &shot);
+
+ return (DLADM_WALK_CONTINUE);
+}
+
+static boolean_t
+print_overlay_fma_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize)
+{
+ showoverlay_fma_state_t *shof = ofarg->ofmt_cbarg;
+ dladm_overlay_status_t *st = shof->shof_status;
+
+ switch (ofarg->ofmt_id) {
+ case OVERLAY_FMA_LINK:
+ snprintf(buf, bufsize, "%s", shof->shof_linkname);
+ break;
+ case OVERLAY_FMA_STATUS:
+ snprintf(buf, bufsize, st->dos_degraded == B_TRUE ?
+ "DEGRADED": "ONLINE");
+ break;
+ case OVERLAY_FMA_DETAILS:
+ snprintf(buf, bufsize, "%s", st->dos_degraded == B_TRUE ?
+ st->dos_fmamsg : "-");
+ break;
+ default:
+ abort();
+ }
+ return (B_TRUE);
+}
+
+static void
+show_one_overlay_fma_cb(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_overlay_status_t *stat, void *arg)
+{
+ showoverlay_fma_state_t *shof = arg;
+
+ ofmt_print(shof->shof_ofmt, arg);
+}
+
+
+static int
+show_one_overlay_fma(dladm_handle_t handle, datalink_id_t linkid, void *arg)
+{
+ dladm_status_t status;
+ char linkbuf[MAXLINKNAMELEN];
+ datalink_class_t class;
+ showoverlay_fma_state_t shof;
+
+ if (dladm_datalink_id2info(handle, linkid, NULL, &class, NULL, linkbuf,
+ MAXLINKNAMELEN) != DLADM_STATUS_OK ||
+ class != DATALINK_CLASS_OVERLAY) {
+ die("datalink %s is not an overlay device\n", linkbuf);
+ }
+
+ shof.shof_ofmt = arg;
+ shof.shof_linkname = linkbuf;
+
+ status = dladm_overlay_status(handle, linkid,
+ show_one_overlay_fma_cb, &shof);
+ if (status != DLADM_STATUS_OK)
+ die_dlerr(status, "failed to obtain device status for %s",
+ linkbuf);
+
+ return (DLADM_WALK_CONTINUE);
+}
+
+/* TODO Do we want to be able to select on properties here? */
+static void
+do_show_overlay(int argc, char *argv[], const char *use)
+{
+ int i, opt;
+ datalink_id_t linkid = DATALINK_ALL_LINKID;
+ dladm_status_t status;
+ int (*funcp)(dladm_handle_t, datalink_id_t, void *);
+ char *fields_str = NULL;
+ const ofmt_field_t *fieldsp;
+ ofmt_status_t oferr;
+ boolean_t parse;
+ ofmt_handle_t ofmt;
+ uint_t ofmtflags;
+ int err;
+
+
+ funcp = show_one_overlay;
+ fieldsp = overlay_fields;
+ parse = B_FALSE;
+ ofmtflags = OFMT_WRAP;
+ while ((opt = getopt(argc, argv, ":o:pft")) != -1) {
+ switch (opt) {
+ case 'f':
+ funcp = show_one_overlay_fma;
+ fieldsp = overlay_fma_fields;
+ break;
+ case 'o':
+ fields_str = optarg;
+ break;
+ case 'p':
+ parse = B_TRUE;
+ ofmtflags = OFMT_PARSABLE;
+ break;
+ case 't':
+ funcp = show_one_overlay_table;
+ fieldsp = overlay_targ_fields;
+ break;
+ default:
+ die_opterr(optopt, opt, use);
+ }
+ }
+
+ if (fields_str != NULL && strcasecmp(fields_str, "all") == 0)
+ fields_str = NULL;
+
+ oferr = ofmt_open(fields_str, fieldsp, ofmtflags, 0, &ofmt);
+ dladm_ofmt_check(oferr, parse, ofmt);
+
+ err = 0;
+ if (argc > optind) {
+ for (i = optind; i < argc; i++) {
+ status = dladm_name2info(handle, argv[i], &linkid,
+ NULL, NULL, NULL);
+ if (status != DLADM_STATUS_OK) {
+ warn_dlerr(status, "failed to find %s",
+ argv[i]);
+ err = 1;
+ continue;
+ }
+ funcp(handle, linkid, ofmt);
+ }
+ } else {
+ (void) dladm_walk_datalink_id(funcp, handle, ofmt,
+ DATALINK_CLASS_OVERLAY, DATALINK_ANY_MEDIATYPE,
+ DLADM_OPT_ACTIVE);
+ }
+ ofmt_close(ofmt);
+
+ exit(err);
+}
+
+static void
+do_modify_overlay(int argc, char *argv[], const char *use)
+{
+ int opt, ocnt = 0;
+ boolean_t flush, set, delete;
+ struct ether_addr e;
+ char *dest;
+ datalink_id_t linkid = DATALINK_ALL_LINKID;
+ dladm_status_t status;
+
+ flush = set = delete = B_FALSE;
+ while ((opt = getopt(argc, argv, ":fd:s:")) != -1) {
+ switch (opt) {
+ case 'd':
+ if (delete == B_TRUE)
+ die_optdup('d');
+ delete = B_TRUE;
+ ocnt++;
+ if (ether_aton_r(optarg, &e) == NULL)
+ die("invalid mac address: %s\n", optarg);
+ break;
+ case 'f':
+ if (flush == B_TRUE)
+ die_optdup('f');
+ flush = B_TRUE;
+ ocnt++;
+ break;
+ case 's':
+ if (set == B_TRUE)
+ die_optdup('s');
+ set = B_TRUE;
+ ocnt++;
+ dest = strchr(optarg, '=');
+ *dest = '\0';
+ dest++;
+ if (dest == NULL)
+ die("malformed value, expected mac=dest, "
+ "got: %s\n", optarg);
+ if (ether_aton_r(optarg, &e) == NULL)
+ die("invalid mac address: %s\n", optarg);
+ break;
+ default:
+ die_opterr(optopt, opt, use);
+ }
+ }
+
+ if (ocnt == 0)
+ die("need to specify one of -d, -f, or -s");
+ if (ocnt > 1)
+ die("only one of -d, -f, or -s may be used");
+
+ if (argv[optind] == NULL)
+ die("missing required overlay device\n");
+ if (argc > optind + 1)
+ die("only one overlay device may be specified\n");
+
+ status = dladm_name2info(handle, argv[optind], &linkid, NULL, NULL,
+ NULL);
+ if (status != DLADM_STATUS_OK) {
+ die_dlerr(status, "failed to find overlay %s", argv[optind]);
+ }
+
+ if (flush == B_TRUE) {
+ status = dladm_overlay_cache_flush(handle, linkid);
+ if (status != DLADM_STATUS_OK)
+ die_dlerr(status, "failed to flush target cache for "
+ "overlay %s", argv[optind]);
+ }
+
+ if (delete == B_TRUE) {
+ status = dladm_overlay_cache_delete(handle, linkid, &e);
+ if (status != DLADM_STATUS_OK)
+ die_dlerr(status, "failed to flush target %s from "
+ "overlay target cache %s", optarg, argv[optind]);
+ }
+
+ if (set == B_TRUE) {
+ status = dladm_overlay_cache_set(handle, linkid, &e, dest);
+ if (status != DLADM_STATUS_OK)
+ die_dlerr(status, "failed to set target %s for overlay "
+ "target cache %s", optarg, argv[optind]);
+ }
+
+}
diff --git a/usr/src/cmd/mdb/common/modules/mpt_sas/mpt_sas.c b/usr/src/cmd/mdb/common/modules/mpt_sas/mpt_sas.c
index c39583b1f5..f669c530f4 100644
--- a/usr/src/cmd/mdb/common/modules/mpt_sas/mpt_sas.c
+++ b/usr/src/cmd/mdb/common/modules/mpt_sas/mpt_sas.c
@@ -34,6 +34,7 @@
#include <sys/sunmdi.h>
#include <sys/list.h>
#include <sys/scsi/scsi.h>
+#include <sys/refhash.h>
#pragma pack(1)
#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_type.h>
@@ -47,7 +48,6 @@
#pragma pack()
#include <sys/scsi/adapters/mpt_sas/mptsas_var.h>
-#include <sys/scsi/adapters/mpt_sas/mptsas_hash.h>
struct {
int value;
diff --git a/usr/src/cmd/varpd/Makefile b/usr/src/cmd/varpd/Makefile
new file mode 100644
index 0000000000..512680d246
--- /dev/null
+++ b/usr/src/cmd/varpd/Makefile
@@ -0,0 +1,61 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+PROG= varpd
+OBJS = varpd.o
+SRCS = $(OBJS:%.o=../%.c)
+SVCMETHOD = svc-varpd
+MANIFEST = varpd.xml
+
+
+include ../Makefile.cmd
+include ../Makefile.ctf
+
+ROOTMANIFESTDIR= $(ROOTSVCNETWORK)
+
+CLEANFILES += $(OBJS)
+CPPFLAGS += -D_REENTRANT
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lvarpd -lumem
+$(NOT_RELEASE_BUILD)CPPFLAGS += -DDEBUG
+
+# XXX Development only
+CFLAGS += -_gcc=-Wno-unused-function
+
+.KEEP_STATE:
+
+$(PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+clean:
+ -$(RM) $(CLEANFILES)
+
+lint: lint_PROG
+
+%.o: ../%.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
+check: $(CHKMANIFEST)
+
+clobber: clean
+ $(RM) $(PROG)
+
+install: $(PROG) $(ROOTUSRSBINPROG) $(ROOTMANIFEST) $(ROOTSVCMETHOD)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/varpd/svc-varpd b/usr/src/cmd/varpd/svc-varpd
new file mode 100644
index 0000000000..0ca9920cef
--- /dev/null
+++ b/usr/src/cmd/varpd/svc-varpd
@@ -0,0 +1,35 @@
+#!/usr/bin/sh
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+. /lib/svc/share/smf_include.sh
+
+#
+# XXX For the time being, we're going to manually make sure that the
+# overlay driver is loaded. We probably shouldn't do that in the long
+# run, but it helps for getting stuff started.
+#
+add_drv overlay 2>/dev/null
+
+#
+# TODO This should be a service property.
+#
+/usr/sbin/varpd -i /usr/lib/varpd
+if [ $? = 0 ]; then
+ exit $SMF_EXIT_OK
+else
+ exit $SMF_EXIT_ERR_FATAL
+fi
diff --git a/usr/src/cmd/varpd/varpd.c b/usr/src/cmd/varpd/varpd.c
new file mode 100644
index 0000000000..88eb31d24c
--- /dev/null
+++ b/usr/src/cmd/varpd/varpd.c
@@ -0,0 +1,440 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * virtual arp daemon -- varpd
+ *
+ * The virtual arp daemon is the user land counterpart to overlay(9XXX). It's
+ * purpose is to provide a means for looking up mappings between layer two hosts
+ * and a corresponding encapsulation plugin.
+ */
+
+#include <libvarpd.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <libgen.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <paths.h>
+#include <limits.h>
+#include <sys/corectl.h>
+#include <signal.h>
+#include <strings.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <thread.h>
+
+#define VARPD_EXIT_REQUESTED 0
+#define VARPD_EXIT_FATAL 1
+#define VARPD_EXIT_USAGE 2
+
+#define VARPD_RUNDIR "/var/run/varpd"
+#define VARPD_DEFAULT_DOOR "/var/run/varpd/varpd.door"
+
+static varpd_handle_t *varpd_handle;
+static const char *varpd_pname;
+static volatile boolean_t varpd_exit = B_FALSE;
+
+/*
+ * Debug builds are automatically wired up for umem debugging.
+ */
+#ifdef DEBUG
+const char *
+_umem_debug_init()
+{
+ return ("default,verbose");
+}
+
+const char *
+_umem_logging_init(void)
+{
+ return ("fail,contents");
+}
+#endif /* DEBUG */
+
+static void
+varpd_vwarn(FILE *out, const char *fmt, va_list ap)
+{
+ int error = errno;
+
+ (void) fprintf(out, "%s: ", varpd_pname);
+ (void) vfprintf(out, fmt, ap);
+
+ if (fmt[strlen(fmt) - 1] != '\n')
+ (void) fprintf(out, ": %s\n", strerror(error));
+}
+
+static void
+varpd_warn(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ varpd_vwarn(stderr, fmt, ap);
+ va_end(ap);
+}
+
+static void
+varpd_fatal(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ varpd_vwarn(stderr, fmt, ap);
+ va_end(ap);
+
+ exit(VARPD_EXIT_FATAL);
+}
+
+static void
+varpd_info(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ varpd_vwarn(stdout, fmt, ap);
+ va_end(ap);
+ (void) fflush(stdout);
+}
+
+static void
+varpd_dfatal(int dfd, const char *fmt, ...)
+{
+ int status = VARPD_EXIT_FATAL;
+ va_list ap;
+
+ va_start(ap, fmt);
+ varpd_vwarn(stdout, fmt, ap);
+ va_end(ap);
+
+ /* Take a single shot at this */
+ (void) write(dfd, &status, sizeof (status));
+ exit(status);
+}
+
+static int
+varpd_plugin_walk_cb(varpd_handle_t *vph, const char *name, void *unused)
+{
+ printf("loaded %s!\n", name);
+ return (0);
+}
+
+static void
+varpd_dir_setup(void)
+{
+ int fd;
+
+ if (mkdir(VARPD_RUNDIR, 0700) != 0) {
+ if (errno != EEXIST)
+ varpd_fatal("failed to create %s", VARPD_RUNDIR);
+ }
+
+ fd = open(VARPD_RUNDIR, O_RDONLY);
+ if (fd < 0)
+ varpd_fatal("failed to open %s", VARPD_RUNDIR);
+}
+
+/*
+ * Because varpd is generally run under SMF, we opt to keep its stdout and
+ * stderr to be whatever our parent set them up to be.
+ */
+static void
+varpd_fd_setup(void)
+{
+ int dupfd;
+
+ closefrom(STDERR_FILENO + 1);
+ dupfd = open(_PATH_DEVNULL, O_RDONLY);
+ if (dupfd < 0)
+ varpd_fatal("failed to open %s", _PATH_DEVNULL);
+ if (dup2(dupfd, STDIN_FILENO) == -1)
+ varpd_fatal("failed to dup out stdin");
+}
+
+/*
+ * We borrow fmd's daemonization style. Basically, the parent waits for the
+ * child to successfully set up a door and recover all of the old configurations
+ * before we say that we're good to go.
+ */
+static int
+varpd_daemonize(void)
+{
+ char path[PATH_MAX];
+ struct rlimit rlim;
+ sigset_t set, oset;
+ int estatus, pfds[2];
+ pid_t child;
+
+ /*
+ * Set a per-process core path to be inside of /var/run/varpd. Make sure
+ * that we aren't limited in our dump size.
+ */
+ (void) snprintf(path, sizeof (path),
+ "/var/run/varpd/core.%s.%%p", varpd_pname);
+ (void) core_set_process_path(path, strlen(path) + 1, getpid());
+
+ rlim.rlim_cur = RLIM_INFINITY;
+ rlim.rlim_max = RLIM_INFINITY;
+ (void) setrlimit(RLIMIT_CORE, &rlim);
+
+ /*
+ * Claim as many file descriptors as the system will let us.
+ */
+ if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
+ rlim.rlim_cur = rlim.rlim_max;
+ (void) setrlimit(RLIMIT_NOFILE, &rlim);
+ }
+
+ /*
+ * chdir /var/run/varpd
+ */
+ if (chdir(VARPD_RUNDIR) != 0)
+ varpd_fatal("failed to chdir to %s", VARPD_RUNDIR);
+
+ /*
+ * XXX Drop privileges here some day. For the moment, we'll drop a few
+ * that we know we shouldn't need.
+ */
+
+
+ /*
+ * At this point block all signals going in so we don't have the parent
+ * mistakingly exit when the child is running, but never block SIGABRT.
+ */
+ if (sigfillset(&set) != 0)
+ abort();
+ if (sigdelset(&set, SIGABRT) != 0)
+ abort();
+ if (sigprocmask(SIG_BLOCK, &set, &oset) != 0)
+ abort();
+
+ /*
+ * Do the fork+setsid dance.
+ */
+ if (pipe(pfds) != 0)
+ varpd_fatal("failed to create pipe for daemonizing");
+
+ if ((child = fork()) == -1)
+ varpd_fatal("failed to fork for daemonizing");
+
+ if (child != 0) {
+ /* We'll be exiting shortly, so allow for silent failure */
+ (void) close(pfds[1]);
+ if (read(pfds[0], &estatus, sizeof (estatus)) ==
+ sizeof (estatus))
+ _exit(estatus);
+
+ if (waitpid(child, &estatus, 0) == child && WIFEXITED(estatus))
+ _exit(WEXITSTATUS(estatus));
+
+ _exit(VARPD_EXIT_FATAL);
+ }
+
+ if (close(pfds[0]) != 0)
+ abort();
+ if (setsid() == -1)
+ abort();
+ if (sigprocmask(SIG_SETMASK, &oset, NULL) != 0)
+ abort();
+ (void) umask(0022);
+
+ return (pfds[1]);
+}
+
+static int
+varpd_setup_lookup_threads(void)
+{
+ int ret;
+ long i, ncpus = sysconf(_SC_NPROCESSORS_ONLN) * 2 + 1;
+
+ if (ncpus <= 0)
+ abort();
+ for (i = 0; i < ncpus; i++) {
+ thread_t thr;
+
+ ret = thr_create(NULL, 0,
+ (void *(*)(void *))libvarpd_overlay_lookup_run,
+ varpd_handle, THR_DETACHED | THR_DAEMON, &thr);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+static void
+varpd_cleanup(void)
+{
+ varpd_exit = B_TRUE;
+}
+
+/*
+ * There are a bunch of things we need to do to be a proper daemon here.
+ *
+ * o Ensure that /var/run/varpd exists or create it
+ * o make stdin /dev/null (stdout?)
+ * o Ensure any other fds that we somehow inherited are closed, eg.
+ * closefrom()
+ * o Properly daemonize
+ * o Mask all signals except sigabrt before creating our first door -- all
+ * other doors will inherit from that.
+ * o Have the main thread sigsuspend looking for most things that are
+ * actionable...
+ */
+int
+main(int argc, char *argv[])
+{
+ int err, c, dfd, i;
+ const char *doorpath = VARPD_DEFAULT_DOOR;
+ sigset_t set;
+ struct sigaction act;
+ int nincpath = 0, nextincpath = 0;
+ char **incpath = NULL;
+
+ varpd_pname = basename(argv[0]);
+
+ /*
+ * We want to clean up our file descriptors before we do anything else
+ * as we can't assume that libvarpd won't open file descriptors, etc.
+ */
+ varpd_fd_setup();
+
+ if ((err = libvarpd_create(&varpd_handle)) != 0) {
+ varpd_fatal("failed to open a libvarpd handle");
+ return (1);
+ }
+
+ while ((c = getopt(argc, argv, ":i:d:")) != -1) {
+ switch (c) {
+ case 'i':
+ if (nextincpath == nincpath) {
+ if (nincpath == 0)
+ nincpath = 16;
+ else
+ nincpath *= 2;
+ incpath = realloc(incpath, sizeof (char *) *
+ nincpath);
+ if (incpath == NULL) {
+ (void) fprintf(stderr, "failed to "
+ "allocate memory for the %dth "
+ "-I option: %s\n", nextincpath + 1,
+ strerror(errno));
+ }
+
+ }
+ incpath[nextincpath] = optarg;
+ nextincpath++;
+ break;
+ case 'd':
+ doorpath = optarg;
+ break;
+ default:
+ (void) fprintf(stderr, "unknown option: %c\n", c);
+ return (1);
+ }
+ }
+
+ varpd_dir_setup();
+
+ libvarpd_plugin_walk(varpd_handle, varpd_plugin_walk_cb, NULL);
+
+ dfd = varpd_daemonize();
+
+ /*
+ * Now that we're in the child, go ahead and load all of our plug-ins.
+ * We do this, in part, because these plug-ins may need threads of their
+ * own and fork won't preserve those and we'd rather the plug-ins don't
+ * have to learn about fork-handlers.
+ */
+ for (i = 0; i < nextincpath; i++) {
+ err = libvarpd_plugin_load(varpd_handle, incpath[i]);
+ if (err != 0) {
+ (void) fprintf(stderr, "failed to load from %s: %s\n",
+ optarg, strerror(err));
+ return (1);
+ }
+ }
+
+ if ((err = libvarpd_persist_enable(varpd_handle, VARPD_RUNDIR)) != 0)
+ varpd_dfatal(dfd, "failed to enable varpd persistence: %s\n",
+ strerror(err));
+
+ if ((err = libvarpd_persist_restore(varpd_handle)) != 0)
+ varpd_dfatal(dfd, "failed to enable varpd persistence: %s\n",
+ strerror(err));
+
+ /*
+ * The ur-door thread will inherit from this signal mask. So set it to
+ * what we want before doing anything else. In addition, so will our
+ * threads that handle varpd lookups.
+ */
+ if (sigfillset(&set) != 0)
+ varpd_dfatal(dfd, "failed to fill a signal set...");
+
+ if (sigdelset(&set, SIGABRT) != 0)
+ varpd_dfatal(dfd, "failed to unmask SIGABRT");
+
+ if (sigprocmask(SIG_BLOCK, &set, NULL) != 0)
+ varpd_dfatal(dfd, "failed to set our door signal mask");
+
+ if ((err = varpd_setup_lookup_threads()) != 0)
+ varpd_dfatal(dfd, "failed to create lookup threads: %s\n",
+ strerror(err));
+
+ if ((err = libvarpd_door_server_create(varpd_handle, doorpath)) != 0)
+ varpd_dfatal(dfd, "failed to create door server at %s: %s\n",
+ doorpath, strerror(err));
+
+ /*
+ * At this point, finish up signal intialization and finally go ahead,
+ * notify the parent that we're okay, and enter the sigsuspend loop.
+ */
+ bzero(&act, sizeof (struct sigaction));
+ act.sa_handler = varpd_cleanup;
+ if (sigfillset(&act.sa_mask) != 0)
+ varpd_dfatal(dfd, "failed to fill sigaction mask");
+ act.sa_flags = 0;
+ if (sigaction(SIGHUP, &act, NULL) != 0)
+ varpd_dfatal(dfd, "failed to register HUP handler");
+ if (sigaction(SIGQUIT, &act, NULL) != 0)
+ varpd_dfatal(dfd, "failed to register QUIT handler");
+ if (sigaction(SIGINT, &act, NULL) != 0)
+ varpd_dfatal(dfd, "failed to register INT handler");
+ if (sigaction(SIGTERM, &act, NULL) != 0)
+ varpd_dfatal(dfd, "failed to register TERM handler");
+
+ err = 0;
+ (void) write(dfd, &err, sizeof (err));
+ (void) close(dfd);
+
+ for (;;) {
+ if (sigsuspend(&set) == -1)
+ if (errno == EFAULT)
+ abort();
+ if (varpd_exit == B_TRUE)
+ break;
+ }
+
+ libvarpd_door_server_destroy(varpd_handle);
+ libvarpd_destroy(varpd_handle);
+
+ return (VARPD_EXIT_REQUESTED);
+}
diff --git a/usr/src/cmd/varpd/varpd.xml b/usr/src/cmd/varpd/varpd.xml
new file mode 100644
index 0000000000..e11525cd74
--- /dev/null
+++ b/usr/src/cmd/varpd/varpd.xml
@@ -0,0 +1,52 @@
+<?xml version="1.0"?>
+<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
+<!--
+This file and its contents are supplied under the terms of the
+Common Development and Distribution License ("CDDL"), version 1.0.
+You may only use this file in accordance with the terms of version
+1.0 of the CDDL.
+
+A full copy of the text of the CDDL should have accompanied this
+source. A copy of the CDDL is also available via the Internet at
+http://www.illumos.org/license/CDDL.
+
+Copyright (c) 2014, Joyent, Inc. All rights resreved.
+-->
+
+<service_bundle type="manifest" name="illumos:varpd" >
+
+ <service name="network/varpd" type="service" version="1" >
+
+ <create_default_instance enabled="true" />
+
+ <single_instance/>
+
+ <dependency name="varpd-network-physical"
+ grouping="require_all"
+ restart_on="none"
+ type="service">
+ <service_fmri value="svc:/network/physical:default" />
+ </dependency>
+
+ <exec_method
+ type="method"
+ name="start"
+ exec="/lib/svc/method/svc-varpd"
+ timeout_seconds="60" />
+
+ <exec_method
+ type="method"
+ name="stop"
+ exec=":kill"
+ timeout_seconds="10" />
+
+ <stability value='Unstable' />
+
+ <template>
+ <common_name>
+ <loctext xml:lang="C">virutal ARP daemon
+ </loctext>
+ </common_name>
+ </template>
+ </service>
+</service_bundle>
diff --git a/usr/src/uts/common/os/id_space.c b/usr/src/common/idspace/id_space.c
index 2dad0cb940..543dff5d40 100644
--- a/usr/src/uts/common/os/id_space.c
+++ b/usr/src/common/idspace/id_space.c
@@ -53,6 +53,10 @@
* reservation, in which an ID is allocated but placed in a internal
* dictionary for later use, should be added when a consuming subsystem
* arrives.)
+ *
+ * This code is also shared with userland. In userland, we don't have the same
+ * ability to have sleeping variants, so we effectively turn the normal
+ * versions without _nosleep into _nosleep.
*/
#define ID_TO_ADDR(id) ((void *)(uintptr_t)(id + 1))
@@ -60,16 +64,22 @@
/*
* Create an arena to represent the range [low, high).
- * Caller must be in a context in which VM_SLEEP is legal.
+ * Caller must be in a context in which VM_SLEEP is legal,
+ * for the kernel. Always VM_NOSLEEP in userland.
*/
id_space_t *
id_space_create(const char *name, id_t low, id_t high)
{
ASSERT(low >= 0);
ASSERT(low < high);
+#ifdef _KERNEL
+ int flag = VM_SLEEP;
+#else
+ int flag = VM_NOSLEEP;
+#endif
return (vmem_create(name, ID_TO_ADDR(low), high - low, 1,
- NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER));
+ NULL, NULL, NULL, 0, flag | VMC_IDENTIFIER));
}
/*
@@ -85,7 +95,12 @@ id_space_destroy(id_space_t *isp)
void
id_space_extend(id_space_t *isp, id_t low, id_t high)
{
- (void) vmem_add(isp, ID_TO_ADDR(low), high - low, VM_SLEEP);
+#ifdef _KERNEL
+ int flag = VM_SLEEP;
+#else
+ int flag = VM_NOSLEEP;
+#endif
+ (void) vmem_add(isp, ID_TO_ADDR(low), high - low, flag);
}
/*
@@ -95,7 +110,12 @@ id_space_extend(id_space_t *isp, id_t low, id_t high)
id_t
id_alloc(id_space_t *isp)
{
- return (ADDR_TO_ID(vmem_alloc(isp, 1, VM_SLEEP | VM_NEXTFIT)));
+#ifdef _KERNEL
+ int flag = VM_SLEEP;
+#else
+ int flag = VM_NOSLEEP;
+#endif
+ return (ADDR_TO_ID(vmem_alloc(isp, 1, flag | VM_NEXTFIT)));
}
/*
@@ -116,7 +136,12 @@ id_alloc_nosleep(id_space_t *isp)
id_t
id_allocff(id_space_t *isp)
{
- return (ADDR_TO_ID(vmem_alloc(isp, 1, VM_SLEEP | VM_FIRSTFIT)));
+#ifdef _KERNEL
+ int flag = VM_SLEEP;
+#else
+ int flag = VM_NOSLEEP;
+#endif
+ return (ADDR_TO_ID(vmem_alloc(isp, 1, flag | VM_FIRSTFIT)));
}
/*
diff --git a/usr/src/docs/admin b/usr/src/docs/admin
new file mode 100644
index 0000000000..42fb2ba045
--- /dev/null
+++ b/usr/src/docs/admin
@@ -0,0 +1,252 @@
+# Overlay Networks Administration Guide
+
+To better understand the implementation of overlay networks from a
+non-data path point of view, it's useful to put together a straw man
+proposal for how these work from an administration point of view.
+
+This goes through a bunch of options and leaves some open questions. Any
+thoughts on it should be directed to Robert Mustacchi <rm@joyent.com> or
+rmustacc on irc.freenode.net.
+
+## Overlay Devices Background
+
+It's worth going into a bit of background as to what overlay devices
+look like. An overlay device is a combination of an etherstub and an IP
+tunnel device with potentially multiple destinations. An overlay device
+has two different components: an encapsulation module and a search
+module.
+
+The encapsulation module defines how packets get transmitted on the wire
+and aspects of their transport. Consider the two rather popular
+encapsulation methods: VXLAN and NVGRE. The VXLAN specification defines
+the VXLAN header that gets put on an Ethernet frame and then defines
+that it be sent over UDP. NVGRE on the other hand defines that it should
+use a GRE header with a specific GRE protocol field, which is itself is
+a separate IP type.
+
+In addition, today, because they define the mechanism for encapsulation
+and decapsulation, they also define how they should receive packets. In
+other words, what IP address and in the case of VXLAN, port, they
+listen on.
+
+
+The search plugin, on the other hand, defines how we route encapsulated
+packets to a destination. The most simplest form of this is a point to
+point tunnel where the destination plugin sends everything to a single
+unicast IP address. Only slightly more complicated would be something
+that sent it to a single multicast address. However, these plugins also
+allow for custom means of looking up destinations and dynamic
+destinations. In other words, sending an Ethernet frame to a different
+destination based on its mac address or other criteria. This may be
+implemented by having a list of mappings in files, or there may be some
+much more advanced mechanism that integrates with a broader
+orchestration suite or other database. We need to think about both of
+these different classes of search plugins - static and dynamic as we go
+through this.
+
+On top off these devices, a series of VNICs can be created, like an
+etherstub. All their traffic will be subject to a local virtual switch
+and then sent over the encapsulated network.
+
+## Basic model
+
+We want to introduce a new object into dladm that we call an overlay. An
+overlay is its own class, like bridges, IP tunnels, and etherstubs.
+There would generally be four basic commands, much like other parts of
+dladm:
+
+ o dladm create-overlay -- Creates an overlay device
+ o dladm show-overlay -- Shows information about overlay devices
+ o dladm modify-overlay -- Modifies aspects of overlay devices
+ o dladm delete-overlay -- Remove an overlay device
+
+
+### Example: Creating a point to point VXLAN overlay network
+
+Let's consider the act of creating and manipulating a VXLAN overlay
+network. There are a handful of properties that we care about:
+
+ o The virtual network id, the unique VXLAN identifier
+ o The UDP IP address and port that we're listening on
+ o The UDP IP address and port that we're going to send to
+
+In the VXLAN case, we have a default port that we'd like to use that's
+been assigned by IANA, but obviously, allow it to be overridden. In
+addition, we've listed two implicit properties in this example that we
+want to first class:
+
+ o The encapsulation plugin, VXLAN
+ o The search plugin, a direct point to point tunnel we'll call 'direct'
+
+Here's one idea of how this could look:
+
+```
+dladm create-overlay -e vxlan -s direct -v 169 -p vxlan/listen_ip=a.b.c.d -p direct/dest_ip=e.f.g.h overlay0
+```
+
+If we take this apart, -e specifies the encapsulation module that we
+should use for this device. While -s is specifying that we should use
+the 'direct' search plugin which is a point to point tunnel. While we
+have default values for the vxlan/listen_port and we can use the same
+default for the search plugin, we don't have the same for an IP address.
+Because of this, we have to specify it on the command line.
+
+In this world, -e, -s, and -v are sugar for properties. Respectively the
+properties 'encap', 'search', and 'vnetid' That means this could look
+like:
+
+```
+dladm create-overlay -p encap=vxlan -p search=direct -p vnetid=169 -p vxlan/listen_ip=a.b.c.d -p direct/dest_ip=e.f.g.h overlay0
+```
+
+In addition, much like with other dladm objects, a user would also be
+able to specify other properties on the creation command like, such as
+the vxlan listen port (vxlan/listen_port) or something like the MTU.
+
+I believe that having a summarized version with some options that are
+reflected with abbreviations like in the first form makes sense, but we
+don't want to elevate too much into short letters, otherwise every
+plugin, while overlap exists, will cause us to go through and run out of
+letters.
+
+## Propety namespaces
+
+In the above, you'll notice that we've namespaced various properties
+such as the vxlan/listen_ip. There is nothing special about these
+properties. This is just a way of grouping things in a way that might be
+a bit more useful for folks. This makes it easy to see which properties
+are related to the fact that we're doing vxlan encapsulation or some
+other plugin-based aspect. The unprefixed properties should all be
+generic.
+
+
+## Default and required properties
+
+Each plugin is going to have to define their own set of required
+properties and whether they have default values. For example, vxlan
+defines two properties:
+
+ o vxlan/listen_ip
+ o vxlan/listen_port
+
+The listen_port has a default value of 4789. While this is what most
+folks will want to use, some will want to override it at creation time.
+However, just like we can modify the defaults with ipadm, we'll want to
+do the same here and have some mode that allows us to modify them. This
+will allow an administrator to override a default property for their
+site.
+
+In addition to those default, some plugins have properties that require
+an argument, but have no default. There's no good default for an IP
+address to listen on. INADDR_ANY or loop back (and their v6 equivalents)
+are almost certainly wrong choices. As such, the plugins will need to
+define their required properties. This should probably be displayed in
+some way along with the defaults. Perhaps this is done as a new flag on
+modify-overlay and show-overlay to see the default/required sets?
+
+```
+dladm show-overlay -d
+dladm modify-overlay -d
+```
+
+
+## Working through other examples
+
+To help evaluate other behavior and choices, it's worth working through
+some other examples and seeing what kinds of questions this causes us to
+have.
+
+### NVGRE and Geneve
+
+NVGRE and Geneve are other encapsulation modules. NVGRE is already
+widely deployed generally in MSFT related software. Today, it fits the
+model fairly well. NVGRE just has a single property:
+
+ o nvgre/listen_ip
+
+Geneve on the other hand is yet another draft that is supposed to bring
+the titans of NVGRE and VXLAN together. It's another UDP like protocol.
+It has properties like VXLAN:
+
+ o geneve/listen_ip
+ o geneve/listen_port
+
+
+Now, you might be saying, but wait rm, we're always redefining
+listen_ip! Can't we just make that a first class property. Oh and maybe
+listen_port, but wait that NVGRE isn't using it. Hmm.
+
+So, I have thought about this, but I think for the time, I'd rather make
+them specific to the plugin. There may be something new that comes
+around and actually doesn't use IP at all and in fact speaks on an
+ethertype. At which point we'll want to use dls/dlpi/vnd to manage that
+communication and there will not be any IP. Of course, this doesn't
+exist today, but I think it doesn't make sense to elevate it at first.
+If it turns out that this is all terrible, then we can go figure that
+out.
+
+### Multicast
+
+Another common thing that we're going to build out of the gate is a
+multicast module. Unlike the direct model, with the multicast model, the
+user subscribes to a multicast address to receive packets and sends
+packets out to a multicast group. In this case, the normal listen_ip
+would still exist, it would just be a multicast IP. However instead of a
+multicast/direct_ip, we would have:
+
+ o multicast/dest_ip
+
+### Search plugins and different encap types
+
+One of the things that we'd like to do is have an encapsulation
+algorithm encode what it requires to be directed, eg. VXLAN requires and
+IP address and a port. NVGRE just requires an IP. We'll have t work out
+an interface, but the search plugin will be able to get some information
+from the encapsulation plugin which will let it know if it should
+require an IP address, an IP address and a port, or some other set of
+configuration. It will also provide a way to share defaults.
+Realistically, the search plugin, should probably just inherit the
+default from the encap plugin, if appropriate. Otherwise an
+administrator should define a global default for all encapsulation
+plugins for the search module. While the former is useful, the latter
+isn't going to be very realistic.
+
+### Files plugin and dynamic destinations
+
+The files plugin is interesting, because it moves away from the world of
+static maps to dynamic destinations. In this case, the destination of a
+packet with a mac address m, is defined by some file on the system,
+similar to /etc/hosts (but an entirely different format).
+
+The interesting thing about the files model is that it requires some
+additional features, which we need to implement anyways. For example
+we'll need to:
+
+ o Be able to flush the destination map
+ o Obtain some way of getting the destination map from the kernel
+ o For the files case, tell it to refresh the mappings from the file
+ o Update the mapping file to something else
+
+While the last of these would be done through a normal modify-overlay of
+a property, the question is how would we handle all of these other
+actions. They basically have us performing actions on the device that,
+in some cases, are specific to the plugin-type.
+
+How should this be done? Should these be kind of transitive properties
+that exist? Like a 'files/refresh' property that you set to true? I'm
+not sure I really like the idea of transitive properties as much as I
+perhaps want to have some alternative option space to modify-overlay
+perhaps that allows you to inject one of these options.
+
+Perhaps all of these three things instead should be part of some
+command that isn't dladm? I dunno, I'm not really partial to that.
+
+## Concluding Thoughts
+
+All in all, I think we have a reasonable framework to start with, but
+some of these details kind of matter and as we go further down the path
+of implementation, the tradeoffs and options that we want to make will
+make more sense. It may very well be that there will not be many
+properties except for the Joyent or other user specific search
+mechanisms and therefore spending too much time on this isn't
+worthwhile.
diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile
index 372fc4327c..18bcd74901 100644
--- a/usr/src/lib/Makefile
+++ b/usr/src/lib/Makefile
@@ -22,7 +22,7 @@
# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2012 by Delphix. All rights reserved.
-# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
# Copyright (c) 2013 Gary Mills
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
@@ -137,6 +137,7 @@ SUBDIRS += \
libpp \
libproc \
libproject \
+ librename \
libsendfile \
nametoaddr \
ncad_addr \
@@ -162,7 +163,10 @@ SUBDIRS += \
libm1 \
libm \
libmvec \
- libvnd
+ libvnd \
+ libidspace \
+ varpd \
+ libbunyan
SUBDIRS += \
passwdutil \
@@ -371,6 +375,7 @@ HDRSUBDIRS= \
libast \
libbrand \
libbsm \
+ libbunyan \
libc \
libcmd \
libcmdutils \
@@ -400,6 +405,7 @@ HDRSUBDIRS= \
libfru \
libfstyp \
libgen \
+ libidspace \
libipadm \
libipd \
libipsecutil \
@@ -431,6 +437,7 @@ HDRSUBDIRS= \
libraidcfg \
librcm \
librdc \
+ librename \
libscf \
libsip \
libsmbios \
@@ -568,7 +575,7 @@ libdevinfo: libnvpair libsec
libdhcpagent: libsocket libdhcputil libuuid libdlpi libcontract
libdhcputil: libnsl libgen libinetutil libdlpi
libdladm: libdevinfo libinetutil libsocket libscf librcm libnvpair \
- libexacct libnsl libkstat libcurses
+ libexacct libnsl libkstat libcurses varpd
libdll: libast
libdlpi: libinetutil libdladm
libds: libsysevent
@@ -657,14 +664,18 @@ libreparse: libnvpair
libhotplug: libnvpair
cfgadm_plugins: libhotplug
libilb: libsocket
+libidspace: libumem
libipmi: libm
libprtdiag: libm
libsqlite: libm
libstmf: libm
libvscan: libm
+libbunyan: libnvpair
$(INTEL_BUILD)libdiskmgt:libfdisk
+varpd: libavl libidspace libumem libnsl libnvpair libmd5 librename \
+ libbunyan
#
# The reason this rule checks for the existence of the
diff --git a/usr/src/lib/libbunyan/Makefile b/usr/src/lib/libbunyan/Makefile
new file mode 100644
index 0000000000..a59de91113
--- /dev/null
+++ b/usr/src/lib/libbunyan/Makefile
@@ -0,0 +1,42 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc.
+#
+
+include ../Makefile.lib
+
+HDRS = bunyan.h
+HDRDIR = common
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/lib/libbunyan/Makefile.com b/usr/src/lib/libbunyan/Makefile.com
new file mode 100644
index 0000000000..5214915c56
--- /dev/null
+++ b/usr/src/lib/libbunyan/Makefile.com
@@ -0,0 +1,36 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = libbunyan.a
+VERS = .1
+OBJECTS = bunyan.o
+USDT_PROVIDERS = bunyan_provider.d
+
+include ../../Makefile.lib
+
+LIBS = $(DYNLIB) $(LINTLIB)
+LDLIBS += -lc -lumem -lnvpair -lnsl
+CPPFLAGS += -I../common -I. -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../Makefile.targ
+include ../../Makefile.usdt
diff --git a/usr/src/lib/libbunyan/amd64/Makefile b/usr/src/lib/libbunyan/amd64/Makefile
new file mode 100644
index 0000000000..15d904c616
--- /dev/null
+++ b/usr/src/lib/libbunyan/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libbunyan/common/bunyan.c b/usr/src/lib/libbunyan/common/bunyan.c
new file mode 100644
index 0000000000..30398aa098
--- /dev/null
+++ b/usr/src/lib/libbunyan/common/bunyan.c
@@ -0,0 +1,913 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#include <errno.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <stdarg.h>
+#include <umem.h>
+#include <netdb.h>
+#include <string.h>
+#include <strings.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/sysmacros.h>
+#include <thread.h>
+#include <sys/debug.h>
+
+#include <bunyan.h>
+#include <bunyan_provider_impl.h>
+
+struct bunyan_key;
+struct bunyan_stream;
+struct bunyan;
+
+typedef struct bunyan_stream {
+ struct bunyan_stream *bs_next;
+ char *bs_name;
+ bunyan_level_t bs_level;
+ bunyan_stream_f bs_func;
+ void *bs_arg;
+ uint_t bs_count;
+} bunyan_stream_t;
+
+typedef struct bunyan_key {
+ struct bunyan_key *bk_next;
+ char *bk_name;
+ bunyan_type_t bk_type;
+ void *bk_data;
+ size_t bk_len;
+} bunyan_key_t;
+
+typedef struct bunyan {
+ pthread_mutex_t bun_lock;
+ bunyan_key_t *bun_keys;
+ bunyan_stream_t *bun_streams;
+ char *bun_name;
+ char bun_host[MAXHOSTNAMELEN+1];
+} bunyan_t;
+
+#define ISO_TIMELEN 25
+static const int bunyan_version = 0;
+
+static void
+bunyan_key_fini(bunyan_key_t *bkp)
+{
+ size_t nlen = strlen(bkp->bk_name) + 1;
+ umem_free(bkp->bk_data, bkp->bk_len);
+ umem_free(bkp->bk_name, nlen);
+ umem_free(bkp, sizeof (bunyan_key_t));
+}
+
+static void
+bunyan_stream_fini(bunyan_stream_t *bsp)
+{
+ size_t nlen = strlen(bsp->bs_name) + 1;
+ umem_free(bsp->bs_name, nlen);
+ umem_free(bsp, sizeof (bunyan_stream_t));
+}
+
+int
+bunyan_init(const char *name, bunyan_logger_t **bhp)
+{
+ int ret;
+ bunyan_t *b;
+ size_t nlen = strlen(name) + 1;
+
+ b = umem_zalloc(sizeof (bunyan_t), UMEM_DEFAULT);
+ if (b == NULL)
+ return (ENOMEM);
+
+ b->bun_name = umem_alloc(nlen, UMEM_DEFAULT);
+ if (b->bun_name == NULL) {
+ umem_free(b, sizeof (bunyan_t));
+ return (ENOMEM);
+ }
+ bcopy(name, b->bun_name, nlen);
+
+ if ((ret = pthread_mutex_init(&b->bun_lock, NULL)) != 0) {
+ umem_free(b->bun_name, nlen);
+ umem_free(b, sizeof (bunyan_t));
+ return (ret);
+ }
+
+ VERIFY(gethostname(b->bun_host, sizeof (b->bun_host)) == 0);
+ b->bun_host[MAXHOSTNAMELEN] = '\0';
+
+ *bhp = (bunyan_logger_t *)b;
+ return (0);
+}
+
+void
+bunyan_fini(bunyan_logger_t *bhp)
+{
+ bunyan_t *b = (bunyan_t *)bhp;
+ bunyan_key_t *bkp;
+ bunyan_stream_t *bsp;
+
+ while ((bkp = b->bun_keys) != NULL) {
+ b->bun_keys = bkp->bk_next;
+ bunyan_key_fini(bkp);
+ }
+
+ while ((bsp = b->bun_streams) != NULL) {
+ b->bun_streams = bsp->bs_next;
+ bunyan_stream_fini(bsp);
+ }
+
+ if (b->bun_name != NULL)
+ umem_free(b->bun_name, strlen(b->bun_name) + 1);
+
+ VERIFY(pthread_mutex_destroy(&b->bun_lock) == 0);
+ umem_free(b, sizeof (bunyan_t));
+}
+
+/* ARGSUSED */
+int
+bunyan_stream_fd(nvlist_t *nvl, const char *js, void *arg)
+{
+ uintptr_t fd = (uintptr_t)arg;
+ size_t jslen = strlen(js);
+ off_t off = 0;
+ ssize_t ret = 0;
+ static int maxbuf = -1;
+
+ if (maxbuf == -1)
+ maxbuf = getpagesize();
+
+ while (off != jslen) {
+ /*
+ * Write up to a page of data at a time. If for some reason an
+ * individual write fails, move on and try to still write a new
+ * line at least...
+ */
+ ret = write(fd, js + off, MIN(jslen - off, maxbuf));
+ if (ret < 0)
+ break;
+ off += ret;
+ }
+
+ if (ret < 0) {
+ (void) write(fd, "\n", 1);
+ } else {
+ ret = write(fd, "\n", 1);
+ }
+ return (ret < 0 ? 1: 0);
+}
+
+int
+bunyan_stream_add(bunyan_logger_t *bhp, const char *name, int level,
+ bunyan_stream_f func, void *arg)
+{
+ bunyan_stream_t *bs, *cur;
+ size_t nlen = strlen(name) + 1;
+ bunyan_t *b = (bunyan_t *)bhp;
+
+ if (level != BUNYAN_L_TRACE &&
+ level != BUNYAN_L_DEBUG &&
+ level != BUNYAN_L_INFO &&
+ level != BUNYAN_L_WARN &&
+ level != BUNYAN_L_ERROR &&
+ level != BUNYAN_L_FATAL)
+ return (EINVAL);
+
+ bs = umem_alloc(sizeof (bunyan_stream_t), UMEM_DEFAULT);
+ if (bs == NULL)
+ return (ENOMEM);
+
+ bs->bs_name = umem_alloc(nlen, UMEM_DEFAULT);
+ if (bs->bs_name == NULL) {
+ umem_free(bs, sizeof (bunyan_stream_t));
+ return (ENOMEM);
+ }
+ bcopy(name, bs->bs_name, nlen);
+ bs->bs_level = level;
+ bs->bs_func = func;
+ bs->bs_arg = arg;
+ bs->bs_count = 0;
+ (void) pthread_mutex_lock(&b->bun_lock);
+ cur = b->bun_streams;
+ while (cur != NULL) {
+ if (strcmp(name, cur->bs_name) == 0) {
+ (void) pthread_mutex_unlock(&b->bun_lock);
+ umem_free(bs->bs_name, nlen);
+ umem_free(bs, sizeof (bunyan_stream_t));
+ return (EEXIST);
+ }
+ cur = cur->bs_next;
+ }
+ bs->bs_next = b->bun_streams;
+ b->bun_streams = bs;
+ (void) pthread_mutex_unlock(&b->bun_lock);
+
+ return (0);
+}
+
+int
+bunyan_stream_remove(bunyan_logger_t *bhp, const char *name)
+{
+ bunyan_stream_t *cur, *prev;
+ bunyan_t *b = (bunyan_t *)bhp;
+
+ (void) pthread_mutex_lock(&b->bun_lock);
+ prev = NULL;
+ cur = b->bun_streams;
+ while (cur != NULL) {
+ if (strcmp(name, cur->bs_name) == 0)
+ break;
+ prev = cur;
+ cur = cur->bs_next;
+ }
+ if (cur == NULL) {
+ (void) pthread_mutex_unlock(&b->bun_lock);
+ return (ENOENT);
+ }
+ if (prev == NULL)
+ b->bun_streams = cur->bs_next;
+ else
+ prev->bs_next = cur->bs_next;
+ cur->bs_next = NULL;
+ (void) pthread_mutex_unlock(&b->bun_lock);
+
+ bunyan_stream_fini(cur);
+
+ return (0);
+}
+
+static int
+bunyan_key_add_one(bunyan_t *b, const char *name, bunyan_level_t type,
+ const void *arg)
+{
+ bunyan_key_t *bkp, *cur, *prev;
+ size_t nlen = strlen(name) + 1;
+ size_t blen;
+
+ bkp = umem_alloc(sizeof (bunyan_key_t), UMEM_DEFAULT);
+ if (bkp == NULL)
+ return (ENOMEM);
+ bkp->bk_name = umem_alloc(nlen, UMEM_DEFAULT);
+ if (bkp->bk_name == NULL) {
+ umem_free(bkp, sizeof (bunyan_key_t));
+ return (ENOMEM);
+ }
+ bcopy(name, bkp->bk_name, nlen);
+
+ switch (type) {
+ case BUNYAN_T_STRING:
+ blen = strlen(arg) + 1;
+ break;
+ case BUNYAN_T_POINTER:
+ blen = sizeof (uintptr_t);
+ break;
+ case BUNYAN_T_IP:
+ blen = sizeof (struct in_addr);
+ break;
+ case BUNYAN_T_IP6:
+ blen = sizeof (struct in6_addr);
+ break;
+ case BUNYAN_T_BOOLEAN:
+ blen = sizeof (boolean_t);
+ break;
+ case BUNYAN_T_INT32:
+ blen = sizeof (int32_t);
+ break;
+ case BUNYAN_T_INT64:
+ case BUNYAN_T_INT64STR:
+ blen = sizeof (int64_t);
+ break;
+ case BUNYAN_T_UINT32:
+ blen = sizeof (uint32_t);
+ break;
+ case BUNYAN_T_UINT64:
+ case BUNYAN_T_UINT64STR:
+ blen = sizeof (uint64_t);
+ break;
+ case BUNYAN_T_DOUBLE:
+ blen = sizeof (double);
+ break;
+ default:
+ umem_free(bkp->bk_name, nlen);
+ umem_free(bkp, sizeof (bunyan_key_t));
+ return (EINVAL);
+ }
+
+ bkp->bk_data = umem_alloc(blen, UMEM_DEFAULT);
+ if (bkp->bk_data == NULL) {
+ umem_free(bkp->bk_name, nlen);
+ umem_free(bkp, sizeof (bunyan_key_t));
+ return (ENOMEM);
+ }
+ bcopy(arg, bkp->bk_data, blen);
+ bkp->bk_len = blen;
+ bkp->bk_type = type;
+
+ (void) pthread_mutex_lock(&b->bun_lock);
+ prev = NULL;
+ cur = b->bun_keys;
+ while (cur != NULL) {
+ if (strcmp(name, cur->bk_name) == 0)
+ break;
+ prev = cur;
+ cur = cur->bk_next;
+ }
+ if (cur != NULL) {
+ if (prev == NULL)
+ b->bun_keys = cur->bk_next;
+ else
+ prev->bk_next = cur->bk_next;
+ bunyan_key_fini(cur);
+ }
+ bkp->bk_next = b->bun_keys;
+ b->bun_keys = bkp;
+ (void) pthread_mutex_unlock(&b->bun_lock);
+
+ return (0);
+}
+
+static int
+bunyan_key_vadd(bunyan_t *b, va_list *ap)
+{
+ int type, ret;
+ void *data;
+ boolean_t bt;
+ int32_t i32;
+ int64_t i64;
+ uint32_t ui32;
+ uint64_t ui64;
+ double d;
+ uintptr_t ptr;
+
+ while ((type = va_arg(*ap, int)) != BUNYAN_T_END) {
+ const char *name = va_arg(*ap, char *);
+
+ switch (type) {
+ case BUNYAN_T_STRING:
+ data = va_arg(*ap, char *);
+ break;
+ case BUNYAN_T_POINTER:
+ ptr = (uintptr_t)va_arg(*ap, void *);
+ data = &ptr;
+ break;
+ case BUNYAN_T_IP:
+ case BUNYAN_T_IP6:
+ data = va_arg(*ap, void *);
+ break;
+ case BUNYAN_T_BOOLEAN:
+ bt = va_arg(*ap, boolean_t);
+ data = &bt;
+ break;
+ case BUNYAN_T_INT32:
+ i32 = va_arg(*ap, int32_t);
+ data = &i32;
+ break;
+ case BUNYAN_T_INT64:
+ case BUNYAN_T_INT64STR:
+ i64 = va_arg(*ap, int64_t);
+ data = &i64;
+ break;
+ case BUNYAN_T_UINT32:
+ ui32 = va_arg(*ap, uint32_t);
+ data = &ui32;
+ break;
+ case BUNYAN_T_UINT64:
+ case BUNYAN_T_UINT64STR:
+ ui64 = va_arg(*ap, uint64_t);
+ data = &ui64;
+ break;
+ case BUNYAN_T_DOUBLE:
+ d = va_arg(*ap, double);
+ data = &d;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ if ((ret = bunyan_key_add_one(b, name, type, data)) != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+int
+bunyan_key_add(bunyan_logger_t *bhp, ...)
+{
+ int ret;
+ va_list ap;
+ bunyan_t *b = (bunyan_t *)bhp;
+
+ va_start(ap, bhp);
+ ret = bunyan_key_vadd(b, &ap);
+ va_end(ap);
+
+ return (ret);
+}
+
+int
+bunyan_key_remove(bunyan_logger_t *bhp, const char *name)
+{
+ bunyan_t *b = (bunyan_t *)bhp;
+ bunyan_key_t *cur, *prev;
+
+ (void) pthread_mutex_lock(&b->bun_lock);
+ prev = NULL;
+ cur = b->bun_keys;
+ while (cur != NULL) {
+ if (strcmp(name, cur->bk_name) == 0)
+ break;
+ prev = cur;
+ cur = cur->bk_next;
+ }
+
+ if (cur == NULL) {
+ (void) pthread_mutex_unlock(&b->bun_lock);
+ return (ENOENT);
+ }
+
+ if (prev == NULL)
+ b->bun_keys = cur->bk_next;
+ else
+ prev->bk_next = cur->bk_next;
+ (void) pthread_mutex_unlock(&b->bun_lock);
+
+ bunyan_key_fini(cur);
+ return (0);
+}
+
+static bunyan_key_t *
+bunyan_key_dup(const bunyan_key_t *bkp)
+{
+ bunyan_key_t *nkp;
+ size_t nlen = strlen(bkp->bk_name) + 1;
+
+ nkp = umem_alloc(sizeof (bunyan_key_t), UMEM_DEFAULT);
+ if (nkp == NULL)
+ return (NULL);
+ nkp->bk_next = NULL;
+ nkp->bk_name = umem_alloc(nlen, UMEM_DEFAULT);
+ if (nkp->bk_name == NULL) {
+ umem_free(nkp, sizeof (bunyan_key_t));
+ return (NULL);
+ }
+ bcopy(bkp->bk_name, nkp->bk_name, nlen);
+ nkp->bk_type = bkp->bk_type;
+ nkp->bk_data = umem_alloc(bkp->bk_len, UMEM_DEFAULT);
+ if (nkp->bk_data == NULL) {
+ umem_free(nkp->bk_name, nlen);
+ umem_free(nkp, sizeof (bunyan_key_t));
+ return (NULL);
+ }
+ bcopy(bkp->bk_data, nkp->bk_data, bkp->bk_len);
+ nkp->bk_len = bkp->bk_len;
+
+ return (nkp);
+}
+
+static bunyan_stream_t *
+bunyan_stream_dup(const bunyan_stream_t *bsp)
+{
+ bunyan_stream_t *nsp;
+ size_t nlen = strlen(bsp->bs_name) + 1;
+
+ nsp = umem_alloc(sizeof (bunyan_stream_t), UMEM_DEFAULT);
+ if (nsp == NULL)
+ return (NULL);
+
+ nsp->bs_next = NULL;
+ nsp->bs_name = umem_alloc(nlen, UMEM_DEFAULT);
+ if (nsp->bs_name == NULL) {
+ umem_free(nsp, sizeof (bunyan_stream_t));
+ return (NULL);
+ }
+ bcopy(bsp->bs_name, nsp->bs_name, nlen);
+ nsp->bs_level = bsp->bs_level;
+ nsp->bs_func = bsp->bs_func;
+ nsp->bs_arg = bsp->bs_arg;
+ nsp->bs_count = 0;
+
+ return (nsp);
+}
+
+static bunyan_t *
+bunyan_dup(const bunyan_t *b)
+{
+ bunyan_t *n;
+ const bunyan_key_t *bkp;
+ const bunyan_stream_t *bsp;
+ size_t nlen;
+
+ n = umem_zalloc(sizeof (bunyan_t), UMEM_DEFAULT);
+ if (n == NULL)
+ return (NULL);
+
+ if (pthread_mutex_init(&n->bun_lock, NULL) != 0) {
+ umem_free(n, sizeof (bunyan_t));
+ return (NULL);
+ }
+
+ for (bkp = b->bun_keys; bkp != NULL; bkp = bkp->bk_next) {
+ bunyan_key_t *nkp;
+ nkp = bunyan_key_dup(bkp);
+ if (nkp == NULL) {
+ bunyan_fini((bunyan_logger_t *)n);
+ return (NULL);
+ }
+
+ nkp->bk_next = n->bun_keys;
+ n->bun_keys = nkp;
+ }
+
+ for (bsp = b->bun_streams; bsp != NULL; bsp = bsp->bs_next) {
+ bunyan_stream_t *nsp;
+ nsp = bunyan_stream_dup(bsp);
+ if (bsp == NULL) {
+ bunyan_fini((bunyan_logger_t *)n);
+ return (NULL);
+ }
+
+ nsp->bs_next = n->bun_streams;
+ n->bun_streams = nsp;
+ }
+
+ nlen = strlen(b->bun_name) + 1;
+ n->bun_name = umem_alloc(nlen, UMEM_DEFAULT);
+ if (n->bun_name == NULL) {
+ bunyan_fini((bunyan_logger_t *)n);
+ return (NULL);
+ }
+ bcopy(b->bun_name, n->bun_name, nlen);
+ bcopy(b->bun_host, n->bun_host, MAXHOSTNAMELEN+1);
+
+ return (n);
+}
+
+int
+bunyan_child(const bunyan_logger_t *bhp, bunyan_logger_t **outp, ...)
+{
+ bunyan_t *b = (bunyan_t *)bhp;
+ bunyan_t *n;
+ va_list ap;
+ int ret;
+
+ n = bunyan_dup(b);
+ if (n == NULL)
+ return (ENOMEM);
+
+ va_start(ap, outp);
+ ret = bunyan_key_vadd(b, &ap);
+ va_end(ap);
+
+ if (ret != 0)
+ bunyan_fini((bunyan_logger_t *)n);
+ else
+ *outp = (bunyan_logger_t *)n;
+
+ return (ret);
+}
+
+static int
+bunyan_iso_time(char *buf)
+{
+ struct timeval tv;
+ struct tm tm;
+
+ if (gettimeofday(&tv, NULL) != 0)
+ return (errno);
+
+ if (gmtime_r(&tv.tv_sec, &tm) == NULL)
+ return (errno);
+
+ VERIFY(strftime(buf, ISO_TIMELEN, "%FT%T", &tm) == 19);
+
+ (void) snprintf(&buf[19], 6, ".%03dZ", (int)(tv.tv_usec / 1000));
+
+ return (0);
+}
+
+/*
+ * Note, these fields are all required, so even if a user attempts to use one of
+ * them in their own fields, we'll override them and therefore, have it be the
+ * last one.
+ */
+static int
+bunyan_vlog_defaults(nvlist_t *nvl, bunyan_t *b, bunyan_level_t level,
+ const char *msg)
+{
+ int ret;
+ char tbuf[ISO_TIMELEN];
+
+ if ((ret = bunyan_iso_time(tbuf)) != 0)
+ return (ret);
+
+ if ((ret = nvlist_add_int32(nvl, "v", bunyan_version)) != 0 ||
+ (ret = nvlist_add_int32(nvl, "level", level) != 0) ||
+ (ret = nvlist_add_string(nvl, "name", b->bun_name) != 0) ||
+ (ret = nvlist_add_string(nvl, "hostname", b->bun_host) != 0) ||
+ (ret = nvlist_add_int32(nvl, "pid", getpid()) != 0) ||
+ (ret = nvlist_add_uint32(nvl, "tid", thr_self()) != 0) ||
+ (ret = nvlist_add_string(nvl, "time", tbuf) != 0) ||
+ (ret = nvlist_add_string(nvl, "msg", msg) != 0))
+ return (ret);
+
+ return (0);
+}
+
+static int
+bunyan_vlog_add(nvlist_t *nvl, const char *key, bunyan_type_t type, void *arg)
+{
+ int ret;
+ uintptr_t *up;
+ struct in_addr *v4;
+ struct in6_addr *v6;
+
+ /*
+ * Our buffer needs to hold the string forms of pointers, IPv6 strings,
+ * etc. INET6_ADDRSTRLEN is large enough for all of these.
+ */
+ char buf[INET6_ADDRSTRLEN];
+
+ switch (type) {
+ case BUNYAN_T_STRING:
+ ret = nvlist_add_string(nvl, key, (char *)arg);
+ break;
+ case BUNYAN_T_POINTER:
+ up = arg;
+ (void) snprintf(buf, sizeof (buf), "0x%p", *up);
+ ret = nvlist_add_string(nvl, key, buf);
+ break;
+ case BUNYAN_T_IP:
+ v4 = arg;
+ VERIFY(inet_ntop(AF_INET, v4, buf, sizeof (buf)) != NULL);
+ ret = nvlist_add_string(nvl, key, buf);
+ break;
+ case BUNYAN_T_IP6:
+ v6 = arg;
+ VERIFY(inet_ntop(AF_INET6, v6, buf, sizeof (buf)) != NULL);
+ ret = nvlist_add_string(nvl, key, buf);
+ break;
+ case BUNYAN_T_BOOLEAN:
+ ret = nvlist_add_boolean_value(nvl, key, *(boolean_t *)arg);
+ break;
+ case BUNYAN_T_INT32:
+ ret = nvlist_add_int32(nvl, key, *(int32_t *)arg);
+ break;
+ case BUNYAN_T_INT64:
+ ret = nvlist_add_int64(nvl, key, *(int64_t *)arg);
+ break;
+ case BUNYAN_T_UINT32:
+ ret = nvlist_add_uint32(nvl, key, *(uint32_t *)arg);
+ break;
+ case BUNYAN_T_UINT64:
+ ret = nvlist_add_uint64(nvl, key, *(uint32_t *)arg);
+ break;
+ case BUNYAN_T_DOUBLE:
+ ret = nvlist_add_double(nvl, key, *(double *)arg);
+ break;
+ case BUNYAN_T_INT64STR:
+ (void) snprintf(buf, sizeof (buf), "%lld", *(int64_t *)arg);
+ ret = nvlist_add_string(nvl, key, buf);
+ break;
+ case BUNYAN_T_UINT64STR:
+ (void) snprintf(buf, sizeof (buf), "%llu", *(uint64_t *)arg);
+ ret = nvlist_add_string(nvl, key, buf);
+ break;
+ default:
+ ret = EINVAL;
+ break;
+ }
+
+ return (ret);
+}
+
+static int
+bunyan_vlog(bunyan_logger_t *bhp, bunyan_level_t level, const char *msg,
+ va_list *ap)
+{
+ nvlist_t *nvl = NULL;
+ int ret, type;
+ bunyan_key_t *bkp;
+ bunyan_stream_t *bsp;
+ char *buf = NULL;
+ bunyan_t *b = (bunyan_t *)bhp;
+
+ if (msg == NULL)
+ return (EINVAL);
+
+ (void) pthread_mutex_lock(&b->bun_lock);
+
+ if ((ret = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0) {
+ (void) pthread_mutex_unlock(&b->bun_lock);
+ return (ret);
+ }
+
+ /*
+ * We add pre-defined keys, then go through and process the users keys,
+ * and finally go ahead and our defaults. If all that succeeds, then we
+ * can go ahead and call all the built-in logs.
+ */
+ for (bkp = b->bun_keys; bkp != NULL; bkp = bkp->bk_next) {
+ if ((ret = bunyan_vlog_add(nvl, bkp->bk_name, bkp->bk_type,
+ bkp->bk_data)) != 0)
+ goto out;
+ }
+
+ while ((type = va_arg(*ap, int)) != BUNYAN_T_END) {
+ void *data;
+ boolean_t bt;
+ int32_t i32;
+ int64_t i64;
+ uint32_t ui32;
+ uint64_t ui64;
+ double d;
+ uintptr_t ptr;
+ const char *key = va_arg(*ap, char *);
+
+ switch (type) {
+ case BUNYAN_T_STRING:
+ data = va_arg(*ap, char *);
+ break;
+ case BUNYAN_T_POINTER:
+ ptr = (uintptr_t)va_arg(*ap, void *);
+ data = &ptr;
+ break;
+ case BUNYAN_T_IP:
+ case BUNYAN_T_IP6:
+ data = va_arg(*ap, void *);
+ break;
+ case BUNYAN_T_BOOLEAN:
+ bt = va_arg(*ap, boolean_t);
+ data = &bt;
+ break;
+ case BUNYAN_T_INT32:
+ i32 = va_arg(*ap, int32_t);
+ data = &i32;
+ break;
+ case BUNYAN_T_INT64:
+ case BUNYAN_T_INT64STR:
+ i64 = va_arg(*ap, int64_t);
+ data = &i64;
+ break;
+ case BUNYAN_T_UINT32:
+ ui32 = va_arg(*ap, uint32_t);
+ data = &ui32;
+ break;
+ case BUNYAN_T_UINT64:
+ case BUNYAN_T_UINT64STR:
+ ui64 = va_arg(*ap, uint64_t);
+ data = &ui64;
+ break;
+ case BUNYAN_T_DOUBLE:
+ d = va_arg(*ap, double);
+ data = &d;
+ break;
+ default:
+ ret = EINVAL;
+ goto out;
+ }
+
+ if ((ret = bunyan_vlog_add(nvl, key, type, data)) != 0)
+ goto out;
+
+ }
+ /*
+ * This must be the last thing we do before we log to ensure that all of
+ * our defaults always make it out.
+ */
+ if ((ret = bunyan_vlog_defaults(nvl, b, level, msg)) != 0)
+ goto out;
+
+ if (nvlist_dump_json(nvl, &buf) < 0) {
+ ret = errno;
+ goto out;
+ }
+
+ /* Fire DTrace probes */
+ switch (level) {
+ case BUNYAN_L_TRACE:
+ BUNYAN_LOG_TRACE(buf);
+ break;
+ case BUNYAN_L_DEBUG:
+ BUNYAN_LOG_DEBUG(buf);
+ break;
+ case BUNYAN_L_INFO:
+ BUNYAN_LOG_INFO(buf);
+ break;
+ case BUNYAN_L_WARN:
+ BUNYAN_LOG_WARN(buf);
+ break;
+ case BUNYAN_L_ERROR:
+ BUNYAN_LOG_ERROR(buf);
+ break;
+ case BUNYAN_L_FATAL:
+ BUNYAN_LOG_FATAL(buf);
+ break;
+ }
+
+ for (bsp = b->bun_streams; bsp != NULL; bsp = bsp->bs_next) {
+ if (bsp->bs_level <= level)
+ if (bsp->bs_func(nvl, buf, bsp->bs_arg) != 0)
+ bsp->bs_count++;
+ }
+ ret = 0;
+out:
+ (void) pthread_mutex_unlock(&b->bun_lock);
+ if (buf != NULL)
+ nvlist_dump_json_free(nvl, buf);
+ if (nvl != NULL)
+ nvlist_free(nvl);
+ return (ret);
+}
+
+int
+bunyan_trace(bunyan_logger_t *bhp, const char *msg, ...)
+{
+ va_list va;
+ int ret;
+
+ va_start(va, msg);
+ ret = bunyan_vlog(bhp, BUNYAN_L_TRACE, msg, &va);
+ va_end(va);
+
+ return (ret);
+}
+
+int
+bunyan_debug(bunyan_logger_t *bhp, const char *msg, ...)
+{
+ va_list va;
+ int ret;
+
+ va_start(va, msg);
+ ret = bunyan_vlog(bhp, BUNYAN_L_DEBUG, msg, &va);
+ va_end(va);
+
+ return (ret);
+}
+
+int
+bunyan_info(bunyan_logger_t *bhp, const char *msg, ...)
+{
+ va_list va;
+ int ret;
+
+ va_start(va, msg);
+ ret = bunyan_vlog(bhp, BUNYAN_L_INFO, msg, &va);
+ va_end(va);
+
+ return (ret);
+}
+
+int
+bunyan_warn(bunyan_logger_t *bhp, const char *msg, ...)
+{
+ va_list va;
+ int ret;
+
+ va_start(va, msg);
+ ret = bunyan_vlog(bhp, BUNYAN_L_WARN, msg, &va);
+ va_end(va);
+
+ return (ret);
+}
+
+int
+bunyan_error(bunyan_logger_t *bhp, const char *msg, ...)
+{
+ va_list va;
+ int ret;
+
+ va_start(va, msg);
+ ret = bunyan_vlog(bhp, BUNYAN_L_ERROR, msg, &va);
+ va_end(va);
+
+ return (ret);
+}
+
+
+int
+bunyan_fatal(bunyan_logger_t *bhp, const char *msg, ...)
+{
+ va_list va;
+ int ret;
+
+ va_start(va, msg);
+ ret = bunyan_vlog(bhp, BUNYAN_L_FATAL, msg, &va);
+ va_end(va);
+
+ return (ret);
+}
diff --git a/usr/src/lib/libbunyan/common/bunyan.h b/usr/src/lib/libbunyan/common/bunyan.h
new file mode 100644
index 0000000000..9a01f6f6cd
--- /dev/null
+++ b/usr/src/lib/libbunyan/common/bunyan.h
@@ -0,0 +1,88 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+#ifndef _BUNYAN_H
+#define _BUNYAN_H
+
+/*
+ * C version of the bunyan logging format.
+ */
+
+#include <limits.h>
+#include <libnvpair.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct bunyan_logger bunyan_logger_t;
+
+typedef enum bunyan_level {
+ BUNYAN_L_TRACE = 10,
+ BUNYAN_L_DEBUG = 20,
+ BUNYAN_L_INFO = 30,
+ BUNYAN_L_WARN = 40,
+ BUNYAN_L_ERROR = 50,
+ BUNYAN_L_FATAL = 60
+} bunyan_level_t;
+
+typedef enum bunyan_type {
+ BUNYAN_T_END = 0x0,
+ BUNYAN_T_STRING,
+ BUNYAN_T_POINTER,
+ BUNYAN_T_IP,
+ BUNYAN_T_IP6,
+ BUNYAN_T_BOOLEAN,
+ BUNYAN_T_INT32,
+ BUNYAN_T_INT64,
+ BUNYAN_T_UINT32,
+ BUNYAN_T_UINT64,
+ BUNYAN_T_DOUBLE,
+ BUNYAN_T_INT64STR,
+ BUNYAN_T_UINT64STR
+} bunyan_type_t;
+
+/*
+ * A handle is MT-safe, but not fork-safe.
+ */
+extern int bunyan_init(const char *, bunyan_logger_t **);
+extern int bunyan_child(const bunyan_logger_t *, bunyan_logger_t **, ...);
+extern void bunyan_fini(bunyan_logger_t *);
+
+/*
+ * Bunyan stream callbacks are guaranteed to be serialized.
+ */
+typedef int (*bunyan_stream_f)(nvlist_t *, const char *, void *);
+extern int bunyan_stream_fd(nvlist_t *, const char *, void *);
+
+extern int bunyan_stream_add(bunyan_logger_t *, const char *, int,
+ bunyan_stream_f, void *);
+extern int bunyan_stream_remove(bunyan_logger_t *, const char *);
+
+extern int bunyan_key_add(bunyan_logger_t *, ...);
+extern int bunyan_key_remove(bunyan_logger_t *, const char *);
+
+extern int bunyan_trace(bunyan_logger_t *, const char *msg, ...);
+extern int bunyan_debug(bunyan_logger_t *, const char *msg, ...);
+extern int bunyan_info(bunyan_logger_t *, const char *msg, ...);
+extern int bunyan_warn(bunyan_logger_t *, const char *msg, ...);
+extern int bunyan_error(bunyan_logger_t *, const char *msg, ...);
+extern int bunyan_fatal(bunyan_logger_t *, const char *msg, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BUNYAN_H */
diff --git a/usr/src/lib/libbunyan/common/bunyan_provider.d b/usr/src/lib/libbunyan/common/bunyan_provider.d
new file mode 100644
index 0000000000..d47ea75733
--- /dev/null
+++ b/usr/src/lib/libbunyan/common/bunyan_provider.d
@@ -0,0 +1,32 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+/*
+ * Bunyan DTrace provider
+ */
+provider bunyan {
+ probe log__trace(char *);
+ probe log__debug(char *);
+ probe log__info(char *);
+ probe log__warn(char *);
+ probe log__error(char *);
+ probe log__fatal(char *);
+};
+
+#pragma D attributes Stable/Stable/ISA provider bunyan provider
+#pragma D attributes Private/Private/Unknown provider bunyan module
+#pragma D attributes Private/Private/Unknown provider bunyan function
+#pragma D attributes Stable/Stable/ISA provider bunyan name
+#pragma D attributes Stable/Stable/ISA provider bunyan args
diff --git a/usr/src/lib/libbunyan/common/llib-lbunyan b/usr/src/lib/libbunyan/common/llib-lbunyan
new file mode 100644
index 0000000000..31f6a52aba
--- /dev/null
+++ b/usr/src/lib/libbunyan/common/llib-lbunyan
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+#include <bunyan.h>
diff --git a/usr/src/lib/libbunyan/common/mapfile-vers b/usr/src/lib/libbunyan/common/mapfile-vers
new file mode 100644
index 0000000000..e72bbf76c8
--- /dev/null
+++ b/usr/src/lib/libbunyan/common/mapfile-vers
@@ -0,0 +1,50 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION ILLUMOS_1.0 {
+ global:
+ bunyan_init;
+ bunyan_child;
+ bunyan_fini;
+ bunyan_stream_fd;
+ bunyan_stream_add;
+ bunyan_stream_remove;
+ bunyan_key_add;
+ bunyan_key_remove;
+ bunyan_trace;
+ bunyan_debug;
+ bunyan_info;
+ bunyan_warn;
+ bunyan_error;
+ bunyan_fatal;
+ local:
+ *;
+};
diff --git a/usr/src/lib/libbunyan/i386/Makefile b/usr/src/lib/libbunyan/i386/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/libbunyan/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libbunyan/sparc/Makefile b/usr/src/lib/libbunyan/sparc/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/libbunyan/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libbunyan/sparcv9/Makefile b/usr/src/lib/libbunyan/sparcv9/Makefile
new file mode 100644
index 0000000000..15d904c616
--- /dev/null
+++ b/usr/src/lib/libbunyan/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libcmdutils/common/custr.c b/usr/src/lib/libcmdutils/common/custr.c
index 1ec72de9dd..f59f433ae9 100644
--- a/usr/src/lib/libcmdutils/common/custr.c
+++ b/usr/src/lib/libcmdutils/common/custr.c
@@ -20,13 +20,20 @@
#include <stdlib.h>
#include <err.h>
#include <string.h>
+#include <stdio.h>
+#include <stdarg.h>
#include "libcmdutils.h"
+typedef enum {
+ CUSTR_FIXEDBUF = 0x01
+} custr_flags_t;
+
struct custr {
size_t cus_strlen;
size_t cus_datalen;
char *cus_data;
+ custr_flags_t cus_flags;
};
#define STRING_CHUNK_SIZE 64
@@ -53,23 +60,15 @@ custr_cstr(custr_t *cus)
return (cus->cus_data);
}
-int
-custr_appendc(custr_t *cus, char newc)
-{
- char news[2];
-
- news[0] = newc;
- news[1] = '\0';
-
- return (custr_append(cus, news));
-}
-
-int
-custr_append(custr_t *cus, const char *news)
+static int
+custr_append_vprintf(custr_t *cus, const char *fmt, va_list ap)
{
- size_t len = strlen(news);
+ size_t len = vsnprintf(NULL, 0, fmt, ap);
size_t chunksz = STRING_CHUNK_SIZE;
+ if (len == -1)
+ return (len);
+
while (chunksz < len) {
chunksz *= 2;
}
@@ -78,6 +77,11 @@ custr_append(custr_t *cus, const char *news)
char *new_data;
size_t new_datalen = cus->cus_datalen + chunksz;
+ if (cus->cus_flags & CUSTR_FIXEDBUF) {
+ errno = EOVERFLOW;
+ return (-1);
+ }
+
/*
* Allocate replacement memory:
*/
@@ -104,13 +108,41 @@ custr_append(custr_t *cus, const char *news)
/*
* Append new string to existing string:
*/
- (void) memcpy(cus->cus_data + cus->cus_strlen, news, len + 1);
+ len = vsnprintf(cus->cus_data + cus->cus_strlen,
+ (uintptr_t)cus->cus_data - (uintptr_t)cus->cus_strlen, fmt, ap);
+ if (len == -1)
+ return (len);
cus->cus_strlen += len;
return (0);
}
int
+custr_appendc(custr_t *cus, char newc)
+{
+ return (custr_append_printf(cus, "%c", newc));
+}
+
+int
+custr_append_printf(custr_t *cus, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, fmt);
+ ret = custr_append_vprintf(cus, fmt, ap);
+ va_end(ap);
+
+ return (ret);
+}
+
+int
+custr_append(custr_t *cus, const char *name)
+{
+ return (custr_append_printf(cus, "%s", name));
+}
+
+int
custr_alloc(custr_t **cus)
{
custr_t *t;
@@ -124,12 +156,35 @@ custr_alloc(custr_t **cus)
return (0);
}
+int
+custr_alloc_buf(custr_t **cus, void *buf, size_t buflen)
+{
+ int ret;
+
+ if (buflen == 0 || buf == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if ((ret = custr_alloc(cus)) != 0)
+ return (ret);
+
+ (*cus)->cus_data = buf;
+ (*cus)->cus_datalen = buflen;
+ (*cus)->cus_strlen = 0;
+ (*cus)->cus_flags = CUSTR_FIXEDBUF;
+ (*cus)->cus_data[0] = '\0';
+
+ return (0);
+}
+
void
custr_free(custr_t *cus)
{
if (cus == NULL)
return;
- free(cus->cus_data);
+ if ((cus->cus_flags & CUSTR_FIXEDBUF) == 0)
+ free(cus->cus_data);
free(cus);
}
diff --git a/usr/src/lib/libcmdutils/common/mapfile-vers b/usr/src/lib/libcmdutils/common/mapfile-vers
index 640959e4b5..3106695eb0 100644
--- a/usr/src/lib/libcmdutils/common/mapfile-vers
+++ b/usr/src/lib/libcmdutils/common/mapfile-vers
@@ -43,8 +43,10 @@ SYMBOL_VERSION SUNWprivate_1.1 {
global:
add_tnode;
custr_alloc;
+ custr_alloc_buf;
custr_append;
custr_appendc;
+ custr_append_printf;
custr_cstr;
custr_free;
custr_len;
diff --git a/usr/src/lib/libcmdutils/libcmdutils.h b/usr/src/lib/libcmdutils/libcmdutils.h
index a280751c27..835560b8e7 100644
--- a/usr/src/lib/libcmdutils/libcmdutils.h
+++ b/usr/src/lib/libcmdutils/libcmdutils.h
@@ -157,6 +157,12 @@ extern int custr_alloc(custr_t **);
extern void custr_free(custr_t *);
/*
+ * Allocate a "custr_t" dynamic string object that operates on a fixed external
+ * buffer.
+ */
+extern int custr_alloc_buf(custr_t **, void *, size_t);
+
+/*
* Append a single character, or a NUL-terminated string of characters, to a
* dynamic string. Returns 0 on success and -1 otherwise. The dynamic string
* will be unmodified if the function returns -1.
@@ -165,6 +171,13 @@ extern int custr_appendc(custr_t *, char);
extern int custr_append(custr_t *, const char *);
/*
+ * Append a format string and arguments as though the contents were being parsed
+ * through snprintf. Returns 0 on success and -1 otherwise. The dynamic string
+ * will be unmodified if the function returns -1.
+ */
+extern int custr_append_printf(custr_t *, const char *, ...);
+
+/*
* Determine the length in bytes, not including the NUL terminator, of the
* dynamic string.
*/
diff --git a/usr/src/lib/libdladm/Makefile b/usr/src/lib/libdladm/Makefile
index 92de14cc8a..a9270eb848 100644
--- a/usr/src/lib/libdladm/Makefile
+++ b/usr/src/lib/libdladm/Makefile
@@ -29,7 +29,7 @@ HDRS = libdladm.h libdladm_impl.h libdllink.h libdlaggr.h \
libdlwlan.h libdlwlan_impl.h libdlvnic.h libdlvlan.h \
libdlmgmt.h libdlflow.h libdlflow_impl.h libdlstat.h \
libdlether.h libdlsim.h libdlbridge.h libdliptun.h \
- libdlib.h
+ libdlib.h libdloverlay.h
HDRDIR = common
@@ -50,6 +50,14 @@ MSGFILES = common/libdladm.c common/linkprop.c common/secobj.c \
XGETFLAGS = -a -x libdladm.xcl
+TYPECHECK_LIB = libdladm.so.1
+TYPELIST = overlay_ioc_create_t \
+ overlay_ioc_activate_t \
+ overlay_ioc_delete_t \
+ overlay_ioc_nprops_t \
+ overlay_ioc_propinfo_t \
+ overlay_ioc_prop_t
+
all := TARGET = all
clean := TARGET = clean
clobber := TARGET = clobber
@@ -62,7 +70,7 @@ all clean clobber install lint: $(SUBDIRS)
install_h: $(ROOTHDRS)
-check: $(CHECKHDRS)
+check: $(CHECKHDRS) $(TYPECHECK)
$(POFILE): pofile_MSGFILES
diff --git a/usr/src/lib/libdladm/Makefile.com b/usr/src/lib/libdladm/Makefile.com
index 5bb56d1440..bd98ad76fa 100644
--- a/usr/src/lib/libdladm/Makefile.com
+++ b/usr/src/lib/libdladm/Makefile.com
@@ -27,7 +27,8 @@ VERS = .1
OBJECTS = libdladm.o secobj.o linkprop.o libdllink.o libdlaggr.o \
libdlwlan.o libdlvnic.o libdlmgmt.o libdlvlan.o libdlib.o\
flowattr.o flowprop.o propfuncs.o libdlflow.o libdlstat.o \
- usage.o libdlether.o libdlsim.o libdlbridge.o libdliptun.o
+ usage.o libdlether.o libdlsim.o libdlbridge.o libdliptun.o \
+ libdloverlay.o
include ../../Makefile.lib
@@ -36,7 +37,7 @@ include ../../Makefile.rootfs
LIBS = $(DYNLIB) $(LINTLIB)
LDLIBS += -ldevinfo -lc -linetutil -lsocket -lscf -lrcm -lnvpair \
- -lexacct -lnsl -lkstat -lcurses -lpool
+ -lexacct -lnsl -lkstat -lcurses -lpool -lvarpd
SRCDIR = ../common
$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
diff --git a/usr/src/lib/libdladm/common/libdladm.c b/usr/src/lib/libdladm/common/libdladm.c
index cf113e7357..5b810e0d02 100644
--- a/usr/src/lib/libdladm/common/libdladm.c
+++ b/usr/src/lib/libdladm/common/libdladm.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <unistd.h>
@@ -418,6 +419,9 @@ dladm_status2str(dladm_status_t status, char *buf)
case DLADM_STATUS_INVALID_MTU:
s = "MTU check failed, MTU outside of device's supported range";
break;
+ case DLADM_STATUS_BAD_ENCAP:
+ s = "invalid encapsulation protocol";
+ break;
default:
s = "<unknown error>";
break;
@@ -654,6 +658,9 @@ dladm_class2str(datalink_class_t class, char *buf)
case DATALINK_CLASS_PART:
s = "part";
break;
+ case DATALINK_CLASS_OVERLAY:
+ s = "overlay";
+ break;
default:
s = "unknown";
break;
@@ -1132,15 +1139,15 @@ dladm_strs2range(char **prop_val, uint_t val_cnt,
* Convert a mac_propval_range_t structure into an array of elements.
*/
dladm_status_t
-dladm_range2list(mac_propval_range_t *rangep, void *elem, uint_t *nelem)
+dladm_range2list(const mac_propval_range_t *rangep, void *elem, uint_t *nelem)
{
int i, j, k;
dladm_status_t status = DLADM_STATUS_OK;
switch (rangep->mpr_type) {
case MAC_PROPVAL_UINT32: {
- mac_propval_uint32_range_t *ur;
- uint32_t *elem32 = elem;
+ const mac_propval_uint32_range_t *ur;
+ uint32_t *elem32 = elem;
k = 0;
ur = &rangep->mpr_range_uint32[0];
@@ -1168,13 +1175,13 @@ dladm_range2list(mac_propval_range_t *rangep, void *elem, uint_t *nelem)
* of single elements or ranges.
*/
int
-dladm_range2strs(mac_propval_range_t *rangep, char **prop_val)
+dladm_range2strs(const mac_propval_range_t *rangep, char **prop_val)
{
int i;
switch (rangep->mpr_type) {
case MAC_PROPVAL_UINT32: {
- mac_propval_uint32_range_t *ur;
+ const mac_propval_uint32_range_t *ur;
/* Write ranges and individual elements */
ur = &rangep->mpr_range_uint32[0];
@@ -1191,6 +1198,20 @@ dladm_range2strs(mac_propval_range_t *rangep, char **prop_val)
}
return (0);
}
+ case MAC_PROPVAL_STR: {
+ const mac_propval_str_range_t *str;
+ size_t coff, len;
+
+ coff = 0;
+ str = &rangep->u.mpr_str;
+ for (i = 0; i < rangep->mpr_count; i++) {
+ len = strlen(&str->mpur_data[coff]);
+ (void) strlcpy(prop_val[i], &str->mpur_data[coff],
+ DLADM_PROP_VAL_MAX);
+ coff += len + 1;
+ }
+ return (0);
+ }
default:
break;
}
diff --git a/usr/src/lib/libdladm/common/libdladm.h b/usr/src/lib/libdladm/common/libdladm.h
index f3347a0ede..fcb0551f88 100644
--- a/usr/src/lib/libdladm/common/libdladm.h
+++ b/usr/src/lib/libdladm/common/libdladm.h
@@ -179,7 +179,8 @@ typedef enum {
DLADM_STATUS_NO_IB_HW_RESOURCE,
DLADM_STATUS_INVALID_PKEY_TBL_SIZE,
DLADM_STATUS_PORT_NOPROTO,
- DLADM_STATUS_INVALID_MTU
+ DLADM_STATUS_INVALID_MTU,
+ DLADM_STATUS_BAD_ENCAP
} dladm_status_t;
typedef enum {
@@ -288,9 +289,9 @@ extern dladm_status_t dladm_zone_halt(dladm_handle_t, zoneid_t);
extern dladm_status_t dladm_strs2range(char **, uint_t, mac_propval_type_t,
mac_propval_range_t **);
-extern dladm_status_t dladm_range2list(mac_propval_range_t *, void*,
+extern dladm_status_t dladm_range2list(const mac_propval_range_t *, void *,
uint_t *);
-extern int dladm_range2strs(mac_propval_range_t *, char **);
+extern int dladm_range2strs(const mac_propval_range_t *, char **);
extern dladm_status_t dladm_list2range(void *, uint_t, mac_propval_type_t,
mac_propval_range_t **);
diff --git a/usr/src/lib/libdladm/common/libdloverlay.c b/usr/src/lib/libdladm/common/libdloverlay.c
new file mode 100644
index 0000000000..0d3bc326da
--- /dev/null
+++ b/usr/src/lib/libdladm/common/libdloverlay.c
@@ -0,0 +1,870 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ */
+
+#include <libdladm_impl.h>
+#include <libdllink.h>
+#include <libdloverlay.h>
+#include <sys/dld.h>
+#include <sys/overlay.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <limits.h>
+#include <libvarpd_client.h>
+
+#define VARPD_PROPERTY_NAME "varpd/id"
+
+static const char *dladm_overlay_doorpath = "/var/run/varpd/varpd.door";
+
+typedef struct dladm_overlay_propinfo {
+ boolean_t dop_isvarpd;
+ union {
+ overlay_ioc_propinfo_t *dop_overlay;
+ varpd_client_prop_handle_t *dop_varpd;
+ } dop_un;
+} dladm_overlay_propinfo_t;
+
+dladm_status_t
+dladm_overlay_prop_info(dladm_overlay_propinfo_handle_t phdl,
+ const char **namep, uint_t *typep, uint_t *protp, const void **defp,
+ uint32_t *sizep, const mac_propval_range_t **possp)
+{
+ dladm_overlay_propinfo_t *infop = (dladm_overlay_propinfo_t *)phdl;
+ overlay_ioc_propinfo_t *oinfop = infop->dop_un.dop_overlay;
+
+ if (infop->dop_isvarpd == B_FALSE) {
+ if (namep != NULL)
+ *namep = oinfop->oipi_name;
+ if (typep != NULL)
+ *typep = oinfop->oipi_type;
+ if (protp != NULL)
+ *protp = oinfop->oipi_prot;
+ if (defp != NULL)
+ *defp = oinfop->oipi_default;
+ if (sizep != NULL)
+ *sizep = oinfop->oipi_defsize;
+ if (possp != NULL)
+ *possp = (const mac_propval_range_t *)oinfop->oipi_poss;
+
+ } else {
+ int ret;
+ ret = libvarpd_c_prop_info(infop->dop_un.dop_varpd, namep,
+ typep, protp, defp, sizep, possp);
+ if (ret != 0)
+ return (dladm_errno2status(ret));
+
+ }
+
+ return (DLADM_STATUS_OK);
+}
+
+static dladm_status_t
+dladm_overlay_parse_prop(overlay_prop_type_t type, void *buf, uint32_t *sizep,
+ const char *val)
+{
+ int ret;
+ int64_t ival;
+ uint64_t uval;
+ char *eptr;
+ struct in6_addr ipv6;
+ struct in_addr ip;
+
+ switch (type) {
+ case OVERLAY_PROP_T_INT:
+ errno = 0;
+ ival = strtol(val, &eptr, 10);
+ if ((ival == 0 && errno == EINVAL) ||
+ ((ival == LONG_MAX || ival == LONG_MIN) &&
+ errno == ERANGE))
+ return (DLADM_STATUS_BADARG);
+ bcopy(&ival, buf, sizeof (int64_t));
+ *sizep = sizeof (int64_t);
+ break;
+ case OVERLAY_PROP_T_UINT:
+ errno = 0;
+ uval = strtol(val, &eptr, 10);
+ if ((uval == 0 && errno == EINVAL) ||
+ (uval == ULONG_MAX && errno == ERANGE))
+ return (DLADM_STATUS_BADARG);
+ bcopy(&uval, buf, sizeof (uint64_t));
+ *sizep = sizeof (uint64_t);
+ break;
+ case OVERLAY_PROP_T_STRING:
+ ret = strlcpy((char *)buf, val, OVERLAY_PROP_SIZEMAX);
+ if (ret >= OVERLAY_PROP_SIZEMAX)
+ return (DLADM_STATUS_BADARG);
+ *sizep = ret + 1;
+ break;
+ case OVERLAY_PROP_T_IP:
+ /*
+ * Always try to parse the IP as an IPv6 address. If that fails,
+ * try to interpret it as an IPv4 address and transform it into
+ * an IPv6 mapped IPv4 address.
+ */
+ if (inet_pton(AF_INET6, val, &ipv6) != 1) {
+ if (inet_pton(AF_INET, val, &ip) != 1)
+ return (DLADM_STATUS_BADARG);
+
+ IN6_INADDR_TO_V4MAPPED(&ip, &ipv6);
+ }
+ bcopy(&ipv6, buf, sizeof (struct in6_addr));
+ *sizep = sizeof (struct in6_addr);
+ break;
+ default:
+ abort();
+ }
+
+ return (DLADM_STATUS_OK);
+}
+
+static dladm_status_t
+dladm_overlay_varpd_setprop(dladm_handle_t handle, varpd_client_handle_t *chdl,
+ uint64_t inst, const char *name, char *const *valp, uint_t cnt)
+{
+ int ret;
+ uint32_t size;
+ uint8_t buf[LIBVARPD_PROP_SIZEMAX];
+ varpd_client_prop_handle_t *phdl;
+ uint_t type;
+ dladm_status_t status;
+
+ if ((ret = libvarpd_c_prop_handle_alloc(chdl, inst, &phdl)) != 0)
+ return (dladm_errno2status(ret));
+
+ if ((ret = libvarpd_c_prop_info_fill_by_name(phdl, name)) != 0) {
+ libvarpd_c_prop_handle_free(phdl);
+ return (dladm_errno2status(ret));
+ }
+
+ if ((ret = libvarpd_c_prop_info(phdl, NULL, &type, NULL, NULL, NULL,
+ NULL)) != 0) {
+ libvarpd_c_prop_handle_free(phdl);
+ return (dladm_errno2status(ret));
+ }
+
+ if ((status = dladm_overlay_parse_prop(type, buf, &size, valp[0])) !=
+ DLADM_STATUS_OK) {
+ libvarpd_c_prop_handle_free(phdl);
+ return (dladm_errno2status(ret));
+ }
+
+ status = DLADM_STATUS_OK;
+ ret = libvarpd_c_prop_set(phdl, buf, size);
+ libvarpd_c_prop_handle_free(phdl);
+
+ return (dladm_errno2status(ret));
+}
+
+dladm_status_t
+dladm_overlay_setprop(dladm_handle_t handle, datalink_id_t linkid,
+ const char *name, char *const *valp, uint_t cnt)
+{
+ int ret;
+ dladm_status_t status;
+ overlay_ioc_propinfo_t info;
+ overlay_ioc_prop_t prop;
+
+ if (linkid == DATALINK_INVALID_LINKID ||
+ name == NULL || valp == NULL || cnt != 1)
+ return (DLADM_STATUS_BADARG);
+
+ bzero(&info, sizeof (overlay_ioc_propinfo_t));
+ info.oipi_linkid = linkid;
+ info.oipi_id = -1;
+ if (strlcpy(info.oipi_name, name, OVERLAY_PROP_NAMELEN) >=
+ OVERLAY_PROP_NAMELEN)
+ return (DLADM_STATUS_BADARG);
+
+ status = DLADM_STATUS_OK;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_PROPINFO, &info);
+ if (ret != 0)
+ status = dladm_errno2status(errno);
+
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ prop.oip_linkid = linkid;
+ prop.oip_id = info.oipi_id;
+ prop.oip_name[0] = '\0';
+ if ((ret = dladm_overlay_parse_prop(info.oipi_type, prop.oip_value,
+ &prop.oip_size, valp[0])) != DLADM_STATUS_OK)
+ return (ret);
+
+ status = DLADM_STATUS_OK;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_SETPROP, &prop);
+ if (ret != 0)
+ status = dladm_errno2status(errno);
+
+ return (ret);
+}
+
+/*
+ * Tell the user about any unset required properties.
+ * XXX libraries shouldn't do this. Should be a dladm_arg_list_t returned to
+ * dladm(1M)
+ */
+static int
+dladm_overlay_activate_cb(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_overlay_propinfo_handle_t phdl, void *arg)
+{
+ dladm_status_t status;
+ uint8_t buf[DLADM_OVERLAY_PROP_SIZEMAX];
+ uint_t prot;
+ size_t size = sizeof (buf);
+ const char *name;
+
+ if ((status = dladm_overlay_prop_info(phdl, &name, NULL, &prot, NULL,
+ NULL, NULL)) != DLADM_STATUS_OK)
+ return (status);
+
+ if ((prot & OVERLAY_PROP_PERM_REQ) == 0)
+ return (DLADM_WALK_CONTINUE);
+
+ if (dladm_overlay_get_prop(handle, linkid, phdl, buf, &size) !=
+ DLADM_STATUS_OK)
+ return (DLADM_WALK_CONTINUE);
+
+ if (size == 0)
+ fprintf(stderr, "unset required propety: %s\n", name);
+
+ return (DLADM_WALK_CONTINUE);
+}
+
+/*
+ * We need to clean up the world here. The problem is that we may or may not
+ * actually have everything created. While in the normal case, we'd always have
+ * an overlay device, assigned datalink id, and a varpd instance, we might not
+ * have any of those, except for the datalink instance. Therefore, as long as
+ * the id refers to a valid overlay, we should try to clean up as much of the
+ * state as possible and most importantly, we need to make sure we delete the
+ * datalink id. If we fail to do that, then that name will become lost to time.
+ */
+dladm_status_t
+dladm_overlay_delete(dladm_handle_t handle, datalink_id_t linkid)
+{
+ datalink_class_t class;
+ overlay_ioc_delete_t oid;
+ varpd_client_handle_t *chdl;
+ int ret, rval = 0;
+ uint32_t flags;
+ uint64_t varpdid;
+
+ if (dladm_datalink_id2info(handle, linkid, &flags, &class, NULL,
+ NULL, 0) != DLADM_STATUS_OK)
+ return (DLADM_STATUS_BADARG);
+
+ if (class != DATALINK_CLASS_OVERLAY)
+ return (DLADM_STATUS_BADARG);
+
+ oid.oid_linkid = linkid;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_DELETE, &oid);
+ if (ret != 0)
+ rval = dladm_errno2status(errno);
+
+ if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0) {
+ return (dladm_errno2status(ret));
+ }
+
+ if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) {
+ if (ret == ENOENT) {
+ goto finish;
+ }
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ ret = libvarpd_c_instance_destroy(chdl, varpdid);
+finish:
+ libvarpd_c_destroy(chdl);
+ (void) dladm_destroy_datalink_id(handle, linkid, flags);
+
+ return (dladm_errno2status(ret));
+}
+
+dladm_status_t
+dladm_overlay_get_prop(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_overlay_propinfo_handle_t infohdl, void *buf, size_t *sizep)
+{
+ int ret;
+ overlay_ioc_prop_t oip;
+ dladm_overlay_propinfo_t *infop = (dladm_overlay_propinfo_t *)infohdl;
+
+ /* XXX Better errno */
+ if (*sizep < DLADM_OVERLAY_PROP_SIZEMAX)
+ return (dladm_errno2status(ERANGE));
+
+ if (infop->dop_isvarpd == B_FALSE) {
+ bzero(&oip, sizeof (overlay_ioc_prop_t));
+ oip.oip_linkid = linkid;
+ oip.oip_id = infop->dop_un.dop_overlay->oipi_id;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_GETPROP, &oip);
+ if (ret != 0)
+ return (dladm_errno2status(errno));
+ bcopy(oip.oip_value, buf, DLADM_OVERLAY_PROP_SIZEMAX);
+ *sizep = oip.oip_size;
+ } else {
+ uint32_t size = *sizep;
+
+ ret = libvarpd_c_prop_get(infop->dop_un.dop_varpd, buf, &size);
+ if (ret != 0)
+ return (dladm_errno2status(errno));
+ *sizep = size;
+ }
+
+ return (DLADM_STATUS_OK);
+}
+
+static dladm_status_t
+dladm_overlay_walk_varpd_prop(dladm_handle_t handle, datalink_id_t linkid,
+ uint64_t varpdid, dladm_overlay_prop_f func, void *arg)
+{
+ int ret, i;
+ varpd_client_handle_t *chdl;
+ varpd_client_prop_handle_t *phdl;
+ uint_t nprops;
+ dladm_status_t status;
+
+ if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0)
+ return (dladm_errno2status(ret));
+
+ if ((ret = libvarpd_c_prop_handle_alloc(chdl, varpdid, &phdl)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ if ((ret = libvarpd_c_prop_nprops(chdl, varpdid, &nprops)) != 0) {
+ libvarpd_c_prop_handle_free(phdl);
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+#if 0
+ printf("got n props: %d\n", nprops);
+#endif
+
+ status = DLADM_STATUS_OK;
+ for (i = 0; i < nprops; i++) {
+ dladm_overlay_propinfo_t dop;
+
+ bzero(&dop, sizeof (dop));
+ dop.dop_isvarpd = B_TRUE;
+ dop.dop_un.dop_varpd = phdl;
+
+ if ((ret = libvarpd_c_prop_info_fill(phdl, i)) != 0) {
+ status = dladm_errno2status(ret);
+ break;
+ }
+
+ ret = func(handle, linkid,
+ (dladm_overlay_propinfo_handle_t)&dop, arg);
+ if (ret == DLADM_WALK_TERMINATE)
+ break;
+ }
+
+ libvarpd_c_prop_handle_free(phdl);
+ libvarpd_c_destroy(chdl);
+
+ return (status);
+}
+
+dladm_status_t
+dladm_overlay_walk_prop(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_overlay_prop_f func, void *arg)
+{
+ int i, ret;
+ dladm_status_t status;
+ datalink_class_t class;
+ overlay_ioc_nprops_t oin;
+ overlay_ioc_propinfo_t oipi;
+ dladm_overlay_propinfo_t dop;
+ uint64_t varpdid = UINT64_MAX;
+
+ if (dladm_datalink_id2info(handle, linkid, NULL, &class, NULL,
+ NULL, 0) != DLADM_STATUS_OK)
+ return (DLADM_STATUS_BADARG);
+
+ if (class != DATALINK_CLASS_OVERLAY)
+ return (DLADM_STATUS_BADARG);
+
+ bzero(&oin, sizeof (overlay_ioc_nprops_t));
+ status = DLADM_STATUS_OK;
+ oin.oipn_linkid = linkid;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_NPROPS, &oin);
+ if (ret != 0)
+ return (dladm_errno2status(errno));
+
+ for (i = 0; i < oin.oipn_nprops; i++) {
+ bzero(&dop, sizeof (dladm_overlay_propinfo_t));
+ bzero(&oipi, sizeof (overlay_ioc_propinfo_t));
+ oipi.oipi_linkid = linkid;
+ oipi.oipi_id = i;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_PROPINFO, &oipi);
+ if (ret != 0) {
+ fprintf(stderr, "failed to get propinfo %d\n", i);
+ return (dladm_errno2status(errno));
+ }
+
+ dop.dop_isvarpd = B_FALSE;
+ dop.dop_un.dop_overlay = &oipi;
+ ret = func(handle, linkid,
+ (dladm_overlay_propinfo_handle_t)&dop, arg);
+ if (ret == DLADM_WALK_TERMINATE)
+ break;
+
+ if (strcmp(oipi.oipi_name, VARPD_PROPERTY_NAME) == 0) {
+ uint8_t buf[DLADM_OVERLAY_PROP_SIZEMAX];
+ size_t bufsize = sizeof (buf);
+ uint64_t *vp;
+
+ if ((status = dladm_overlay_get_prop(handle, linkid,
+ (dladm_overlay_propinfo_handle_t)&dop, buf,
+ &bufsize)) != DLADM_STATUS_OK)
+ continue;
+
+ vp = (uint64_t *)buf;
+ varpdid = *vp;
+ }
+ }
+
+ /* Should this really be possible? */
+ if (varpdid == UINT64_MAX)
+ return (DLADM_STATUS_OK);
+
+ return (dladm_overlay_walk_varpd_prop(handle, linkid, varpdid, func,
+ arg));
+}
+
+dladm_status_t
+dladm_overlay_create(dladm_handle_t handle, const char *name,
+ const char *encap, const char *search, uint64_t vid,
+ dladm_arg_list_t *props, uint32_t flags)
+{
+ int ret, i;
+ dladm_status_t status;
+ datalink_id_t linkid;
+ overlay_ioc_create_t oic;
+ overlay_ioc_activate_t oia;
+ size_t slen;
+ varpd_client_handle_t *vch;
+ uint64_t id;
+
+ status = dladm_create_datalink_id(handle, name, DATALINK_CLASS_OVERLAY,
+ DL_ETHER, flags, &linkid);
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ bzero(&oic, sizeof (oic));
+ oic.oic_linkid = linkid;
+ oic.oic_vnetid = vid;
+ (void) strlcpy(oic.oic_encap, encap, MAXLINKNAMELEN);
+
+ status = DLADM_STATUS_OK;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_CREATE, &oic);
+ if (ret != 0) {
+ /* XXX We need to have private errors here */
+ status = dladm_errno2status(errno);
+ }
+
+ if (status != DLADM_STATUS_OK) {
+ (void) dladm_destroy_datalink_id(handle, linkid, flags);
+ return (status);
+ }
+
+ slen = strlen(search);
+ for (i = 0; props != NULL && i < props->al_count; i++) {
+ dladm_arg_info_t *aip = &props->al_info[i];
+
+ /*
+ * If it's a property for the search plugin, eg. it has the
+ * prefix '<search>/', then we don't set the property on the
+ * overlay device and instead set it on the varpd instance.
+ */
+ if (strncmp(aip->ai_name, search, slen) == 0 &&
+ aip->ai_name[slen] == '/')
+ continue;
+ status = dladm_overlay_setprop(handle, linkid, aip->ai_name,
+ aip->ai_val, aip->ai_count);
+ if (status != DLADM_STATUS_OK) {
+ /* XXX */
+ fprintf(stderr, "failed to set property %s\n",
+ aip->ai_name);
+ (void) dladm_overlay_delete(handle, linkid);
+ return (status);
+ }
+ }
+
+ if ((ret = libvarpd_c_create(&vch, dladm_overlay_doorpath)) != 0) {
+ fprintf(stderr, "failed to create libvarpd handle: %d\n", ret);
+ (void) dladm_overlay_delete(handle, linkid);
+ return (dladm_errno2status(ret));
+ }
+
+ if ((ret = libvarpd_c_instance_create(vch, linkid, search,
+ &id)) != 0) {
+ fprintf(stderr, "failed to create varpd instance: %d\n", ret);
+ libvarpd_c_destroy(vch);
+ (void) dladm_overlay_delete(handle, linkid);
+ return (dladm_errno2status(ret));
+ }
+
+ for (i = 0; props != NULL && i < props->al_count; i++) {
+ dladm_arg_info_t *aip = &props->al_info[i];
+
+ /*
+ * Skip arguments we've processed already.
+ */
+ if (strncmp(aip->ai_name, search, slen) != 0)
+ continue;
+
+ if (aip->ai_name[slen] != '/')
+ continue;
+
+ ret = dladm_overlay_varpd_setprop(handle, vch, id, aip->ai_name,
+ aip->ai_val, aip->ai_count);
+ if (ret != 0) {
+ fprintf(stderr, "failed to set varpd prop: %s\n",
+ aip->ai_name);
+ /* XXX Need to clean up instance, but... */
+ libvarpd_c_destroy(vch);
+ (void) dladm_overlay_delete(handle, linkid);
+ return (dladm_errno2status(ret));
+ }
+ }
+
+ if ((ret = libvarpd_c_instance_activate(vch, id)) != 0) {
+ fprintf(stderr, "failed to activate varpd instance: %d\n", ret);
+ dladm_overlay_walk_varpd_prop(handle, linkid, id,
+ dladm_overlay_activate_cb, NULL);
+ libvarpd_c_destroy(vch);
+ (void) dladm_overlay_delete(handle, linkid);
+ return (dladm_errno2status(ret));
+
+ }
+
+ bzero(&oia, sizeof (oia));
+ oia.oia_linkid = linkid;
+ status = DLADM_STATUS_OK;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_ACTIVATE, &oia);
+ if (ret != 0) {
+ /* XXX We need to have private errors here */
+ ret = errno;
+ fprintf(stderr, "failed to activate %d\n", ret);
+ dladm_overlay_walk_prop(handle, linkid,
+ dladm_overlay_activate_cb, NULL);
+ status = dladm_errno2status(ret);
+ (void) libvarpd_c_instance_destroy(vch, id);
+ }
+
+ libvarpd_c_destroy(vch);
+ if (status != DLADM_STATUS_OK)
+ (void) dladm_overlay_delete(handle, linkid);
+
+ return (status);
+}
+
+
+
+typedef struct overlay_walk_cb {
+ dladm_handle_t owc_handle;
+ datalink_id_t owc_linkid;
+ void *owc_arg;
+ dladm_overlay_cache_f owc_func;
+ uint_t owc_mode;
+ uint_t owc_dest;
+} overlay_walk_cb_t;
+
+static int
+dladm_overlay_walk_cache_cb(varpd_client_handle_t *chdl, uint64_t varpdid,
+ const struct ether_addr *key, const varpd_client_cache_entry_t *entry,
+ void *arg)
+{
+ overlay_walk_cb_t *owc = arg;
+ dladm_overlay_point_t point;
+
+ bzero(&point, sizeof (dladm_overlay_point_t));
+ point.dop_dest = owc->owc_dest;
+ point.dop_mac = entry->vcp_mac;
+ point.dop_flags = entry->vcp_flags;
+ point.dop_ip = entry->vcp_ip;
+ point.dop_port = entry->vcp_port;
+
+ if (owc->owc_mode == OVERLAY_TARGET_POINT)
+ point.dop_flags |= DLADM_OVERLAY_F_DEFAULT;
+
+ if (owc->owc_func(owc->owc_handle, owc->owc_linkid, key, &point,
+ owc->owc_arg) == DLADM_WALK_TERMINATE)
+ return (1);
+ return (0);
+}
+
+dladm_status_t
+dladm_overlay_walk_cache(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_overlay_cache_f func, void *arg)
+{
+ int ret;
+ uint_t mode, dest;
+ uint64_t varpdid;
+ varpd_client_handle_t *chdl;
+ overlay_walk_cb_t cbarg;
+
+ if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0)
+ return (dladm_errno2status(ret));
+
+ if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ if ((ret = libvarpd_c_instance_target_mode(chdl, varpdid,
+ &dest, &mode)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ cbarg.owc_handle = handle;
+ cbarg.owc_linkid = linkid;
+ cbarg.owc_arg = arg;
+ cbarg.owc_func = func;
+ cbarg.owc_dest = dest;
+ cbarg.owc_mode = mode;
+ ret = libvarpd_c_instance_cache_walk(chdl, varpdid,
+ dladm_overlay_walk_cache_cb, &cbarg);
+ libvarpd_c_destroy(chdl);
+
+ return (dladm_errno2status(ret));
+}
+
+dladm_status_t
+dladm_overlay_cache_flush(dladm_handle_t handle, datalink_id_t linkid)
+{
+ int ret;
+ uint64_t varpdid;
+ varpd_client_handle_t *chdl;
+
+ if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0)
+ return (dladm_errno2status(ret));
+
+ if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ ret = libvarpd_c_instance_cache_flush(chdl, varpdid);
+ libvarpd_c_destroy(chdl);
+
+ return (dladm_errno2status(ret));
+}
+
+dladm_status_t
+dladm_overlay_cache_delete(dladm_handle_t handle, datalink_id_t linkid,
+ const struct ether_addr *key)
+{
+ int ret;
+ uint64_t varpdid;
+ varpd_client_handle_t *chdl;
+
+ if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0)
+ return (dladm_errno2status(ret));
+
+ if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ ret = libvarpd_c_instance_cache_delete(chdl, varpdid, key);
+ libvarpd_c_destroy(chdl);
+
+ return (dladm_errno2status(ret));
+}
+
+dladm_status_t
+dladm_overlay_cache_set(dladm_handle_t handle, datalink_id_t linkid,
+ const struct ether_addr *key, char *val)
+{
+ int ret;
+ uint_t dest;
+ uint64_t varpdid;
+ char *ip, *port = NULL;
+ varpd_client_handle_t *chdl;
+ varpd_client_cache_entry_t vcp;
+
+
+ if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0)
+ return (dladm_errno2status(ret));
+
+ if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ if ((ret = libvarpd_c_instance_target_mode(chdl, varpdid,
+ &dest, NULL)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ /*
+ * Mode tells us what we should expect in val. It we have more than one
+ * thing listed, the canonical format of it right now is mac,ip:port.
+ */
+ bzero(&vcp, sizeof (varpd_client_cache_entry_t));
+
+ if (strcasecmp(val, "drop") == 0) {
+ vcp.vcp_flags = OVERLAY_TARGET_CACHE_DROP;
+ goto send;
+ }
+
+ if (dest & OVERLAY_PLUGIN_D_ETHERNET) {
+ if (ether_aton_r(val, &vcp.vcp_mac) == NULL) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(EINVAL));
+ }
+ }
+
+ if (dest & OVERLAY_PLUGIN_D_IP) {
+ if (dest & OVERLAY_PLUGIN_D_ETHERNET) {
+ if ((ip = strchr(val, ',')) == NULL) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+ ip++;
+ } else {
+ ip = val;
+ }
+
+ if (dest & OVERLAY_PLUGIN_D_PORT) {
+ if ((port = strchr(val, ':')) == NULL) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+ *port = '\0';
+ port++;
+ }
+
+ /* Try v6, then fall back to v4 */
+ ret = inet_pton(AF_INET6, ip, &vcp.vcp_ip);
+ if (ret == -1)
+ abort();
+ if (ret == 0) {
+ struct in_addr v4;
+
+ ret = inet_pton(AF_INET, ip, &v4);
+ if (ret == -1)
+ abort();
+ if (ret == 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+ IN6_INADDR_TO_V4MAPPED(&v4, &vcp.vcp_ip);
+ }
+ }
+
+ if (dest & OVERLAY_PLUGIN_D_PORT) {
+ char *eptr;
+ unsigned long l;
+ if (port == NULL && (dest & OVERLAY_PLUGIN_D_ETHERNET)) {
+ if ((port = strchr(val, ',')) == NULL) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(EINVAL));
+ }
+ } else if (port == NULL)
+ port = val;
+
+ errno = 0;
+ l = strtoul(port, &eptr, 10);
+ if (errno != 0 || *eptr != '\0') {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(EINVAL));
+ }
+ if (l == 0 || l > UINT16_MAX) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(EINVAL));
+ }
+ vcp.vcp_port = l;
+ }
+
+send:
+ ret = libvarpd_c_instance_cache_set(chdl, varpdid, key, &vcp);
+
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+}
+
+dladm_status_t
+dladm_overlay_cache_get(dladm_handle_t handle, datalink_id_t linkid,
+ const struct ether_addr *key, dladm_overlay_point_t *point)
+{
+ int ret;
+ uint_t dest, mode;
+ uint64_t varpdid;
+ varpd_client_handle_t *chdl;
+ varpd_client_cache_entry_t entry;
+
+ if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0)
+ return (dladm_errno2status(ret));
+
+ if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ if ((ret = libvarpd_c_instance_target_mode(chdl, varpdid,
+ &dest, &mode)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ ret = libvarpd_c_instance_cache_get(chdl, varpdid, key, &entry);
+ if (ret == 0) {
+ point->dop_dest = dest;
+ point->dop_mac = entry.vcp_mac;
+ point->dop_flags = entry.vcp_flags;
+ point->dop_ip = entry.vcp_ip;
+ point->dop_port = entry.vcp_port;
+ if (mode == OVERLAY_TARGET_POINT)
+ point->dop_flags |= DLADM_OVERLAY_F_DEFAULT;
+ }
+
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+}
+
+dladm_status_t
+dladm_overlay_status(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_overlay_status_f func, void *arg)
+{
+ int ret;
+ dladm_status_t status;
+ overlay_ioc_status_t ois;
+ dladm_overlay_status_t dos;
+
+ ois.ois_linkid = linkid;
+ status = DLADM_STATUS_OK;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_STATUS, &ois);
+ if (ret != 0)
+ status = dladm_errno2status(errno);
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ dos.dos_degraded = ois.ois_status == OVERLAY_I_DEGRADED ? B_TRUE :
+ B_FALSE;
+ (void) strlcpy(dos.dos_fmamsg, ois.ois_message,
+ sizeof (dos.dos_fmamsg));
+ func(handle, linkid, &dos, arg);
+ return (DLADM_STATUS_OK);
+}
diff --git a/usr/src/lib/libdladm/common/libdloverlay.h b/usr/src/lib/libdladm/common/libdloverlay.h
new file mode 100644
index 0000000000..f80bf1afe0
--- /dev/null
+++ b/usr/src/lib/libdladm/common/libdloverlay.h
@@ -0,0 +1,111 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LIBDLOVERLAY_H
+#define _LIBDLOVERLAY_H
+
+/*
+ * libdladm Overlay device routines
+ */
+
+#include <libdladm.h>
+#include <libdladm_impl.h>
+#include <sys/overlay.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DLADM_OVERLAY_F_DROP 0x0001
+#define DLADM_OVERLAY_F_DEFAULT 0xf000
+
+typedef struct dladm_overlay_point {
+ uint_t dop_dest;
+ struct ether_addr dop_mac;
+ uint16_t dop_flags;
+ struct in6_addr dop_ip;
+ uint16_t dop_port;
+} dladm_overlay_point_t;
+
+typedef struct dladm_overlay_status {
+ boolean_t dos_degraded;
+ char dos_fmamsg[256];
+} dladm_overlay_status_t;
+
+extern dladm_status_t dladm_overlay_create(dladm_handle_t, const char *,
+ const char *, const char *, uint64_t, dladm_arg_list_t *,
+ uint32_t);
+extern dladm_status_t dladm_overlay_delete(dladm_handle_t, datalink_id_t);
+
+/*
+ * XXX I don't really like this API, but I also have a feeling that this will
+ * change over time. I guess we could turf it given a lack of stability, but...
+ */
+typedef void (*dladm_overlay_status_f)(dladm_handle_t, datalink_id_t,
+ dladm_overlay_status_t *, void *);
+extern dladm_status_t dladm_overlay_status(dladm_handle_t, datalink_id_t,
+ dladm_overlay_status_f, void *);
+
+extern dladm_status_t dladm_overlay_cache_flush(dladm_handle_t, datalink_id_t);
+extern dladm_status_t dladm_overlay_cache_delete(dladm_handle_t, datalink_id_t,
+ const struct ether_addr *);
+extern dladm_status_t dladm_overlay_cache_set(dladm_handle_t, datalink_id_t,
+ const struct ether_addr *, char *);
+extern dladm_status_t dladm_overlay_cache_get(dladm_handle_t, datalink_id_t,
+ const struct ether_addr *, dladm_overlay_point_t *);
+
+#define DLADM_OVERLAY_PROP_SIZEMAX 256
+#define DLADM_OVERLAY_PROP_NAMELEN 32
+
+typedef struct __dladm_overlay_propinfo *dladm_overlay_propinfo_handle_t;
+
+extern dladm_status_t dladm_overlay_prop_info(dladm_overlay_propinfo_handle_t,
+ const char **, uint_t *, uint_t *, const void **, uint32_t *,
+ const mac_propval_range_t **);
+extern dladm_status_t dladm_overlay_get_prop(dladm_handle_t, datalink_id_t,
+ dladm_overlay_propinfo_handle_t, void *buf, size_t *bufsize);
+
+typedef int (*dladm_overlay_prop_f)(dladm_handle_t, datalink_id_t,
+ dladm_overlay_propinfo_handle_t, void *);
+extern dladm_status_t dladm_overlay_walk_prop(dladm_handle_t, datalink_id_t,
+ dladm_overlay_prop_f, void *arg);
+
+typedef int (*dladm_overlay_cache_f)(dladm_handle_t, datalink_id_t,
+ const struct ether_addr *, const dladm_overlay_point_t *, void *);
+extern dladm_status_t dladm_overlay_walk_cache(dladm_handle_t, datalink_id_t,
+ dladm_overlay_cache_f, void *);
+
+/*
+ * The following is the likely API for setting a property.
+ */
+#if 0
+extern dladm_status_t dladm_overlay_prop_lookup(dladm_handle_t, datalink_id_t,
+ const char *, dladm_overlay_propinfo_handle_t *);
+extern void dladm_overlay_prop_handle_free(dladm_handle_t, datalink_id_t,
+ dladm_overlay_propinfo_handle_t *);
+extern dladm_status_t dladm_overlay_set_prop(dladm_handle_t, datalink_id_t,
+ dladm_propinfo_handle_t, void *buf, size_t *bufsize);
+extern dladm_status_t dladm_overlay_str_to_buf(dladm_handle_t, datalink_id_t,
+ dladm_overlay_propinfo_handle_t *, const char *, void *, size_t *);
+extern dladm_status_t dladm_overlay_buf_to_str(dladm_handle_t, datalink_id_t,
+ dladm_overlay_propinfo_handle_t *, const void *, const size_t, char *,
+ size_t *);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBDLOVERLAY_H */
diff --git a/usr/src/lib/libdladm/common/mapfile-vers b/usr/src/lib/libdladm/common/mapfile-vers
index 3eaeea656e..fc667192a4 100644
--- a/usr/src/lib/libdladm/common/mapfile-vers
+++ b/usr/src/lib/libdladm/common/mapfile-vers
@@ -20,7 +20,7 @@
#
#
# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2011, Joyent Inc. All rights reserved.
+# Copyright (c) 2014, Joyent Inc. All rights reserved.
#
#
@@ -272,6 +272,19 @@ SYMBOL_VERSION SUNWprivate_1.1 {
dladm_strs2range;
dladm_range2list;
dladm_list2range;
+
+ dladm_overlay_create;
+ dladm_overlay_delete;
+ dladm_overlay_status;
+ dladm_overlay_prop_info;
+ dladm_overlay_get_prop;
+ dladm_overlay_walk_prop;
+
+ dladm_overlay_cache_set;
+ dladm_overlay_cache_get;
+ dladm_overlay_cache_delete;
+ dladm_overlay_cache_flush;
+ dladm_overlay_walk_cache;
local:
*;
};
diff --git a/usr/src/lib/libidspace/Makefile b/usr/src/lib/libidspace/Makefile
new file mode 100644
index 0000000000..44640eeddc
--- /dev/null
+++ b/usr/src/lib/libidspace/Makefile
@@ -0,0 +1,45 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.lib
+
+HDRS = libidspace.h
+HDRDIR = common
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install: install_h $(SUBDIRS)
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/lib/libidspace/Makefile.com b/usr/src/lib/libidspace/Makefile.com
new file mode 100644
index 0000000000..8cc60ffc4c
--- /dev/null
+++ b/usr/src/lib/libidspace/Makefile.com
@@ -0,0 +1,42 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = libidspace.a
+VERS = .1
+OBJECTS = id_space.o \
+ libidspace.o
+COMDIR = $(SRC)/common/idspace
+
+include ../../Makefile.lib
+
+SRCDIR = ../common
+SRCS = ../../../common/idspace/id_space.c
+LIBS = $(DYNLIB) $(LINTLIB)
+
+LDLIBS += -lc -lumem
+
+$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../Makefile.targ
+
+objs/%.o pics/%.o: $(COMDIR)/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
diff --git a/usr/src/lib/libidspace/amd64/Makefile b/usr/src/lib/libidspace/amd64/Makefile
new file mode 100644
index 0000000000..15d904c616
--- /dev/null
+++ b/usr/src/lib/libidspace/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libidspace/common/libidspace.c b/usr/src/lib/libidspace/common/libidspace.c
new file mode 100644
index 0000000000..7a9f8acd67
--- /dev/null
+++ b/usr/src/lib/libidspace/common/libidspace.c
@@ -0,0 +1,25 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+/*
+ * Wrappers around the common id_space code, for userland.
+ */
+#include <sys/id_space.h>
+
+id_t
+id_alloc_specific(id_space_t *idp, id_t id)
+{
+ return (id_alloc_specific_nosleep(idp, id));
+}
diff --git a/usr/src/lib/libidspace/common/libidspace.h b/usr/src/lib/libidspace/common/libidspace.h
new file mode 100644
index 0000000000..bb8690f19c
--- /dev/null
+++ b/usr/src/lib/libidspace/common/libidspace.h
@@ -0,0 +1,42 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LIBIDSPACE_H
+#define _LIBIDSPACE_H
+
+/*
+ * libidspace public header
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+
+typedef struct id_space id_space_t;
+
+extern id_space_t *id_space_create(const char *, id_t, id_t);
+extern void id_space_destroy(id_space_t *);
+extern void id_space_extend(id_space_t *, id_t, id_t);
+extern id_t id_alloc(id_space_t *);
+extern id_t id_alloc_specific(id_space_t *, id_t);
+extern void id_free(id_space_t *, id_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBIDSPACE_H */
diff --git a/usr/src/lib/libidspace/common/llib-lidspace b/usr/src/lib/libidspace/common/llib-lidspace
new file mode 100644
index 0000000000..39c628da47
--- /dev/null
+++ b/usr/src/lib/libidspace/common/llib-lidspace
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+#include <libidspace.h>
diff --git a/usr/src/lib/libidspace/common/mapfile-vers b/usr/src/lib/libidspace/common/mapfile-vers
new file mode 100644
index 0000000000..61ae855ee0
--- /dev/null
+++ b/usr/src/lib/libidspace/common/mapfile-vers
@@ -0,0 +1,47 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION ILLUMOS_1.0 { # first release of libidspace
+ global:
+ id_alloc;
+ id_alloc_specific;
+ id_free;
+ id_space_create;
+ id_space_destroy;
+ id_space_extend;
+};
+
+
+SYMBOL_VERSION ILLUMOSprivate {
+ local:
+ *;
+};
+
diff --git a/usr/src/lib/libidspace/i386/Makefile b/usr/src/lib/libidspace/i386/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/libidspace/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libidspace/sparc/Makefile b/usr/src/lib/libidspace/sparc/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/libidspace/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libidspace/sparcv9/Makefile b/usr/src/lib/libidspace/sparcv9/Makefile
new file mode 100644
index 0000000000..15d904c616
--- /dev/null
+++ b/usr/src/lib/libidspace/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libnvpair/libnvpair.h b/usr/src/lib/libnvpair/libnvpair.h
index b05669e506..197ec37f46 100644
--- a/usr/src/lib/libnvpair/libnvpair.h
+++ b/usr/src/lib/libnvpair/libnvpair.h
@@ -49,6 +49,8 @@ extern int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *,
extern void nvlist_print(FILE *, nvlist_t *);
extern int nvlist_print_json(FILE *, nvlist_t *);
extern void dump_nvlist(nvlist_t *, int);
+extern int nvlist_dump_json(nvlist_t *, char **);
+extern void nvlist_dump_json_free(nvlist_t *, char *);
/*
* Private nvlist printing interface that allows the caller some control
diff --git a/usr/src/lib/libnvpair/mapfile-vers b/usr/src/lib/libnvpair/mapfile-vers
index 0403964e05..9b1f048f75 100644
--- a/usr/src/lib/libnvpair/mapfile-vers
+++ b/usr/src/lib/libnvpair/mapfile-vers
@@ -244,6 +244,8 @@ SYMBOL_VERSION SUNWprivate_1.1 {
dump_nvlist;
nvlist_add_hrtime;
nvlist_lookup_hrtime;
+ nvlist_dump_json;
+ nvlist_dump_json_free;
nvlist_print;
nvlist_print_json;
nvlist_prt;
diff --git a/usr/src/lib/libnvpair/nvpair_json.c b/usr/src/lib/libnvpair/nvpair_json.c
index 5a317f5f94..e59510726f 100644
--- a/usr/src/lib/libnvpair/nvpair_json.c
+++ b/usr/src/lib/libnvpair/nvpair_json.c
@@ -17,16 +17,72 @@
#include <strings.h>
#include <wchar.h>
#include <sys/debug.h>
+#include <stdarg.h>
+#include <assert.h>
#include "libnvpair.h"
-#define FPRINTF(fp, ...) \
+#define FPRINTF(bufp, blen, offp, ...) \
do { \
- if (fprintf(fp, __VA_ARGS__) < 0) \
+ if (nvlist_rasnprintf(bufp, blen, offp, \
+ __VA_ARGS__) < 0) \
return (-1); \
} while (0)
/*
+ * A realloc-aware snprintf/asprintf like function.
+ */
+/*PRINTFLIKE4*/
+static int
+nvlist_rasnprintf(char **bufp, size_t *blen, off_t *boff, char *input, ...)
+{
+ int ret;
+ va_list ap;
+ size_t size, asize;
+ char *b;
+
+ if (*bufp == NULL) {
+ assert(*blen == 0);
+ assert(*boff == 0);
+ /* Pick a reasonable starting point, let's say 1k */
+ *blen = 1024;
+ *bufp = malloc(*blen);
+ if (*bufp == NULL)
+ return (-1);
+ }
+
+ size = *blen - *boff;
+ va_start(ap, input);
+ /* E_SEC_PRINTF_VAR_FMT */
+ ret = vsnprintf(*bufp + *boff, size, input, ap);
+ va_end(ap);
+ if (ret < 0) {
+ va_end(ap);
+ return (-1);
+ }
+ if (ret >= size) {
+ asize = *blen;
+ while (ret >= asize)
+ asize += 1024;
+ if ((b = realloc(*bufp, asize)) == NULL)
+ return (-1);
+ *bufp = b;
+ *blen = asize;
+ size = *blen - *boff;
+ va_start(ap, input);
+ /* E_SEC_PRINTF_VAR_FMT */
+ ret = vsnprintf(*bufp + *boff, size, input, ap);
+ va_end(ap);
+ if (ret < 0)
+ return (-1);
+ assert(ret < size);
+ }
+ *boff += ret;
+
+ return (0);
+}
+
+/*
* When formatting a string for JSON output we must escape certain characters,
* as described in RFC4627. This applies to both member names and
* DATA_TYPE_STRING values.
@@ -43,7 +99,8 @@
* representable Unicode characters included in their escaped numeric form.
*/
static int
-nvlist_print_json_string(FILE *fp, const char *input)
+nvlist_print_json_string(const char *input, char **bufp, size_t *blen,
+ off_t *offp)
{
mbstate_t mbr;
wchar_t c;
@@ -51,29 +108,29 @@ nvlist_print_json_string(FILE *fp, const char *input)
bzero(&mbr, sizeof (mbr));
- FPRINTF(fp, "\"");
+ FPRINTF(bufp, blen, offp, "\"");
while ((sz = mbrtowc(&c, input, MB_CUR_MAX, &mbr)) > 0) {
switch (c) {
case '"':
- FPRINTF(fp, "\\\"");
+ FPRINTF(bufp, blen, offp, "\\\"");
break;
case '\n':
- FPRINTF(fp, "\\n");
+ FPRINTF(bufp, blen, offp, "\\n");
break;
case '\r':
- FPRINTF(fp, "\\r");
+ FPRINTF(bufp, blen, offp, "\\r");
break;
case '\\':
- FPRINTF(fp, "\\\\");
+ FPRINTF(bufp, blen, offp, "\\\\");
break;
case '\f':
- FPRINTF(fp, "\\f");
+ FPRINTF(bufp, blen, offp, "\\f");
break;
case '\t':
- FPRINTF(fp, "\\t");
+ FPRINTF(bufp, blen, offp, "\\t");
break;
case '\b':
- FPRINTF(fp, "\\b");
+ FPRINTF(bufp, blen, offp, "\\b");
break;
default:
if ((c >= 0x00 && c <= 0x1f) ||
@@ -83,13 +140,15 @@ nvlist_print_json_string(FILE *fp, const char *input)
* characters in the Basic Multilingual Plane
* as JSON-escaped multibyte characters.
*/
- FPRINTF(fp, "\\u%04x", (int)(0xffff & c));
+ FPRINTF(bufp, blen, offp, "\\u%04x",
+ (int)(0xffff & c));
} else if (c >= 0x20 && c <= 0x7f) {
/*
* Render other 7-bit ASCII characters directly
* and drop other, unrepresentable characters.
*/
- FPRINTF(fp, "%c", (int)(0xff & c));
+ FPRINTF(bufp, blen, offp, "%c",
+ (int)(0xff & c));
}
break;
}
@@ -104,98 +163,103 @@ nvlist_print_json_string(FILE *fp, const char *input)
return (-1);
}
- FPRINTF(fp, "\"");
+ FPRINTF(bufp, blen, offp, "\"");
return (0);
}
-/*
- * Dump a JSON-formatted representation of an nvlist to the provided FILE *.
- * This routine does not output any new-lines or additional whitespace other
- * than that contained in strings, nor does it call fflush(3C).
- */
-int
-nvlist_print_json(FILE *fp, nvlist_t *nvl)
+static int
+nvlist_do_json(nvlist_t *nvl, char **bufp, size_t *blen, off_t *offp)
{
nvpair_t *curr;
boolean_t first = B_TRUE;
- FPRINTF(fp, "{");
+ FPRINTF(bufp, blen, offp, "{");
for (curr = nvlist_next_nvpair(nvl, NULL); curr;
curr = nvlist_next_nvpair(nvl, curr)) {
data_type_t type = nvpair_type(curr);
if (!first)
- FPRINTF(fp, ",");
+ FPRINTF(bufp, blen, offp, ",");
else
first = B_FALSE;
- if (nvlist_print_json_string(fp, nvpair_name(curr)) == -1)
+ if (nvlist_print_json_string(nvpair_name(curr), bufp, blen,
+ offp) == -1)
return (-1);
- FPRINTF(fp, ":");
+ FPRINTF(bufp, blen, offp, ":");
switch (type) {
case DATA_TYPE_STRING: {
char *string = fnvpair_value_string(curr);
- if (nvlist_print_json_string(fp, string) == -1)
+ if (nvlist_print_json_string(string, bufp, blen,
+ offp) == -1)
return (-1);
break;
}
case DATA_TYPE_BOOLEAN: {
- FPRINTF(fp, "true");
+ FPRINTF(bufp, blen, offp, "true");
break;
}
case DATA_TYPE_BOOLEAN_VALUE: {
- FPRINTF(fp, "%s", fnvpair_value_boolean_value(curr) ==
- B_TRUE ? "true" : "false");
+ FPRINTF(bufp, blen, offp, "%s",
+ fnvpair_value_boolean_value(curr) == B_TRUE ?
+ "true" : "false");
break;
}
case DATA_TYPE_BYTE: {
- FPRINTF(fp, "%hhu", fnvpair_value_byte(curr));
+ FPRINTF(bufp, blen, offp, "%hhu",
+ fnvpair_value_byte(curr));
break;
}
case DATA_TYPE_INT8: {
- FPRINTF(fp, "%hhd", fnvpair_value_int8(curr));
+ FPRINTF(bufp, blen, offp, "%hhd",
+ fnvpair_value_int8(curr));
break;
}
case DATA_TYPE_UINT8: {
- FPRINTF(fp, "%hhu", fnvpair_value_uint8_t(curr));
+ FPRINTF(bufp, blen, offp, "%hhu",
+ fnvpair_value_uint8_t(curr));
break;
}
case DATA_TYPE_INT16: {
- FPRINTF(fp, "%hd", fnvpair_value_int16(curr));
+ FPRINTF(bufp, blen, offp, "%hd",
+ fnvpair_value_int16(curr));
break;
}
case DATA_TYPE_UINT16: {
- FPRINTF(fp, "%hu", fnvpair_value_uint16(curr));
+ FPRINTF(bufp, blen, offp, "%hu",
+ fnvpair_value_uint16(curr));
break;
}
case DATA_TYPE_INT32: {
- FPRINTF(fp, "%d", fnvpair_value_int32(curr));
+ FPRINTF(bufp, blen, offp, "%d",
+ fnvpair_value_int32(curr));
break;
}
case DATA_TYPE_UINT32: {
- FPRINTF(fp, "%u", fnvpair_value_uint32(curr));
+ FPRINTF(bufp, blen, offp, "%u",
+ fnvpair_value_uint32(curr));
break;
}
case DATA_TYPE_INT64: {
- FPRINTF(fp, "%lld",
+ FPRINTF(bufp, blen, offp, "%lld",
(long long)fnvpair_value_int64(curr));
break;
}
case DATA_TYPE_UINT64: {
- FPRINTF(fp, "%llu",
+ FPRINTF(bufp, blen, offp, "%llu",
(unsigned long long)fnvpair_value_uint64(curr));
break;
}
@@ -203,20 +267,21 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
case DATA_TYPE_HRTIME: {
hrtime_t val;
VERIFY0(nvpair_value_hrtime(curr, &val));
- FPRINTF(fp, "%llu", (unsigned long long)val);
+ FPRINTF(bufp, blen, offp, "%llu",
+ (unsigned long long)val);
break;
}
case DATA_TYPE_DOUBLE: {
double val;
VERIFY0(nvpair_value_double(curr, &val));
- FPRINTF(fp, "%f", val);
+ FPRINTF(bufp, blen, offp, "%f", val);
break;
}
case DATA_TYPE_NVLIST: {
- if (nvlist_print_json(fp,
- fnvpair_value_nvlist(curr)) == -1)
+ if (nvlist_do_json(fnvpair_value_nvlist(curr), bufp,
+ blen, offp) == -1)
return (-1);
break;
}
@@ -225,14 +290,15 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
char **val;
uint_t valsz, i;
VERIFY0(nvpair_value_string_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- if (nvlist_print_json_string(fp, val[i]) == -1)
+ FPRINTF(bufp, blen, offp, ",");
+ if (nvlist_print_json_string(val[i], bufp,
+ blen, offp) == -1)
return (-1);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -240,14 +306,15 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
nvlist_t **val;
uint_t valsz, i;
VERIFY0(nvpair_value_nvlist_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- if (nvlist_print_json(fp, val[i]) == -1)
+ FPRINTF(bufp, blen, offp, ",");
+ if (nvlist_do_json(val[i], bufp, blen,
+ offp) == -1)
return (-1);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -255,14 +322,14 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
boolean_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_boolean_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, val[i] == B_TRUE ?
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, val[i] == B_TRUE ?
"true" : "false");
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -270,13 +337,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
uchar_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_byte_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%hhu", val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%hhu", val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -284,13 +351,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
uint8_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_uint8_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%hhu", val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%hhu", val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -298,13 +365,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
int8_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_int8_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%hd", val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%hd", val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -312,13 +379,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
uint16_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_uint16_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%hu", val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%hu", val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -326,13 +393,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
int16_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_int16_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%hd", val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%hd", val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -340,13 +407,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
uint32_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_uint32_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%u", val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%u", val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -354,13 +421,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
int32_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_int32_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%d", val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%d", val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -368,14 +435,14 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
uint64_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_uint64_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%llu",
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%llu",
(unsigned long long)val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -383,13 +450,14 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
int64_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_int64_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%lld", (long long)val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%lld",
+ (long long)val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -398,6 +466,41 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
}
}
- FPRINTF(fp, "}");
+ FPRINTF(bufp, blen, offp, "}");
return (0);
}
+
+int
+nvlist_dump_json(nvlist_t *nvl, char **bufp)
+{
+ off_t off = 0;
+ size_t l = 0;
+
+ *bufp = NULL;
+ return (nvlist_do_json(nvl, bufp, &l, &off));
+}
+
+/* ARGSUSED */
+void
+nvlist_dump_json_free(nvlist_t *nvl, char *buf)
+{
+ free(buf);
+}
+
+/*
+ * Dump a JSON-formatted representation of an nvlist to the provided FILE *.
+ * This routine does not output any new-lines or additional whitespace other
+ * than that contained in strings, nor does it call fflush(3C).
+ */
+int
+nvlist_print_json(FILE *fp, nvlist_t *nvl)
+{
+ int ret;
+ char *buf;
+
+ if ((ret = nvlist_dump_json(nvl, &buf)) < 0)
+ return (ret);
+ ret = fprintf(fp, "%s", buf);
+ nvlist_dump_json_free(nvl, buf);
+ return (ret);
+}
diff --git a/usr/src/lib/librename/Makefile b/usr/src/lib/librename/Makefile
new file mode 100644
index 0000000000..222523d9a1
--- /dev/null
+++ b/usr/src/lib/librename/Makefile
@@ -0,0 +1,44 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.lib
+
+HDRS = librename.h
+HDRDIR = common
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber lint: $(SUBDIRS)
+
+install: $(SUBDIRS) $(VARPD_MAPFILES) install_h
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/lib/librename/Makefile.com b/usr/src/lib/librename/Makefile.com
new file mode 100644
index 0000000000..f0a22f25ac
--- /dev/null
+++ b/usr/src/lib/librename/Makefile.com
@@ -0,0 +1,34 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = librename.a
+VERS = .1
+OBJECTS = librename.o \
+
+include ../../Makefile.lib
+
+LIBS = $(DYNLIB) $(LINTLIB)
+LDLIBS += -lc
+CPPFLAGS += -I../common
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/librename/amd64/Makefile b/usr/src/lib/librename/amd64/Makefile
new file mode 100644
index 0000000000..15d904c616
--- /dev/null
+++ b/usr/src/lib/librename/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/librename/common/librename.c b/usr/src/lib/librename/common/librename.c
new file mode 100644
index 0000000000..bb7f5a7a2d
--- /dev/null
+++ b/usr/src/lib/librename/common/librename.c
@@ -0,0 +1,232 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Implementation of librename(3RENAME) interfaces.
+ */
+
+#include <librename.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <synch.h>
+
+typedef enum librename_atomic_state {
+ LIBRENAME_ATOMIC_INITIAL = 0x0,
+ LIBRENAME_ATOMIC_FSYNC,
+ LIBRENAME_ATOMIC_RENAME,
+ LIBRENAME_ATOMIC_POSTSYNC,
+ LIBRENAME_ATOMIC_COMPLETED
+} librename_atomic_state_t;
+
+struct librename_atomic {
+ char *lra_fname; /* RO */
+ char *lra_altname; /* RO */
+ int lra_dirfd; /* RO */
+ int lra_tmpfd; /* RO */
+ mutex_t lra_lock;
+ librename_atomic_state_t lra_state; /* lra_lock */
+};
+
+int
+librename_atomic_fdinit(int fd, const char *file, const char *prefix,
+ int mode, int flags, librename_atomic_t **outp)
+{
+ int ret;
+ int oflags;
+ librename_atomic_t *lrap;
+ struct stat st;
+
+ if (fd < 0 || file == NULL || outp == NULL)
+ return (EINVAL);
+
+ if (flags & ~(LIBRENAME_ATOMIC_NOUNLINK | LIBRENAME_ATOMIC_CLOEXEC))
+ return (EINVAL);
+
+ if (strchr(file, '/') != NULL)
+ return (EINVAL);
+
+ if (prefix != NULL && strchr(prefix, '/') != NULL)
+ return (EINVAL);
+
+ *outp = NULL;
+ lrap = malloc(sizeof (librename_atomic_t));
+ if (lrap == NULL)
+ return (errno);
+
+ if (fstat(fd, &st) != 0) {
+ ret = errno;
+ free(lrap);
+ return (ret);
+ }
+
+ if (!S_ISDIR(st.st_mode)) {
+ if (close(lrap->lra_dirfd) != 0)
+ abort();
+ free(lrap);
+ return (ENOTDIR);
+ }
+
+ if ((lrap->lra_dirfd = dup(fd)) == -1) {
+ ret = errno;
+ free(lrap);
+ return (ret);
+ }
+
+
+ lrap->lra_fname = strdup(file);
+ if (lrap->lra_fname == NULL) {
+ ret = errno;
+ if (close(lrap->lra_dirfd) != 0)
+ abort();
+ free(lrap);
+ return (ret);
+ }
+
+ if (prefix == NULL) {
+ ret = asprintf(&lrap->lra_altname, ".%d.%s", (int)getpid(),
+ file);
+ } else {
+ ret = asprintf(&lrap->lra_altname, "%s%s", prefix, file);
+ }
+ if (ret == -1) {
+ ret = errno;
+ free(lrap->lra_fname);
+ if (close(lrap->lra_dirfd) != 0)
+ abort();
+ free(lrap);
+ return (errno);
+ }
+
+ oflags = O_CREAT | O_TRUNC | O_RDWR | O_NOFOLLOW;
+ if (flags & LIBRENAME_ATOMIC_NOUNLINK)
+ oflags |= O_EXCL;
+
+ if (flags & LIBRENAME_ATOMIC_CLOEXEC)
+ oflags |= O_CLOEXEC;
+
+ lrap->lra_tmpfd = openat(lrap->lra_dirfd, lrap->lra_altname,
+ oflags, mode);
+ if (lrap->lra_tmpfd < 0) {
+ ret = errno;
+ free(lrap->lra_altname);
+ free(lrap->lra_fname);
+ if (close(lrap->lra_dirfd) != 0)
+ abort();
+ free(lrap);
+ return (ret);
+ }
+
+ if (mutex_init(&lrap->lra_lock, USYNC_THREAD, NULL) != 0)
+ abort();
+
+ lrap->lra_state = LIBRENAME_ATOMIC_INITIAL;
+ *outp = lrap;
+ return (0);
+}
+
+int
+librename_atomic_init(const char *dir, const char *file, const char *prefix,
+ int mode, int flags, librename_atomic_t **outp)
+{
+ int fd, ret;
+
+ if ((fd = open(dir, O_RDONLY)) < 0)
+ return (errno);
+
+ ret = librename_atomic_fdinit(fd, file, prefix, mode, flags, outp);
+ if (close(fd) != 0)
+ abort();
+
+ return (ret);
+}
+
+int
+librename_atomic_fd(librename_atomic_t *lrap)
+{
+ return (lrap->lra_tmpfd);
+}
+
+/*
+ * To atomically commit a file, we need to go through and do the following:
+ *
+ * o fsync the source
+ * o run rename
+ * o fsync the source again and the directory.
+ */
+int
+librename_atomic_commit(librename_atomic_t *lrap)
+{
+ int ret = 0;
+
+ if (mutex_lock(&lrap->lra_lock) != 0)
+ abort();
+ if (lrap->lra_state == LIBRENAME_ATOMIC_COMPLETED) {
+ /* XXX What's a good errno to use here? */
+ ret = ENXIO;
+ goto out;
+ }
+
+ if (fsync(lrap->lra_tmpfd) != 0) {
+ ret = errno;
+ goto out;
+ }
+ lrap->lra_state = LIBRENAME_ATOMIC_FSYNC;
+
+ if (renameat(lrap->lra_dirfd, lrap->lra_altname, lrap->lra_dirfd,
+ lrap->lra_fname) != 0) {
+ ret = errno;
+ goto out;
+ }
+ lrap->lra_state = LIBRENAME_ATOMIC_RENAME;
+
+ if (fsync(lrap->lra_tmpfd) != 0) {
+ ret = errno;
+ goto out;
+ }
+ lrap->lra_state = LIBRENAME_ATOMIC_POSTSYNC;
+
+ if (fsync(lrap->lra_dirfd) != 0) {
+ ret = errno;
+ goto out;
+ }
+ lrap->lra_state = LIBRENAME_ATOMIC_COMPLETED;
+
+out:
+ if (mutex_unlock(&lrap->lra_lock) != 0)
+ abort();
+ return (ret);
+}
+
+void
+librename_atomic_fini(librename_atomic_t *lrap)
+{
+
+ free(lrap->lra_altname);
+ free(lrap->lra_fname);
+ if (close(lrap->lra_tmpfd) != 0)
+ abort();
+ if (close(lrap->lra_dirfd) != 0)
+ abort();
+ if (mutex_destroy(&lrap->lra_lock) != 0)
+ abort();
+ free(lrap);
+}
diff --git a/usr/src/lib/librename/common/librename.h b/usr/src/lib/librename/common/librename.h
new file mode 100644
index 0000000000..cb344f534c
--- /dev/null
+++ b/usr/src/lib/librename/common/librename.h
@@ -0,0 +1,43 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LIBRENAME_H
+#define _LIBRENAME_H
+
+/*
+ * librename(3RENAME) public interfaces
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct librename_atomic librename_atomic_t;
+
+#define LIBRENAME_ATOMIC_NOUNLINK 0x01
+#define LIBRENAME_ATOMIC_CLOEXEC 0x02
+extern int librename_atomic_init(const char *, const char *, const char *,
+ int, int, librename_atomic_t **);
+extern int librename_atomic_fdinit(int, const char *, const char *, int, int,
+ librename_atomic_t **);
+extern int librename_atomic_fd(librename_atomic_t *);
+extern int librename_atomic_commit(librename_atomic_t *);
+extern void librename_atomic_fini(librename_atomic_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBRENAME_H */
diff --git a/usr/src/lib/librename/common/llib-lrename b/usr/src/lib/librename/common/llib-lrename
new file mode 100644
index 0000000000..6f1dd81a7b
--- /dev/null
+++ b/usr/src/lib/librename/common/llib-lrename
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+#include <librename.h>
diff --git a/usr/src/lib/librename/common/mapfile-vers b/usr/src/lib/librename/common/mapfile-vers
new file mode 100644
index 0000000000..2b117f32f9
--- /dev/null
+++ b/usr/src/lib/librename/common/mapfile-vers
@@ -0,0 +1,41 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION ILLUMOS_1.0 {
+ global:
+ librename_atomic_commit;
+ librename_atomic_fd;
+ librename_atomic_fdinit;
+ librename_atomic_fini;
+ librename_atomic_init;
+ local:
+ *;
+};
diff --git a/usr/src/lib/librename/i386/Makefile b/usr/src/lib/librename/i386/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/librename/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/librename/sparc/Makefile b/usr/src/lib/librename/sparc/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/librename/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/librename/sparcv9/Makefile b/usr/src/lib/librename/sparcv9/Makefile
new file mode 100644
index 0000000000..15d904c616
--- /dev/null
+++ b/usr/src/lib/librename/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libsocket/common/mapfile-vers b/usr/src/lib/libsocket/common/mapfile-vers
index 2f7777f395..471774b462 100644
--- a/usr/src/lib/libsocket/common/mapfile-vers
+++ b/usr/src/lib/libsocket/common/mapfile-vers
@@ -39,6 +39,12 @@
$mapfile_version 2
+SYMBOL_VERSION ILLUMOS_0.2 { # reentrant ethers(3SOCKET)
+ global:
+ ether_aton_r;
+ ether_ntoa_r;
+} ILLUMOS_0.1;
+
SYMBOL_VERSION ILLUMOS_0.1 { # Illumos additions
global:
accept4;
diff --git a/usr/src/lib/libsocket/inet/ether_addr.c b/usr/src/lib/libsocket/inet/ether_addr.c
index 37105bb302..523e7b472d 100644
--- a/usr/src/lib/libsocket/inet/ether_addr.c
+++ b/usr/src/lib/libsocket/inet/ether_addr.c
@@ -22,6 +22,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -37,8 +38,6 @@
* contributors.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* All routines necessary to deal the "ethers" database. The sources
* contain mappings between 48 bit ethernet addresses and corresponding
@@ -258,38 +257,42 @@ ea_buf(void)
}
/*
- * Converts a 48 bit ethernet number to its string representation.
+ * Converts a 48 bit ethernet number to its string representation using a user
+ * defined buffer.
+ */
+char *
+ether_ntoa_r(const struct ether_addr *e, char *buf)
+{
+ (void) sprintf(buf, "%x:%x:%x:%x:%x:%x",
+ e->ether_addr_octet[0], e->ether_addr_octet[1],
+ e->ether_addr_octet[2], e->ether_addr_octet[3],
+ e->ether_addr_octet[4], e->ether_addr_octet[5]);
+ return (buf);
+}
+
+/*
+ * Converts a 48 bit ethernet number to its string representation using a
+ * per-thread buffer.
*/
char *
ether_ntoa(const struct ether_addr *e)
{
eabuf_t *eabuf;
- char *s;
if ((eabuf = ea_buf()) == NULL)
return (NULL);
- s = eabuf->ea_string;
- (void) sprintf(s, "%x:%x:%x:%x:%x:%x",
- e->ether_addr_octet[0], e->ether_addr_octet[1],
- e->ether_addr_octet[2], e->ether_addr_octet[3],
- e->ether_addr_octet[4], e->ether_addr_octet[5]);
- return (s);
+ return (ether_ntoa_r(e, eabuf->ea_string));
}
/*
- * Converts an ethernet address representation back into its 48 bits.
+ * Converts an ethernet address representation back into its 48 bits using a
+ * user defined buffer.
*/
struct ether_addr *
-ether_aton(const char *s)
+ether_aton_r(const char *s, struct ether_addr *e)
{
- eabuf_t *eabuf;
- struct ether_addr *e;
int i;
uint_t t[6];
-
- if ((eabuf = ea_buf()) == NULL)
- return (NULL);
- e = &eabuf->ea_addr;
i = sscanf(s, " %x:%x:%x:%x:%x:%x",
&t[0], &t[1], &t[2], &t[3], &t[4], &t[5]);
if (i != 6)
@@ -298,3 +301,17 @@ ether_aton(const char *s)
e->ether_addr_octet[i] = (uchar_t)t[i];
return (e);
}
+
+/*
+ * Converts an ethernet address representation back into its 48 bits using a
+ * per-thread buffer.
+ */
+struct ether_addr *
+ether_aton(const char *s)
+{
+ eabuf_t *eabuf;
+
+ if ((eabuf = ea_buf()) == NULL)
+ return (NULL);
+ return (ether_aton_r(s, &eabuf->ea_addr));
+}
diff --git a/usr/src/lib/varpd/Makefile b/usr/src/lib/varpd/Makefile
new file mode 100644
index 0000000000..5fb179c1fe
--- /dev/null
+++ b/usr/src/lib/varpd/Makefile
@@ -0,0 +1,33 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+SUBDIRS = libvarpd .WAIT direct files svp
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+check := TARGET = check
+install := TARGET = install
+install_h := TARGET = install_h
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install install_h check lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/varpd/Makefile.plugin b/usr/src/lib/varpd/Makefile.plugin
new file mode 100644
index 0000000000..67410742df
--- /dev/null
+++ b/usr/src/lib/varpd/Makefile.plugin
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+ROOTLIBDIR = $(ROOT)/usr/lib/varpd
+ROOTLIBDIR64 = $(ROOT)/usr/lib/varpd/$(MACH64)
+
+MAPFILES += ../../libvarpd/common/mapfile-plugin
diff --git a/usr/src/lib/varpd/direct/Makefile b/usr/src/lib/varpd/direct/Makefile
new file mode 100644
index 0000000000..f026c620e6
--- /dev/null
+++ b/usr/src/lib/varpd/direct/Makefile
@@ -0,0 +1,40 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../../Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h:
+
+check:
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/varpd/direct/Makefile.com b/usr/src/lib/varpd/direct/Makefile.com
new file mode 100644
index 0000000000..e48efcfcc0
--- /dev/null
+++ b/usr/src/lib/varpd/direct/Makefile.com
@@ -0,0 +1,35 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = libvarpd_direct.a
+VERS = .1
+OBJECTS = libvarpd_direct.o
+
+include ../../../Makefile.lib
+include ../../Makefile.plugin
+
+LIBS = $(DYNLIB)
+LDLIBS += -lc -lvarpd -lumem -lnvpair -lnsl
+CPPFLAGS += -I../common
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../Makefile.targ
diff --git a/usr/src/lib/varpd/direct/amd64/Makefile b/usr/src/lib/varpd/direct/amd64/Makefile
new file mode 100644
index 0000000000..5c586c1d40
--- /dev/null
+++ b/usr/src/lib/varpd/direct/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/direct/common/libvarpd_direct.c b/usr/src/lib/varpd/direct/common/libvarpd_direct.c
new file mode 100644
index 0000000000..fd2ee0154a
--- /dev/null
+++ b/usr/src/lib/varpd/direct/common/libvarpd_direct.c
@@ -0,0 +1,395 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Point to point plug-in for varpd.
+ */
+
+#include <libvarpd_provider.h>
+#include <umem.h>
+#include <errno.h>
+#include <thread.h>
+#include <synch.h>
+#include <strings.h>
+#include <assert.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <libnvpair.h>
+
+typedef struct varpd_direct {
+ overlay_plugin_dest_t vad_dest; /* RO */
+ mutex_t vad_lock; /* Protects the rest */
+ boolean_t vad_hip;
+ boolean_t vad_hport;
+ struct in6_addr vad_ip;
+ uint16_t vad_port;
+} varpd_direct_t;
+
+static const char *varpd_direct_props[] = {
+ "direct/dest_ip",
+ "direct/dest_port"
+};
+
+static boolean_t
+varpd_direct_valid_dest(overlay_plugin_dest_t dest)
+{
+ if (dest & ~(OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
+ return (B_FALSE);
+
+ if (!(dest & (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+static int
+varpd_direct_create(varpd_provider_handle_t *hdl, void **outp,
+ overlay_plugin_dest_t dest)
+{
+ int ret;
+ varpd_direct_t *vdp;
+
+ if (varpd_direct_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ vdp = umem_alloc(sizeof (varpd_direct_t), UMEM_DEFAULT);
+ if (vdp == NULL)
+ return (ENOMEM);
+
+ if ((ret = mutex_init(&vdp->vad_lock, USYNC_THREAD, NULL)) != 0) {
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (ret);
+ }
+
+ vdp->vad_dest = dest;
+ vdp->vad_hip = B_FALSE;
+ vdp->vad_hport = B_FALSE;
+ *outp = vdp;
+ return (0);
+}
+
+static int
+varpd_direct_start(void *arg)
+{
+ varpd_direct_t *vdp = arg;
+
+ mutex_lock(&vdp->vad_lock);
+ if (vdp->vad_hip == B_FALSE ||((vdp->vad_dest & OVERLAY_PLUGIN_D_IP) &&
+ vdp->vad_hport == B_FALSE)) {
+ mutex_unlock(&vdp->vad_lock);
+ return (EAGAIN);
+ }
+ mutex_unlock(&vdp->vad_lock);
+
+ return (0);
+}
+
+static void
+varpd_direct_stop(void *arg)
+{
+}
+
+static void
+varpd_direct_destroy(void *arg)
+{
+ varpd_direct_t *vdp = arg;
+
+ if (mutex_destroy(&vdp->vad_lock) != 0)
+ abort();
+ umem_free(vdp, sizeof (varpd_direct_t));
+}
+
+static int
+varpd_direct_default(void *arg, overlay_target_point_t *otp)
+{
+ varpd_direct_t *vdp = arg;
+
+ mutex_lock(&vdp->vad_lock);
+ bcopy(&vdp->vad_ip, &otp->otp_ip, sizeof (struct in6_addr));
+ otp->otp_port = vdp->vad_port;
+ mutex_unlock(&vdp->vad_lock);
+
+ return (VARPD_LOOKUP_OK);
+}
+
+static int
+varpd_direct_nprops(void *arg, uint_t *nprops)
+{
+ const varpd_direct_t *vdp = arg;
+
+ *nprops = 0;
+ if (vdp->vad_dest & OVERLAY_PLUGIN_D_ETHERNET)
+ *nprops += 1;
+
+ if (vdp->vad_dest & OVERLAY_PLUGIN_D_IP)
+ *nprops += 1;
+
+ if (vdp->vad_dest & OVERLAY_PLUGIN_D_PORT)
+ *nprops += 1;
+
+ assert(*nprops == 1 || *nprops == 2);
+
+ return (0);
+}
+
+static int
+varpd_direct_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
+{
+ varpd_direct_t *vdp = arg;
+
+ /*
+ * Because we only support IP + port combos right now, prop 0 should
+ * always be the IP. We don't support a port without an IP.
+ */
+ assert(vdp->vad_dest & OVERLAY_PLUGIN_D_IP);
+ if (propid == 0) {
+ libvarpd_prop_set_name(vph, varpd_direct_props[0]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP);
+ libvarpd_prop_set_nodefault(vph);
+ return (0);
+ }
+
+ if (propid == 1 && vdp->vad_dest & OVERLAY_PLUGIN_D_PORT) {
+ libvarpd_prop_set_name(vph, varpd_direct_props[1]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
+ libvarpd_prop_set_nodefault(vph);
+ libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_direct_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
+{
+ varpd_direct_t *vdp = arg;
+
+ /* direct/dest_ip */
+ if (strcmp(pname, varpd_direct_props[0]) == 0) {
+ if (*sizep < sizeof (struct in6_addr))
+ return (EOVERFLOW);
+ mutex_lock(&vdp->vad_lock);
+ if (vdp->vad_hip == B_FALSE) {
+ *sizep = 0;
+ } else {
+ bcopy(&vdp->vad_ip, buf, sizeof (struct in6_addr));
+ *sizep = sizeof (struct in6_addr);
+ }
+ mutex_unlock(&vdp->vad_lock);
+ return (0);
+ }
+
+ /* direct/dest_port */
+ if (strcmp(pname, varpd_direct_props[1]) == 0) {
+ uint64_t val;
+
+ if (*sizep < sizeof (uint64_t))
+ return (EOVERFLOW);
+ mutex_lock(&vdp->vad_lock);
+ if (vdp->vad_hport == B_FALSE) {
+ *sizep = 0;
+ } else {
+ val = vdp->vad_port;
+ bcopy(&val, buf, sizeof (uint64_t));
+ *sizep = sizeof (uint64_t);
+ }
+ mutex_unlock(&vdp->vad_lock);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_direct_setprop(void *arg, const char *pname, const void *buf,
+ const uint32_t size)
+{
+ varpd_direct_t *vdp = arg;
+
+ /* direct/dest_ip */
+ if (strcmp(pname, varpd_direct_props[0]) == 0) {
+ const struct in6_addr *ipv6 = buf;
+
+ if (size < sizeof (struct in6_addr))
+ return (EOVERFLOW);
+ /*
+ * XXX What else should be disallowed?
+ */
+ if (IN6_IS_ADDR_V4COMPAT(ipv6))
+ return (EINVAL);
+
+ mutex_lock(&vdp->vad_lock);
+ bcopy(buf, &vdp->vad_ip, sizeof (struct in6_addr));
+ vdp->vad_hip = B_TRUE;
+ mutex_unlock(&vdp->vad_lock);
+ return (0);
+ }
+
+ /* direct/dest_port */
+ if (strcmp(pname, varpd_direct_props[1]) == 0) {
+ const uint64_t *valp = buf;
+ if (size < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ if (*valp == 0 || *valp > UINT16_MAX)
+ return (EINVAL);
+
+ mutex_lock(&vdp->vad_lock);
+ vdp->vad_port = (uint16_t)*valp;
+ vdp->vad_hport = B_TRUE;
+ mutex_unlock(&vdp->vad_lock);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_direct_save(void *arg, nvlist_t *nvp)
+{
+ int ret;
+ varpd_direct_t *vdp = arg;
+
+ mutex_lock(&vdp->vad_lock);
+ if (vdp->vad_hport == B_TRUE) {
+ if ((ret = nvlist_add_uint16(nvp, varpd_direct_props[1],
+ vdp->vad_port)) != 0) {
+ mutex_unlock(&vdp->vad_lock);
+ return (ret);
+ }
+ }
+
+ if (vdp->vad_hip == B_TRUE) {
+ char buf[INET6_ADDRSTRLEN];
+
+ if (inet_ntop(AF_INET6, &vdp->vad_ip, buf, sizeof (buf)) ==
+ NULL)
+ abort();
+ if ((ret = nvlist_add_string(nvp, varpd_direct_props[0],
+ buf)) != 0) {
+ mutex_unlock(&vdp->vad_lock);
+ return (ret);
+ }
+ }
+ mutex_unlock(&vdp->vad_lock);
+
+ return (0);
+}
+
+static int
+varpd_direct_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
+ overlay_plugin_dest_t dest, void **outp)
+{
+ int ret;
+ char *ipstr;
+ varpd_direct_t *vdp;
+
+ if (varpd_direct_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ vdp = umem_alloc(sizeof (varpd_direct_t), UMEM_DEFAULT);
+ if (vdp == NULL)
+ return (ENOMEM);
+
+ if ((ret = mutex_init(&vdp->vad_lock, USYNC_THREAD, NULL)) != 0) {
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (ret);
+ }
+
+ if ((ret = nvlist_lookup_uint16(nvp, varpd_direct_props[1],
+ &vdp->vad_port)) != 0) {
+ if (ret != ENOENT) {
+ if (mutex_destroy(&vdp->vad_lock) != 0)
+ abort();
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (ret);
+ }
+ vdp->vad_hport = B_FALSE;
+ } else {
+ vdp->vad_hport = B_TRUE;
+ }
+
+ if ((ret = nvlist_lookup_string(nvp, varpd_direct_props[0],
+ &ipstr)) != 0) {
+ if (ret != ENOENT) {
+ if (mutex_destroy(&vdp->vad_lock) != 0)
+ abort();
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (ret);
+ }
+ vdp->vad_hip = B_FALSE;
+ } else {
+ ret = inet_pton(AF_INET6, ipstr, &vdp->vad_ip);
+ /*
+ * inet_pton is only defined to return -1 with errno set to
+ * EAFNOSUPPORT, which really, shouldn't happen.
+ */
+ if (ret == -1) {
+ assert(errno == EAFNOSUPPORT);
+ abort();
+ }
+ if (ret == 0) {
+ if (mutex_destroy(&vdp->vad_lock) != 0)
+ abort();
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (EINVAL);
+ }
+ }
+
+ *outp = vdp;
+ return (0);
+}
+
+static const varpd_plugin_ops_t varpd_direct_ops = {
+ 0,
+ varpd_direct_create,
+ varpd_direct_start,
+ varpd_direct_stop,
+ varpd_direct_destroy,
+ varpd_direct_default,
+ NULL,
+ varpd_direct_nprops,
+ varpd_direct_propinfo,
+ varpd_direct_getprop,
+ varpd_direct_setprop,
+ varpd_direct_save,
+ varpd_direct_restore
+};
+
+#pragma init(varpd_direct_init)
+static void
+varpd_direct_init(void)
+{
+ int err;
+ varpd_plugin_register_t *vpr;
+
+ vpr = libvarpd_plugin_alloc(VARPD_VERSION_ONE, &err);
+ /* XXX How should we communicate this failure? */
+ if (vpr == NULL)
+ return;
+
+ vpr->vpr_mode = OVERLAY_TARGET_POINT;
+ vpr->vpr_name = "direct";
+ vpr->vpr_ops = &varpd_direct_ops;
+ /* XXX We care about failure, but what do we do? */
+ (void) libvarpd_plugin_register(vpr);
+ libvarpd_plugin_free(vpr);
+}
diff --git a/usr/src/lib/varpd/direct/common/llib-lvarpd_direct b/usr/src/lib/varpd/direct/common/llib-lvarpd_direct
new file mode 100644
index 0000000000..31b3d36fbe
--- /dev/null
+++ b/usr/src/lib/varpd/direct/common/llib-lvarpd_direct
@@ -0,0 +1,18 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
diff --git a/usr/src/lib/varpd/direct/common/mapfile-vers b/usr/src/lib/varpd/direct/common/mapfile-vers
new file mode 100644
index 0000000000..642ef72adc
--- /dev/null
+++ b/usr/src/lib/varpd/direct/common/mapfile-vers
@@ -0,0 +1,35 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate {
+ local:
+ *;
+};
diff --git a/usr/src/lib/varpd/direct/i386/Makefile b/usr/src/lib/varpd/direct/i386/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/varpd/direct/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/direct/sparc/Makefile b/usr/src/lib/varpd/direct/sparc/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/varpd/direct/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/direct/sparcv9/Makefile b/usr/src/lib/varpd/direct/sparcv9/Makefile
new file mode 100644
index 0000000000..5c586c1d40
--- /dev/null
+++ b/usr/src/lib/varpd/direct/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/files/Makefile b/usr/src/lib/varpd/files/Makefile
new file mode 100644
index 0000000000..f026c620e6
--- /dev/null
+++ b/usr/src/lib/varpd/files/Makefile
@@ -0,0 +1,40 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../../Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h:
+
+check:
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/varpd/files/Makefile.com b/usr/src/lib/varpd/files/Makefile.com
new file mode 100644
index 0000000000..5433f79fd1
--- /dev/null
+++ b/usr/src/lib/varpd/files/Makefile.com
@@ -0,0 +1,36 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = libvarpd_files.a
+VERS = .1
+OBJECTS = libvarpd_files.o \
+ libvarpd_files_json.o
+
+include ../../../Makefile.lib
+include ../../Makefile.plugin
+
+LIBS = $(DYNLIB)
+LDLIBS += -lc -lvarpd -lumem -lnvpair -lsocket -lnsl
+CPPFLAGS += -I../common
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../Makefile.targ
diff --git a/usr/src/lib/varpd/files/amd64/Makefile b/usr/src/lib/varpd/files/amd64/Makefile
new file mode 100644
index 0000000000..5c586c1d40
--- /dev/null
+++ b/usr/src/lib/varpd/files/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/files/common/libvarpd_files.c b/usr/src/lib/varpd/files/common/libvarpd_files.c
new file mode 100644
index 0000000000..0b6812bfc0
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/libvarpd_files.c
@@ -0,0 +1,598 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Files based plug in for varpd
+ *
+ * This is a dynamic varpd plug-in that has a static backing store. In this
+ * case, the idea here is that the full set of mappings is fixed at creation
+ * time and specified in a single file which is currently expected to be in a
+ * JSON format of the following form:
+ *
+ * {
+ * "aa:bb:cc:dd:ee:ff": {
+ * "arp": "10.23.69.1",
+ * "ndp": "2600:3c00::f03c:91ff:fe96:a264",
+ * "ip": "192.168.1.1",
+ * "port": 8080
+ * }
+ * }
+ */
+
+#include <libvarpd_provider.h>
+#include <umem.h>
+#include <errno.h>
+#include <thread.h>
+#include <synch.h>
+#include <strings.h>
+#include <assert.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <libnvpair.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/ethernet.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include <libvarpd_files_json.h>
+
+typedef struct varpd_files {
+ overlay_plugin_dest_t vaf_dest; /* RO */
+ varpd_provider_handle_t *vaf_hdl; /* RO */
+ char *vaf_path; /* WO */
+ nvlist_t *vaf_nvl; /* WO */
+ uint64_t vaf_nmisses; /* Atomic */
+ uint64_t vaf_narp; /* Atomic */
+} varpd_files_t;
+
+static const char *varpd_files_props[] = {
+ "files/config"
+};
+
+static boolean_t
+varpd_files_valid_dest(overlay_plugin_dest_t dest)
+{
+ if (dest & ~(OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
+ return (B_FALSE);
+
+ if (!(dest & (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+static int
+varpd_files_create(varpd_provider_handle_t *hdl, void **outp,
+ overlay_plugin_dest_t dest)
+{
+ varpd_files_t *vaf;
+
+ if (varpd_files_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ vaf = umem_alloc(sizeof (varpd_files_t), UMEM_DEFAULT);
+ if (vaf == NULL)
+ return (ENOMEM);
+
+ bzero(vaf, sizeof (varpd_files_t));
+ vaf->vaf_dest = dest;
+ vaf->vaf_path = NULL;
+ vaf->vaf_nvl = NULL;
+ vaf->vaf_hdl = hdl;
+ *outp = vaf;
+ return (0);
+}
+
+static int
+varpd_files_normalize_nvlist(varpd_files_t *vaf, nvlist_t *nvl)
+{
+ int ret;
+ nvlist_t *out;
+ nvpair_t *pair;
+
+ if ((ret = nvlist_alloc(&out, NV_UNIQUE_NAME, 0)) != 0)
+ return (ret);
+
+ for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(nvl, pair)) {
+ char *name, fname[ETHERADDRSTRL];
+ nvlist_t *data;
+ struct ether_addr ether, *e;
+ e = &ether;
+
+ if (nvpair_type(pair) != DATA_TYPE_NVLIST) {
+ nvlist_free(out);
+ return (EINVAL);
+ }
+
+ name = nvpair_name(pair);
+ if ((ret = nvpair_value_nvlist(pair, &data)) != 0) {
+ nvlist_free(out);
+ return (EINVAL);
+ }
+
+ if (ether_aton_r(name, e) == NULL) {
+ nvlist_free(out);
+ return (EINVAL);
+ }
+
+ if (ether_ntoa_r(e, fname) == NULL) {
+ nvlist_free(out);
+ return (ENOMEM);
+ }
+
+ if ((ret = nvlist_add_nvlist(out, fname, data)) != 0) {
+ nvlist_free(out);
+ return (EINVAL);
+ }
+ }
+
+ vaf->vaf_nvl = out;
+ return (0);
+}
+
+static int
+varpd_files_start(void *arg)
+{
+ int fd, ret;
+ void *maddr;
+ struct stat st;
+ nvlist_t *nvl;
+ varpd_files_t *vaf = arg;
+
+ if (vaf->vaf_path == NULL)
+ return (EAGAIN);
+
+ if ((fd = open(vaf->vaf_path, O_RDONLY)) < 0)
+ return (errno);
+
+ if (fstat(fd, &st) != 0) {
+ ret = errno;
+ if (close(fd) != 0)
+ abort();
+ return (ret);
+ }
+
+ maddr = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+ fd, 0);
+ if (maddr == NULL) {
+ ret = errno;
+ if (close(fd) != 0)
+ abort();
+ return (ret);
+ }
+
+ ret = nvlist_parse_json(maddr, st.st_size, &nvl,
+ NVJSON_FORCE_INTEGER);
+ if (ret == 0) {
+ ret = varpd_files_normalize_nvlist(vaf, nvl);
+ nvlist_free(nvl);
+ }
+ if (munmap(maddr, st.st_size) != 0)
+ abort();
+ if (close(fd) != 0)
+ abort();
+
+ return (ret);
+}
+
+static void
+varpd_files_stop(void *arg)
+{
+ varpd_files_t *vaf = arg;
+
+ nvlist_free(vaf->vaf_nvl);
+ vaf->vaf_nvl = NULL;
+}
+
+static void
+varpd_files_destroy(void *arg)
+{
+ varpd_files_t *vaf = arg;
+
+ assert(vaf->vaf_nvl == NULL);
+ if (vaf->vaf_path != NULL) {
+ umem_free(vaf->vaf_path, strlen(vaf->vaf_path) + 1);
+ vaf->vaf_path = NULL;
+ }
+ umem_free(vaf, sizeof (varpd_files_t));
+}
+
+static void
+varpd_files_lookup(void *arg, varpd_query_handle_t *qh,
+ const overlay_targ_lookup_t *otl, overlay_target_point_t *otp)
+{
+ char macstr[ETHERADDRSTRL], *ipstr;
+ nvlist_t *nvl;
+ varpd_files_t *vaf = arg;
+ int32_t port;
+ static const uint8_t bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+ /* We don't support a default */
+ if (otl == NULL) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (otl->otl_sap == ETHERTYPE_ARP) {
+ libvarpd_plugin_proxy_arp(vaf->vaf_hdl, qh, otl);
+ return;
+ }
+
+ if (otl->otl_sap == ETHERTYPE_IPV6 &&
+ otl->otl_dstaddr[0] == 0x33 &&
+ otl->otl_dstaddr[1] == 0x33) {
+ libvarpd_plugin_proxy_ndp(vaf->vaf_hdl, qh, otl);
+ return;
+ }
+
+ if (otl->otl_sap == ETHERTYPE_IP &&
+ bcmp(otl->otl_dstaddr, bcast, ETHERADDRL) == 0) {
+ char *mac;
+ struct ether_addr a, *addr;
+
+ addr = &a;
+ if (ether_ntoa_r((struct ether_addr *)otl->otl_srcaddr,
+ macstr) == NULL) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_string(nvl, "dhcp-proxy", &mac) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (ether_aton_r(mac, addr) == NULL) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ libvarpd_plugin_proxy_dhcp(vaf->vaf_hdl, qh, otl);
+ return;
+ }
+
+ if (ether_ntoa_r((struct ether_addr *)otl->otl_dstaddr,
+ macstr) == NULL) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_int32(nvl, "port", &port) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (port <= 0 || port > UINT16_MAX) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ otp->otp_port = port;
+
+ if (nvlist_lookup_string(nvl, "ip", &ipstr) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ /*
+ * Try to parse it as a v6 address and then if it's not, try to
+ * transform it into a v4 address which we'll then wrap it into a v4
+ * mapped address.
+ */
+ if (inet_pton(AF_INET6, ipstr, &otp->otp_ip) != 1) {
+ uint32_t v4;
+ if (inet_pton(AF_INET, ipstr, &v4) != 1) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ IN6_IPADDR_TO_V4MAPPED(v4, &otp->otp_ip);
+ }
+
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_OK);
+}
+
+static int
+varpd_files_nprops(void *arg, uint_t *nprops)
+{
+ *nprops = 1;
+ return (0);
+}
+
+static int
+varpd_files_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
+{
+ if (propid != 0)
+ return (EINVAL);
+
+ libvarpd_prop_set_name(vph, varpd_files_props[0]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING);
+ libvarpd_prop_set_nodefault(vph);
+ return (0);
+}
+
+static int
+varpd_files_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
+{
+ varpd_files_t *vaf = arg;
+
+ if (strcmp(pname, varpd_files_props[0]) != 0)
+ return (EINVAL);
+
+ if (vaf->vaf_path != NULL) {
+ size_t len = strlen(vaf->vaf_path) + 1;
+ if (*sizep < len)
+ return (EOVERFLOW);
+ *sizep = len;
+ (void) strlcpy(buf, vaf->vaf_path, *sizep);
+
+ } else {
+ *sizep = 0;
+ }
+
+ return (0);
+}
+
+static int
+varpd_files_setprop(void *arg, const char *pname, const void *buf,
+ const uint32_t size)
+{
+ varpd_files_t *vaf = arg;
+
+ if (strcmp(pname, varpd_files_props[0]) != 0)
+ return (EINVAL);
+
+ if (vaf->vaf_path != NULL)
+ umem_free(vaf->vaf_path, strlen(vaf->vaf_path) + 1);
+
+ vaf->vaf_path = umem_alloc(size, UMEM_DEFAULT);
+ if (vaf->vaf_path == NULL)
+ return (ENOMEM);
+ (void) strlcpy(vaf->vaf_path, buf, size);
+ return (0);
+}
+
+static int
+varpd_files_save(void *arg, nvlist_t *nvp)
+{
+ int ret;
+ varpd_files_t *vaf = arg;
+
+ if (vaf->vaf_path == NULL)
+ return (0);
+
+ if ((ret = nvlist_add_string(nvp, varpd_files_props[0],
+ vaf->vaf_path)) != 0)
+ return (ret);
+
+ if ((ret = nvlist_add_uint64(nvp, "files/vaf_nmisses",
+ vaf->vaf_nmisses)) != 0)
+ return (ret);
+
+ if ((ret = nvlist_add_uint64(nvp, "files/vaf_narp",
+ vaf->vaf_narp)) != 0)
+ return (ret);
+ return (0);
+}
+
+static int
+varpd_files_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
+ overlay_plugin_dest_t dest, void **outp)
+{
+ varpd_files_t *vaf;
+ char *str;
+ int ret;
+ uint64_t nmisses, narp;
+
+ if (varpd_files_valid_dest(dest) == B_FALSE)
+ return (EINVAL);
+
+ ret = nvlist_lookup_string(nvp, varpd_files_props[0], &str);
+ if (ret != 0 && ret != ENOENT)
+ return (ret);
+ else if (ret == ENOENT)
+ str = NULL;
+
+ if (nvlist_lookup_uint64(nvp, "files/vaf_nmisses", &nmisses) != 0)
+ return (EINVAL);
+ if (nvlist_lookup_uint64(nvp, "files/vaf_narp", &narp) != 0)
+ return (EINVAL);
+
+ vaf = umem_alloc(sizeof (varpd_files_t), UMEM_DEFAULT);
+ if (vaf == NULL)
+ return (ENOMEM);
+
+ bzero(vaf, sizeof (varpd_files_t));
+ vaf->vaf_dest = dest;
+ if (str != NULL) {
+ size_t len = strlen(str) + 1;
+ vaf->vaf_path = umem_alloc(len, UMEM_DEFAULT);
+ if (vaf->vaf_path == NULL) {
+ umem_free(vaf, sizeof (varpd_files_t));
+ return (ENOMEM);
+ }
+ (void) strlcpy(vaf->vaf_path, str, len);
+ }
+
+ vaf->vaf_hdl = hdl;
+ *outp = vaf;
+ return (0);
+}
+
+static void
+varpd_files_proxy_arp(void *arg, varpd_arp_handle_t *vah, int kind,
+ const struct sockaddr *sock, uint8_t *out)
+{
+ varpd_files_t *vaf = arg;
+ const struct sockaddr_in *ip;
+ const struct sockaddr_in6 *ip6;
+ nvpair_t *pair;
+
+ if (kind != VARPD_QTYPE_ETHERNET) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (sock->sa_family != AF_INET && sock->sa_family != AF_INET6) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ ip = (const struct sockaddr_in *)sock;
+ ip6 = (const struct sockaddr_in6 *)sock;
+ for (pair = nvlist_next_nvpair(vaf->vaf_nvl, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(vaf->vaf_nvl, pair)) {
+ char *mac, *ipstr;
+ nvlist_t *data;
+ struct in_addr ia;
+ struct in6_addr ia6;
+ struct ether_addr ether, *e;
+ e = &ether;
+
+ if (nvpair_type(pair) != DATA_TYPE_NVLIST)
+ continue;
+
+ mac = nvpair_name(pair);
+ if (nvpair_value_nvlist(pair, &data) != 0)
+ continue;
+
+
+ if (sock->sa_family == AF_INET) {
+ if (nvlist_lookup_string(data, "arp", &ipstr) != 0)
+ continue;
+
+ if (inet_pton(AF_INET, ipstr, &ia) != 1)
+ continue;
+
+ if (bcmp(&ia, &ip->sin_addr,
+ sizeof (struct in_addr)) != 0)
+ continue;
+ } else {
+ if (nvlist_lookup_string(data, "ndp", &ipstr) != 0)
+ continue;
+
+ if (inet_pton(AF_INET6, ipstr, &ia6) != 1)
+ continue;
+
+ if (bcmp(&ia6, &ip6->sin6_addr,
+ sizeof (struct in6_addr)) != 0)
+ continue;
+ }
+
+ /* XXX Crappy errno */
+ if (ether_aton_r(mac, e) == NULL) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ bcopy(e, out, ETHERADDRL);
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_OK);
+ return;
+ }
+
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+}
+
+static void
+varpd_files_proxy_dhcp(void *arg, varpd_dhcp_handle_t *vdh, int type,
+ const overlay_targ_lookup_t *otl, uint8_t *out)
+{
+ varpd_files_t *vaf = arg;
+ nvlist_t *nvl;
+ char macstr[ETHERADDRSTRL], *mac;
+ struct ether_addr a, *addr;
+
+ addr = &a;
+ if (type != VARPD_QTYPE_ETHERNET) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (ether_ntoa_r((struct ether_addr *)otl->otl_srcaddr,
+ macstr) == NULL) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_string(nvl, "dhcp-proxy", &mac) != 0) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (ether_aton_r(mac, addr) == NULL) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ bcopy(addr, out, ETHERADDRL);
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_OK);
+}
+
+static const varpd_plugin_ops_t varpd_files_ops = {
+ 0,
+ varpd_files_create,
+ varpd_files_start,
+ varpd_files_stop,
+ varpd_files_destroy,
+ NULL,
+ varpd_files_lookup,
+ varpd_files_nprops,
+ varpd_files_propinfo,
+ varpd_files_getprop,
+ varpd_files_setprop,
+ varpd_files_save,
+ varpd_files_restore,
+ varpd_files_proxy_arp,
+ varpd_files_proxy_dhcp
+};
+
+#pragma init(varpd_files_init)
+static void
+varpd_files_init(void)
+{
+ int err;
+ varpd_plugin_register_t *vpr;
+
+ vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err);
+ /* XXX How should we communicate this failure? */
+ if (vpr == NULL)
+ return;
+
+ vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC;
+ vpr->vpr_name = "files";
+ vpr->vpr_ops = &varpd_files_ops;
+ /* XXX We care about failure, but what do we do? */
+ (void) libvarpd_plugin_register(vpr);
+ libvarpd_plugin_free(vpr);
+}
diff --git a/usr/src/lib/varpd/files/common/libvarpd_files_json.c b/usr/src/lib/varpd/files/common/libvarpd_files_json.c
new file mode 100644
index 0000000000..ed1f34b9fe
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/libvarpd_files_json.c
@@ -0,0 +1,744 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <strings.h>
+#include <errno.h>
+#include <libnvpair.h>
+
+#include <libvarpd_files_json.h>
+
+typedef enum json_type {
+ JSON_TYPE_NOTHING,
+ JSON_TYPE_STRING = 1,
+ JSON_TYPE_INTEGER,
+ JSON_TYPE_DOUBLE,
+ JSON_TYPE_BOOLEAN,
+ JSON_TYPE_NULL,
+ JSON_TYPE_OBJECT,
+ JSON_TYPE_ARRAY
+} json_type_t;
+
+typedef enum parse_state {
+ PARSE_ERROR = -1,
+ PARSE_DONE = 0,
+ PARSE_REST,
+ PARSE_OBJECT,
+ PARSE_KEY_STRING,
+ PARSE_COLON,
+ PARSE_STRING,
+ PARSE_OBJECT_COMMA,
+ PARSE_ARRAY,
+ PARSE_BAREWORD,
+ PARSE_NUMBER,
+ PARSE_ARRAY_VALUE,
+ PARSE_ARRAY_COMMA,
+} parse_state_t;
+
+#define JSON_MARKER ".__json_"
+#define JSON_MARKER_ARRAY JSON_MARKER "array"
+
+typedef struct parse_frame {
+ parse_state_t pf_ps;
+ nvlist_t *pf_nvl;
+
+ char *pf_key;
+ void *pf_value;
+ json_type_t pf_value_type;
+ int pf_array_index;
+
+ struct parse_frame *pf_next;
+} parse_frame_t;
+
+typedef struct state {
+ char *s_in;
+ off_t s_pos;
+ size_t s_len;
+
+ parse_frame_t *s_top;
+
+ nvlist_parse_json_flags_t s_flags;
+} state_t;
+
+typedef void (*parse_handler_t)(state_t *);
+
+static void
+movestate(state_t *s, parse_state_t ps)
+{
+#ifdef DEBUG
+ fprintf(stderr, "move state %d -> %d\n", s->s_top->pf_ps, ps);
+#endif
+ s->s_top->pf_ps = ps;
+}
+
+static void
+pushstate(state_t *s, parse_state_t ps, parse_state_t retps)
+{
+ parse_frame_t *n = calloc(1, sizeof (*n));
+
+#ifdef DEBUG
+ fprintf(stderr, "push state %d -> %d (ret %d)\n", s->s_top->pf_ps, ps,
+ retps);
+#endif
+
+ /*
+ * Store the state we'll return to when popping this
+ * frame:
+ */
+ s->s_top->pf_ps = retps;
+
+ /*
+ * Store the initial state for the new frame, and
+ * put it on top of the stack:
+ */
+ n->pf_ps = ps;
+ n->pf_value_type = JSON_TYPE_NOTHING;
+
+ n->pf_next = s->s_top;
+ s->s_top = n;
+}
+
+static void
+posterror(state_t *s, char *error)
+{
+ /*
+ * XXX do something better here.
+ */
+ if (s->s_flags & NVJSON_ERRORS_TO_STDERR)
+ fprintf(stderr, "error (pos %lld): %s\n", s->s_pos, error);
+ movestate(s, PARSE_ERROR);
+}
+
+static char
+popchar(state_t *s)
+{
+ if (s->s_pos > s->s_len) {
+ return (0);
+ }
+ return (s->s_in[s->s_pos++]);
+}
+
+static char
+peekchar(state_t *s)
+{
+ if (s->s_pos > s->s_len) {
+ return (0);
+ }
+ return (s->s_in[s->s_pos]);
+}
+
+static void
+discard_whitespace(state_t *s)
+{
+ while (isspace(peekchar(s)))
+ popchar(s);
+}
+
+static char *escape_pairs[] = {
+ "\"\"", "\\\\", "//", "b\b", "f\f", "n\n", "r\r", "t\t", NULL
+};
+
+static char
+collect_string_escape(state_t *s)
+{
+ int i;
+ char c = popchar(s);
+
+ if (c == '\0') {
+ fprintf(stderr, "ERROR: eof mid-escape\n");
+ return ('\0');
+ } else if (c == 'u') {
+ int res;
+ int ndigs = 0;
+ char digs[5];
+ /*
+ * Deal with 4-digit unicode escape.
+ */
+ while (ndigs < 4) {
+ if ((digs[ndigs++] = popchar(s)) == '\0') {
+ fprintf(stderr, "ERROR: eof mid-escape\n");
+ return ('\0');
+ }
+ }
+ digs[4] = '\0';
+ res = atoi(digs);
+ if (res > 127) {
+ fprintf(stderr, "ERROR: unicode escape above 0x7f\n");
+ return ('\0');
+ }
+ return (res);
+ }
+
+ for (i = 0; escape_pairs[i] != NULL; i++) {
+ char *ep = escape_pairs[i];
+ if (ep[0] == c)
+ return (ep[1]);
+ }
+
+ fprintf(stderr, "ERROR: unrecognised escape char %c\n", c);
+ return ('\0');
+}
+
+static char *
+collect_string(state_t *s)
+{
+ /* XXX make this not static: */
+ char buf[1000];
+ char *pos = buf;
+
+ for (;;) {
+ char c = popchar(s);
+ if (c == '\0') {
+ /*
+ * Unexpected EOF
+ */
+ fprintf(stderr, "ERROR: unexpected EOF mid-string\n");
+ return (NULL);
+ } else if (c == '\\') {
+ char esc;
+ /*
+ * Escape Character.
+ *
+ * XXX better error handling here.
+ */
+ if ((esc = collect_string_escape(s)) == '\0')
+ return (NULL);
+ *pos++ = esc;
+ } else if (c == '"') {
+ /*
+ * Legal End of String.
+ */
+ break;
+ } else {
+ *pos++ = c;
+ }
+ }
+ *pos = '\0';
+ return (strdup(buf));
+}
+
+static char *
+collect_bareword(state_t *s)
+{
+ /* XXX make this not static: */
+ char out[100];
+ char *pos = out;
+ char c;
+ for (;;) {
+ c = peekchar(s);
+ if (islower(c)) {
+ *pos++ = popchar(s);
+ } else {
+ /*
+ * We're done...
+ */
+ *pos = '\0';
+ break;
+ }
+ }
+ return (strdup(out));
+}
+
+static void
+hdlr_bareword(state_t *s)
+{
+ char *str = collect_bareword(s);
+ if (strcmp(str, "true") == 0) {
+ s->s_top->pf_value_type = JSON_TYPE_BOOLEAN;
+ s->s_top->pf_value = (void *) B_TRUE;
+ } else if (strcmp(str, "false") == 0) {
+ s->s_top->pf_value_type = JSON_TYPE_BOOLEAN;
+ s->s_top->pf_value = (void *) B_FALSE;
+ } else if (strcmp(str, "null") == 0) {
+ s->s_top->pf_value_type = JSON_TYPE_NULL;
+ } else {
+ free(str);
+ return (posterror(s, "expected 'true', 'false' or 'null'"));
+ }
+ free(str);
+ return (movestate(s, PARSE_DONE));
+}
+
+static int
+collect_number(state_t *s, boolean_t *isint, int32_t *result,
+ double *fresult __UNUSED)
+{
+ /* XXX make not static */
+ char out[100];
+ char *pos = out;
+ boolean_t neg = B_FALSE;
+ char c;
+
+ if (peekchar(s) == '-') {
+ neg = B_TRUE;
+ popchar(s);
+ }
+ /*
+ * Read the 'int' portion:
+ */
+ if (!isdigit(c = peekchar(s))) {
+ fprintf(stderr, "expected a digit (0-9)\n");
+ return (-1);
+ }
+ for (;;) {
+ if (!isdigit(peekchar(s)))
+ break;
+ *pos++ = popchar(s);
+ }
+ if (peekchar(s) == '.' || peekchar(s) == 'e' || peekchar(s) == 'E') {
+ fprintf(stderr, "do not yet support FRACs or EXPs\n");
+ return (-1);
+ }
+
+ *isint = B_TRUE;
+ *pos = '\0';
+ *result = neg == B_TRUE ? -atoi(out) : atoi(out);
+ return (0);
+}
+
+static void
+hdlr_number(state_t *s)
+{
+ boolean_t isint;
+ int32_t result;
+ double fresult;
+
+ if (collect_number(s, &isint, &result, &fresult) != 0) {
+ return (posterror(s, "malformed number"));
+ }
+
+ if (isint == B_TRUE) {
+ s->s_top->pf_value = (void *)(uintptr_t)result;
+ s->s_top->pf_value_type = JSON_TYPE_INTEGER;
+ } else {
+ s->s_top->pf_value = malloc(sizeof (fresult));
+ bcopy(&fresult, s->s_top->pf_value, sizeof (fresult));
+ s->s_top->pf_value_type = JSON_TYPE_DOUBLE;
+ }
+
+ return (movestate(s, PARSE_DONE));
+}
+
+static void
+hdlr_rest(state_t *s)
+{
+ char c;
+ discard_whitespace(s);
+ c = popchar(s);
+ switch (c) {
+ case '{':
+ return (movestate(s, PARSE_OBJECT));
+ case '[':
+ return (movestate(s, PARSE_ARRAY));
+ default:
+ return (posterror(s, "eof before object or array"));
+ }
+}
+
+static int
+add_empty_child(state_t *s)
+{
+ /*
+ * Here, we create an empty nvlist to represent this object
+ * or array:
+ */
+ nvlist_t *empty;
+ if (nvlist_alloc(&empty, NV_UNIQUE_NAME, 0) != 0)
+ return (-1);
+ if (s->s_top->pf_next != NULL) {
+ /*
+ * If we're a child of the frame above, we store ourselves in
+ * that frame's nvlist:
+ */
+ nvlist_t *nvl = s->s_top->pf_next->pf_nvl;
+ char *key = s->s_top->pf_next->pf_key;
+
+ if (nvlist_add_nvlist(nvl, key, empty) != 0) {
+ nvlist_free(empty);
+ return (-1);
+ }
+ nvlist_free(empty);
+ if (nvlist_lookup_nvlist(nvl, key, &empty) != 0) {
+ return (-1);
+ }
+ }
+ s->s_top->pf_nvl = empty;
+ return (0);
+}
+
+static int
+decorate_array(state_t *s)
+{
+ /*
+ * When we are done creating an array, we store a 'length'
+ * property on it, as well as an internal-use marker value.
+ */
+ if (nvlist_add_boolean(s->s_top->pf_nvl, JSON_MARKER_ARRAY) != 0 ||
+ nvlist_add_uint32(s->s_top->pf_nvl, "length",
+ s->s_top->pf_array_index))
+ return (-1);
+ return (0);
+}
+
+static void
+hdlr_array(state_t *s)
+{
+ char c;
+ s->s_top->pf_value_type = JSON_TYPE_ARRAY;
+
+ if (add_empty_child(s) == -1)
+ return (posterror(s, "nvlist error"));
+
+ discard_whitespace(s);
+ c = peekchar(s);
+ switch (c) {
+ case ']':
+ popchar(s);
+ decorate_array(s);
+ return (movestate(s, PARSE_DONE));
+ default:
+ return (movestate(s, PARSE_ARRAY_VALUE));
+ }
+}
+
+static void
+hdlr_array_comma(state_t *s)
+{
+ discard_whitespace(s);
+
+ switch (popchar(s)) {
+ case ']':
+ decorate_array(s);
+ return (movestate(s, PARSE_DONE));
+ case ',':
+ return (movestate(s, PARSE_ARRAY_VALUE));
+ default:
+ return (posterror(s, "expected ',' or ']'"));
+ }
+}
+
+static void
+hdlr_array_value(state_t *s)
+{
+ char c;
+ discard_whitespace(s);
+
+ /*
+ * Generate keyname from the next array index:
+ */
+ if (s->s_top->pf_key != NULL) {
+ fprintf(stderr, "pf_key not null! was %s\n", s->s_top->pf_key);
+ abort();
+ }
+ s->s_top->pf_key = malloc(11); /* 10 digits in uint32_t */
+ if (s->s_top->pf_key == NULL)
+ return (posterror(s, "could not allocate memory"));
+ (void) snprintf(s->s_top->pf_key, 11, "%d", s->s_top->pf_array_index++);
+
+ /*
+ * Select which type handler we need for the next value:
+ */
+ switch (c = peekchar(s)) {
+ case '"':
+ popchar(s);
+ return (pushstate(s, PARSE_STRING, PARSE_ARRAY_COMMA));
+ case '{':
+ popchar(s);
+ return (pushstate(s, PARSE_OBJECT, PARSE_ARRAY_COMMA));
+ case '[':
+ popchar(s);
+ return (pushstate(s, PARSE_ARRAY, PARSE_ARRAY_COMMA));
+ default:
+ if (islower(c))
+ return (pushstate(s, PARSE_BAREWORD,
+ PARSE_ARRAY_COMMA));
+ else if (c == '-' || isdigit(c))
+ return (pushstate(s, PARSE_NUMBER, PARSE_ARRAY_COMMA));
+ else
+ return (posterror(s, "unexpected character at start "
+ "of value"));
+ }
+}
+
+static void
+hdlr_object(state_t *s)
+{
+ char c;
+ s->s_top->pf_value_type = JSON_TYPE_OBJECT;
+
+ if (add_empty_child(s) == -1)
+ return (posterror(s, "nvlist error"));
+
+ discard_whitespace(s);
+ c = popchar(s);
+ switch (c) {
+ case '}':
+ return (movestate(s, PARSE_DONE));
+ case '"':
+ return (movestate(s, PARSE_KEY_STRING));
+ default:
+ return (posterror(s, "expected key or '}'"));
+ }
+}
+
+static void
+hdlr_key_string(state_t *s)
+{
+ char *str = collect_string(s);
+ if (str == NULL)
+ return (posterror(s, "could not collect key string"));
+
+ /*
+ * Record the name of the next
+ */
+ s->s_top->pf_key = str;
+ return (movestate(s, PARSE_COLON));
+}
+
+static void
+hdlr_colon(state_t *s)
+{
+ char c;
+ discard_whitespace(s);
+
+ if ((c = popchar(s)) != ':')
+ return (posterror(s, "expected ':'"));
+
+ discard_whitespace(s);
+
+ /*
+ * Select which type handler we need for the value after the colon:
+ */
+ switch (c = peekchar(s)) {
+ case '"':
+ popchar(s);
+ return (pushstate(s, PARSE_STRING, PARSE_OBJECT_COMMA));
+ case '{':
+ popchar(s);
+ return (pushstate(s, PARSE_OBJECT, PARSE_OBJECT_COMMA));
+ case '[':
+ popchar(s);
+ return (pushstate(s, PARSE_ARRAY, PARSE_OBJECT_COMMA));
+ default:
+ if (islower(c))
+ return (pushstate(s, PARSE_BAREWORD,
+ PARSE_OBJECT_COMMA));
+ else if (c == '-' || isdigit(c))
+ return (pushstate(s, PARSE_NUMBER, PARSE_OBJECT_COMMA));
+ else
+ return (posterror(s, "unexpected character at start "
+ "of value"));
+ }
+}
+
+static void
+hdlr_object_comma(state_t *s)
+{
+ char c;
+ discard_whitespace(s);
+
+ switch (c = popchar(s)) {
+ case '}':
+ return (movestate(s, PARSE_DONE));
+ case ',':
+ discard_whitespace(s);
+ if ((c = popchar(s)) != '"')
+ return (posterror(s, "expected '\"'"));
+ return (movestate(s, PARSE_KEY_STRING));
+ default:
+ return (posterror(s, "expected ',' or '}'"));
+ }
+}
+
+static void
+hdlr_string(state_t *s)
+{
+ s->s_top->pf_value = collect_string(s);
+ if (s == NULL)
+ return (posterror(s, "could not collect string"));
+ s->s_top->pf_value_type = JSON_TYPE_STRING;
+ return (movestate(s, PARSE_DONE));
+}
+
+static int
+store_value(state_t *s)
+{
+ nvlist_t *targ = s->s_top->pf_next->pf_nvl;
+ char *key = s->s_top->pf_next->pf_key;
+ json_type_t type = s->s_top->pf_value_type;
+ int ret = 0;
+
+ switch (type) {
+ case JSON_TYPE_STRING:
+ ret = nvlist_add_string(targ, key, s->s_top->pf_value);
+ free(s->s_top->pf_value);
+ goto out;
+ case JSON_TYPE_BOOLEAN:
+ ret = nvlist_add_boolean_value(targ, key,
+ (boolean_t)s->s_top->pf_value);
+ goto out;
+ case JSON_TYPE_NULL:
+ ret = nvlist_add_boolean(targ, key);
+ goto out;
+ case JSON_TYPE_INTEGER:
+ ret = nvlist_add_int32(targ, key,
+ (int32_t)(uintptr_t)s->s_top->pf_value);
+ goto out;
+ case JSON_TYPE_ARRAY:
+ /* FALLTHRU */
+ case JSON_TYPE_OBJECT:
+ /*
+ * Objects and arrays are already 'stored' in their target
+ * nvlist on creation. See: hdlr_object, hdlr_array.
+ */
+ goto out;
+ default:
+ fprintf(stderr, "ERROR: could not store unknown type %d\n",
+ type);
+ abort();
+ }
+out:
+ s->s_top->pf_value = NULL;
+ free(s->s_top->pf_next->pf_key);
+ s->s_top->pf_next->pf_key = NULL;
+ return (ret);
+}
+
+static parse_frame_t *
+parse_frame_free(parse_frame_t *pf, boolean_t free_nvl)
+{
+ parse_frame_t *next = pf->pf_next;
+ if (pf->pf_key != NULL)
+ free(pf->pf_key);
+ if (pf->pf_value != NULL)
+ abort();
+ if (free_nvl && pf->pf_nvl != NULL)
+ nvlist_free(pf->pf_nvl);
+ free(pf);
+ return (next);
+}
+
+static parse_handler_t hdlrs[] = {
+ NULL, /* PARSE_DONE */
+ hdlr_rest, /* PARSE_REST */
+ hdlr_object, /* PARSE_OBJECT */
+ hdlr_key_string, /* PARSE_KEY_STRING */
+ hdlr_colon, /* PARSE_COLON */
+ hdlr_string, /* PARSE_STRING */
+ hdlr_object_comma, /* PARSE_OBJECT_COMMA */
+ hdlr_array, /* PARSE_ARRAY */
+ hdlr_bareword, /* PARSE_BAREWORD */
+ hdlr_number, /* PARSE_NUMBER */
+ hdlr_array_value, /* PARSE_ARRAY_VALUE */
+ hdlr_array_comma, /* PARSE_ARRAY_COMMA */
+};
+#define NUM_PARSE_HANDLERS (int)(sizeof (hdlrs) / sizeof (hdlrs[0]))
+
+int
+nvlist_parse_json(char *buf, size_t buflen, nvlist_t **nvlp,
+ nvlist_parse_json_flags_t flag)
+{
+ int ret = 0;
+ state_t s;
+
+ /*
+ * Check for valid flags:
+ */
+ if ((flag & (NVJSON_FORCE_INTEGER | NVJSON_FORCE_DOUBLE)) ==
+ (NVJSON_FORCE_INTEGER | NVJSON_FORCE_DOUBLE))
+ return (EINVAL);
+
+ /*
+ * Initialise parsing state structure:
+ */
+ bzero(&s, sizeof (s));
+ s.s_in = buf;
+ s.s_pos = 0;
+ s.s_len = buflen;
+ s.s_flags = flag;
+
+ /*
+ * Allocate top-most stack frame:
+ */
+ s.s_top = calloc(1, sizeof (*s.s_top));
+ if (s.s_top == NULL) {
+ ret = errno;
+ goto out;
+ }
+
+ s.s_top->pf_ps = PARSE_REST;
+ for (;;) {
+ if (s.s_top->pf_ps < 0) {
+ /*
+ * The parser reported an error.
+ */
+#if 0
+ fprintf(stderr, "parse error\n");
+#endif
+ ret = EFAULT;
+ goto out;
+ } else if (s.s_top->pf_ps == PARSE_DONE) {
+ if (s.s_top->pf_next == NULL) {
+ /*
+ * Last frame, so we're really
+ * done.
+ */
+ *nvlp = s.s_top->pf_nvl;
+ goto out;
+ } else {
+ /*
+ * Otherwise, pop a frame and continue
+ * in previous state.
+ */
+#if 0
+ parse_frame_t *t = s.s_top->pf_next;
+#endif
+
+ /*
+ * Copy out the value we created in the
+ * old frame:
+ */
+ if ((ret = store_value(&s)) != 0)
+ goto out;
+#if 0
+ fprintf(stderr, "pop state %d -> %d\n",
+ s.s_top->pf_ps, t->pf_ps);
+#endif
+ /*
+ * Free old frame:
+ */
+ s.s_top = parse_frame_free(s.s_top, B_FALSE);
+ }
+ }
+ /*
+ * Dispatch to parser handler routine for this state:
+ */
+ if (s.s_top->pf_ps >= NUM_PARSE_HANDLERS ||
+ hdlrs[s.s_top->pf_ps] == NULL) {
+ fprintf(stderr, "no handler for state %d\n",
+ s.s_top->pf_ps);
+ abort();
+ }
+ hdlrs[s.s_top->pf_ps](&s);
+ }
+
+out:
+ while (s.s_top != NULL)
+ s.s_top = parse_frame_free(s.s_top, ret == 0 ? B_FALSE :
+ B_TRUE);
+ return (ret);
+}
diff --git a/usr/src/lib/varpd/files/common/libvarpd_files_json.h b/usr/src/lib/varpd/files/common/libvarpd_files_json.h
new file mode 100644
index 0000000000..2a96e55206
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/libvarpd_files_json.h
@@ -0,0 +1,40 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LIBVARPD_FILES_JSON_H
+#define _LIBVARPD_FILES_JSON_H
+
+#include <libnvpair.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum nvlist_parse_json_flags {
+ NVJSON_FORCE_INTEGER = 0x01,
+ NVJSON_FORCE_DOUBLE = 0x02,
+ NVJSON_ERRORS_TO_STDERR = 0x04
+} nvlist_parse_json_flags_t;
+
+extern int nvlist_parse_json(char *, size_t, nvlist_t **,
+ nvlist_parse_json_flags_t);
+
+#define __UNUSED __attribute__((unused))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_FILES_JSON_H */
diff --git a/usr/src/lib/varpd/files/common/llib-lvarpd_files b/usr/src/lib/varpd/files/common/llib-lvarpd_files
new file mode 100644
index 0000000000..31b3d36fbe
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/llib-lvarpd_files
@@ -0,0 +1,18 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
diff --git a/usr/src/lib/varpd/files/common/mapfile-vers b/usr/src/lib/varpd/files/common/mapfile-vers
new file mode 100644
index 0000000000..642ef72adc
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/mapfile-vers
@@ -0,0 +1,35 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate {
+ local:
+ *;
+};
diff --git a/usr/src/lib/varpd/files/i386/Makefile b/usr/src/lib/varpd/files/i386/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/varpd/files/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/files/sparc/Makefile b/usr/src/lib/varpd/files/sparc/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/varpd/files/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/files/sparcv9/Makefile b/usr/src/lib/varpd/files/sparcv9/Makefile
new file mode 100644
index 0000000000..5c586c1d40
--- /dev/null
+++ b/usr/src/lib/varpd/files/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/libvarpd/Makefile b/usr/src/lib/varpd/libvarpd/Makefile
new file mode 100644
index 0000000000..60c9cfa07f
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/Makefile
@@ -0,0 +1,54 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../../Makefile.lib
+
+HDRS = libvarpd.h libvarpd_client.h libvarpd_provider.h
+HDRDIR = common
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+TYPECHECK_LIB = libvarpd.so.1
+TYPELIST = \
+ varpd_client_instance_arg_t \
+ varpd_client_nprops_arg_t \
+ varpd_client_propinfo_arg_t \
+ varpd_client_eresp_t \
+ overlay_targ_cache_entry_t \
+ overlay_targ_cache_t \
+ overlay_targ_cache_iter_t
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber lint: $(SUBDIRS)
+
+install: $(SUBDIRS) $(VARPD_MAPFILES) install_h
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS) $(TYPECHECK)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/varpd/libvarpd/Makefile.com b/usr/src/lib/varpd/libvarpd/Makefile.com
new file mode 100644
index 0000000000..e0378f3229
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/Makefile.com
@@ -0,0 +1,44 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = libvarpd.a
+VERS = .1
+OBJECTS = libvarpd.o \
+ libvarpd_arp.o \
+ libvarpd_client.o \
+ libvarpd_door.o \
+ libvarpd_overlay.o \
+ libvarpd_panic.o \
+ libvarpd_persist.o \
+ libvarpd_prop.o \
+ libvarpd_plugin.o \
+ libvarpd_util.o
+
+include ../../../Makefile.lib
+
+LIBS = $(DYNLIB)
+LDLIBS += -lc -lavl -lumem -lidspace -lnvpair -lmd5 -lrename \
+ -lbunyan
+CPPFLAGS += -I../common
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../Makefile.targ
diff --git a/usr/src/lib/varpd/libvarpd/amd64/Makefile b/usr/src/lib/varpd/libvarpd/amd64/Makefile
new file mode 100644
index 0000000000..5c586c1d40
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd.c b/usr/src/lib/varpd/libvarpd/common/libvarpd.c
new file mode 100644
index 0000000000..3c08a3316b
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd.c
@@ -0,0 +1,355 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * varpd library
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <umem.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/avl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <strings.h>
+
+#include <libvarpd_impl.h>
+
+static int
+libvarpd_instance_comparator(const void *lp, const void *rp)
+{
+ const varpd_instance_t *lpp, *rpp;
+ lpp = lp;
+ rpp = rp;
+
+ if (lpp->vri_id > rpp->vri_id)
+ return (1);
+ if (lpp->vri_id < rpp->vri_id)
+ return (-1);
+ return (0);
+}
+
+static int
+libvarpd_instance_lcomparator(const void *lp, const void *rp)
+{
+ const varpd_instance_t *lpp, *rpp;
+ lpp = lp;
+ rpp = rp;
+
+ if (lpp->vri_linkid > rpp->vri_linkid)
+ return (1);
+ if (lpp->vri_linkid < rpp->vri_linkid)
+ return (-1);
+ return (0);
+}
+
+
+int
+libvarpd_create(varpd_handle_t **vphp)
+{
+ int ret;
+ varpd_impl_t *vip;
+ char buf[32];
+
+ if (vphp == NULL)
+ return (EINVAL);
+
+ *vphp = NULL;
+ vip = umem_alloc(sizeof (varpd_impl_t), UMEM_DEFAULT);
+ if (vip == NULL)
+ return (errno);
+
+ bzero(vip, sizeof (varpd_impl_t));
+ (void) snprintf(buf, sizeof (buf), "varpd_%p", vip);
+ vip->vdi_idspace = id_space_create(buf, LIBVARPD_ID_MIN,
+ LIBVARPD_ID_MAX);
+ if (vip->vdi_idspace == NULL) {
+ int ret = errno;
+ umem_free(vip, sizeof (varpd_impl_t));
+ return (ret);
+ }
+
+ vip->vdi_qcache = umem_cache_create("query", sizeof (varpd_query_t), 0,
+ NULL, NULL, NULL, NULL, NULL, 0);
+ if (vip->vdi_qcache == NULL) {
+ int ret = errno;
+ id_space_destroy(vip->vdi_idspace);
+ umem_free(vip, sizeof (varpd_impl_t));
+ return (ret);
+ }
+
+ if ((ret = libvarpd_overlay_init(vip)) != 0) {
+ umem_cache_destroy(vip->vdi_qcache);
+ id_space_destroy(vip->vdi_idspace);
+ umem_free(vip, sizeof (varpd_impl_t));
+ return (ret);
+ }
+
+ if ((ret = bunyan_init("varpd", &vip->vdi_bunyan)) != 0) {
+ libvarpd_overlay_fini(vip);
+ umem_cache_destroy(vip->vdi_qcache);
+ id_space_destroy(vip->vdi_idspace);
+ umem_free(vip, sizeof (varpd_impl_t));
+ return (ret);
+ }
+
+ libvarpd_persist_init(vip);
+
+ avl_create(&vip->vdi_plugins, libvarpd_plugin_comparator,
+ sizeof (varpd_plugin_t), offsetof(varpd_plugin_t, vpp_node));
+
+ avl_create(&vip->vdi_instances, libvarpd_instance_comparator,
+ sizeof (varpd_instance_t), offsetof(varpd_instance_t, vri_inode));
+ avl_create(&vip->vdi_linstances, libvarpd_instance_lcomparator,
+ sizeof (varpd_instance_t), offsetof(varpd_instance_t, vri_lnode));
+
+ if (mutex_init(&vip->vdi_lock, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create mutex: %d", errno);
+
+ vip->vdi_doorfd = -1;
+ *vphp = (varpd_handle_t *)vip;
+ return (0);
+}
+
+void
+libvarpd_destroy(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ if (mutex_destroy(&vip->vdi_lock) != 0)
+ libvarpd_panic("failed to destroy mutex: %d", errno);
+ libvarpd_persist_fini(vip);
+ libvarpd_overlay_fini(vip);
+ umem_cache_destroy(vip->vdi_qcache);
+ id_space_destroy(vip->vdi_idspace);
+ umem_free(vip, sizeof (varpd_impl_t));
+}
+
+int
+libvarpd_instance_create(varpd_handle_t *vhp, datalink_id_t linkid,
+ const char *pname, varpd_instance_handle_t **outp)
+{
+ int ret;
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+ varpd_plugin_t *plugin;
+ varpd_instance_t *inst, lookup;
+ overlay_plugin_dest_t dest;
+ uint64_t vid;
+
+ /* XXX Really want our own errnos */
+ plugin = libvarpd_plugin_lookup(vip, pname);
+ if (plugin == NULL)
+ return (ENOENT);
+
+ if ((ret = libvarpd_overlay_info(vip, linkid, &dest, NULL, &vid)) != 0)
+ return (ret);
+
+ inst = umem_alloc(sizeof (varpd_instance_t), UMEM_DEFAULT);
+ if (inst == NULL)
+ return (ENOMEM);
+
+ inst->vri_id = id_alloc(vip->vdi_idspace);
+ if (inst->vri_id == -1)
+ libvarpd_panic("failed to allocate id from vdi_idspace: %d",
+ errno);
+ inst->vri_linkid = linkid;
+ inst->vri_vnetid = vid;
+ inst->vri_mode = plugin->vpp_mode;
+ inst->vri_dest = dest;
+ inst->vri_plugin = plugin;
+ inst->vri_impl = vip;
+ inst->vri_flags = 0;
+ if ((ret = plugin->vpp_ops->vpo_create((varpd_provider_handle_t *)inst,
+ &inst->vri_private, dest)) != 0) {
+ id_free(vip->vdi_idspace, inst->vri_id);
+ umem_free(inst, sizeof (varpd_instance_t));
+ return (ret);
+ }
+
+ if (mutex_init(&inst->vri_lock, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create mutex: %d", errno);
+
+ mutex_lock(&vip->vdi_lock);
+ lookup.vri_id = inst->vri_id;
+ if (avl_find(&vip->vdi_instances, &lookup, NULL) != NULL)
+ libvarpd_panic("found duplicate instance with id %d",
+ lookup.vri_id);
+ avl_add(&vip->vdi_instances, inst);
+ lookup.vri_linkid = inst->vri_linkid;
+ if (avl_find(&vip->vdi_linstances, &lookup, NULL) != NULL)
+ libvarpd_panic("found duplicate linstance with id %d",
+ lookup.vri_linkid);
+ avl_add(&vip->vdi_linstances, inst);
+ mutex_unlock(&vip->vdi_lock);
+ *outp = (varpd_instance_handle_t *)inst;
+ return (0);
+}
+
+uint64_t
+libvarpd_instance_id(varpd_instance_handle_t *ihp)
+{
+ varpd_instance_t *inst = (varpd_instance_t *)ihp;
+ return (inst->vri_id);
+}
+
+uint64_t
+libvarpd_plugin_vnetid(varpd_provider_handle_t *vhp)
+{
+ varpd_instance_t *inst = (varpd_instance_t *)vhp;
+ return (inst->vri_vnetid);
+}
+
+varpd_instance_handle_t *
+libvarpd_instance_lookup(varpd_handle_t *vhp, uint64_t id)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+ varpd_instance_t lookup, *retp;
+
+ lookup.vri_id = id;
+ mutex_lock(&vip->vdi_lock);
+ retp = avl_find(&vip->vdi_instances, &lookup, NULL);
+ mutex_unlock(&vip->vdi_lock);
+ return ((varpd_instance_handle_t *)retp);
+}
+
+/*
+ * If this function becomes external to varpd, we need to change it to return a
+ * varpd_instance_handle_t.
+ */
+varpd_instance_t *
+libvarpd_instance_lookup_by_dlid(varpd_impl_t *vip, datalink_id_t linkid)
+{
+ varpd_instance_t lookup, *retp;
+
+ lookup.vri_linkid = linkid;
+ mutex_lock(&vip->vdi_lock);
+ retp = avl_find(&vip->vdi_linstances, &lookup, NULL);
+ mutex_unlock(&vip->vdi_lock);
+ return (retp);
+}
+
+/*
+ * When an instance is being destroyed, that means we should deactivate it, as
+ * well as clean it up. That means here, the proper order is calling the plug-in
+ * stop.
+ */
+void
+libvarpd_instance_destroy(varpd_instance_handle_t *ihp)
+{
+ varpd_instance_t *inst = (varpd_instance_t *)ihp;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ /*
+ * First things first, remove it from global visibility.
+ */
+ mutex_lock(&vip->vdi_lock);
+ avl_remove(&vip->vdi_instances, inst);
+ avl_remove(&vip->vdi_linstances, inst);
+ mutex_unlock(&vip->vdi_lock);
+
+ /*
+ * XXX We probably need a reference counting strategy here so we know
+ * it's safe to remove.
+ */
+ mutex_lock(&inst->vri_lock);
+
+ /*
+ * We need to clean up this instance, that means remove it from
+ * persistence and stopping it. Then finally we'll have to clean it up
+ * entirely.
+ */
+ if (inst->vri_flags & VARPD_INSTANCE_F_ACTIVATED) {
+ inst->vri_flags &= ~VARPD_INSTANCE_F_ACTIVATED;
+ libvarpd_torch_instance(vip, inst);
+ inst->vri_plugin->vpp_ops->vpo_stop(inst->vri_private);
+ inst->vri_plugin->vpp_ops->vpo_destroy(inst->vri_private);
+ inst->vri_private = NULL;
+ }
+ mutex_unlock(&inst->vri_lock);
+
+ /* Do the full clean up of the instance */
+ if (mutex_destroy(&inst->vri_lock) != 0)
+ libvarpd_panic("failed to destroy instance vri_lock");
+ id_free(vip->vdi_idspace, inst->vri_id);
+ umem_free(inst, sizeof (varpd_instance_t));
+}
+
+int
+libvarpd_instance_activate(varpd_instance_handle_t *ihp)
+{
+ int ret;
+ varpd_instance_t *inst = (varpd_instance_t *)ihp;
+
+ mutex_lock(&inst->vri_lock);
+
+ if (inst->vri_flags & VARPD_INSTANCE_F_ACTIVATED) {
+ ret = EEXIST;
+ goto out;
+ }
+
+ if ((ret = inst->vri_plugin->vpp_ops->vpo_start(inst->vri_private)) !=
+ 0)
+ goto out;
+
+ if ((ret = libvarpd_persist_instance(inst->vri_impl, inst)) != 0)
+ goto out;
+
+ /* XXX We should call stop if this fails */
+ if ((ret = libvarpd_overlay_associate(inst)) != 0)
+ goto out;
+
+ inst->vri_flags |= VARPD_INSTANCE_F_ACTIVATED;
+
+out:
+ mutex_unlock(&inst->vri_lock);
+ return (ret);
+}
+
+const bunyan_logger_t *
+libvarpd_plugin_bunyan(varpd_provider_handle_t *vhp)
+{
+ varpd_instance_t *inst = (varpd_instance_t *)vhp;
+ return (inst->vri_impl->vdi_bunyan);
+}
+
+static void
+libvarpd_prefork(void)
+{
+ libvarpd_plugin_prefork();
+}
+
+static void
+libvarpd_postfork(void)
+{
+ libvarpd_plugin_postfork();
+}
+
+#pragma init(libvarpd_init)
+static void
+libvarpd_init(void)
+{
+ libvarpd_plugin_init();
+ if (pthread_atfork(NULL, libvarpd_prefork, libvarpd_postfork) != 0)
+ libvarpd_panic("failed to create varpd atfork: %d", errno);
+}
+
+#pragma fini(libvarpd_fini)
+static void
+libvarpd_fini(void)
+{
+ libvarpd_plugin_fini();
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd.h b/usr/src/lib/varpd/libvarpd/common/libvarpd.h
new file mode 100644
index 0000000000..b2dc57dd4e
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd.h
@@ -0,0 +1,78 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_H
+#define _LIBVARPD_H
+
+/*
+ * varpd interfaces
+ */
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <sys/mac.h>
+#include <libvarpd_client.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct __varpd_handle varpd_handle_t;
+typedef struct __varpd_prop_handle varpd_prop_handle_t;
+typedef struct __varpd_instance_handle varpd_instance_handle_t;
+
+extern int libvarpd_create(varpd_handle_t **);
+extern void libvarpd_destroy(varpd_handle_t *);
+
+extern int libvarpd_persist_enable(varpd_handle_t *, const char *);
+extern int libvarpd_persist_restore(varpd_handle_t *);
+extern int libvarpd_persist_disable(varpd_handle_t *);
+
+extern int libvarpd_instance_create(varpd_handle_t *, datalink_id_t,
+ const char *, varpd_instance_handle_t **);
+extern uint64_t libvarpd_instance_id(varpd_instance_handle_t *);
+extern varpd_instance_handle_t *libvarpd_instance_lookup(varpd_handle_t *,
+ uint64_t);
+extern void libvarpd_instance_destroy(varpd_instance_handle_t *);
+extern int libvarpd_instance_activate(varpd_instance_handle_t *);
+
+extern int libvarpd_plugin_load(varpd_handle_t *, const char *);
+typedef int (*libvarpd_plugin_walk_f)(varpd_handle_t *, const char *, void *);
+extern int libvarpd_plugin_walk(varpd_handle_t *, libvarpd_plugin_walk_f,
+ void *);
+
+extern int libvarpd_prop_handle_alloc(varpd_handle_t *,
+ varpd_instance_handle_t *, varpd_prop_handle_t **);
+extern void libvarpd_prop_handle_free(varpd_prop_handle_t *);
+extern int libvarpd_prop_nprops(varpd_instance_handle_t *, uint_t *);
+/* XXX Do we need the next two from a server perspective? */
+extern int libvarpd_prop_info_fill(varpd_prop_handle_t *, uint_t);
+extern int libvarpd_prop_info(varpd_prop_handle_t *, const char **, uint_t *,
+ uint_t *, const void **, uint32_t *, const mac_propval_range_t **);
+extern int libvarpd_prop_get(varpd_prop_handle_t *, void *, uint32_t *);
+extern int libvarpd_prop_set(varpd_prop_handle_t *, const void *, uint32_t);
+
+extern int libvarpd_door_server_create(varpd_handle_t *, const char *);
+extern void libvarpd_door_server_destroy(varpd_handle_t *);
+
+extern void libvarpd_overlay_lookup_run(varpd_handle_t *);
+extern void libvarpd_overlay_lookup_quiesce(varpd_handle_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_H */
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c
new file mode 100644
index 0000000000..04fbe0e05b
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c
@@ -0,0 +1,645 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Common routines for implmeenting proxy arp
+ */
+
+#include <sys/types.h>
+#include <net/if.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet/udp.h>
+#include <netinet/dhcp.h>
+#include <libvarpd_impl.h>
+#include <sys/vlan.h>
+#include <strings.h>
+#include <assert.h>
+
+#define IPV6_VERSION 6
+
+typedef struct varpd_arp_query {
+ int vaq_type;
+ char vaq_buf[ETHERMAX + VLAN_TAGSZ];
+ size_t vaq_bsize;
+ uint8_t vaq_lookup[ETHERADDRL];
+ struct sockaddr_storage vaq_sock;
+ varpd_instance_t *vaq_inst;
+ struct ether_arp *vaq_ea;
+ varpd_query_handle_t *vaq_query;
+ const overlay_targ_lookup_t *vaq_otl;
+ ip6_t *vaq_ip6;
+ nd_neighbor_solicit_t *vaq_ns;
+} varpd_arp_query_t;
+
+typedef struct varpd_dhcp_query {
+ char vdq_buf[ETHERMAX + VLAN_TAGSZ];
+ size_t vdq_bsize;
+ uint8_t vdq_lookup[ETHERADDRL];
+ const overlay_targ_lookup_t *vdq_otl;
+ varpd_instance_t *vdq_inst;
+ varpd_query_handle_t *vdq_query;
+ struct ether_header *vdq_ether;
+} varpd_dhcp_query_t;
+
+static const uint8_t libvarpd_arp_bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff };
+
+void
+libvarpd_plugin_proxy_arp(varpd_provider_handle_t *hdl,
+ varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl)
+{
+ varpd_arp_query_t *vaq;
+ varpd_instance_t *inst = (varpd_instance_t *)hdl;
+ struct ether_arp *ea;
+ struct sockaddr_in *ip;
+
+ vaq = umem_alloc(sizeof (varpd_arp_query_t), UMEM_DEFAULT);
+ if (vaq == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ vaq->vaq_bsize = sizeof (vaq->vaq_buf);
+
+ if (otl->otl_sap != ETHERTYPE_ARP) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * An ARP packet should not be very large because it's definited to only
+ * be allowed to have a single entry at a given time. But our data must
+ * be at least as large as an ether_arp and our header must be at least
+ * as large as a standard ethernet header.
+ */
+ if (otl->otl_hdrsize + otl->otl_pktsize > vaq->vaq_bsize ||
+ otl->otl_pktsize < sizeof (struct ether_arp) ||
+ otl->otl_hdrsize < sizeof (struct ether_header)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (libvarpd_overlay_packet(inst->vri_impl, otl, vaq->vaq_buf,
+ &vaq->vaq_bsize) != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /* XXX Check how many bytes we actually have */
+
+ ea = (void *)((uintptr_t)vaq->vaq_buf + (uintptr_t)otl->otl_hdrsize);
+
+ /*
+ * Make sure it matches something that we know about.
+ */
+ if (ntohs(ea->ea_hdr.ar_hrd) != ARPHRD_ETHER ||
+ ntohs(ea->ea_hdr.ar_pro) != ETHERTYPE_IP ||
+ ea->ea_hdr.ar_hln != ETHERADDRL ||
+ ea->ea_hdr.ar_pln != sizeof (ea->arp_spa) ||
+ ntohs(ea->ea_hdr.ar_op) != ARPOP_REQUEST) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * Now that we've verified that our data is sane, see if we're doing a
+ * gratuitous arp and if so, drop it. Otherwise, we may end up
+ * triggering duplicate address detection.
+ */
+ if (bcmp(ea->arp_spa, ea->arp_tpa, sizeof (ea->arp_spa)) == 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ bzero(&vaq->vaq_sock, sizeof (struct sockaddr_storage));
+ ip = (struct sockaddr_in *)&vaq->vaq_sock;
+ ip->sin_family = AF_INET;
+ bcopy(ea->arp_tpa, &ip->sin_addr, sizeof (ea->arp_tpa));
+
+ vaq->vaq_type = AF_INET;
+ vaq->vaq_inst = inst;
+ vaq->vaq_ea = ea;
+ vaq->vaq_query = vqh;
+ vaq->vaq_otl = otl;
+
+ if (inst->vri_plugin->vpp_ops->vpo_arp == NULL)
+ libvarpd_panic("%s plugin asked to do arp, but has no method",
+ inst->vri_plugin->vpp_name);
+
+ inst->vri_plugin->vpp_ops->vpo_arp(inst->vri_private,
+ (varpd_arp_handle_t *)vaq, VARPD_QTYPE_ETHERNET,
+ (struct sockaddr *)ip, vaq->vaq_lookup);
+}
+
+static void
+libvarpd_proxy_arp_fini(varpd_arp_query_t *vaq)
+{
+ struct ether_header *ether;
+ struct sockaddr_in *ip;
+
+ ip = (struct sockaddr_in *)&vaq->vaq_sock;
+ /*
+ * Modify our packet in place for a reply. We need to swap around the
+ * sender and target addresses.
+ */
+ vaq->vaq_ea->ea_hdr.ar_op = htons(ARPOP_REPLY);
+ bcopy(vaq->vaq_ea->arp_sha, vaq->vaq_ea->arp_tha, ETHERADDRL);
+ bcopy(vaq->vaq_lookup, vaq->vaq_ea->arp_sha, ETHERADDRL);
+ bcopy(vaq->vaq_ea->arp_spa, &ip->sin_addr,
+ sizeof (vaq->vaq_ea->arp_spa));
+ bcopy(vaq->vaq_ea->arp_tpa, vaq->vaq_ea->arp_spa,
+ sizeof (vaq->vaq_ea->arp_spa));
+ bcopy(&ip->sin_addr, vaq->vaq_ea->arp_tpa,
+ sizeof (vaq->vaq_ea->arp_spa));
+
+ /*
+ * Finally go ahead and fix up the mac header and reply to the sender
+ * explicitly.
+ */
+ ether = (struct ether_header *)vaq->vaq_buf;
+ bcopy(&ether->ether_shost, &ether->ether_dhost, ETHERADDRL);
+ bcopy(vaq->vaq_lookup, &ether->ether_shost, ETHERADDRL);
+
+ (void) libvarpd_overlay_inject(vaq->vaq_inst->vri_impl, vaq->vaq_otl,
+ vaq->vaq_buf, vaq->vaq_bsize);
+
+ libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+}
+
+static uint16_t
+libvarpd_icmpv6_checksum(const ip6_t *v6hdr, const uint16_t *buf, uint16_t mlen)
+{
+ int i;
+ uint16_t *v;
+ uint32_t sum = 0;
+
+ assert(mlen % 2 == 0);
+ v = (uint16_t *)&v6hdr->ip6_src;
+ for (i = 0; i < sizeof (struct in6_addr); i += 2, v++)
+ sum += *v;
+ v = (uint16_t *)&v6hdr->ip6_dst;
+ for (i = 0; i < sizeof (struct in6_addr); i += 2, v++)
+ sum += *v;
+ sum += htons(mlen);
+#ifdef _BIG_ENDIAN
+ sum += IPPROTO_ICMPV6;
+#else
+ sum += IPPROTO_ICMPV6 << 8;
+#endif /* _BIG_ENDIAN */
+
+ for (i = 0; i < mlen; i += 2, buf++)
+ sum += *buf;
+
+ while ((sum >> 16) != 0)
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return (sum & 0xffff);
+}
+
+/*
+ * Proxying NDP is much more involved than proxying ARP. For starters, NDP
+ * neighbor solicitations are implemented in terms of IPv6 ICMP as opposed to
+ * its own Ethertype. Therefore, we're going to have to grab a packet if it's a
+ * multicast packet and then determine if we actually want to do anything with
+ * it.
+ */
+void
+libvarpd_plugin_proxy_ndp(varpd_provider_handle_t *hdl,
+ varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl)
+{
+ size_t bsize, plen;
+ varpd_arp_query_t *vaq;
+ ip6_t *v6hdr;
+ nd_neighbor_solicit_t *ns;
+ nd_opt_hdr_t *opt;
+ struct sockaddr_in6 *s6;
+
+ varpd_instance_t *inst = (varpd_instance_t *)hdl;
+ uint8_t *eth = NULL;
+
+ vaq = umem_alloc(sizeof (varpd_arp_query_t), UMEM_DEFAULT);
+ if (vaq == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ vaq->vaq_bsize = sizeof (vaq->vaq_buf);
+
+ if (otl->otl_dstaddr[0] != 0x33 ||
+ otl->otl_dstaddr[1] != 0x33) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * If we have more than a standard frame size for the ICMP neighbor
+ * solicitation, drop it. Similarly if there isn't enough data present
+ * for us, drop it.
+ */
+ if (otl->otl_hdrsize + otl->otl_pktsize > vaq->vaq_bsize) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (otl->otl_pktsize < sizeof (ip6_t) +
+ sizeof (nd_neighbor_solicit_t)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (libvarpd_overlay_packet(inst->vri_impl, otl, vaq->vaq_buf,
+ &vaq->vaq_bsize) != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ bsize = vaq->vaq_bsize;
+ bsize -= otl->otl_hdrsize;
+ assert(bsize > sizeof (ip6_t));
+
+ v6hdr = (ip6_t *)(vaq->vaq_buf + otl->otl_hdrsize);
+ if (((v6hdr->ip6_vfc & 0xf0) >> 4) != IPV6_VERSION) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (v6hdr->ip6_nxt != IPPROTO_ICMPV6) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * In addition to getting these requests on the multicast address for
+ * node solicitation, we may also end up getting them on a generic
+ * multicast address due to timeouts or other choices by various OSes.
+ * We should fairly liberal and accept both, even though the standard
+ * wants them to a solicitation address.
+ */
+ if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&v6hdr->ip6_dst) &&
+ !IN6_IS_ADDR_MC_LINKLOCAL(&v6hdr->ip6_dst)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ bsize -= sizeof (ip6_t);
+ plen = ntohs(v6hdr->ip6_plen);
+ if (bsize < plen) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * Now we know that this is an ICMPv6 request targetting the right
+ * IPv6 multicast prefix. Let's go through and verify that ICMPv6
+ * indicates that we have the real thing and ensure that per RFC 4861
+ * the target address is not a multicast address. Further, because this
+ * is a multicast on Ethernet, we must have a source link-layer address.
+ *
+ * XXX We should probably validate the checksum here...
+ */
+ ns = (nd_neighbor_solicit_t *)(vaq->vaq_buf + otl->otl_hdrsize +
+ sizeof (ip6_t));
+ if (ns->nd_ns_type != ND_NEIGHBOR_SOLICIT && ns->nd_ns_code != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (IN6_IS_ADDR_MULTICAST(&ns->nd_ns_target) ||
+ IN6_IS_ADDR_V4MAPPED(&ns->nd_ns_target) ||
+ IN6_IS_ADDR_LOOPBACK(&ns->nd_ns_target)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ plen -= sizeof (nd_neighbor_solicit_t);
+ opt = (nd_opt_hdr_t *)(ns+1);
+ while (plen >= sizeof (struct nd_opt_hdr)) {
+ /* If we have an option with no lenght, that's clear bogus */
+ if (opt->nd_opt_len == 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (opt->nd_opt_type == ND_OPT_SOURCE_LINKADDR) {
+ eth = (uint8_t *)((uintptr_t)opt +
+ sizeof (nd_opt_hdr_t));
+ }
+ plen -= opt->nd_opt_len * 8;
+ opt = (nd_opt_hdr_t *)((uintptr_t)opt +
+ opt->nd_opt_len * 8);
+ }
+
+ if (eth == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ bzero(&vaq->vaq_sock, sizeof (struct sockaddr_storage));
+ s6 = (struct sockaddr_in6 *)&vaq->vaq_sock;
+ s6->sin6_family = AF_INET6;
+ bcopy(&ns->nd_ns_target, &s6->sin6_addr, sizeof (s6->sin6_addr));
+
+ if (inst->vri_plugin->vpp_ops->vpo_arp == NULL)
+ libvarpd_panic("%s plugin asked to do arp, but has no method",
+ inst->vri_plugin->vpp_name);
+
+ vaq->vaq_type = AF_INET6;
+ vaq->vaq_inst = inst;
+ vaq->vaq_ea = NULL;
+ vaq->vaq_query = vqh;
+ vaq->vaq_otl = otl;
+ vaq->vaq_ns = ns;
+ vaq->vaq_ip6 = v6hdr;
+ inst->vri_plugin->vpp_ops->vpo_arp(inst->vri_private,
+ (varpd_arp_handle_t *)vaq, VARPD_QTYPE_ETHERNET,
+ (struct sockaddr *)s6, vaq->vaq_lookup);
+}
+
+static void
+libvarpd_proxy_ndp_fini(varpd_arp_query_t *vaq)
+{
+ char resp[ETHERMAX + VLAN_TAGSZ];
+ struct ether_header *ether;
+ nd_neighbor_advert_t *na;
+ nd_opt_hdr_t *opt;
+ ip6_t *v6hdr;
+ size_t roff = 0;
+
+ /*
+ * Now we need to assemble an RA as a response. Unlike with arp, we opt
+ * to use a new packet just to make things a bit simpler saner here.
+ */
+ v6hdr = vaq->vaq_ip6;
+ bcopy(vaq->vaq_buf, resp, vaq->vaq_otl->otl_hdrsize);
+ ether = (struct ether_header *)resp;
+ bcopy(&ether->ether_shost, &ether->ether_dhost, ETHERADDRL);
+ bcopy(vaq->vaq_lookup, &ether->ether_shost, ETHERADDRL);
+ roff += vaq->vaq_otl->otl_hdrsize;
+ bcopy(v6hdr, resp + roff, sizeof (ip6_t));
+ v6hdr = (ip6_t *)(resp + roff);
+ bcopy(&v6hdr->ip6_src, &v6hdr->ip6_dst, sizeof (struct in6_addr));
+ bcopy(&vaq->vaq_ns->nd_ns_target, &v6hdr->ip6_src,
+ sizeof (struct in6_addr));
+ roff += sizeof (ip6_t);
+ na = (nd_neighbor_advert_t *)(resp + roff);
+ na->nd_na_type = ND_NEIGHBOR_ADVERT;
+ na->nd_na_code = 0;
+ /*
+ * RFC 4443 defines that we should set the checksum to zero before we
+ * calculate the checksumat we should set the checksum to zero before we
+ * calculate it.
+ */
+ na->nd_na_cksum = 0;
+ /*
+ * Nota bene, the header <netinet/icmp6.h> has already transformed this
+ * into the appropriate host order. Don't use htonl.
+ */
+ na->nd_na_flags_reserved = ND_NA_FLAG_SOLICITED | ND_NA_FLAG_OVERRIDE;
+ bcopy(&vaq->vaq_ns->nd_ns_target, &na->nd_na_target,
+ sizeof (struct in6_addr));
+ roff += sizeof (nd_neighbor_advert_t);
+
+ opt = (nd_opt_hdr_t *)(resp + roff);
+ opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
+ opt->nd_opt_len = 1;
+ roff += sizeof (nd_opt_hdr_t);
+ bcopy(vaq->vaq_lookup, resp + roff, ETHERADDRL);
+ roff += ETHERADDRL;
+
+ /*
+ * Now that we've filled in the packet, go back and compute the checksum
+ * and fill in the IPv6 payload size.
+ */
+ v6hdr->ip6_plen = htons(roff - sizeof (ip6_t) -
+ vaq->vaq_otl->otl_hdrsize);
+ na->nd_na_cksum = ~libvarpd_icmpv6_checksum(v6hdr, (uint16_t *)na,
+ ntohs(v6hdr->ip6_plen)) & 0xffff;
+
+ (void) libvarpd_overlay_inject(vaq->vaq_inst->vri_impl, vaq->vaq_otl,
+ resp, roff);
+
+ libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+}
+
+void
+libvarpd_plugin_arp_reply(varpd_arp_handle_t *vah, int action)
+{
+ varpd_arp_query_t *vaq = (varpd_arp_query_t *)vah;
+
+ if (vaq == NULL)
+ libvarpd_panic("unknown plugin passed invalid "
+ "varpd_arp_handle_t");
+
+ if (action == VARPD_LOOKUP_DROP) {
+ libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ } else if (action != VARPD_LOOKUP_OK)
+ libvarpd_panic("%s plugin returned invalid action %d",
+ vaq->vaq_inst->vri_plugin->vpp_name, action);
+
+ switch (vaq->vaq_type) {
+ case AF_INET:
+ libvarpd_proxy_arp_fini(vaq);
+ break;
+ case AF_INET6:
+ libvarpd_proxy_ndp_fini(vaq);
+ break;
+ default:
+ libvarpd_panic("encountered unknown vaq_type: %d",
+ vaq->vaq_type);
+ }
+}
+
+void
+libvarpd_plugin_proxy_dhcp(varpd_provider_handle_t *hdl,
+ varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl)
+{
+ varpd_dhcp_query_t *vdq;
+ struct ether_header *ether;
+ struct ip *ip;
+ struct udphdr *udp;
+ varpd_instance_t *inst = (varpd_instance_t *)hdl;
+
+ vdq = umem_alloc(sizeof (varpd_dhcp_query_t), UMEM_DEFAULT);
+ if (vdq == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ vdq->vdq_bsize = sizeof (vdq->vdq_buf);
+
+ if (otl->otl_sap != ETHERTYPE_IP) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (bcmp(otl->otl_dstaddr, libvarpd_arp_bcast, ETHERADDRL) != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (otl->otl_hdrsize + otl->otl_pktsize > vdq->vdq_bsize ||
+ otl->otl_pktsize < sizeof (struct ip) + sizeof (struct udphdr) +
+ sizeof (struct dhcp) ||
+ otl->otl_hdrsize < sizeof (struct ether_header)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (libvarpd_overlay_packet(inst->vri_impl, otl, vdq->vdq_buf,
+ &vdq->vdq_bsize) != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (vdq->vdq_bsize != otl->otl_hdrsize + otl->otl_pktsize) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ ether = (struct ether_header *)vdq->vdq_buf;
+ ip = (struct ip *)(vdq->vdq_buf + otl->otl_hdrsize);
+
+ if (ip->ip_v != IPVERSION && ip->ip_p != IPPROTO_UDP) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (otl->otl_hdrsize + ip->ip_hl * 4 + sizeof (struct udphdr) >
+ vdq->vdq_bsize) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ udp = (struct udphdr *)(vdq->vdq_buf + otl->otl_hdrsize +
+ ip->ip_hl * 4);
+
+ if (ntohs(udp->uh_sport) != IPPORT_BOOTPC ||
+ ntohs(udp->uh_dport) != IPPORT_BOOTPS) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ vdq->vdq_ether = ether;
+ vdq->vdq_inst = inst;
+ vdq->vdq_query = vqh;
+ vdq->vdq_otl = otl;
+
+ if (inst->vri_plugin->vpp_ops->vpo_dhcp == NULL)
+ libvarpd_panic("%s plugin asked to do dhcp, but has no method",
+ inst->vri_plugin->vpp_name);
+
+ inst->vri_plugin->vpp_ops->vpo_dhcp(inst->vri_private,
+ (varpd_dhcp_handle_t *)vdq, VARPD_QTYPE_ETHERNET, otl,
+ vdq->vdq_lookup);
+}
+
+void
+libvarpd_plugin_dhcp_reply(varpd_dhcp_handle_t *vdh, int action)
+{
+ varpd_dhcp_query_t *vdq = (varpd_dhcp_query_t *)vdh;
+
+ if (vdq == NULL)
+ libvarpd_panic("unknown plugin passed invalid "
+ "varpd_dhcp_handle_t");
+
+ if (action == VARPD_LOOKUP_DROP) {
+ libvarpd_plugin_query_reply(vdq->vdq_query, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ } else if (action != VARPD_LOOKUP_OK)
+ libvarpd_panic("%s plugin returned invalid action %d",
+ vdq->vdq_inst->vri_plugin->vpp_name, action);
+
+ bcopy(vdq->vdq_lookup, &vdq->vdq_ether->ether_dhost, ETHERADDRL);
+ (void) libvarpd_overlay_resend(vdq->vdq_inst->vri_impl, vdq->vdq_otl,
+ vdq->vdq_buf, vdq->vdq_bsize);
+
+ libvarpd_plugin_query_reply(vdq->vdq_query, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+}
+
+/*
+ * Inject a gratuitious ARP packet to the specified mac address.
+ */
+void
+libvarpd_inject_arp(varpd_provider_handle_t *vph, const uint16_t vlan,
+ const uint8_t *srcmac, const struct in_addr *srcip, const uint8_t *dstmac)
+{
+ char buf[1500];
+ size_t bsize = 0;
+ struct ether_arp *ea;
+ varpd_instance_t *inst = (varpd_instance_t *)vph;
+
+ if (vlan != 0) {
+ struct ether_vlan_header *eh;
+ eh = (struct ether_vlan_header *)(buf + bsize);
+ bsize += sizeof (struct ether_vlan_header);
+ bcopy(dstmac, &eh->ether_dhost, ETHERADDRL);
+ bcopy(srcmac, &eh->ether_shost, ETHERADDRL);
+ eh->ether_tpid = htons(ETHERTYPE_VLAN);
+ eh->ether_tci = htons(VLAN_TCI(0, ETHER_CFI, vlan));
+ eh->ether_type = htons(ETHERTYPE_ARP);
+ } else {
+ struct ether_header *eh;
+ eh = (struct ether_header *)(buf + bsize);
+ bsize += sizeof (struct ether_header);
+ bcopy(dstmac, &eh->ether_dhost, ETHERADDRL);
+ bcopy(srcmac, &eh->ether_shost, ETHERADDRL);
+ eh->ether_type = htons(ETHERTYPE_ARP);
+ }
+
+ ea = (struct ether_arp *)(buf + bsize);
+ bsize += sizeof (struct ether_arp);
+ ea->ea_hdr.ar_hrd = htons(ARPHRD_ETHER);
+ ea->ea_hdr.ar_pro = htons(ETHERTYPE_IP);
+ ea->ea_hdr.ar_hln = ETHERADDRL;
+ ea->ea_hdr.ar_pln = sizeof (struct in_addr);
+ ea->ea_hdr.ar_op = htons(ARPOP_REQUEST);
+ bcopy(srcmac, ea->arp_sha, ETHERADDRL);
+ bcopy(srcip, ea->arp_spa, sizeof (struct in_addr));
+ bcopy(libvarpd_arp_bcast, ea->arp_tha, ETHERADDRL);
+ bcopy(srcip, ea->arp_tpa, sizeof (struct in_addr));
+
+ (void) libvarpd_overlay_instance_inject(inst, buf, bsize);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c
new file mode 100644
index 0000000000..0f616b9533
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c
@@ -0,0 +1,626 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * varpd client interfaces
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <umem.h>
+#include <unistd.h>
+#include <string.h>
+#include <strings.h>
+#include <door.h>
+
+#include <libvarpd_impl.h>
+
+typedef struct varpd_client {
+ int vcl_doorfd;
+} varpd_client_t;
+
+typedef struct varpd_client_prop_info {
+ varpd_client_t *vcprop_client;
+ uint64_t vcprop_instance;
+ uint_t vcprop_propid;
+ uint_t vcprop_type;
+ uint_t vcprop_prot;
+ uint32_t vcprop_defsize;
+ uint32_t vcprop_psize;
+ char vcprop_name[LIBVARPD_PROP_NAMELEN];
+ uint8_t vcprop_default[LIBVARPD_PROP_SIZEMAX];
+ uint8_t vcprop_poss[LIBVARPD_PROP_SIZEMAX];
+} varpd_client_prop_info_t;
+
+static int
+libvarpd_c_door_call(varpd_client_t *client, varpd_client_arg_t *argp,
+ size_t altsize)
+{
+ int ret;
+ door_arg_t darg;
+
+ darg.data_ptr = (char *)argp;
+ darg.desc_ptr = NULL;
+ darg.desc_num = 0;
+ darg.rbuf = (char *)argp;
+ if (altsize != 0) {
+ darg.data_size = altsize;
+ darg.rsize = altsize;
+ } else {
+ darg.data_size = sizeof (varpd_client_arg_t);
+ darg.rsize = sizeof (varpd_client_arg_t);
+ }
+
+ do {
+ ret = door_call(client->vcl_doorfd, &darg);
+ } while (ret != 0 && errno == EINTR);
+ if (ret != 0) {
+ switch (errno) {
+ case E2BIG:
+ case EFAULT:
+ case EINVAL:
+ case ENOTSUP:
+ case EOVERFLOW:
+ case ENFILE:
+ libvarpd_panic("unhandalable errno from door_call: %d",
+ errno);
+ }
+ ret = errno;
+ }
+
+ return (ret);
+}
+
+int
+libvarpd_c_create(varpd_client_handle_t **chpp, const char *doorname)
+{
+ varpd_client_t *client;
+
+ client = umem_alloc(sizeof (varpd_client_t), UMEM_DEFAULT);
+ if (client == NULL)
+ return (ENOMEM);
+
+ client->vcl_doorfd = open(doorname, O_RDWR);
+ if (client->vcl_doorfd < 0) {
+ int ret = errno;
+ umem_free(client, sizeof (varpd_client_t));
+ return (ret);
+ }
+
+ *chpp = (varpd_client_handle_t *)client;
+ return (0);
+}
+
+int
+libvarpd_c_destroy(varpd_client_handle_t *chp)
+{
+ varpd_client_t *client = (varpd_client_t *)chp;
+ if (close(client->vcl_doorfd) != 0)
+ libvarpd_panic("failed to close door fd %d: %d",
+ client->vcl_doorfd, errno);
+
+ umem_free(chp, sizeof (varpd_client_handle_t *));
+ return (0);
+}
+
+int
+libvarpd_c_instance_create(varpd_client_handle_t *chp, datalink_id_t linkid,
+ const char *search, uint64_t *cidp)
+{
+ int ret;
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t carg;
+ varpd_client_create_arg_t *cap = &carg.vca_un.vca_create;
+
+ if (strlen(search) >= LIBVARPD_PROP_NAMELEN)
+ return (EINVAL);
+ carg.vca_command = VARPD_CLIENT_CREATE;
+ carg.vca_errno = 0;
+ cap->vcca_linkid = linkid;
+ (void) strlcpy(cap->vcca_plugin, search, LIBVARPD_PROP_NAMELEN);
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ *cidp = cap->vcca_id;
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_activate(varpd_client_handle_t *chp, uint64_t cid)
+{
+ int ret;
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t carg;
+ varpd_client_instance_arg_t *vciap = &carg.vca_un.vca_instance;
+
+ carg.vca_command = VARPD_CLIENT_ACTIVATE;
+ carg.vca_errno = 0;
+ vciap->vcia_id = cid;
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_destroy(varpd_client_handle_t *chp, uint64_t cid)
+{
+ int ret;
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t carg;
+ varpd_client_instance_arg_t *vciap = &carg.vca_un.vca_instance;
+
+ carg.vca_command = VARPD_CLIENT_DESTROY;
+ carg.vca_errno = 0;
+ vciap->vcia_id = cid;
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_prop_nprops(varpd_client_handle_t *chp, uint64_t cid, uint_t *nprops)
+{
+ int ret;
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t carg;
+ varpd_client_nprops_arg_t *vcnap = &carg.vca_un.vca_nprops;
+
+ carg.vca_command = VARPD_CLIENT_NPROPS;
+ carg.vca_errno = 0;
+ vcnap->vcna_id = cid;
+ vcnap->vcna_nprops = 0;
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+ *nprops = vcnap->vcna_nprops;
+ return (0);
+}
+
+int
+libvarpd_c_prop_handle_alloc(varpd_client_handle_t *chp, uint64_t cid,
+ varpd_client_prop_handle_t **phdlp)
+{
+ varpd_client_prop_info_t *infop;
+
+ infop = umem_alloc(sizeof (varpd_client_prop_info_t), UMEM_DEFAULT);
+ if (infop == NULL)
+ return (ENOMEM);
+
+ bzero(infop, sizeof (varpd_client_prop_info_t));
+ infop->vcprop_client = (varpd_client_t *)chp;
+ infop->vcprop_instance = cid;
+ infop->vcprop_propid = UINT_MAX;
+ *phdlp = (varpd_client_prop_handle_t *)infop;
+ return (0);
+}
+
+void
+libvarpd_c_prop_handle_free(varpd_client_prop_handle_t *phdl)
+{
+ umem_free(phdl, sizeof (varpd_client_prop_info_t));
+ phdl = NULL;
+}
+
+static void
+libvarpd_c_prop_info_from_door(varpd_client_prop_info_t *infop,
+ const varpd_client_propinfo_arg_t *vcfap)
+{
+ infop->vcprop_propid = vcfap->vcfa_propid;
+ infop->vcprop_type = vcfap->vcfa_type;
+ infop->vcprop_prot = vcfap->vcfa_prot;
+ infop->vcprop_defsize = vcfap->vcfa_defsize;
+ infop->vcprop_psize = vcfap->vcfa_psize;
+ bcopy(vcfap->vcfa_name, infop->vcprop_name, LIBVARPD_PROP_NAMELEN);
+ bcopy(vcfap->vcfa_default, infop->vcprop_default,
+ LIBVARPD_PROP_SIZEMAX);
+ bcopy(vcfap->vcfa_poss, infop->vcprop_poss, LIBVARPD_PROP_SIZEMAX);
+}
+
+int
+libvarpd_c_prop_info_fill_by_name(varpd_client_prop_handle_t *phdl,
+ const char *name)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_propinfo_arg_t *vcfap = &carg.vca_un.vca_info;
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+
+ if (strlen(name) >= LIBVARPD_PROP_NAMELEN)
+ return (EINVAL);
+ bzero(&carg, sizeof (varpd_client_arg_t));
+ carg.vca_command = VARPD_CLIENT_PROPINFO;
+ carg.vca_errno = 0;
+ vcfap->vcfa_id = infop->vcprop_instance;
+ vcfap->vcfa_propid = UINT_MAX;
+ (void) strlcpy(vcfap->vcfa_name, name, LIBVARPD_PROP_NAMELEN);
+
+ ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ libvarpd_c_prop_info_from_door(infop, vcfap);
+ return (0);
+}
+
+int
+libvarpd_c_prop_info_fill(varpd_client_prop_handle_t *phdl, uint_t propid)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_propinfo_arg_t *vcfap = &carg.vca_un.vca_info;
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+
+ bzero(&carg, sizeof (varpd_client_arg_t));
+ carg.vca_command = VARPD_CLIENT_PROPINFO;
+ carg.vca_errno = 0;
+ vcfap->vcfa_id = infop->vcprop_instance;
+ vcfap->vcfa_propid = propid;
+
+ ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ libvarpd_c_prop_info_from_door(infop, vcfap);
+ return (0);
+}
+
+int
+libvarpd_c_prop_info(varpd_client_prop_handle_t *phdl, const char **namep,
+ uint_t *typep, uint_t *protp, const void **defp, uint32_t *defsizep,
+ const mac_propval_range_t **possp)
+{
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+ if (infop->vcprop_propid == UINT_MAX)
+ return (EINVAL);
+
+ if (namep != NULL)
+ *namep = infop->vcprop_name;
+ if (typep != NULL)
+ *typep = infop->vcprop_type;
+ if (protp != NULL)
+ *protp = infop->vcprop_prot;
+ if (defp != NULL)
+ *defp = infop->vcprop_default;
+ if (defsizep != NULL)
+ *defsizep = infop->vcprop_defsize;
+ if (possp != NULL)
+ *possp = (const mac_propval_range_t *)infop->vcprop_poss;
+ return (0);
+}
+
+int
+libvarpd_c_prop_get(varpd_client_prop_handle_t *phdl, void *buf, uint32_t *len)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_prop_arg_t *vcpap = &carg.vca_un.vca_prop;
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+
+ if (len == NULL || buf == NULL || infop->vcprop_propid == UINT_MAX)
+ return (EINVAL);
+ if (*len < LIBVARPD_PROP_SIZEMAX)
+ return (EOVERFLOW);
+
+ bzero(&carg, sizeof (varpd_client_arg_t));
+ carg.vca_command = VARPD_CLIENT_GETPROP;
+ carg.vca_errno = 0;
+ vcpap->vcpa_id = infop->vcprop_instance;
+ vcpap->vcpa_propid = infop->vcprop_propid;
+
+ ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ /*
+ * XXX We should really abort, as the server shouldn't send bad input,
+ * but a library really shouldn't kill the client. Therefore we have a
+ * shitty, shitty, error case.
+ */
+ if (vcpap->vcpa_bufsize > LIBVARPD_PROP_SIZEMAX)
+ return (E2BIG);
+
+ bcopy(vcpap->vcpa_buf, buf, vcpap->vcpa_bufsize);
+ *len = vcpap->vcpa_bufsize;
+ return (0);
+}
+
+int
+libvarpd_c_prop_set(varpd_client_prop_handle_t *phdl, const void *buf,
+ uint32_t len)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_prop_arg_t *vcpap = &carg.vca_un.vca_prop;
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+
+ if (len == NULL || buf == NULL || infop->vcprop_propid == UINT_MAX)
+ return (EINVAL);
+ if (len > LIBVARPD_PROP_SIZEMAX)
+ return (EOVERFLOW);
+
+ carg.vca_command = VARPD_CLIENT_SETPROP;
+ carg.vca_errno = 0;
+ vcpap->vcpa_id = infop->vcprop_instance;
+ vcpap->vcpa_propid = infop->vcprop_propid;
+ vcpap->vcpa_bufsize = len;
+ bcopy(buf, vcpap->vcpa_buf, len);
+
+ ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_lookup(varpd_client_handle_t *chp, datalink_id_t linkid,
+ uint64_t *instp)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_lookup_arg_t *vclap = &carg.vca_un.vca_lookup;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ carg.vca_command = VARPD_CLIENT_LOOKUP;
+ carg.vca_errno = 0;
+ vclap->vcla_linkid = linkid;
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+ if (instp != NULL)
+ *instp = vclap->vcla_id;
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_target_mode(varpd_client_handle_t *chp, uint64_t cid,
+ uint_t *dtype, uint_t *mtype)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_mode_arg_t *vctmap = &carg.vca_un.vca_mode;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ carg.vca_command = VARPD_CLIENT_TARGET_MODE;
+ carg.vca_errno = 0;
+ vctmap->vtma_id = cid;
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+ if (ret == 0) {
+ if (mtype != NULL)
+ *mtype = vctmap->vtma_mode;
+ if (dtype != NULL)
+ *dtype = vctmap->vtma_dest;
+ }
+
+ return (ret);
+}
+
+int
+libvarpd_c_instance_cache_flush(varpd_client_handle_t *chp, uint64_t cid)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ carg.vca_command = VARPD_CLIENT_CACHE_FLUSH;
+ carg.vca_errno = 0;
+
+ vctcap->vtca_id = cid;
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_cache_delete(varpd_client_handle_t *chp, uint64_t cid,
+ const struct ether_addr *key)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ if (key == NULL)
+ return (EINVAL);
+
+ carg.vca_command = VARPD_CLIENT_CACHE_DELETE;
+ carg.vca_errno = 0;
+ vctcap->vtca_id = cid;
+ bcopy(key, vctcap->vtca_key, ETHERADDRL);
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_cache_get(varpd_client_handle_t *chp, uint64_t cid,
+ const struct ether_addr *key, varpd_client_cache_entry_t *entry)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ if (key == NULL || entry == NULL)
+ return (EINVAL);
+
+ carg.vca_command = VARPD_CLIENT_CACHE_GET;
+ carg.vca_errno = 0;
+ vctcap->vtca_id = cid;
+ bcopy(key, vctcap->vtca_key, ETHERADDRL);
+ bzero(&vctcap->vtca_entry, sizeof (varpd_client_cache_entry_t));
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ bcopy(&vctcap->vtca_entry, entry, sizeof (varpd_client_cache_entry_t));
+ return (0);
+}
+
+int
+libvarpd_c_instance_cache_set(varpd_client_handle_t *chp, uint64_t cid,
+ const struct ether_addr *key, const varpd_client_cache_entry_t *entry)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ if (key == NULL || entry == NULL)
+ return (EINVAL);
+
+ carg.vca_command = VARPD_CLIENT_CACHE_SET;
+ carg.vca_errno = 0;
+ vctcap->vtca_id = cid;
+ bcopy(key, vctcap->vtca_key, ETHERADDRL);
+ bcopy(entry, &vctcap->vtca_entry, sizeof (varpd_client_cache_entry_t));
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_cache_walk(varpd_client_handle_t *chp, uint64_t cid,
+ varpd_client_cache_f func, void *arg)
+{
+ int ret = 0;
+ size_t bufsize = sizeof (varpd_client_arg_t) +
+ 100 * sizeof (varpd_client_cache_entry_t);
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t *cargp;
+ varpd_client_target_walk_arg_t *vctwap;
+
+ /*
+ * Because the number of entries involved in a walk may be large, we
+ * dynamically allocate a number of queries to make at a single time.
+ * This also means that the average door request doesn't inflate by the
+ * number of entries we want. For now, let's always grab 100 entries in
+ * a request.
+ */
+ cargp = umem_zalloc(bufsize, UMEM_DEFAULT);
+ if (cargp == NULL)
+ return (errno);
+ vctwap = &cargp->vca_un.vca_walk;
+ for (;;) {
+ int i;
+
+ cargp->vca_command = VARPD_CLIENT_CACHE_WALK;
+ cargp->vca_errno = 0;
+ vctwap->vtcw_id = cid;
+ vctwap->vtcw_count = 100;
+
+ ret = libvarpd_c_door_call(client, cargp, bufsize);
+ if (ret != 0)
+ break;
+
+ if (cargp->vca_errno != 0) {
+ ret = cargp->vca_errno;
+ break;
+ }
+
+ if (vctwap->vtcw_count == 0) {
+ ret = 0;
+ break;
+ }
+
+ for (i = 0; i < vctwap->vtcw_count; i++) {
+ varpd_client_cache_entry_t ent;
+
+ ent.vcp_flags = vctwap->vtcw_ents[i].otce_flags;
+ bcopy(vctwap->vtcw_ents[i].otce_dest.otp_mac,
+ &ent.vcp_mac, ETHERADDRL);
+ ent.vcp_ip = vctwap->vtcw_ents[i].otce_dest.otp_ip;
+ ent.vcp_port = vctwap->vtcw_ents[i].otce_dest.otp_port;
+ ret = func(chp, cid,
+ (struct ether_addr *)vctwap->vtcw_ents[i].otce_mac,
+ &ent, arg);
+ if (ret != 0) {
+ ret = 0;
+ goto done;
+ }
+ }
+ }
+
+done:
+ umem_free(cargp, bufsize);
+ return (ret);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h
new file mode 100644
index 0000000000..b794472bc1
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h
@@ -0,0 +1,94 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_CLIENT_H
+#define _LIBVARPD_CLIENT_H
+
+/*
+ * varpd interfaces
+ */
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <sys/mac.h>
+/* XXX Should we have our own, but compatible types? */
+#include <sys/overlay_target.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct __varpd_client_handle varpd_client_handle_t;
+typedef struct __varpd_client_prop_handle varpd_client_prop_handle_t;
+
+typedef struct varpd_client_cache_entry {
+ struct ether_addr vcp_mac;
+ uint16_t vcp_flags;
+ struct in6_addr vcp_ip;
+ uint16_t vcp_port;
+} varpd_client_cache_entry_t;
+
+/*
+ * This should stay in sync with OVERLAY_PROP_*
+ */
+#define LIBVARPD_PROP_SIZEMAX 256
+#define LIBVARPD_PROP_NAMELEN 32
+
+extern int libvarpd_c_create(varpd_client_handle_t **, const char *);
+extern int libvarpd_c_destroy(varpd_client_handle_t *);
+extern int libvarpd_c_instance_create(varpd_client_handle_t *, datalink_id_t,
+ const char *, uint64_t *);
+extern int libvarpd_c_instance_activate(varpd_client_handle_t *, uint64_t);
+extern int libvarpd_c_instance_destroy(varpd_client_handle_t *, uint64_t);
+
+extern int libvarpd_c_prop_nprops(varpd_client_handle_t *, uint64_t, uint_t *);
+extern int libvarpd_c_prop_handle_alloc(varpd_client_handle_t *, uint64_t,
+ varpd_client_prop_handle_t **);
+extern void libvarpd_c_prop_handle_free(varpd_client_prop_handle_t *);
+extern int libvarpd_c_prop_info_fill(varpd_client_prop_handle_t *, uint_t);
+extern int libvarpd_c_prop_info_fill_by_name(varpd_client_prop_handle_t *,
+ const char *);
+extern int libvarpd_c_prop_info(varpd_client_prop_handle_t *, const char **,
+ uint_t *, uint_t *, const void **, uint32_t *,
+ const mac_propval_range_t **);
+extern int libvarpd_c_prop_get(varpd_client_prop_handle_t *, void *,
+ uint32_t *);
+extern int libvarpd_c_prop_set(varpd_client_prop_handle_t *, const void *,
+ uint32_t);
+
+/* XXX Strawman */
+extern int libvarpd_c_instance_lookup(varpd_client_handle_t *, datalink_id_t,
+ uint64_t *);
+extern int libvarpd_c_instance_target_mode(varpd_client_handle_t *, uint64_t,
+ uint_t *, uint_t *);
+extern int libvarpd_c_instance_cache_flush(varpd_client_handle_t *, uint64_t);
+extern int libvarpd_c_instance_cache_delete(varpd_client_handle_t *, uint64_t,
+ const struct ether_addr *);
+extern int libvarpd_c_instance_cache_get(varpd_client_handle_t *, uint64_t,
+ const struct ether_addr *, varpd_client_cache_entry_t *);
+extern int libvarpd_c_instance_cache_set(varpd_client_handle_t *, uint64_t,
+ const struct ether_addr *, const varpd_client_cache_entry_t *);
+
+typedef int (*varpd_client_cache_f)(varpd_client_handle_t *, uint64_t,
+ const struct ether_addr *, const varpd_client_cache_entry_t *, void *);
+extern int libvarpd_c_instance_cache_walk(varpd_client_handle_t *, uint64_t,
+ varpd_client_cache_f, void *);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_CLIENT_H */
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c
new file mode 100644
index 0000000000..f2f93562ac
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c
@@ -0,0 +1,457 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL. *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/*
+ * varpd door server logic
+ */
+
+#include <door.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <priv.h>
+#include <libvarpd_impl.h>
+
+typedef int (libvarpd_door_f)(varpd_impl_t *, varpd_client_arg_t *, ucred_t *);
+
+static boolean_t
+libvarpd_door_privileged(ucred_t *credp)
+{
+ const priv_set_t *ps;
+
+ ps = ucred_getprivset(credp, PRIV_EFFECTIVE);
+ if (ps == NULL)
+ return (B_FALSE);
+
+ return (priv_ismember(ps, PRIV_SYS_NET_CONFIG));
+}
+
+static int
+libvarpd_door_f_create(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ int ret;
+ varpd_instance_handle_t *ihdl;
+ varpd_client_create_arg_t *vccap = &vcap->vca_un.vca_create;
+
+ vccap->vcca_plugin[LIBVARPD_PROP_NAMELEN-1] = '\0';
+ ret = libvarpd_instance_create((varpd_handle_t *)vip,
+ vccap->vcca_linkid, vccap->vcca_plugin, &ihdl);
+ if (ret == 0)
+ vccap->vcca_id = libvarpd_instance_id(ihdl);
+
+ return (ret);
+}
+
+static int
+libvarpd_door_f_activate(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_instance_arg_t *vciap = &vcap->vca_un.vca_instance;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vciap->vcia_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ return (libvarpd_instance_activate(ihp));
+}
+
+static int
+libvarpd_door_f_destroy(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_instance_arg_t *vciap = &vcap->vca_un.vca_instance;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vciap->vcia_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ libvarpd_instance_destroy(ihp);
+ return (0);
+}
+
+static int
+libvarpd_door_f_nprops(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_nprops_arg_t *vcnap = &vcap->vca_un.vca_nprops;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcnap->vcna_id);
+ if (ihp == NULL)
+ return (ENOENT);
+
+ return (libvarpd_prop_nprops(ihp, &vcnap->vcna_nprops));
+}
+
+static int
+libvarpd_door_f_propinfo(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ int ret;
+ varpd_instance_handle_t *ihp;
+ varpd_prop_handle_t *phdl;
+ varpd_client_propinfo_arg_t *vcfap = &vcap->vca_un.vca_info;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcfap->vcfa_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl);
+ if (ret != 0)
+ return (ret);
+
+ if (vcfap->vcfa_propid != UINT_MAX) {
+ ret = libvarpd_prop_info_fill(phdl, vcfap->vcfa_propid);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+ } else {
+ uint_t i, nprop;
+ const char *name;
+
+ vcfap->vcfa_name[LIBVARPD_PROP_NAMELEN-1] = '\0';
+ ret = libvarpd_prop_nprops(ihp, &nprop);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+ for (i = 0; i < nprop; i++) {
+ ret = libvarpd_prop_info_fill(phdl, i);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+ ret = libvarpd_prop_info(phdl, &name, NULL, NULL, NULL,
+ NULL, NULL);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+ if (strcmp(vcfap->vcfa_name, name) == 0)
+ break;
+ }
+
+ if (i == nprop) {
+ libvarpd_prop_handle_free(phdl);
+ return (ENOENT);
+ }
+ vcfap->vcfa_propid = i;
+ }
+ libvarpd_prop_door_convert(phdl, vcfap);
+ libvarpd_prop_handle_free(phdl);
+ return (0);
+}
+
+static int
+libvarpd_door_f_getprop(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ int ret;
+ uint32_t size;
+ varpd_instance_handle_t *ihp;
+ varpd_prop_handle_t *phdl;
+ varpd_client_prop_arg_t *vcpap = &vcap->vca_un.vca_prop;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcpap->vcpa_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl);
+ if (ret != 0)
+ return (ret);
+
+ ret = libvarpd_prop_info_fill(phdl, vcpap->vcpa_propid);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+
+ ret = libvarpd_prop_get(phdl, vcpap->vcpa_buf, &size);
+ if (ret == 0)
+ vcpap->vcpa_bufsize = size;
+ libvarpd_prop_handle_free(phdl);
+ return (0);
+}
+
+static int
+libvarpd_door_f_setprop(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ int ret;
+ varpd_instance_handle_t *ihp;
+ varpd_prop_handle_t *phdl;
+ varpd_client_prop_arg_t *vcpap = &vcap->vca_un.vca_prop;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcpap->vcpa_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl);
+ if (ret != 0)
+ return (ret);
+
+ ret = libvarpd_prop_info_fill(phdl, vcpap->vcpa_propid);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+
+ ret = libvarpd_prop_set(phdl, vcpap->vcpa_buf, vcpap->vcpa_bufsize);
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+}
+
+static int
+libvarpd_door_f_lookup(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_t *inst;
+ varpd_client_lookup_arg_t *vclap = &vcap->vca_un.vca_lookup;
+
+ inst = libvarpd_instance_lookup_by_dlid(vip, vclap->vcla_linkid);
+ if (inst == NULL)
+ return (ENOENT);
+
+ vclap->vcla_id = inst->vri_id;
+ return (0);
+}
+
+static int
+libvarpd_door_f_target(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_instance_t *inst;
+ varpd_client_target_mode_arg_t *vtmap = &vcap->vca_un.vca_mode;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtmap->vtma_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ inst = (varpd_instance_t *)ihp;
+ vtmap->vtma_dest = inst->vri_dest;
+ vtmap->vtma_mode = inst->vri_mode;
+ return (0);
+}
+
+static int
+libvarpd_door_f_flush(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache;
+
+ if (libvarpd_door_privileged(credp) == B_FALSE)
+ return (EPERM);
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ return (libvarpd_overlay_cache_flush((varpd_instance_t *)ihp));
+}
+
+static int
+libvarpd_door_f_delete(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache;
+
+ if (libvarpd_door_privileged(credp) == B_FALSE)
+ return (EPERM);
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ return (libvarpd_overlay_cache_delete((varpd_instance_t *)ihp,
+ vtcap->vtca_key));
+}
+
+static int
+libvarpd_door_f_get(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache;
+
+ /* XXX Should this be privileged? */
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ return (libvarpd_overlay_cache_get((varpd_instance_t *)ihp,
+ vtcap->vtca_key, &vtcap->vtca_entry));
+}
+
+static int
+libvarpd_door_f_set(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache;
+
+ if (libvarpd_door_privileged(credp) == B_FALSE)
+ return (EPERM);
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id);
+ if (ihp == NULL)
+ return (ENOENT);
+
+ return (libvarpd_overlay_cache_set((varpd_instance_t *)ihp,
+ vtcap->vtca_key, &vtcap->vtca_entry));
+}
+
+static int
+libvarpd_door_f_walk(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_walk_arg_t *vctwp = &vcap->vca_un.vca_walk;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vctwp->vtcw_id);
+ if (ihp == NULL)
+ return (ENOENT);
+
+ return (libvarpd_overlay_cache_walk_fill((varpd_instance_t *)ihp,
+ &vctwp->vtcw_marker, &vctwp->vtcw_count, vctwp->vtcw_ents));
+}
+
+static libvarpd_door_f *libvarpd_door_table[] = {
+ libvarpd_door_f_create,
+ libvarpd_door_f_activate,
+ libvarpd_door_f_destroy,
+ libvarpd_door_f_nprops,
+ libvarpd_door_f_propinfo,
+ libvarpd_door_f_getprop,
+ libvarpd_door_f_setprop,
+ libvarpd_door_f_lookup,
+ libvarpd_door_f_target,
+ libvarpd_door_f_flush,
+ libvarpd_door_f_delete,
+ libvarpd_door_f_get,
+ libvarpd_door_f_set,
+ libvarpd_door_f_walk
+};
+
+static void
+libvarpd_door_server(void *cookie, char *argp, size_t argsz, door_desc_t *dp,
+ uint_t ndesc)
+{
+ int ret;
+ varpd_client_eresp_t err;
+ ucred_t *credp = NULL;
+ varpd_impl_t *vip = cookie;
+ varpd_client_arg_t *vcap = (varpd_client_arg_t *)argp;
+
+ err.vce_command = VARPD_CLIENT_INVALID;
+ /* XXX Get a check for max size */
+ if (argsz < sizeof (varpd_client_arg_t)) {
+ err.vce_errno = EINVAL;
+ goto errout;
+ }
+
+ if ((ret = door_ucred(&credp)) != 0) {
+ err.vce_errno = ret;
+ goto errout;
+ }
+
+ if (vcap->vca_command <= 0 || vcap->vca_command >= VARPD_CLIENT_MAX) {
+ err.vce_errno = EINVAL;
+ goto errout;
+ }
+
+ vcap->vca_errno = 0;
+ ret = libvarpd_door_table[vcap->vca_command - 1](vip, vcap, credp);
+ if (ret != 0)
+ vcap->vca_errno = ret;
+
+ ucred_free(credp);
+ (void) door_return(argp, argsz, NULL, 0);
+ return;
+
+errout:
+ /* XXX Should we do something here? */
+ (void) door_return((char *)&err, sizeof (err), NULL, 0);
+}
+
+int
+libvarpd_door_server_create(varpd_handle_t *vhp, const char *path)
+{
+ int fd, ret;
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ mutex_lock(&vip->vdi_lock);
+ if (vip->vdi_doorfd >= 0) {
+ mutex_unlock(&vip->vdi_lock);
+ return (EEXIST);
+ }
+
+ vip->vdi_doorfd = door_create(libvarpd_door_server, vip,
+ DOOR_REFUSE_DESC | DOOR_NO_CANCEL);
+ if (vip->vdi_doorfd == -1) {
+ mutex_unlock(&vip->vdi_lock);
+ return (errno);
+ }
+
+ if ((fd = open(path, O_CREAT | O_RDWR, 0666)) == -1) {
+ ret = errno;
+ if (door_revoke(vip->vdi_doorfd) != 0)
+ libvarpd_panic("failed to revoke door: %d",
+ errno);
+ mutex_unlock(&vip->vdi_lock);
+ return (errno);
+ }
+ /* XXX Really? */
+ if (fchown(fd, UID_NETADM, GID_NETADM) != 0) {
+ ret = errno;
+ if (door_revoke(vip->vdi_doorfd) != 0)
+ libvarpd_panic("failed to revoke door: %d",
+ errno);
+ mutex_unlock(&vip->vdi_lock);
+ return (ret);
+ }
+
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close door fd %d: %d",
+ fd, errno);
+ (void) fdetach(path);
+ if (fattach(vip->vdi_doorfd, path) != 0) {
+ ret = errno;
+ if (door_revoke(vip->vdi_doorfd) != 0)
+ libvarpd_panic("failed to revoke door: %d",
+ errno);
+ mutex_unlock(&vip->vdi_lock);
+ return (ret);
+ }
+
+ mutex_unlock(&vip->vdi_lock);
+ return (0);
+}
+
+void
+libvarpd_door_server_destroy(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ mutex_lock(&vip->vdi_lock);
+ if (vip->vdi_doorfd != 0) {
+ if (door_revoke(vip->vdi_doorfd) != 0)
+ libvarpd_panic("failed to revoke door: %d",
+ errno);
+ vip->vdi_doorfd = -1;
+ }
+ mutex_unlock(&vip->vdi_lock);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h
new file mode 100644
index 0000000000..44bee7d92a
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h
@@ -0,0 +1,247 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_IMPL_H
+#define _LIBVARPD_IMPL_H
+
+/*
+ * varpd internal interfaces
+ */
+
+#include <libvarpd.h>
+#include <libvarpd_provider.h>
+#include <sys/avl.h>
+#include <thread.h>
+#include <synch.h>
+#include <limits.h>
+#include <libidspace.h>
+#include <umem.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LIBVARPD_ID_MIN 1
+#define LIBVARPD_ID_MAX INT32_MAX
+
+typedef struct varpd_plugin {
+ avl_node_t vpp_node;
+ const char *vpp_name;
+ overlay_target_mode_t vpp_mode;
+ const varpd_plugin_ops_t *vpp_ops;
+ mutex_t vpp_lock;
+ uint_t vpp_active;
+} varpd_plugin_t;
+
+typedef struct varpd_impl {
+ mutex_t vdi_lock;
+ mutex_t vdi_loglock;
+ rwlock_t vdi_pfdlock;
+ avl_tree_t vdi_plugins; /* vdi_lock */
+ avl_tree_t vdi_instances; /* vdi_lock */
+ avl_tree_t vdi_linstances; /* vdi_lock */
+ id_space_t *vdi_idspace; /* RO */
+ umem_cache_t *vdi_qcache; /* RO */
+ bunyan_logger_t *vdi_bunyan; /* RO */
+ int vdi_overlayfd; /* RO */
+ int vdi_doorfd; /* vdi_lock */
+ int vdi_persistfd; /* vdi_plock */
+ cond_t vdi_lthr_cv; /* vdi_lock */
+ boolean_t vdi_lthr_quiesce; /* vdi_lock */
+ uint_t vdi_lthr_count; /* vdi_lock */
+} varpd_impl_t;
+
+typedef enum varpd_instance_flags {
+ VARPD_INSTANCE_F_ACTIVATED = 0x01
+} varpd_instance_flags_t;
+
+typedef struct varpd_instance {
+ avl_node_t vri_inode;
+ avl_node_t vri_lnode;
+ uint64_t vri_id; /* RO */
+ uint64_t vri_vnetid; /* RO */
+ datalink_id_t vri_linkid; /* RO */
+ overlay_target_mode_t vri_mode; /* RO */
+ overlay_plugin_dest_t vri_dest; /* RO */
+ varpd_impl_t *vri_impl; /* RO */
+ varpd_plugin_t *vri_plugin; /* RO */
+ void *vri_private; /* RO */
+ mutex_t vri_lock;
+ varpd_instance_flags_t vri_flags; /* vri_lock */
+} varpd_instance_t;
+
+typedef struct varpd_query {
+ overlay_targ_lookup_t vq_lookup;
+ overlay_targ_resp_t vq_response;
+ varpd_instance_t *vq_instance;
+} varpd_query_t;
+
+typedef struct varpd_client_create_arg {
+ datalink_id_t vcca_linkid;
+ uint64_t vcca_id;
+ char vcca_plugin[LIBVARPD_PROP_NAMELEN];
+} varpd_client_create_arg_t;
+
+typedef struct varpd_client_instance_arg {
+ uint64_t vcia_id;
+} varpd_client_instance_arg_t;
+
+typedef struct varpd_client_nprops_arg {
+ uint64_t vcna_id;
+ uint_t vcna_nprops;
+} varpd_client_nprops_arg_t;
+
+typedef struct varpd_client_propinfo_arg {
+ uint64_t vcfa_id;
+ uint_t vcfa_propid;
+ uint_t vcfa_type;
+ uint_t vcfa_prot;
+ uint32_t vcfa_defsize;
+ uint32_t vcfa_psize;
+ char vcfa_name[LIBVARPD_PROP_NAMELEN];
+ uint8_t vcfa_default[LIBVARPD_PROP_SIZEMAX];
+ uint8_t vcfa_poss[LIBVARPD_PROP_SIZEMAX];
+} varpd_client_propinfo_arg_t;
+
+typedef struct varpd_client_prop_arg {
+ uint64_t vcpa_id;
+ uint_t vcpa_propid;
+ uint8_t vcpa_buf[LIBVARPD_PROP_SIZEMAX];
+ size_t vcpa_bufsize;
+} varpd_client_prop_arg_t;
+
+typedef struct varpd_client_lookup_arg {
+ datalink_id_t vcla_linkid;
+ uint32_t vcla_pad;
+ uint64_t vcla_id;
+} varpd_client_lookup_arg_t;
+
+typedef struct varpd_client_target_mode_arg {
+ uint64_t vtma_id;
+ uint32_t vtma_dest;
+ uint32_t vtma_mode;
+} varpd_client_target_mode_arg_t;
+
+typedef struct varpd_client_target_cache_arg {
+ uint64_t vtca_id;
+ uint8_t vtca_key[ETHERADDRL];
+ uint8_t vtca_pad[2];
+ varpd_client_cache_entry_t vtca_entry;
+} varpd_client_target_cache_arg_t;
+
+typedef struct varpd_client_target_walk_arg {
+ uint64_t vtcw_id;
+ uint64_t vtcw_marker;
+ uint64_t vtcw_count;
+ overlay_targ_cache_entry_t vtcw_ents[];
+} varpd_client_target_walk_arg_t;
+
+typedef enum varpd_client_command {
+ VARPD_CLIENT_INVALID = 0x0,
+ VARPD_CLIENT_CREATE,
+ VARPD_CLIENT_ACTIVATE,
+ VARPD_CLIENT_DESTROY,
+ VARPD_CLIENT_NPROPS,
+ VARPD_CLIENT_PROPINFO,
+ VARPD_CLIENT_GETPROP,
+ VARPD_CLIENT_SETPROP,
+ VARPD_CLIENT_LOOKUP,
+ VARPD_CLIENT_TARGET_MODE,
+ VARPD_CLIENT_CACHE_FLUSH,
+ VARPD_CLIENT_CACHE_DELETE,
+ VARPD_CLIENT_CACHE_GET,
+ VARPD_CLIENT_CACHE_SET,
+ VARPD_CLIENT_CACHE_WALK,
+ VARPD_CLIENT_MAX
+} varpd_client_command_t;
+
+typedef struct varpd_client_arg {
+ uint_t vca_command;
+ uint_t vca_errno;
+ union {
+ varpd_client_create_arg_t vca_create;
+ varpd_client_instance_arg_t vca_instance;
+ varpd_client_nprops_arg_t vca_nprops;
+ varpd_client_propinfo_arg_t vca_info;
+ varpd_client_prop_arg_t vca_prop;
+ varpd_client_lookup_arg_t vca_lookup;
+ varpd_client_target_mode_arg_t vca_mode;
+ varpd_client_target_cache_arg_t vca_cache;
+ varpd_client_target_walk_arg_t vca_walk;
+ } vca_un;
+} varpd_client_arg_t;
+
+typedef struct varpd_client_eresp {
+ uint_t vce_command;
+ uint_t vce_errno;
+} varpd_client_eresp_t;
+
+extern void libvarpd_plugin_init(void);
+extern void libvarpd_plugin_prefork(void);
+extern void libvarpd_plugin_postfork(void);
+extern void libvarpd_plugin_fini(void);
+extern int libvarpd_plugin_comparator(const void *, const void *);
+extern varpd_plugin_t *libvarpd_plugin_lookup(varpd_impl_t *, const char *);
+extern varpd_instance_t *libvarpd_instance_lookup_by_dlid(varpd_impl_t *,
+ datalink_id_t);
+
+extern void libvarpd_prop_door_convert(const varpd_prop_handle_t *,
+ varpd_client_propinfo_arg_t *);
+
+extern const char *libvarpd_isaext(void);
+typedef int (*libvarpd_dirwalk_f)(varpd_impl_t *, const char *, void *);
+extern int libvarpd_dirwalk(varpd_impl_t *, const char *, const char *,
+ libvarpd_dirwalk_f, void *);
+
+extern int libvarpd_overlay_init(varpd_impl_t *);
+extern void libvarpd_overlay_fini(varpd_impl_t *);
+extern int libvarpd_overlay_info(varpd_impl_t *, datalink_id_t,
+ overlay_plugin_dest_t *, uint64_t *, uint64_t *);
+extern int libvarpd_overlay_associate(varpd_instance_t *);
+extern int libvarpd_overlay_disassociate(varpd_instance_t *);
+extern int libvarpd_overlay_degrade(varpd_instance_t *, const char *);
+extern int libvarpd_overlay_degrade_datalink(varpd_impl_t *, datalink_id_t,
+ const char *);
+extern int libvarpd_overlay_restore(varpd_instance_t *);
+extern int libvarpd_overlay_packet(varpd_impl_t *,
+ const overlay_targ_lookup_t *, void *, size_t *);
+extern int libvarpd_overlay_inject(varpd_impl_t *,
+ const overlay_targ_lookup_t *, void *, size_t);
+extern int libvarpd_overlay_instance_inject(varpd_instance_t *, void *, size_t);
+extern int libvarpd_overlay_resend(varpd_impl_t *,
+ const overlay_targ_lookup_t *, void *, size_t);
+typedef int (*libvarpd_overlay_iter_f)(varpd_impl_t *, datalink_id_t, void *);
+extern int libvarpd_overlay_iter(varpd_impl_t *, libvarpd_overlay_iter_f,
+ void *);
+extern int libvarpd_overlay_cache_flush(varpd_instance_t *);
+extern int libvarpd_overlay_cache_delete(varpd_instance_t *, const uint8_t *);
+extern int libvarpd_overlay_cache_delete(varpd_instance_t *, const uint8_t *);
+extern int libvarpd_overlay_cache_get(varpd_instance_t *, const uint8_t *,
+ varpd_client_cache_entry_t *);
+extern int libvarpd_overlay_cache_set(varpd_instance_t *, const uint8_t *,
+ const varpd_client_cache_entry_t *);
+extern int libvarpd_overlay_cache_walk_fill(varpd_instance_t *, uint64_t *,
+ uint64_t *, overlay_targ_cache_entry_t *);
+
+extern void libvarpd_persist_init(varpd_impl_t *);
+extern void libvarpd_persist_fini(varpd_impl_t *);
+extern int libvarpd_persist_instance(varpd_impl_t *, varpd_instance_t *);
+extern void libvarpd_torch_instance(varpd_impl_t *, varpd_instance_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_IMPL_H */
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c
new file mode 100644
index 0000000000..f2cad47394
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c
@@ -0,0 +1,574 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc.
+ */
+
+/*
+ * Interactions with /dev/overlay
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stropts.h>
+#include <strings.h>
+#include <umem.h>
+
+#include <libvarpd_impl.h>
+#include <sys/overlay_target.h>
+
+#define OVERLAY_PATH "/dev/overlay"
+
+int
+libvarpd_overlay_init(varpd_impl_t *vip)
+{
+ vip->vdi_overlayfd = open(OVERLAY_PATH, O_RDWR);
+ if (vip->vdi_overlayfd == -1)
+ return (errno);
+ return (0);
+}
+
+void
+libvarpd_overlay_fini(varpd_impl_t *vip)
+{
+ assert(vip->vdi_overlayfd > 0);
+ if (close(vip->vdi_overlayfd) != 0)
+ libvarpd_panic("failed to close /dev/overlay fd %d: %d",
+ vip->vdi_overlayfd, errno);
+}
+
+int
+libvarpd_overlay_info(varpd_impl_t *vip, datalink_id_t linkid,
+ overlay_plugin_dest_t *destp, uint64_t *flags, uint64_t *vnetid)
+{
+ overlay_targ_info_t oti;
+
+ oti.oti_linkid = linkid;
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_INFO, &oti) != 0)
+ return (errno);
+
+ if (destp != NULL)
+ *destp = oti.oti_needs;
+ if (flags != NULL)
+ *flags = oti.oti_flags;
+ if (vnetid != NULL)
+ *vnetid = oti.oti_vnetid;
+ return (0);
+}
+
+int
+libvarpd_overlay_associate(varpd_instance_t *inst)
+{
+ overlay_targ_associate_t ota;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&ota, sizeof (overlay_targ_associate_t));
+ ota.ota_linkid = inst->vri_linkid;
+ ota.ota_mode = inst->vri_mode;
+ ota.ota_id = inst->vri_id;
+ ota.ota_provides = inst->vri_dest;
+
+ if (ota.ota_mode == OVERLAY_TARGET_POINT) {
+ int ret;
+ ret = inst->vri_plugin->vpp_ops->vpo_default(inst->vri_private,
+ &ota.ota_point);
+ if (ret != VARPD_LOOKUP_OK)
+ return (ret);
+ }
+
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_ASSOCIATE, &ota) != 0)
+ return (errno);
+
+ return (0);
+}
+
+int
+libvarpd_overlay_disassociate(varpd_instance_t *inst)
+{
+ overlay_targ_id_t otid;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ otid.otid_linkid = inst->vri_linkid;
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_DISASSOCIATE, &otid) != 0)
+ return (errno);
+ return (0);
+}
+
+int
+libvarpd_overlay_degrade_datalink(varpd_impl_t *vip, datalink_id_t linkid,
+ const char *msg)
+{
+ overlay_targ_degrade_t otd;
+
+ otd.otd_linkid = linkid;
+ (void) strlcpy(otd.otd_buf, msg, OVERLAY_STATUS_BUFLEN);
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_DEGRADE, &otd) != 0)
+ return (errno);
+ return (0);
+
+}
+
+int
+libvarpd_overlay_degrade(varpd_instance_t *inst, const char *msg)
+{
+ return (libvarpd_overlay_degrade_datalink(inst->vri_impl,
+ inst->vri_linkid, msg));
+}
+
+int
+libvarpd_overlay_restore(varpd_instance_t *inst)
+{
+ overlay_targ_id_t otid;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ otid.otid_linkid = inst->vri_linkid;
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_RESTORE, &otid) != 0)
+ return (errno);
+ return (0);
+}
+
+int
+libvarpd_overlay_packet(varpd_impl_t *vip, const overlay_targ_lookup_t *otl,
+ void *buf, size_t *buflen)
+{
+ int ret;
+ overlay_targ_pkt_t otp;
+
+ otp.otp_linkid = UINT64_MAX;
+ otp.otp_reqid = otl->otl_reqid;
+ otp.otp_size = *buflen;
+ otp.otp_buf = buf;
+
+ do {
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_PKT, &otp);
+ } while (ret != 0 && errno == EINTR);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_PKT ioctl efault");
+ else if (ret != 0)
+ ret = errno;
+
+ if (ret == 0)
+ *buflen = otp.otp_size;
+
+ return (ret);
+}
+
+static int
+libvarpd_overlay_inject_common(varpd_impl_t *vip, varpd_instance_t *inst,
+ const overlay_targ_lookup_t *otl, void *buf, size_t buflen, int cmd)
+{
+ int ret;
+ overlay_targ_pkt_t otp;
+
+ if (otl == NULL) {
+ otp.otp_linkid = inst->vri_linkid;
+ otp.otp_reqid = 0;
+ } else {
+ otp.otp_linkid = UINT64_MAX;
+ otp.otp_reqid = otl->otl_reqid;
+ }
+ otp.otp_size = buflen;
+ otp.otp_buf = buf;
+
+ do {
+ ret = ioctl(vip->vdi_overlayfd, cmd, &otp);
+ } while (ret != 0 && errno == EINTR);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("overlay_inject_common ioctl efault");
+ else if (ret != 0)
+ ret = errno;
+
+ return (ret);
+}
+
+int
+libvarpd_overlay_inject(varpd_impl_t *vip, const overlay_targ_lookup_t *otl,
+ void *buf, size_t buflen)
+{
+ return (libvarpd_overlay_inject_common(vip, NULL, otl, buf, buflen,
+ OVERLAY_TARG_INJECT));
+}
+
+int
+libvarpd_overlay_instance_inject(varpd_instance_t *inst, void *buf,
+ size_t buflen)
+{
+ return (libvarpd_overlay_inject_common(inst->vri_impl, inst, NULL, buf,
+ buflen, OVERLAY_TARG_INJECT));
+}
+
+int
+libvarpd_overlay_resend(varpd_impl_t *vip, const overlay_targ_lookup_t *otl,
+ void *buf, size_t buflen)
+{
+ return (libvarpd_overlay_inject_common(vip, NULL, otl, buf, buflen,
+ OVERLAY_TARG_RESEND));
+}
+
+static void
+libvarpd_overlay_lookup_reply(varpd_impl_t *vip,
+ const overlay_targ_lookup_t *otl, overlay_targ_resp_t *otr, int cmd)
+{
+ int ret;
+
+ otr->otr_reqid = otl->otl_reqid;
+ do {
+ ret = ioctl(vip->vdi_overlayfd, cmd, otr);
+ } while (ret != 0 && errno == EINTR);
+ /* XXX abort feels wrong here */
+ if (ret != 0 && errno != EINVAL)
+ libvarpd_panic("receieved bad errno from lookup_reply "
+ "(cmd %d): %d\n", cmd, errno);
+}
+
+static void
+libvarpd_overlay_lookup_handle(varpd_impl_t *vip)
+{
+ int ret;
+ varpd_query_t *vqp;
+ overlay_targ_lookup_t *otl;
+ overlay_targ_resp_t *otr;
+ varpd_instance_t *inst;
+
+ vqp = umem_cache_alloc(vip->vdi_qcache, UMEM_DEFAULT);
+ otl = &vqp->vq_lookup;
+ otr = &vqp->vq_response;
+ /*
+ * XXX abort doesn't really help here, we should figure out what to do.
+ * Try and force a reap, then some?
+ */
+ if (vqp == NULL)
+ libvarpd_panic("failed to allocate memory for lookup "
+ "handle..., we should not panic()");
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_LOOKUP, otl);
+ if (ret != 0 && errno != ETIME && errno != EINTR)
+ libvarpd_panic("received bad errno from OVERLAY_TARG_LOOKUP: "
+ "%d", errno);
+
+ if (ret != 0) {
+ umem_cache_free(vip->vdi_qcache, vqp);
+ return;
+ }
+
+ inst = (varpd_instance_t *)libvarpd_instance_lookup(
+ (varpd_handle_t *)vip, otl->otl_varpdid);
+ if (inst == NULL) {
+ libvarpd_overlay_lookup_reply(vip, otl, otr,
+ OVERLAY_TARG_DROP);
+ umem_cache_free(vip->vdi_qcache, vqp);
+ return;
+ }
+ vqp->vq_instance = inst;
+
+ inst->vri_plugin->vpp_ops->vpo_lookup(inst->vri_private,
+ (varpd_query_handle_t *)vqp, otl, &otr->otr_answer);
+}
+
+void
+libvarpd_overlay_lookup_run(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ mutex_lock(&vip->vdi_lock);
+ if (vip->vdi_lthr_quiesce == B_TRUE) {
+ mutex_unlock(&vip->vdi_lock);
+ return;
+ }
+ vip->vdi_lthr_count++;
+
+ for (;;) {
+ mutex_unlock(&vip->vdi_lock);
+ libvarpd_overlay_lookup_handle(vip);
+ mutex_lock(&vip->vdi_lock);
+ if (vip->vdi_lthr_quiesce == B_TRUE)
+ break;
+ }
+ assert(vip->vdi_lthr_count > 0);
+ vip->vdi_lthr_count--;
+ cond_signal(&vip->vdi_lthr_cv);
+ mutex_unlock(&vip->vdi_lock);
+}
+
+void
+libvarpd_overlay_lookup_quiesce(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ mutex_lock(&vip->vdi_lock);
+ if (vip->vdi_lthr_count == 0) {
+ mutex_unlock(&vip->vdi_lock);
+ return;
+ }
+ vip->vdi_lthr_quiesce = B_TRUE;
+ while (vip->vdi_lthr_count > 0)
+ (void) cond_wait(&vip->vdi_lthr_cv, &vip->vdi_lock);
+ vip->vdi_lthr_quiesce = B_FALSE;
+ mutex_unlock(&vip->vdi_lock);
+}
+
+int
+libvarpd_overlay_iter(varpd_impl_t *vip, libvarpd_overlay_iter_f func,
+ void *arg)
+{
+ uint32_t curents = 0, i;
+ size_t size;
+ overlay_targ_list_t *otl;
+
+ for (;;) {
+ size = sizeof (overlay_targ_list_t) +
+ sizeof (uint32_t) * curents;
+ otl = umem_alloc(size, UMEM_DEFAULT);
+ if (otl == NULL)
+ return (ENOMEM);
+
+ otl->otl_nents = curents;
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_LIST, otl) != 0) {
+ if (errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_LIST ioctl "
+ "efault");
+ umem_free(otl, size);
+ if (errno == EINTR)
+ continue;
+ else
+ return (errno);
+ }
+
+ if (otl->otl_nents == curents)
+ break;
+
+ curents = otl->otl_nents;
+ umem_free(otl, size);
+ }
+
+ for (i = 0; i < otl->otl_nents; i++) {
+ if (func(vip, otl->otl_ents[i], arg) != 0)
+ break;
+ }
+ umem_free(otl, size);
+ return (0);
+}
+
+int
+libvarpd_overlay_cache_flush(varpd_instance_t *inst)
+{
+ int ret;
+ overlay_targ_cache_t cache;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&cache, sizeof (overlay_targ_cache_t));
+ cache.otc_linkid = inst->vri_linkid;
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_FLUSH, &cache);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_FLUSH ioctl efault");
+ else if (ret != 0)
+ ret = errno;
+
+ return (ret);
+}
+
+int
+libvarpd_overlay_cache_delete(varpd_instance_t *inst, const uint8_t *key)
+{
+ int ret;
+ overlay_targ_cache_t cache;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&cache, sizeof (overlay_targ_cache_t));
+ cache.otc_linkid = inst->vri_linkid;
+ bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL);
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_REMOVE, &cache);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_REMOVE ioctl efault");
+ else if (ret != 0)
+ ret = errno;
+
+ return (ret);
+
+}
+
+int
+libvarpd_overlay_cache_get(varpd_instance_t *inst, const uint8_t *key,
+ varpd_client_cache_entry_t *entry)
+{
+ int ret;
+ overlay_targ_cache_t cache;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&cache, sizeof (overlay_targ_cache_t));
+ cache.otc_linkid = inst->vri_linkid;
+ bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL);
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_GET, &cache);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_GET ioctl efault");
+ else if (ret != 0)
+ return (errno);
+
+ bcopy(cache.otc_entry.otce_dest.otp_mac, &entry->vcp_mac, ETHERADDRL);
+ entry->vcp_flags = cache.otc_entry.otce_flags;
+ entry->vcp_ip = cache.otc_entry.otce_dest.otp_ip;
+ entry->vcp_port = cache.otc_entry.otce_dest.otp_port;
+
+ return (0);
+}
+
+int
+libvarpd_overlay_cache_set(varpd_instance_t *inst, const uint8_t *key,
+ const varpd_client_cache_entry_t *entry)
+{
+ int ret;
+ overlay_targ_cache_t cache;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&cache, sizeof (overlay_targ_cache_t));
+ cache.otc_linkid = inst->vri_linkid;
+ bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL);
+ bcopy(&entry->vcp_mac, cache.otc_entry.otce_dest.otp_mac, ETHERADDRL);
+ cache.otc_entry.otce_flags = entry->vcp_flags;
+ cache.otc_entry.otce_dest.otp_ip = entry->vcp_ip;
+ cache.otc_entry.otce_dest.otp_port = entry->vcp_port;
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_SET, &cache);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_SET ioctl efault");
+ else if (ret != 0)
+ return (errno);
+
+ return (0);
+}
+
+int
+libvarpd_overlay_cache_walk_fill(varpd_instance_t *inst, uint64_t *markerp,
+ uint64_t *countp, overlay_targ_cache_entry_t *ents)
+{
+ int ret;
+ size_t asize;
+ overlay_targ_cache_iter_t *iter;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ if (*countp > 200)
+ return (E2BIG);
+
+ asize = sizeof (overlay_targ_cache_iter_t) +
+ *countp * sizeof (overlay_targ_cache_entry_t);
+ iter = umem_alloc(asize, UMEM_DEFAULT);
+ if (iter == NULL)
+ return (ENOMEM);
+
+ iter->otci_linkid = inst->vri_linkid;
+ iter->otci_marker = *markerp;
+ iter->otci_count = *countp;
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_ITER, iter);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_ITER ioctl efault");
+ else if (ret != 0) {
+ ret = errno;
+ goto out;
+ }
+
+ *markerp = iter->otci_marker;
+ *countp = iter->otci_count;
+ bcopy(iter->otci_ents, ents,
+ *countp * sizeof (overlay_targ_cache_entry_t));
+out:
+ umem_free(iter, asize);
+ return (ret);
+}
+
+void
+libvarpd_plugin_query_reply(varpd_query_handle_t *vqh, int action)
+{
+ varpd_query_t *vqp = (varpd_query_t *)vqh;
+
+ if (vqp == NULL)
+ libvarpd_panic("unkonwn plugin passed invalid "
+ "varpd_query_handle_t");
+
+ if (action == VARPD_LOOKUP_DROP)
+ libvarpd_overlay_lookup_reply(vqp->vq_instance->vri_impl,
+ &vqp->vq_lookup, &vqp->vq_response, OVERLAY_TARG_DROP);
+ else if (action == VARPD_LOOKUP_OK)
+ libvarpd_overlay_lookup_reply(vqp->vq_instance->vri_impl,
+ &vqp->vq_lookup, &vqp->vq_response, OVERLAY_TARG_RESPOND);
+ else
+ libvarpd_panic("plugin %s passed in an invalid action: %d",
+ vqp->vq_instance->vri_plugin->vpp_name, action);
+}
+
+void
+libvarpd_inject_varp(varpd_provider_handle_t *vph, const uint8_t *mac,
+ const overlay_target_point_t *otp)
+{
+ int ret;
+ overlay_targ_cache_t otc;
+ varpd_instance_t *inst = (varpd_instance_t *)vph;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ otc.otc_linkid = inst->vri_linkid;
+ otc.otc_entry.otce_flags = 0;
+ bcopy(mac, otc.otc_entry.otce_mac, ETHERADDRL);
+ bcopy(otp, &otc.otc_entry.otce_dest, sizeof (overlay_target_point_t));
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_SET, &otc);
+ if (ret != 0) {
+ switch (errno) {
+ case EBADF:
+ case EFAULT:
+ case ENOTSUP:
+ libvarpd_panic("received bad errno from "
+ "OVERLAY_TARG_CACHE_SET: %d", errno);
+ default:
+ break;
+ }
+ }
+}
+
+void
+libvarpd_fma_degrade(varpd_provider_handle_t *vph, const char *msg)
+{
+ int ret;
+ varpd_instance_t *inst = (varpd_instance_t *)vph;
+
+ ret = libvarpd_overlay_degrade(inst, msg);
+ switch (ret) {
+ case ENOENT:
+ case EFAULT:
+ libvarpd_panic("received bad errno from degrade ioctl: %d",
+ errno);
+ default:
+ break;
+ }
+}
+
+void
+libvarpd_fma_restore(varpd_provider_handle_t *vph)
+{
+ int ret;
+ varpd_instance_t *inst = (varpd_instance_t *)vph;
+
+ ret = libvarpd_overlay_restore(inst);
+ switch (ret) {
+ case ENOENT:
+ case EFAULT:
+ libvarpd_panic("received bad errno from restore ioctl: %d",
+ errno);
+ default:
+ break;
+ }
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c
new file mode 100644
index 0000000000..9d02504139
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c
@@ -0,0 +1,48 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+/*
+ * No, 'tis not so deep as a well, nor so wide as a church door; but 'tis
+ * enough,'twill serve. Ask for me tomorrow, and you shall find me a grave man.
+ *
+ * This file maintains various routines for handling when we die.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <thread.h>
+#include <stdlib.h>
+
+static int varpd_panic_errno;
+static char varpd_panic_buf[1024];
+static thread_t varpd_panic_thread;
+
+void
+libvarpd_panic(const char *fmt, ...)
+{
+ va_list ap;
+
+ /* Always save errno first! */
+ varpd_panic_errno = errno;
+ varpd_panic_thread = thr_self();
+
+ if (fmt != NULL) {
+ va_start(ap, fmt);
+ (void) vsnprintf(varpd_panic_buf, sizeof (varpd_panic_buf), fmt,
+ ap);
+ }
+ abort();
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c
new file mode 100644
index 0000000000..255622b63b
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c
@@ -0,0 +1,590 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * varpd persistence backend
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <strings.h>
+#include <librename.h>
+#include <md5.h>
+#include <sys/sysmacros.h>
+#include <dirent.h>
+#include <sys/mman.h>
+#include <umem.h>
+
+#include <libvarpd_impl.h>
+
+static uint8_t varpd_persist_magic[4] = {
+ 'v',
+ 'a',
+ 'r',
+ 'p',
+};
+
+#define VARPD_PERSIST_MAXWRITE 4096
+#define VARPD_PERSIST_VERSION_ONE 1
+#define VARPD_PERSIST_SUFFIX ".varpd"
+
+/*
+ * XXX ctfdiff this structure
+ */
+typedef struct varpd_persist_header {
+ uint8_t vph_magic[4];
+ uint32_t vph_version;
+ uint8_t vph_md5[16];
+} varpd_persist_header_t;
+
+void
+libvarpd_persist_init(varpd_impl_t *vip)
+{
+ vip->vdi_persistfd = -1;
+ if (rwlock_init(&vip->vdi_pfdlock, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create rw vdi_pfdlock");
+}
+
+void
+libvarpd_persist_fini(varpd_impl_t *vip)
+{
+ /*
+ * Clean up for someone that left something behind.
+ */
+ if (vip->vdi_persistfd != -1) {
+ if (close(vip->vdi_persistfd) != 0)
+ libvarpd_panic("failed to close persist fd %d: %d",
+ vip->vdi_persistfd, errno);
+ vip->vdi_persistfd = -1;
+ }
+ if (rwlock_destroy(&vip->vdi_pfdlock) != 0)
+ libvarpd_panic("failed to destroy rw vdi_pfdlock");
+}
+
+int
+libvarpd_persist_enable(varpd_handle_t *vhp, const char *rootdir)
+{
+ int fd;
+ struct stat st;
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ fd = open(rootdir, O_RDONLY);
+ if (fd < 0)
+ return (errno);
+
+ if (fstat(fd, &st) != 0) {
+ int ret = errno;
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
+ rootdir, fd, errno);
+ return (ret);
+ }
+
+ if (!S_ISDIR(st.st_mode)) {
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
+ rootdir, fd, errno);
+ return (EINVAL);
+ }
+
+
+ rw_wrlock(&vip->vdi_pfdlock);
+ if (vip->vdi_persistfd != -1) {
+ rw_unlock(&vip->vdi_pfdlock);
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
+ rootdir, fd, errno);
+ return (EEXIST);
+ }
+ vip->vdi_persistfd = fd;
+ rw_unlock(&vip->vdi_pfdlock);
+
+ return (0);
+}
+
+static int
+libvarpd_persist_write(int fd, const void *buf, size_t buflen)
+{
+ size_t ret;
+ off_t off = 0;
+
+ while (buflen > 0) {
+ ret = write(fd, buf + off,
+ MIN(buflen, VARPD_PERSIST_MAXWRITE));
+ if (ret == -1 && errno == EINTR)
+ continue;
+ if (ret == -1)
+ return (errno);
+
+ off += ret;
+ buflen -= ret;
+ }
+
+ return (0);
+}
+
+static int
+libvarpd_persist_nvlist(int dirfd, uint64_t id, nvlist_t *nvl)
+{
+ int err, fd;
+ size_t size;
+ varpd_persist_header_t hdr;
+ librename_atomic_t *lrap;
+ char *buf = NULL, *name;
+
+ if ((err = nvlist_pack(nvl, &buf, &size, NV_ENCODE_XDR, 0)) != 0)
+ return (err);
+
+ if (asprintf(&name, "%lld%s", id, ".varpd") == -1) {
+ err = errno;
+ free(buf);
+ return (err);
+ }
+
+ if ((err = librename_atomic_fdinit(dirfd, name, NULL, 0600, 0,
+ &lrap)) != 0) {
+ free(name);
+ free(buf);
+ return (err);
+ }
+
+ fd = librename_atomic_fd(lrap);
+
+ bzero(&hdr, sizeof (varpd_persist_header_t));
+ bcopy(varpd_persist_magic, hdr.vph_magic, sizeof (varpd_persist_magic));
+ hdr.vph_version = VARPD_PERSIST_VERSION_ONE;
+ md5_calc(hdr.vph_md5, buf, size);
+
+ if ((err = libvarpd_persist_write(fd, &hdr,
+ sizeof (varpd_persist_header_t))) != 0) {
+ librename_atomic_fini(lrap);
+ free(name);
+ free(buf);
+ return (err);
+ }
+
+ if ((err = libvarpd_persist_write(fd, buf, size)) != 0) {
+ librename_atomic_fini(lrap);
+ free(name);
+ free(buf);
+ return (err);
+ }
+
+ do {
+ err = librename_atomic_commit(lrap);
+ } while (err == EINTR);
+
+ librename_atomic_fini(lrap);
+ free(name);
+ free(buf);
+ return (err);
+}
+
+int
+libvarpd_persist_instance(varpd_impl_t *vip, varpd_instance_t *inst)
+{
+ int err = 0;
+ nvlist_t *nvl = NULL, *cvl = NULL;
+
+ rw_rdlock(&vip->vdi_pfdlock);
+ /* Check if persistence exists */
+ if (vip->vdi_persistfd == -1)
+ goto out;
+
+ if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
+ goto out;
+
+ if ((err = nvlist_alloc(&cvl, NV_UNIQUE_NAME, 0)) != 0)
+ goto out;
+
+ if ((err = nvlist_add_uint64(nvl, "vri_id", inst->vri_id)) != 0)
+ goto out;
+
+ if ((err = nvlist_add_uint32(nvl, "vri_linkid", inst->vri_linkid)) != 0)
+ goto out;
+
+ if ((err = nvlist_add_uint32(nvl, "vri_dest",
+ (uint32_t)inst->vri_dest)) != 0)
+ goto out;
+ if ((err = nvlist_add_uint32(nvl, "vri_mode",
+ (uint32_t)inst->vri_mode)) != 0)
+ goto out;
+
+ if ((err = nvlist_add_string(nvl, "vri_plugin",
+ inst->vri_plugin->vpp_name)) != 0)
+ goto out;
+
+ err = inst->vri_plugin->vpp_ops->vpo_save(inst->vri_private, cvl);
+ if (err != 0)
+ goto out;
+
+ if ((err = nvlist_add_nvlist(nvl, "vri_private", cvl)) != 0)
+ goto out;
+
+ err = libvarpd_persist_nvlist(vip->vdi_persistfd, inst->vri_id, nvl);
+out:
+ nvlist_free(nvl);
+ nvlist_free(cvl);
+ rw_unlock(&vip->vdi_pfdlock);
+ return (err);
+}
+
+void
+libvarpd_torch_instance(varpd_impl_t *vip, varpd_instance_t *inst)
+{
+ char buf[32];
+ int ret;
+
+ rw_rdlock(&vip->vdi_pfdlock);
+ if (vip->vdi_persistfd == -1) {
+ rw_unlock(&vip->vdi_pfdlock);
+ return;
+ }
+
+ if (snprintf(buf, sizeof (buf), "%lld.varpd", inst->vri_id) >= 32)
+ libvarpd_panic("somehow exceeded static value for "
+ "libvarpd_torch_instance buffer");
+
+ do {
+ ret = unlinkat(vip->vdi_persistfd, buf, 0);
+ } while (ret == -1 && errno == EINTR);
+ if (ret != 0) {
+ switch (errno) {
+ case ENOENT:
+ break;
+ default:
+ libvarpd_panic("failed to unlinkat %d`%s: %s",
+ vip->vdi_persistfd, buf, strerror(errno));
+ }
+ }
+
+ rw_unlock(&vip->vdi_pfdlock);
+}
+
+static int
+libvarpd_persist_restore_instance(varpd_impl_t *vip, nvlist_t *nvl)
+{
+ nvlist_t *pvl;
+ uint64_t id, flags, vid;
+ uint32_t linkid, dest, mode;
+ char *pluginstr;
+ varpd_plugin_t *plugin;
+ overlay_plugin_dest_t adest;
+ varpd_instance_t *inst, lookup;
+
+ if (nvlist_lookup_uint64(nvl, "vri_id", &id) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_uint32(nvl, "vri_linkid", &linkid) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_uint32(nvl, "vri_dest", &dest) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_uint32(nvl, "vri_mode", &mode) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_string(nvl, "vri_plugin", &pluginstr) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_nvlist(nvl, "vri_private", &pvl) != 0)
+ return (EINVAL);
+
+ plugin = libvarpd_plugin_lookup(vip, pluginstr);
+ if (plugin == NULL)
+ return (EINVAL);
+
+ if (plugin->vpp_mode != mode)
+ return (EINVAL);
+
+ if (libvarpd_overlay_info(vip, linkid, &adest, &flags, &vid) != 0)
+ return (EINVAL);
+
+ if (dest != adest)
+ return (EINVAL);
+
+ /* XXX This failure shouldn't cause us to unlink... */
+ inst = umem_alloc(sizeof (varpd_instance_t), UMEM_DEFAULT);
+ if (inst == NULL)
+ return (ENOMEM);
+
+ inst->vri_id = id_alloc_specific(vip->vdi_idspace, id);
+ if (inst->vri_id != id) {
+ umem_free(inst, sizeof (varpd_instance_t));
+ return (EINVAL);
+ }
+
+ inst->vri_linkid = linkid;
+ inst->vri_vnetid = vid;
+ inst->vri_mode = plugin->vpp_mode;
+ inst->vri_dest = dest;
+ inst->vri_plugin = plugin;
+ inst->vri_impl = vip;
+ inst->vri_flags = 0;
+ if (plugin->vpp_ops->vpo_restore(pvl, (varpd_provider_handle_t *)inst,
+ dest, &inst->vri_private) != 0) {
+ id_free(vip->vdi_idspace, id);
+ umem_free(inst, sizeof (varpd_instance_t));
+ return (EINVAL);
+ }
+
+ if (mutex_init(&inst->vri_lock, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create vri_lock mutex");
+
+ mutex_lock(&vip->vdi_lock);
+ lookup.vri_id = inst->vri_id;
+ if (avl_find(&vip->vdi_instances, &lookup, NULL) != NULL)
+ libvarpd_panic("found duplicate instance with id %d",
+ lookup.vri_id);
+ avl_add(&vip->vdi_instances, inst);
+ lookup.vri_linkid = inst->vri_linkid;
+ if (avl_find(&vip->vdi_linstances, &lookup, NULL) != NULL)
+ libvarpd_panic("found duplicate linstance with id %d",
+ lookup.vri_linkid);
+ avl_add(&vip->vdi_linstances, inst);
+ mutex_unlock(&vip->vdi_lock);
+
+ if (plugin->vpp_ops->vpo_start(inst->vri_private) != 0) {
+ libvarpd_instance_destroy((varpd_instance_handle_t *)inst);
+ return (EINVAL);
+ }
+
+ if (flags & OVERLAY_TARG_INFO_F_ACTIVE)
+ libvarpd_overlay_disassociate(inst);
+
+ if (libvarpd_overlay_associate(inst) != 0) {
+ libvarpd_instance_destroy((varpd_instance_handle_t *)inst);
+ return (EINVAL);
+ }
+
+ if (flags & OVERLAY_TARG_INFO_F_DEGRADED)
+ libvarpd_overlay_restore(inst);
+
+ mutex_lock(&inst->vri_lock);
+ inst->vri_flags |= VARPD_INSTANCE_F_ACTIVATED;
+ mutex_unlock(&inst->vri_lock);
+
+ return (0);
+}
+
+static int
+libvarpd_persist_restore_one(varpd_impl_t *vip, int fd)
+{
+ int err;
+ size_t fsize;
+ struct stat st;
+ void *buf, *datap;
+ varpd_persist_header_t *hdr;
+ uint8_t md5[16];
+ nvlist_t *nvl;
+
+ if (fstat(fd, &st) != 0)
+ return (errno);
+
+ if (st.st_size <= sizeof (varpd_persist_header_t))
+ return (EINVAL);
+ fsize = st.st_size - sizeof (varpd_persist_header_t);
+
+ buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (buf == MAP_FAILED)
+ return (errno);
+
+ hdr = buf;
+ if (bcmp(varpd_persist_magic, hdr->vph_magic,
+ sizeof (varpd_persist_magic)) != 0) {
+ if (munmap(buf, st.st_size) != 0)
+ libvarpd_panic("failed to munmap %p: %d", buf, errno);
+ return (EINVAL);
+ }
+
+ if (hdr->vph_version != VARPD_PERSIST_VERSION_ONE) {
+ if (munmap(buf, st.st_size) != 0)
+ libvarpd_panic("failed to munmap %p: %d", buf, errno);
+ return (EINVAL);
+ }
+
+ datap = (void *)((uintptr_t)buf + sizeof (varpd_persist_header_t));
+ md5_calc(md5, datap, fsize);
+ if (bcmp(md5, hdr->vph_md5, sizeof (uint8_t) * 16) != 0) {
+ if (munmap(buf, st.st_size) != 0)
+ libvarpd_panic("failed to munmap %p: %d", buf, errno);
+ return (EINVAL);
+ }
+
+ err = nvlist_unpack(datap, fsize, &nvl, 0);
+ if (munmap(buf, st.st_size) != 0)
+ libvarpd_panic("failed to munmap %p: %d", buf, errno);
+
+ if (err != 0)
+ return (EINVAL);
+
+ err = libvarpd_persist_restore_instance(vip, nvl);
+ nvlist_free(nvl);
+ return (err);
+}
+
+/*
+ * XXX ew, O(n^2)
+ */
+static int
+libvarpd_check_degrade_cb(varpd_impl_t *vip, datalink_id_t linkid, void *arg)
+{
+ varpd_instance_t *inst;
+
+ mutex_lock(&vip->vdi_lock);
+ for (inst = avl_first(&vip->vdi_instances); inst != NULL;
+ inst = AVL_NEXT(&vip->vdi_instances, inst)) {
+ if (inst->vri_linkid == linkid) {
+ mutex_unlock(&vip->vdi_lock);
+ return (0);
+ }
+ }
+
+ mutex_unlock(&vip->vdi_lock);
+
+ (void) libvarpd_overlay_degrade_datalink(vip, linkid,
+ "no varpd instance exists");
+ return (0);
+}
+
+static void
+libvarpd_check_degrade(varpd_impl_t *vip)
+{
+ libvarpd_overlay_iter(vip, libvarpd_check_degrade_cb, NULL);
+}
+
+/*
+ * XXX We need to go through and mark any kernel devices that we don't know
+ * about as degraded.
+ */
+int
+libvarpd_persist_restore(varpd_handle_t *vhp)
+{
+ int dirfd;
+ int ret = 0;
+ DIR *dirp = NULL;
+ struct dirent *dp;
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ rw_rdlock(&vip->vdi_pfdlock);
+ if ((dirfd = dup(vip->vdi_persistfd)) < 0) {
+ ret = errno;
+ goto out;
+ }
+
+ if ((dirp = fdopendir(dirfd)) == NULL) {
+ ret = errno;
+ if (close(dirfd) != 0)
+ libvarpd_panic("failed to close dirfd %d: %d",
+ dirfd, errno);
+ goto out;
+ }
+
+ for (;;) {
+ int fd;
+ uint64_t id;
+ char *eptr;
+ struct stat st;
+
+ errno = 0;
+ dp = readdir(dirp);
+ if (dp == NULL) {
+ ret = errno;
+ break;
+ }
+
+ if (strcmp(dp->d_name, ".") == 0 ||
+ strcmp(dp->d_name, "..") == 0)
+ continue;
+
+ /*
+ * Leave files that we don't recognize alone. A valid file has
+ * the format `%llu.varpd`.
+ */
+ errno = 0;
+ id = strtoull(dp->d_name, &eptr, 10);
+ if ((id == 0 && errno == EINVAL) ||
+ (id == ULLONG_MAX && errno == ERANGE))
+ continue;
+
+ if (strcmp(eptr, VARPD_PERSIST_SUFFIX) != 0)
+ continue;
+
+ fd = openat(vip->vdi_persistfd, dp->d_name, O_RDONLY);
+ if (fd < 0) {
+ ret = errno;
+ break;
+ }
+
+ if (fstat(fd, &st) != 0) {
+ ret = errno;
+ break;
+ }
+
+ if (!S_ISREG(st.st_mode)) {
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close fd (%s) %d: "
+ "%d\n", dp->d_name, fd, errno);
+ continue;
+ }
+
+ ret = libvarpd_persist_restore_one(vip, fd);
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close fd (%s) %d: "
+ "%d\n", dp->d_name, fd, errno);
+ /*
+ * This is an invalid file. We'll unlink it to save us this
+ * trouble in the future. XXX We shouldn't unlink on all
+ * failures presumably...
+ */
+ if (ret != 0) {
+ if (unlinkat(vip->vdi_persistfd, dp->d_name, 0) != 0) {
+ ret = errno;
+ break;
+ }
+ }
+ }
+
+ libvarpd_check_degrade(vip);
+
+out:
+ if (dirp != NULL)
+ closedir(dirp);
+ rw_unlock(&vip->vdi_pfdlock);
+ return (ret);
+}
+
+int
+libvarpd_persist_disable(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ rw_wrlock(&vip->vdi_pfdlock);
+ if (vip->vdi_persistfd == -1) {
+ mutex_unlock(&vip->vdi_lock);
+ rw_unlock(&vip->vdi_pfdlock);
+ return (ENOENT);
+ }
+ if (close(vip->vdi_persistfd) != 0)
+ libvarpd_panic("failed to close persist fd %d: %d",
+ vip->vdi_persistfd, errno);
+ vip->vdi_persistfd = -1;
+ rw_unlock(&vip->vdi_pfdlock);
+ return (0);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c
new file mode 100644
index 0000000000..df53ee5d1d
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c
@@ -0,0 +1,233 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/*
+ * varpd plugin management
+ */
+
+#include <libvarpd_impl.h>
+#include <errno.h>
+#include <umem.h>
+#include <assert.h>
+#include <strings.h>
+#include <dlfcn.h>
+#include <link.h>
+#include <stdio.h>
+
+static varpd_impl_t *varpd_load_handle;
+static mutex_t varpd_load_lock;
+static cond_t varpd_load_cv;
+
+int
+libvarpd_plugin_comparator(const void *lp, const void *rp)
+{
+ int ret;
+ const varpd_plugin_t *lpp, *rpp;
+
+ lpp = lp;
+ rpp = rp;
+
+ ret = strcmp(lpp->vpp_name, rpp->vpp_name);
+ if (ret > 0)
+ return (1);
+ if (ret < 0)
+ return (-1);
+ return (0);
+}
+
+varpd_plugin_register_t *
+libvarpd_plugin_alloc(uint_t version, int *errp)
+{
+ int err;
+ varpd_plugin_register_t *vprp;
+
+ if (errp == NULL)
+ errp = &err;
+
+ if (version != VARPD_VERSION_ONE) {
+ *errp = EINVAL;
+ return (NULL);
+ }
+
+ vprp = umem_alloc(sizeof (varpd_plugin_register_t), UMEM_DEFAULT);
+ if (vprp == NULL) {
+ *errp = ENOMEM;
+ return (NULL);
+ }
+
+ vprp->vpr_version = VARPD_VERSION_ONE;
+
+ return (vprp);
+}
+
+void
+libvarpd_plugin_free(varpd_plugin_register_t *vprp)
+{
+ umem_free(vprp, sizeof (varpd_plugin_register_t));
+}
+
+int
+libvarpd_plugin_register(varpd_plugin_register_t *vprp)
+{
+ varpd_plugin_t *vpp;
+ varpd_plugin_t lookup;
+
+ vpp = umem_alloc(sizeof (varpd_plugin_t), UMEM_DEFAULT);
+ if (vpp == NULL)
+ return (ENOMEM);
+
+ /* Watch out for an evil plugin */
+ if (vprp->vpr_version != VARPD_VERSION_ONE)
+ return (EINVAL);
+
+ mutex_lock(&varpd_load_lock);
+ if (varpd_load_handle == NULL)
+ libvarpd_panic("varpd_load_handle was unexpectedly null");
+
+ mutex_lock(&varpd_load_handle->vdi_lock);
+ lookup.vpp_name = vprp->vpr_name;
+ if (avl_find(&varpd_load_handle->vdi_plugins, &lookup, NULL) != NULL) {
+ mutex_unlock(&varpd_load_handle->vdi_lock);
+ mutex_unlock(&varpd_load_lock);
+ umem_free(vpp, sizeof (varpd_plugin_t));
+ return (EEXIST);
+ }
+ vpp->vpp_name = strdup(vprp->vpr_name);
+ if (vpp->vpp_name == NULL) {
+ mutex_unlock(&varpd_load_handle->vdi_lock);
+ mutex_unlock(&varpd_load_lock);
+ umem_free(vpp, sizeof (varpd_plugin_t));
+ return (EEXIST);
+ }
+
+ vpp->vpp_mode = vprp->vpr_mode;
+ vpp->vpp_ops = vprp->vpr_ops;
+ if (mutex_init(&vpp->vpp_lock, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create plugin's vpp_lock");
+ vpp->vpp_active = 0;
+ avl_add(&varpd_load_handle->vdi_plugins, vpp);
+ mutex_unlock(&varpd_load_handle->vdi_lock);
+ mutex_unlock(&varpd_load_lock);
+
+ return (0);
+}
+
+varpd_plugin_t *
+libvarpd_plugin_lookup(varpd_impl_t *vip, const char *name)
+{
+ varpd_plugin_t lookup, *ret;
+
+ lookup.vpp_name = name;
+ mutex_lock(&vip->vdi_lock);
+ ret = avl_find(&vip->vdi_plugins, &lookup, NULL);
+ mutex_unlock(&vip->vdi_lock);
+
+ return (ret);
+}
+
+static int
+libvarpd_plugin_load_cb(varpd_impl_t *vip, const char *path, void *unused)
+{
+ void *dlp;
+
+ dlp = dlopen(path, RTLD_LOCAL | RTLD_NOW);
+ if (dlp == NULL) {
+ /* XXX This should be a real error */
+ fprintf(stderr, "dlopen failed: %s\n", dlerror());
+ }
+
+ return (0);
+}
+
+int
+libvarpd_plugin_load(varpd_handle_t *vph, const char *path)
+{
+ int ret = 0;
+ varpd_impl_t *vip = (varpd_impl_t *)vph;
+
+ if (vip == NULL || path == NULL)
+ return (EINVAL);
+ mutex_lock(&varpd_load_lock);
+ while (varpd_load_handle != NULL)
+ cond_wait(&varpd_load_cv, &varpd_load_lock);
+ varpd_load_handle = vip;
+ mutex_unlock(&varpd_load_lock);
+
+ ret = libvarpd_dirwalk(vip, path, ".so", libvarpd_plugin_load_cb, NULL);
+
+ mutex_lock(&varpd_load_lock);
+ varpd_load_handle = NULL;
+ cond_signal(&varpd_load_cv);
+ mutex_unlock(&varpd_load_lock);
+
+ return (ret);
+}
+
+int
+libvarpd_plugin_walk(varpd_handle_t *vph, libvarpd_plugin_walk_f func,
+ void *arg)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vph;
+ varpd_plugin_t *vpp;
+
+ mutex_lock(&vip->vdi_lock);
+ for (vpp = avl_first(&vip->vdi_plugins); vpp != NULL;
+ vpp = AVL_NEXT(&vip->vdi_plugins, vpp)) {
+ if (func(vph, vpp->vpp_name, arg) != 0) {
+ mutex_unlock(&vip->vdi_lock);
+ return (1);
+ }
+ }
+ mutex_unlock(&vip->vdi_lock);
+ return (0);
+}
+
+void
+libvarpd_plugin_init(void)
+{
+ if (mutex_init(&varpd_load_lock, USYNC_THREAD | LOCK_RECURSIVE |
+ LOCK_ERRORCHECK, NULL) != 0)
+ libvarpd_panic("failed to create varpd_load_lock");
+
+ if (cond_init(&varpd_load_cv, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create varpd_load_cv");
+
+ varpd_load_handle = NULL;
+}
+
+void
+libvarpd_plugin_fini(void)
+{
+ assert(varpd_load_handle == NULL);
+ if (mutex_destroy(&varpd_load_lock) != 0)
+ libvarpd_panic("failed to destroy varpd_load_lock");
+ if (cond_destroy(&varpd_load_cv) != 0)
+ libvarpd_panic("failed to destroy varpd_load_cv");
+}
+
+void
+libvarpd_plugin_prefork(void)
+{
+ mutex_lock(&varpd_load_lock);
+ while (varpd_load_handle != NULL)
+ cond_wait(&varpd_load_cv, &varpd_load_lock);
+}
+
+void
+libvarpd_plugin_postfork(void)
+{
+ cond_signal(&varpd_load_cv);
+ mutex_unlock(&varpd_load_lock);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c
new file mode 100644
index 0000000000..32ab9e81ab
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c
@@ -0,0 +1,238 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/*
+ * varpd property management
+ */
+
+#include <libvarpd_impl.h>
+#include <errno.h>
+#include <strings.h>
+#include <sys/mac.h>
+#include <umem.h>
+
+typedef struct varpd_prop_info {
+ varpd_impl_t *vprop_vip;
+ varpd_instance_t *vprop_instance;
+ uint_t vprop_type;
+ uint_t vprop_prot;
+ uint32_t vprop_defsize;
+ uint32_t vprop_psize;
+ char vprop_name[LIBVARPD_PROP_NAMELEN];
+ uint8_t vprop_default[LIBVARPD_PROP_SIZEMAX];
+ uint8_t vprop_poss[LIBVARPD_PROP_SIZEMAX];
+} varpd_prop_info_t;
+
+void
+libvarpd_prop_set_name(varpd_prop_handle_t *phdl, const char *name)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ (void) strlcpy(infop->vprop_name, name, OVERLAY_PROP_NAMELEN);
+}
+
+void
+libvarpd_prop_set_prot(varpd_prop_handle_t *phdl, overlay_prop_prot_t perm)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ infop->vprop_prot = perm;
+}
+
+void
+libvarpd_prop_set_type(varpd_prop_handle_t *phdl, overlay_prop_type_t type)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ infop->vprop_type = type;
+}
+
+int
+libvarpd_prop_set_default(varpd_prop_handle_t *phdl, void *buf, ssize_t len)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+
+ if (len > LIBVARPD_PROP_SIZEMAX)
+ return (E2BIG);
+
+ if (len < 0)
+ return (EOVERFLOW);
+
+ bcopy(buf, infop->vprop_default, len);
+ infop->vprop_defsize = len;
+ return (0);
+}
+
+void
+libvarpd_prop_set_nodefault(varpd_prop_handle_t *phdl)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+
+ infop->vprop_default[0] = '\0';
+ infop->vprop_defsize = 0;
+}
+
+void
+libvarpd_prop_set_range_uint32(varpd_prop_handle_t *phdl, uint32_t min,
+ uint32_t max)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss;
+
+ /* XXX We should probably set some kind of error here... */
+ if (rangep->mpr_count != 0 && rangep->mpr_type != MAC_PROPVAL_UINT32)
+ return;
+
+ /* XXX We should probably set some kind of error here... */
+ if (infop->vprop_psize + sizeof (mac_propval_uint32_range_t) >
+ sizeof (infop->vprop_poss))
+ return;
+
+ infop->vprop_psize += sizeof (mac_propval_uint32_range_t);
+ rangep->mpr_count++;
+ rangep->mpr_type = MAC_PROPVAL_UINT32;
+ rangep->u.mpr_uint32[rangep->mpr_count-1].mpur_min = min;
+ rangep->u.mpr_uint32[rangep->mpr_count-1].mpur_max = max;
+}
+
+void
+libvarpd_prop_set_range_str(varpd_prop_handle_t *phdl, const char *str)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ size_t len = strlen(str) + 1; /* Account for a null terminator */
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss;
+ mac_propval_str_range_t *pstr = &rangep->u.mpr_str;
+
+ /* XXX Errors */
+ if (rangep->mpr_count != 0 && rangep->mpr_type != MAC_PROPVAL_STR)
+ return;
+
+ /* XXX Errors */
+ if (infop->vprop_psize + len > sizeof (infop->vprop_poss))
+ return;
+
+ rangep->mpr_count++;
+ rangep->mpr_type = MAC_PROPVAL_STR;
+ strlcpy((char *)&pstr->mpur_data[pstr->mpur_nextbyte], str,
+ sizeof (infop->vprop_poss) - infop->vprop_psize);
+ pstr->mpur_nextbyte += len;
+ infop->vprop_psize += len;
+}
+
+int
+libvarpd_prop_handle_alloc(varpd_handle_t *vph, varpd_instance_handle_t *inst,
+ varpd_prop_handle_t **phdlp)
+{
+ varpd_prop_info_t *infop;
+
+ infop = umem_alloc(sizeof (varpd_prop_info_t), UMEM_DEFAULT);
+ if (infop == NULL)
+ return (ENOMEM);
+
+ bzero(infop, sizeof (varpd_prop_info_t));
+ infop->vprop_vip = (varpd_impl_t *)vph;
+ infop->vprop_instance = (varpd_instance_t *)inst;
+
+ *phdlp = (varpd_prop_handle_t *)infop;
+ return (0);
+}
+
+void
+libvarpd_prop_handle_free(varpd_prop_handle_t *phdl)
+{
+ umem_free(phdl, sizeof (varpd_prop_info_t));
+}
+
+int
+libvarpd_prop_nprops(varpd_instance_handle_t *ihdl, uint_t *np)
+{
+ varpd_instance_t *instp = (varpd_instance_t *)ihdl;
+
+ return (instp->vri_plugin->vpp_ops->vpo_nprops(instp->vri_private, np));
+}
+
+int
+libvarpd_prop_info_fill(varpd_prop_handle_t *phdl, uint_t propid)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ varpd_instance_t *instp = infop->vprop_instance;
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss;
+
+ infop->vprop_psize = sizeof (mac_propval_range_t);
+ bzero(rangep, sizeof (mac_propval_range_t));
+ return (instp->vri_plugin->vpp_ops->vpo_propinfo(instp->vri_private,
+ propid, phdl));
+}
+
+int
+libvarpd_prop_info(varpd_prop_handle_t *phdl, const char **namep,
+ uint_t *typep, uint_t *protp, const void **defp, uint32_t *sizep,
+ const mac_propval_range_t **possp)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ if (namep != NULL)
+ *namep = infop->vprop_name;
+ if (typep != NULL)
+ *typep = infop->vprop_type;
+ if (protp != NULL)
+ *protp = infop->vprop_prot;
+ if (defp != NULL)
+ *defp = infop->vprop_default;
+ if (sizep != NULL)
+ *sizep = infop->vprop_psize;
+ if (possp != NULL)
+ *possp = (mac_propval_range_t *)infop->vprop_poss;
+ return (0);
+}
+
+int
+libvarpd_prop_get(varpd_prop_handle_t *phdl, void *buf, uint32_t *sizep)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ varpd_instance_t *instp = infop->vprop_instance;
+
+ /* XXX We should maybe keep a boolean to keep track of this? */
+ if (infop->vprop_name[0] == '\0')
+ return (EINVAL);
+
+ return (instp->vri_plugin->vpp_ops->vpo_getprop(instp->vri_private,
+ infop->vprop_name, buf, sizep));
+}
+
+int
+libvarpd_prop_set(varpd_prop_handle_t *phdl, const void *buf, uint32_t size)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ varpd_instance_t *instp = infop->vprop_instance;
+
+ /* XXX We should maybe keep a boolean to keep track of this? */
+ if (infop->vprop_name[0] == '\0')
+ return (EINVAL);
+
+ return (instp->vri_plugin->vpp_ops->vpo_setprop(instp->vri_private,
+ infop->vprop_name, buf, size));
+}
+
+void
+libvarpd_prop_door_convert(const varpd_prop_handle_t *phdl,
+ varpd_client_propinfo_arg_t *vcfap)
+{
+ const varpd_prop_info_t *infop = (const varpd_prop_info_t *)phdl;
+
+ vcfap->vcfa_type = infop->vprop_type;
+ vcfap->vcfa_prot = infop->vprop_prot;
+ vcfap->vcfa_defsize = infop->vprop_defsize;
+ vcfap->vcfa_psize = infop->vprop_psize;
+ bcopy(infop->vprop_name, vcfap->vcfa_name, LIBVARPD_PROP_NAMELEN);
+ bcopy(infop->vprop_default, vcfap->vcfa_default, LIBVARPD_PROP_SIZEMAX);
+ bcopy(infop->vprop_poss, vcfap->vcfa_poss, LIBVARPD_PROP_SIZEMAX);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h
new file mode 100644
index 0000000000..232d92e82e
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h
@@ -0,0 +1,226 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_PROVIDER_H
+#define _LIBVARPD_PROVIDER_H
+
+/*
+ * varpd provider interface
+ *
+ * This header file defines all the structures and functions that a given plugin
+ * should register.
+ */
+
+#include <bunyan.h>
+#include <libvarpd.h>
+#include <libnvpair.h>
+#include <sys/socket.h>
+#include <sys/overlay_target.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define VARPD_VERSION_ONE 1
+#define VARPD_CURRENT_VERSION VARPD_VERSION_ONE
+
+typedef struct __varpd_provier_handle varpd_provider_handle_t;
+typedef struct __varpd_query_handle varpd_query_handle_t;
+typedef struct __varpd_arp_handle varpd_arp_handle_t;
+typedef struct __varpd_dhcp_handle varpd_dhcp_handle_t;
+
+/*
+ * Create a new instance of a plugin.
+ */
+typedef int (*varpd_plugin_create_f)(varpd_provider_handle_t *, void **,
+ overlay_plugin_dest_t);
+
+/*
+ * Upon the return of this, the lookup function will be called.
+ */
+typedef int (*varpd_plugin_start_f)(void *);
+
+/*
+ * Upon the entry of this function, the lookup function will not be called.
+ */
+typedef void (*varpd_plugin_stop_f)(void *);
+
+/*
+ * Destroy an instance of a plugin.
+ */
+typedef void (*varpd_plugin_destroy_f)(void *);
+
+/*
+ * The varpd_plugin_default_f and varpd_plugin_lookup_f both look up
+ * destinations and should have them written into the overlay_target_point_t.
+ * The varpd_plugin_default_f should only be implemented for plugins which are
+ * of type OVERLAY_TARGET_POINT, where as only the lookup function should be
+ * implemented by plugins that are of type OVERLAY_TARGET_DYNAMIC.
+ *
+ * In both cases, the answer should be filled into the overlay_target_point_t.
+ * In the case of the varpd_plugin_default_f, one of the VARPD_LOOKUP_* values
+ * should be returned by the function.
+ *
+ * In the case of the varpd_plugin_lookup_f, no value is returned. Instead, this
+ * is allowed to be an asynchronous operation and therefore any thread may call
+ * back the status by using the function varpd_plugin_reply. Again, specifying
+ * the appropriate VARPD_LOOKUP_* flags.
+ *
+ * The flag, VARPD_LOOKUP_OK indicates that the overlay_target_point_t has been
+ * filled in completely. The flag, VARPD_LOOKUP_DROP indicates that the packet
+ * in question should be dropped.
+ */
+#define VARPD_LOOKUP_OK (0)
+#define VARPD_LOOKUP_DROP (-1)
+typedef int (*varpd_plugin_default_f)(void *, overlay_target_point_t *);
+typedef void (*varpd_plugin_lookup_f)(void *, varpd_query_handle_t *,
+ const overlay_targ_lookup_t *, overlay_target_point_t *);
+
+/*
+ * Do a proxy ARP/NDP lookup.
+ */
+#define VARPD_QTYPE_ETHERNET 0x0
+typedef void (*varpd_plugin_arp_f)(void *, varpd_arp_handle_t *, int,
+ const struct sockaddr *, uint8_t *);
+
+typedef void (*varpd_plugin_dhcp_f)(void *, varpd_dhcp_handle_t *, int,
+ const overlay_targ_lookup_t *, uint8_t *);
+
+/*
+ * The following four functions all revolve around properties that exist for
+ * varpd. A plugin should strive to have a uniform set of properties that exist,
+ * however a given plugin may not always support every property. For example, in
+ * a vxlan world, the target IP address and port are both required; however,
+ * there are other encapsulation protocols which only require an IP address, or
+ * maybe require something else.
+ */
+
+/*
+ * Obtain a total number of properties.
+ */
+typedef int (*varpd_plugin_nprops_f)(void *, uint_t *);
+
+/*
+ * Obtain information about a property.
+ */
+typedef int (*varpd_plugin_propinfo_f)(void *, const uint_t,
+ varpd_prop_handle_t *);
+
+/*
+ * Get the value for a single property.
+ */
+typedef int (*varpd_plugin_getprop_f)(void *, const char *, void *, uint32_t *);
+
+/*
+ * Set the value for a single property.
+ */
+typedef int (*varpd_plugin_setprop_f)(void *, const char *, const void *,
+ const uint32_t);
+
+/*
+ * Save a plugin's private data into an nvlist.
+ */
+typedef int (*varpd_plugin_save_f)(void *, nvlist_t *);
+
+/*
+ * Restore a plugin's private data to an nvlist.
+ */
+typedef int (*varpd_plugin_restore_f)(nvlist_t *, varpd_provider_handle_t *,
+ overlay_plugin_dest_t, void **);
+
+typedef struct varpd_plugin_ops {
+ uint_t vpo_callbacks;
+ varpd_plugin_create_f vpo_create;
+ varpd_plugin_start_f vpo_start;
+ varpd_plugin_stop_f vpo_stop;
+ varpd_plugin_destroy_f vpo_destroy;
+ varpd_plugin_default_f vpo_default;
+ varpd_plugin_lookup_f vpo_lookup;
+ varpd_plugin_nprops_f vpo_nprops;
+ varpd_plugin_propinfo_f vpo_propinfo;
+ varpd_plugin_getprop_f vpo_getprop;
+ varpd_plugin_setprop_f vpo_setprop;
+ varpd_plugin_save_f vpo_save;
+ varpd_plugin_restore_f vpo_restore;
+ varpd_plugin_arp_f vpo_arp;
+ varpd_plugin_dhcp_f vpo_dhcp;
+} varpd_plugin_ops_t;
+
+typedef struct varpd_plugin_register {
+ uint_t vpr_version;
+ uint_t vpr_mode;
+ const char *vpr_name;
+ const varpd_plugin_ops_t *vpr_ops;
+} varpd_plugin_register_t;
+
+extern varpd_plugin_register_t *libvarpd_plugin_alloc(uint_t, int *);
+extern void libvarpd_plugin_free(varpd_plugin_register_t *);
+extern int libvarpd_plugin_register(varpd_plugin_register_t *);
+
+/*
+ * Blowing up and logging
+ */
+extern void libvarpd_panic(const char *, ...) __NORETURN;
+extern const bunyan_logger_t *libvarpd_plugin_bunyan(varpd_provider_handle_t *);
+
+/*
+ * Misc. Information APIs
+ */
+extern uint64_t libvarpd_plugin_vnetid(varpd_provider_handle_t *);
+
+/*
+ * Lookup Replying query and proxying
+ */
+extern void libvarpd_plugin_query_reply(varpd_query_handle_t *, int);
+
+extern void libvarpd_plugin_proxy_arp(varpd_provider_handle_t *,
+ varpd_query_handle_t *, const overlay_targ_lookup_t *);
+extern void libvarpd_plugin_proxy_ndp(varpd_provider_handle_t *,
+ varpd_query_handle_t *, const overlay_targ_lookup_t *);
+extern void libvarpd_plugin_arp_reply(varpd_arp_handle_t *, int);
+
+extern void libvarpd_plugin_proxy_dhcp(varpd_provider_handle_t *,
+ varpd_query_handle_t *, const overlay_targ_lookup_t *);
+extern void libvarpd_plugin_dhcp_reply(varpd_dhcp_handle_t *, int);
+
+
+/*
+ * Property information callbacks
+ */
+extern void libvarpd_prop_set_name(varpd_prop_handle_t *, const char *);
+extern void libvarpd_prop_set_prot(varpd_prop_handle_t *, overlay_prop_prot_t);
+extern void libvarpd_prop_set_type(varpd_prop_handle_t *, overlay_prop_type_t);
+extern int libvarpd_prop_set_default(varpd_prop_handle_t *, void *, ssize_t);
+extern void libvarpd_prop_set_nodefault(varpd_prop_handle_t *);
+extern void libvarpd_prop_set_range_uint32(varpd_prop_handle_t *, uint32_t,
+ uint32_t);
+extern void libvarpd_prop_set_range_str(varpd_prop_handle_t *, const char *);
+
+/*
+ * Various injecting and invalidation routines
+ */
+extern void libvarpd_inject_varp(varpd_provider_handle_t *, const uint8_t *,
+ const overlay_target_point_t *);
+extern void libvarpd_inject_arp(varpd_provider_handle_t *, const uint16_t,
+ const uint8_t *, const struct in_addr *, const uint8_t *);
+extern void libvarpd_fma_degrade(varpd_provider_handle_t *, const char *);
+extern void libvarpd_fma_restore(varpd_provider_handle_t *);
+/* TODO NDP */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_PROVIDER_H */
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c
new file mode 100644
index 0000000000..f5d8f3c796
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c
@@ -0,0 +1,98 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#include <libvarpd_impl.h>
+#include <assert.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+const char *
+libvarpd_isaext(void)
+{
+#if defined(__sparc)
+#if defined(__sparcv9)
+ return ("64");
+#else /* __sparcv9 */
+ return ("");
+#endif /* __sparvc9 */
+#elif defined(__amd64)
+ return ("64");
+#elif defined(__i386)
+ return ("");
+#else
+#error "unkonwn ISA"
+#endif
+}
+
+int
+libvarpd_dirwalk(varpd_impl_t *vip, const char *path, const char *suffix,
+ libvarpd_dirwalk_f func, void *arg)
+{
+ int ret;
+ size_t slen;
+ char *dirpath, *filepath;
+ DIR *dirp;
+ struct dirent *dp;
+ assert(vip != NULL && path != NULL);
+
+ if (asprintf(&dirpath, "%s/%s", path, libvarpd_isaext()) == -1)
+ return (errno);
+
+ if ((dirp = opendir(dirpath)) == NULL) {
+ ret = errno;
+ free(filepath);
+ return (ret);
+ }
+
+ slen = strlen(suffix);
+ for (;;) {
+ size_t len;
+
+ errno = 0;
+ dp = readdir(dirp);
+ if (dp == NULL) {
+ ret = errno;
+ break;
+ }
+
+ len = strlen(dp->d_name);
+ if (len <= slen)
+ continue;
+
+ if (strcmp(suffix, dp->d_name + (len - slen)) != 0)
+ continue;
+
+ if (asprintf(&filepath, "%s/%s", dirpath, dp->d_name) == -1) {
+ ret = errno;
+ break;
+ }
+
+ if (func(vip, filepath, arg) != 0) {
+ free(filepath);
+ ret = 0;
+ break;
+ }
+
+ free(filepath);
+ }
+
+ closedir(dirp);
+ free(dirpath);
+ return (ret);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/llib-lvarpd b/usr/src/lib/varpd/libvarpd/common/llib-lvarpd
new file mode 100644
index 0000000000..24d819d290
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/llib-lvarpd
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+#include <libvarpd.h>
diff --git a/usr/src/lib/varpd/libvarpd/common/mapfile-plugin b/usr/src/lib/varpd/libvarpd/common/mapfile-plugin
new file mode 100644
index 0000000000..7d208168e8
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/mapfile-plugin
@@ -0,0 +1,47 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_SCOPE {
+ global:
+ libvarpd_plugin_alloc { FLAGS = EXTERN };
+ libvarpd_plugin_free { FLAGS = EXTERN };
+ libvarpd_plugin_proxy_arp { FLAGS = EXTERN };
+ libvarpd_plugin_proxy_dhcp { FLAGS = EXTERN };
+ libvarpd_plugin_proxy_ndp { FLAGS = EXTERN };
+ libvarpd_plugin_register { FLAGS = EXTERN };
+ libvarpd_prop_set_name { FLAGS = EXTERN };
+ libvarpd_prop_set_prot { FLAGS = EXTERN };
+ libvarpd_prop_set_type { FLAGS = EXTERN };
+ libvarpd_prop_set_default { FLAGS = EXTERN };
+ libvarpd_prop_set_nodefault { FLAGS = EXTERN };
+ libvarpd_prop_set_range_uint32 { FLAGS = EXTERN };
+ libvarpd_prop_set_rangestr { FLAGS = EXTERN };
+};
diff --git a/usr/src/lib/varpd/libvarpd/common/mapfile-vers b/usr/src/lib/varpd/libvarpd/common/mapfile-vers
new file mode 100644
index 0000000000..62fbb5e879
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/mapfile-vers
@@ -0,0 +1,113 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate {
+ global:
+ libvarpd_c_create;
+ libvarpd_c_destroy;
+ libvarpd_c_instance_activate;
+ libvarpd_c_instance_create;
+ libvarpd_c_instance_destroy;
+ libvarpd_c_prop_nprops;
+ libvarpd_c_prop_handle_alloc;
+ libvarpd_c_prop_handle_free;
+ libvarpd_c_prop_info_fill;
+ libvarpd_c_prop_info_fill_by_name;
+ libvarpd_c_prop_info;
+ libvarpd_c_prop_get;
+ libvarpd_c_prop_set;
+
+ libvarpd_c_instance_lookup;
+ libvarpd_c_instance_target_mode;
+ libvarpd_c_instance_cache_flush;
+ libvarpd_c_instance_cache_delete;
+ libvarpd_c_instance_cache_get;
+ libvarpd_c_instance_cache_set;
+ libvarpd_c_instance_cache_walk;
+
+ libvarpd_create;
+ libvarpd_destroy;
+
+ libvarpd_door_server_create;
+ libvarpd_door_server_destroy;
+
+ libvarpd_fma_degrade;
+ libvarpd_fma_restore;
+
+ libvarpd_inject_varp;
+ libvarpd_inject_arp;
+
+ libvarpd_instance_activate;
+ libvarpd_instance_create;
+ libvarpd_instance_destroy;
+ libvarpd_instance_lookup;
+ libvarpd_instance_id;
+
+ libvarpd_panic;
+
+ libvarpd_persist_disable;
+ libvarpd_persist_enable;
+ libvarpd_persist_restore;
+
+ libvarpd_plugin_alloc;
+ libvarpd_plugin_load;
+ libvarpd_plugin_free;
+ libvarpd_plugin_arp_reply;
+ libvarpd_plugin_dhcp_reply;
+ libvarpd_plugin_query_reply;
+ libvarpd_plugin_proxy_arp;
+ libvarpd_plugin_proxy_dhcp;
+ libvarpd_plugin_proxy_ndp;
+ libvarpd_plugin_register;
+ libvarpd_plugin_walk;
+ libvarpd_plugin_vnetid;
+
+ libvarpd_prop_set_default;
+ libvarpd_prop_set_nodefault;
+ libvarpd_prop_set_name;
+ libvarpd_prop_set_prot;
+ libvarpd_prop_set_range_uint32;
+ libvarpd_prop_set_range_str;
+ libvarpd_prop_set_type;
+
+ libvarpd_prop_handle_alloc;
+ libvarpd_prop_handle_free;
+ libvarpd_prop_nprops;
+ libvarpd_prop_info_fill;
+ libvarpd_prop_info;
+ libvarpd_prop_get;
+ libvarpd_prop_set;
+
+ libvarpd_overlay_lookup_quiesce;
+ libvarpd_overlay_lookup_run;
+ local:
+ *;
+};
diff --git a/usr/src/lib/varpd/libvarpd/i386/Makefile b/usr/src/lib/varpd/libvarpd/i386/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/libvarpd/sparc/Makefile b/usr/src/lib/varpd/libvarpd/sparc/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/libvarpd/sparcv9/Makefile b/usr/src/lib/varpd/libvarpd/sparcv9/Makefile
new file mode 100644
index 0000000000..5c586c1d40
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/svp/Makefile b/usr/src/lib/varpd/svp/Makefile
new file mode 100644
index 0000000000..f026c620e6
--- /dev/null
+++ b/usr/src/lib/varpd/svp/Makefile
@@ -0,0 +1,40 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../../Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h:
+
+check:
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/varpd/svp/Makefile.com b/usr/src/lib/varpd/svp/Makefile.com
new file mode 100644
index 0000000000..814b70354e
--- /dev/null
+++ b/usr/src/lib/varpd/svp/Makefile.com
@@ -0,0 +1,48 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = libvarpd_svp.a
+VERS = .1
+OBJECTS = libvarpd_svp.o \
+ libvarpd_svp_conn.o \
+ libvarpd_svp_crc.o \
+ libvarpd_svp_host.o \
+ libvarpd_svp_loop.o \
+ libvarpd_svp_remote.o \
+ libvarpd_svp_timer.o
+
+include ../../../Makefile.lib
+include ../../Makefile.plugin
+
+LIBS = $(DYNLIB)
+
+#
+# Yes, this isn't a command, but libcmdutils does have the list(9F)
+# functions and better to use that then compile list.o yet again
+# ourselves... probably.
+#
+LDLIBS += -lc -lvarpd -lumem -lnvpair -lsocket -lnsl -lavl \
+ -lcmdutils -lidspace -lbunyan
+CPPFLAGS += -I../common
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../Makefile.targ
diff --git a/usr/src/lib/varpd/svp/amd64/Makefile b/usr/src/lib/varpd/svp/amd64/Makefile
new file mode 100644
index 0000000000..b64b830ddd
--- /dev/null
+++ b/usr/src/lib/varpd/svp/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp.c b/usr/src/lib/varpd/svp/common/libvarpd_svp.c
new file mode 100644
index 0000000000..23f9586ba6
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp.c
@@ -0,0 +1,755 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+/*
+ * This plugin implements the SDC VXLAN Protocol (SVP).
+ *
+ * XXX Expand on everything.
+ */
+
+#include <umem.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <libnvpair.h>
+#include <strings.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+
+#include <libvarpd_provider.h>
+#include "libvarpd_svp.h"
+
+bunyan_logger_t *svp_bunyan;
+static int svp_defport = 1296;
+static int svp_defuport = 1339;
+static umem_cache_t *svp_lookup_cache;
+
+typedef enum svp_lookup_type {
+ SVP_L_UNKNOWN = 0x0,
+ SVP_L_VL2 = 0x1,
+ SVP_L_VL3 = 0x2
+} svp_lookup_type_t;
+
+typedef struct svp_lookup {
+ int svl_type;
+ union {
+ struct svl_lookup_vl2 {
+ varpd_query_handle_t *svl_handle;
+ overlay_target_point_t *svl_point;
+ } svl_vl2;
+ struct svl_lookup_vl3 {
+ varpd_arp_handle_t *svl_vah;
+ uint8_t *svl_out;
+ } svl_vl3;
+ } svl_u;
+ svp_query_t svl_query;
+} svp_lookup_t;
+
+static const char *varpd_svp_props[] = {
+ "svp/host",
+ "svp/port",
+ "svp/underlay_ip",
+ "svp/underlay_port"
+};
+
+int
+svp_comparator(const void *l, const void *r)
+{
+ const svp_t *ls = l;
+ const svp_t *rs = r;
+
+ if (ls->svp_vid > rs->svp_vid)
+ return (1);
+ if (ls->svp_vid < rs->svp_vid)
+ return (-1);
+ return (0);
+}
+
+static void
+svp_vl2_lookup_cb(svp_t *svp, svp_status_t status, const struct in6_addr *uip,
+ const uint16_t uport, void *arg)
+{
+ svp_lookup_t *svl = arg;
+ overlay_target_point_t *otp;
+
+ assert(svp != NULL);
+ assert(arg != NULL);
+
+ if (status != SVP_S_OK) {
+ libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
+ VARPD_LOOKUP_DROP);
+ umem_cache_free(svp_lookup_cache, svl);
+ return;
+ }
+
+ otp = svl->svl_u.svl_vl2.svl_point;
+ bcopy(uip, &otp->otp_ip, sizeof (struct in6_addr));
+ otp->otp_port = uport;
+ libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
+ VARPD_LOOKUP_OK);
+ umem_cache_free(svp_lookup_cache, svl);
+}
+
+static void
+svp_vl3_lookup_cb(svp_t *svp, svp_status_t status, const uint8_t *vl2mac,
+ const struct in6_addr *uip, const uint16_t uport, void *arg)
+{
+ overlay_target_point_t point;
+ svp_lookup_t *svl = arg;
+
+ assert(svp != NULL);
+ assert(svl != NULL);
+
+ if (status != SVP_S_OK) {
+ libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
+ VARPD_LOOKUP_DROP);
+ umem_cache_free(svp_lookup_cache, svl);
+ return;
+ }
+
+ /* Inject the L2 mapping before the L3 */
+ bcopy(uip, &point.otp_ip, sizeof (struct in6_addr));
+ point.otp_port = uport;
+ libvarpd_inject_varp(svp->svp_hdl, vl2mac, &point);
+
+ bcopy(vl2mac, svl->svl_u.svl_vl3.svl_out, ETHERADDRL);
+ libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
+ VARPD_LOOKUP_OK);
+ umem_cache_free(svp_lookup_cache, svl);
+}
+
+static void
+svp_vl2_invalidate_cb(svp_t *svp, const uint8_t *vl2mac)
+{
+ libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
+}
+
+static void
+svp_vl3_inject_cb(svp_t *svp, const uint16_t vlan, const struct in6_addr *vl3ip,
+ const uint8_t *vl2mac, const uint8_t *targmac)
+{
+ struct in_addr v4;
+
+ if (IN6_IS_ADDR_V4MAPPED(vl3ip) == 0)
+ libvarpd_panic("implement libvarpd_inject_ndp");
+ IN6_V4MAPPED_TO_INADDR(vl3ip, &v4);
+ libvarpd_inject_arp(svp->svp_hdl, vlan, vl2mac, &v4, targmac);
+}
+
+static void
+svp_shootdown_cb(svp_t *svp, const uint8_t *vl2mac, const struct in6_addr *uip,
+ const uint16_t uport)
+{
+ /*
+ * XXX We should probably do a conditional invlaidation here.
+ */
+ libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
+}
+
+static svp_cb_t svp_defops = {
+ svp_vl2_lookup_cb,
+ svp_vl3_lookup_cb,
+ svp_vl2_invalidate_cb,
+ svp_vl3_inject_cb,
+ svp_shootdown_cb
+};
+
+static boolean_t
+varpd_svp_valid_dest(overlay_plugin_dest_t dest)
+{
+ if (dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+static int
+varpd_svp_create(varpd_provider_handle_t *hdl, void **outp,
+ overlay_plugin_dest_t dest)
+{
+ int ret;
+ svp_t *svp;
+
+ if (varpd_svp_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ svp = umem_zalloc(sizeof (svp_t), UMEM_DEFAULT);
+ if (svp == NULL)
+ return (ENOMEM);
+
+ if ((ret = mutex_init(&svp->svp_lock, USYNC_THREAD, NULL) != 0)) {
+ umem_free(svp, sizeof (svp_t));
+ return (ret);
+ }
+
+ svp->svp_port = svp_defport;
+ svp->svp_uport = svp_defuport;
+ svp->svp_cb = svp_defops;
+ svp->svp_hdl = hdl;
+ svp->svp_vid = libvarpd_plugin_vnetid(svp->svp_hdl);
+ *outp = svp;
+ return (0);
+}
+
+static int
+varpd_svp_start(void *arg)
+{
+ int ret;
+ svp_remote_t *srp;
+ svp_t *svp = arg;
+
+ mutex_lock(&svp->svp_lock);
+ if (svp->svp_host == NULL || svp->svp_port == 0 ||
+ svp->svp_huip == B_FALSE || svp->svp_uport == 0) {
+ mutex_unlock(&svp->svp_lock);
+ return (EAGAIN);
+ }
+ mutex_unlock(&svp->svp_lock);
+
+ if ((ret = svp_remote_find(svp->svp_host, svp->svp_port, &srp)) != 0)
+ return (ret);
+
+ if ((ret = svp_remote_attach(srp, svp)) != 0) {
+ svp_remote_release(srp);
+ return (ret);
+ }
+
+ return (0);
+}
+
+static void
+varpd_svp_stop(void *arg)
+{
+ svp_t *svp = arg;
+
+ svp_remote_detach(svp);
+}
+
+static void
+varpd_svp_destroy(void *arg)
+{
+ svp_t *svp = arg;
+
+ if (svp->svp_host != NULL)
+ umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
+
+ if (mutex_destroy(&svp->svp_lock) != 0)
+ libvarpd_panic("failed to destroy svp_t`svp_lock");
+
+ umem_free(svp, sizeof (svp_t));
+}
+
+static void
+varpd_svp_lookup(void *arg, varpd_query_handle_t *vqh,
+ const overlay_targ_lookup_t *otl, overlay_target_point_t *otp)
+{
+ svp_lookup_t *slp;
+ svp_t *svp = arg;
+
+ /*
+ * Check if this is something that we need to proxy, eg. arp or ndp.
+ */
+ if (otl->otl_sap == ETHERTYPE_ARP) {
+ libvarpd_plugin_proxy_arp(svp->svp_hdl, vqh, otl);
+ return;
+ }
+
+ if (otl->otl_sap == ETHERTYPE_IPV6 &&
+ otl->otl_dstaddr[0] == 0x33 &&
+ otl->otl_dstaddr[1] == 0x33) {
+ libvarpd_plugin_proxy_ndp(svp->svp_hdl, vqh, otl);
+ }
+
+ /* XXX CACHES */
+
+ /*
+ * If we have a failure to allocate memory for this, that's not good.
+ * However, telling the kernel to just drop this packet is much better
+ * than the alternative at this moment. At least we'll try again and we
+ * may have something more available to us in a little bit.
+ *
+ * TODO We need to have observability around this case.
+ */
+ slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
+ if (slp == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ slp->svl_type = SVP_L_VL2;
+ slp->svl_u.svl_vl2.svl_handle = vqh;
+ slp->svl_u.svl_vl2.svl_point = otp;
+
+ svp_remote_vl2_lookup(svp, &slp->svl_query, otl->otl_dstaddr, slp);
+}
+
+static int
+varpd_svp_nprops(void *arg, uint_t *nprops)
+{
+ *nprops = sizeof (varpd_svp_props) / sizeof (char *);
+ return (0);
+}
+
+static int
+varpd_svp_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
+{
+ switch (propid) {
+ case 0:
+ /* svp/host */
+ libvarpd_prop_set_name(vph, varpd_svp_props[0]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING);
+ libvarpd_prop_set_nodefault(vph);
+ break;
+ case 1:
+ /* svp/port */
+ libvarpd_prop_set_name(vph, varpd_svp_props[1]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
+ libvarpd_prop_set_default(vph, &svp_defport,
+ sizeof (svp_defport));
+ libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
+ break;
+ case 2:
+ /* svp/underlay_ip */
+ libvarpd_prop_set_name(vph, varpd_svp_props[2]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP);
+ libvarpd_prop_set_nodefault(vph);
+ break;
+ case 3:
+ /* svp/underlay_port */
+ libvarpd_prop_set_name(vph, varpd_svp_props[3]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
+ libvarpd_prop_set_default(vph, &svp_defuport,
+ sizeof (svp_defuport));
+ libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
+ break;
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+
+static int
+varpd_svp_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
+{
+ svp_t *svp = arg;
+
+ /* svp/host */
+ if (strcmp(pname, varpd_svp_props[0]) == 0) {
+ size_t len;
+
+ mutex_lock(&svp->svp_lock);
+ if (svp->svp_host == NULL) {
+ *sizep = 0;
+ } else {
+ len = strlen(svp->svp_host) + 1;
+ if (*sizep < len) {
+ mutex_unlock(&svp->svp_lock);
+ return (EOVERFLOW);
+ }
+ *sizep = len;
+ (void) strlcpy(buf, svp->svp_host, *sizep);
+ }
+ mutex_unlock(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/port */
+ if (strcmp(pname, varpd_svp_props[1]) == 0) {
+ uint64_t val;
+
+ if (*sizep < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ mutex_lock(&svp->svp_lock);
+ if (svp->svp_port == 0) {
+ *sizep = 0;
+ } else {
+ val = svp->svp_port;
+ bcopy(&val, buf, sizeof (uint64_t));
+ *sizep = sizeof (uint64_t);
+ }
+
+ mutex_unlock(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/underlay_ip */
+ if (strcmp(pname, varpd_svp_props[2]) == 0) {
+ if (*sizep > sizeof (struct in6_addr))
+ return (EOVERFLOW);
+ mutex_lock(&svp->svp_lock);
+ if (svp->svp_huip == B_FALSE) {
+ *sizep = 0;
+ } else {
+ bcopy(&svp->svp_uip, buf, sizeof (struct in6_addr));
+ *sizep = sizeof (struct in6_addr);
+ }
+ return (0);
+ }
+
+ /* svp/underlay_port */
+ if (strcmp(pname, varpd_svp_props[3]) == 0) {
+ uint64_t val;
+
+ if (*sizep < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ mutex_lock(&svp->svp_lock);
+ if (svp->svp_uport == 0) {
+ *sizep = 0;
+ } else {
+ val = svp->svp_uport;
+ bcopy(&val, buf, sizeof (uint64_t));
+ *sizep = sizeof (uint64_t);
+ }
+
+ mutex_unlock(&svp->svp_lock);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_svp_setprop(void *arg, const char *pname, const void *buf,
+ const uint32_t size)
+{
+ svp_t *svp = arg;
+
+ /* svp/host */
+ if (strcmp(pname, varpd_svp_props[0]) == 0) {
+ char *dup;
+ /* XXX Validate hostname characters, maybe grab a C locale */
+ dup = umem_alloc(size, UMEM_DEFAULT);
+ (void) strlcpy(dup, buf, size);
+ if (dup == NULL)
+ return (ENOMEM);
+ mutex_lock(&svp->svp_lock);
+ if (svp->svp_host != NULL)
+ umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
+ svp->svp_host = dup;
+ mutex_unlock(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/port */
+ if (strcmp(pname, varpd_svp_props[1]) == 0) {
+ const uint64_t *valp = buf;
+ if (size < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ if (*valp == 0 || *valp > UINT16_MAX)
+ return (EINVAL);
+
+ mutex_lock(&svp->svp_lock);
+ svp->svp_port = (uint16_t)*valp;
+ mutex_unlock(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/underlay_ip */
+ if (strcmp(pname, varpd_svp_props[2]) == 0) {
+ const struct in6_addr *ipv6 = buf;
+
+ if (size < sizeof (struct in6_addr))
+ return (EOVERFLOW);
+
+ /*
+ * XXX Is anything else disallowed?
+ */
+ if (IN6_IS_ADDR_V4COMPAT(ipv6))
+ return (EINVAL);
+ mutex_lock(&svp->svp_lock);
+ bcopy(buf, &svp->svp_uip, sizeof (struct in6_addr));
+ svp->svp_huip = B_TRUE;
+ mutex_unlock(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/underlay_port */
+ if (strcmp(pname, varpd_svp_props[3]) == 0) {
+ const uint64_t *valp = buf;
+ if (size < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ if (*valp == 0 || *valp > UINT16_MAX)
+ return (EINVAL);
+
+ mutex_lock(&svp->svp_lock);
+ svp->svp_uport = (uint16_t)*valp;
+ mutex_unlock(&svp->svp_lock);
+
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_svp_save(void *arg, nvlist_t *nvp)
+{
+ int ret;
+ svp_t *svp = arg;
+
+ mutex_lock(&svp->svp_lock);
+ if (svp->svp_host != NULL) {
+ if ((ret = nvlist_add_string(nvp, varpd_svp_props[0],
+ svp->svp_host)) != 0) {
+ mutex_unlock(&svp->svp_lock);
+ return (ret);
+ }
+ }
+
+ if (svp->svp_port != 0) {
+ if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[1],
+ svp->svp_port)) != 0) {
+ mutex_unlock(&svp->svp_lock);
+ return (ret);
+ }
+ }
+
+ if (svp->svp_huip == B_TRUE) {
+ char buf[INET6_ADDRSTRLEN];
+
+ if (inet_ntop(AF_INET6, &svp->svp_uip, buf, sizeof (buf)) ==
+ NULL)
+ libvarpd_panic("unexpected inet_ntop failure: %d",
+ errno);
+
+ if ((ret = nvlist_add_string(nvp, varpd_svp_props[2],
+ buf)) != 0) {
+ mutex_unlock(&svp->svp_lock);
+ return (ret);
+ }
+ }
+
+ if (svp->svp_uport != 0) {
+ if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[3],
+ svp->svp_uport)) != 0) {
+ mutex_unlock(&svp->svp_lock);
+ return (ret);
+ }
+ }
+
+ mutex_unlock(&svp->svp_lock);
+ return (0);
+}
+
+static int
+varpd_svp_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
+ overlay_plugin_dest_t dest, void **outp)
+{
+ int ret;
+ svp_t *svp;
+ char *ipstr, *hstr;
+
+ if (varpd_svp_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ if ((ret = varpd_svp_create(hdl, (void **)&svp, dest)) != 0)
+ return (ret);
+
+ /* XXX Validate hostname */
+ if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[0],
+ &hstr)) != 0) {
+ if (ret != ENOENT) {
+ varpd_svp_destroy(svp);
+ return (ret);
+ }
+ svp->svp_host = NULL;
+ } else {
+ size_t blen = strlen(hstr) + 1;
+ svp->svp_host = umem_alloc(blen, UMEM_DEFAULT);
+ (void) strlcpy(svp->svp_host, hstr, blen);
+ }
+
+ if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[1],
+ &svp->svp_port)) != 0) {
+ if (ret != ENOENT) {
+ varpd_svp_destroy(svp);
+ return (ret);
+ }
+ svp->svp_port = 0;
+ }
+
+ if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[2],
+ &ipstr)) != 0) {
+ if (ret != ENOENT) {
+ varpd_svp_destroy(svp);
+ return (ret);
+ }
+ svp->svp_huip = B_FALSE;
+ } else {
+ ret = inet_pton(AF_INET6, ipstr, &svp->svp_uip);
+ if (ret == -1) {
+ assert(errno == EAFNOSUPPORT);
+ libvarpd_panic("unexpected inet_pton failure: %d",
+ errno);
+ }
+
+ if (ret == 0) {
+ varpd_svp_destroy(svp);
+ return (EINVAL);
+ }
+ svp->svp_huip = B_TRUE;
+ }
+
+ if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[3],
+ &svp->svp_uport)) != 0) {
+ if (ret != ENOENT) {
+ varpd_svp_destroy(svp);
+ return (ret);
+ }
+ svp->svp_uport = 0;
+ }
+
+ svp->svp_hdl = hdl;
+ *outp = svp;
+ return (0);
+}
+
+static void
+varpd_svp_arp(void *arg, varpd_arp_handle_t *vah, int type,
+ const struct sockaddr *sock, uint8_t *out)
+{
+ svp_t *svp = arg;
+ svp_lookup_t *svl;
+
+ if (type != VARPD_QTYPE_ETHERNET) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ /* XXX CACHES */
+
+ svl = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
+ if (svl == NULL) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ svl->svl_type = SVP_L_VL3;
+ svl->svl_u.svl_vl3.svl_vah = vah;
+ svl->svl_u.svl_vl3.svl_out = out;
+ svp_remote_vl3_lookup(svp, &svl->svl_query, sock, svl);
+}
+
+static const varpd_plugin_ops_t varpd_svp_ops = {
+ 0,
+ varpd_svp_create,
+ varpd_svp_start,
+ varpd_svp_stop,
+ varpd_svp_destroy,
+ NULL,
+ varpd_svp_lookup,
+ varpd_svp_nprops,
+ varpd_svp_propinfo,
+ varpd_svp_getprop,
+ varpd_svp_setprop,
+ varpd_svp_save,
+ varpd_svp_restore,
+ varpd_svp_arp,
+ NULL
+};
+
+static int
+svp_bunyan_init(void)
+{
+ int ret;
+
+ if ((ret = bunyan_init("svp", &svp_bunyan)) != 0)
+ return (ret);
+ ret = bunyan_stream_add(svp_bunyan, "stderr", BUNYAN_L_INFO,
+ bunyan_stream_fd, (void *)STDERR_FILENO);
+ if (ret != 0)
+ bunyan_fini(svp_bunyan);
+ return (ret);
+}
+
+static void
+svp_bunyan_fini(void)
+{
+ if (svp_bunyan != NULL)
+ bunyan_fini(svp_bunyan);
+}
+
+#pragma init(varpd_svp_init)
+static void
+varpd_svp_init(void)
+{
+ int err;
+ varpd_plugin_register_t *vpr;
+
+ /* XXX Revisit and make sure we have proper clean up */
+ if (svp_bunyan_init() != 0)
+ return;
+
+ if ((err == svp_host_init()) != 0) {
+ svp_bunyan_fini();
+ return;
+ }
+
+ /* XXX Communicate failure */
+ svp_lookup_cache = umem_cache_create("svp_lookup",
+ sizeof (svp_lookup_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+ if (svp_lookup_cache == NULL) {
+ svp_bunyan_fini();
+ return;
+ }
+
+ if ((err = svp_event_init()) != 0) {
+ svp_bunyan_fini();
+ umem_cache_destroy(svp_lookup_cache);
+ return;
+ }
+
+ if ((err = svp_timer_init()) != 0) {
+ svp_event_fini();
+ umem_cache_destroy(svp_lookup_cache);
+ svp_bunyan_fini();
+ return;
+ }
+
+ if ((err = svp_remote_init()) != 0) {
+ svp_event_fini();
+ umem_cache_destroy(svp_lookup_cache);
+ svp_bunyan_fini();
+ return;
+ }
+
+ /* XXX Revisit failure semantics here */
+ vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err);
+ if (vpr == NULL) {
+ svp_remote_fini();
+ svp_event_fini();
+ umem_cache_destroy(svp_lookup_cache);
+ return;
+ }
+
+ vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC;
+ vpr->vpr_name = "svp";
+ vpr->vpr_ops = &varpd_svp_ops;
+
+ (void) libvarpd_plugin_register(vpr);
+ libvarpd_plugin_free(vpr);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp.h b/usr/src/lib/varpd/svp/common/libvarpd_svp.h
new file mode 100644
index 0000000000..90acf325aa
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp.h
@@ -0,0 +1,377 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_SVP_H
+#define _LIBVARPD_SVP_H
+
+/*
+ * Implementation details of the SVP plugin and the SVP protocol.
+ */
+
+#include <netinet/in.h>
+#include <sys/ethernet.h>
+#include <thread.h>
+#include <synch.h>
+#include <libvarpd_provider.h>
+#include <sys/avl.h>
+#include <port.h>
+#include <sys/list.h>
+#include <bunyan.h>
+
+#include <libvarpd_svp_prot.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct svp svp_t;
+typedef struct svp_remote svp_remote_t;
+typedef struct svp_conn svp_conn_t;
+typedef struct svp_query svp_query_t;
+
+typedef void (*svp_event_f)(port_event_t *, void *);
+
+typedef struct svp_event {
+ svp_event_f se_func;
+ void *se_arg;
+ int se_events;
+} svp_event_t;
+
+typedef void (*svp_timer_f)(void *);
+
+typedef struct svp_timer {
+ svp_timer_f st_func;
+ void *st_arg;
+ boolean_t st_oneshot;
+ uint32_t st_value;
+ /* Fields below here are private to the svp_timer implementaiton */
+ uint64_t st_expire;
+ boolean_t st_delivering;
+ avl_node_t st_link;
+} svp_timer_t;
+
+typedef union svp_query_data {
+ svp_vl2_req_t sqd_vl2r;
+ svp_vl2_ack_t sqd_vl2a;
+ svp_vl3_req_t sdq_vl3r;
+ svp_vl3_ack_t sdq_vl3a;
+} svp_query_data_t;
+
+typedef void (*svp_query_f)(svp_query_t *, void *);
+
+typedef enum svp_query_state {
+ SVP_QUERY_INIT = 0x00,
+ SVP_QUERY_WRITING = 0x01,
+ SVP_QUERY_READING = 0x02,
+ SVP_QUERY_FINISHED = 0x03
+} svp_query_state_t;
+
+/*
+ * The query structure is usable for all forms of svp queries that end up
+ * getting passed across. Right now it's optimized for the fixed size data
+ * requests as opposed to requests whose responses will always be streaming in
+ * nature. Though, the streaming requests are the less common ones we have.
+ *
+ * XXX Fix that and make this streaming friendly
+ */
+struct svp_query {
+ list_node_t sq_lnode;
+ svp_query_f sq_func;
+ svp_query_state_t sq_state;
+ void *sq_arg;
+ svp_t *sq_svp;
+ svp_req_t sq_header;
+ svp_query_data_t sq_rdun;
+ svp_query_data_t sq_wdun;
+ svp_status_t sq_status;
+ void *sq_rdata;
+ size_t sq_rsize;
+ void *sq_wdata;
+ size_t sq_wsize;
+ hrtime_t sq_acttime;
+};
+
+/*
+ * XXX Centralize this somewhere more generally, big theroy statement, where are
+ * you?
+ *
+ * We have a connection pool that's built upon DNS records. DNS describes the
+ * membership of the set of remote peers that make up our pool and we maintain
+ * one connection to each of them. In addition, we maintain an exponential
+ * backoff for each peer and will attempt to reconect immediately before backing
+ * off. The following are the valid states that a connection can be in:
+ *
+ * SVP_CS_INITIAL This is the initial state of a connection, all
+ * that should exist is an unbound socket.
+ *
+ * SVP_CS_CONNECTING A call to connect has been made and we are
+ * polling for it to complete.
+ *
+ * SVP_CS_BACKOFF A connect attempt has failed and we are
+ * currently backing off, waiting to try again.
+ *
+ * SVP_CS_ACTIVE We have successfully connected to the remote
+ * system.
+ *
+ * SVP_CS_WINDDOWN This connection is going to valhalla. In other
+ * words, a previously active connection is no
+ * longer valid in DNS, so we should curb our use
+ * of it, and reap it as soon as we have other
+ * active connections.
+ *
+ * SVP_CS_REAPING This connection object will be freed and reaped.
+ * It will no longer be used.
+ *
+ * The following diagram attempts to describe our state transition scheme, and
+ * when we transition from one state to the next.
+ *
+ * |
+ * * New remote IP from DNS resolution,
+ * | not currently active in the system.
+ * |
+ * v Socket Error,
+ * +----------------+ still in DNS
+ * | SVP_CS_INITIAL |<----------------------*-----+
+ * +----------------+ |
+ * | |
+ * Connection failed .. Always * Successful |
+ * backoff limit . | connect() |
+ * not exceeded +----*---------+ | +-----------*--+ |
+ * | | | | | |
+ * V ^ v ^ V ^
+ * +----------------+ +-------------------+ +---------------+
+ * +-<-| SVP_CS_BACKOFF | | SVP_CS_CONNECTING | | SVP_CS_ACTIVE |
+ * | +----------------+ +-------------------+ +---------------+
+ * | V ^ | ^ V
+ * | Backoff wait * | v | * Removed
+ * | interval +--------------+ | Added to * | from DNS
+ * | finished | DNS | |
+ * | | | |
+ * | | ^ V
+ * | | +-----------------+
+ * +---->---------------+-----<-------+ +-<-| SVP_CS_WINDDOWN |
+ * v Conn * +-----------------+
+ * | Error | V
+ * Removed from * v |
+ * DNS | +----------------+ * Connection
+ * +----------->| SVP_CS_REAPING |<--------+ Quiesced
+ * +----------------+
+ *
+ */
+typedef enum svp_conn_state {
+ SVP_CS_ERROR = 0x00,
+ SVP_CS_INITIAL = 0x01,
+ SVP_CS_CONNECTING = 0x02,
+ SVP_CS_BACKOFF = 0x03,
+ SVP_CS_ACTIVE = 0x04,
+ SVP_CS_WINDDOWN = 0x05
+} svp_conn_state_t;
+
+typedef enum svp_conn_error {
+ SVP_CE_NONE = 0x00,
+ SVP_CE_ASSOCIATE = 0x01,
+ SVP_CE_NOPOLLOUT = 0x02,
+ SVP_CE_SOCKET = 0x03
+} svp_conn_error_t;
+
+typedef enum svp_conn_flags {
+ SVP_CF_ADDED = 0x01,
+ SVP_CF_DEGRADED = 0x02,
+ SVP_CF_REAP = 0x04,
+ SVP_CF_TEARDOWN = 0x08,
+ SVP_CF_UFLAG = 0x0c,
+ SVP_CF_USER = 0x10
+} svp_conn_flags_t;
+
+typedef struct svp_conn_out {
+ svp_query_t *sco_query;
+ size_t sco_offset;
+} svp_conn_out_t;
+
+typedef struct svp_conn_in {
+ svp_query_t *sci_query;
+ svp_req_t sci_req;
+ size_t sci_offset;
+} svp_conn_in_t;
+
+struct svp_conn {
+ svp_remote_t *sc_remote; /* RO */
+ struct in6_addr sc_addr; /* RO */
+ list_node_t sc_rlist; /* svp_remote_t`sr_lock */
+ mutex_t sc_lock;
+ svp_event_t sc_event;
+ svp_timer_t sc_btimer;
+ svp_timer_t sc_qtimer;
+ int sc_socket;
+ uint_t sc_gen;
+ uint_t sc_nbackoff;
+ svp_conn_flags_t sc_flags;
+ svp_conn_state_t sc_cstate;
+ svp_conn_error_t sc_error;
+ int sc_errno;
+ hrtime_t sc_lastact;
+ list_t sc_queries;
+ svp_conn_out_t sc_output;
+ svp_conn_in_t sc_input;
+};
+
+typedef enum svp_remote_state {
+ SVP_RS_LOOKUP_SCHEDULED = 0x01, /* On the DNS Queue */
+ SVP_RS_LOOKUP_INPROGRESS = 0x02, /* Doing a DNS lookup */
+ SVP_RS_LOOKUP_VALID = 0x04 /* addrinfo valid */
+} svp_remote_state_t;
+
+/*
+ * These series of bit-based flags should be ordered such that the most severe
+ * is first. We only can set one message that user land can see, so if more than
+ * one is set we want to make sure that one is there.
+ */
+typedef enum svp_degrade_state {
+ SVP_RD_DNS_FAIL = 0x01, /* DNS Resolution Failure */
+ SVP_RD_REMOTE_FAIL = 0x02, /* cannot reach any remote peers */
+ SVP_RD_ALL = 0x03 /* Only suitable for restore */
+} svp_degrade_state_t;
+
+struct svp_remote {
+ char *sr_hostname; /* RO */
+ uint16_t sr_rport; /* RO */
+ avl_node_t sr_gnode; /* svp_remote_lock */
+ svp_remote_t *sr_nexthost; /* svp_host_lock */
+ mutex_t sr_lock;
+ svp_remote_state_t sr_state;
+ svp_degrade_state_t sr_degrade;
+ struct addrinfo *sr_addrinfo;
+ avl_tree_t sr_tree;
+ uint_t sr_count; /* active count */
+ uint_t sr_gen;
+ uint_t sr_tconns; /* total conns + dconns */
+ uint_t sr_ndconns; /* number of degraded conns */
+ list_t sr_conns; /* all conns */
+};
+
+/*
+ * We have a bunch of different things that we get back from the API at the
+ * plug-in layer. These include:
+ *
+ * o OOB Shootdowns
+ * o VL3->VL2 Lookups
+ * o VL2->UL3 Lookups
+ * o VL2 Log invalidations
+ * o VL3 Log injections
+ */
+typedef void (*svp_vl2_lookup_f)(svp_t *, svp_status_t, const struct in6_addr *,
+ const uint16_t, void *);
+typedef void (*svp_vl3_lookup_f)(svp_t *, svp_status_t, const uint8_t *,
+ const struct in6_addr *, const uint16_t, void *);
+typedef void (*svp_vl2_invalidation_f)(svp_t *, const uint8_t *);
+typedef void (*svp_vl3_inject_f)(svp_t *, const uint16_t,
+ const struct in6_addr *, const uint8_t *, const uint8_t *);
+typedef void (*svp_shootdown_f)(svp_t *, const uint8_t *,
+ const struct in6_addr *, const uint16_t uport);
+
+typedef struct svp_cb {
+ svp_vl2_lookup_f scb_vl2_lookup;
+ svp_vl3_lookup_f scb_vl3_lookup;
+ svp_vl2_invalidation_f scb_vl2_invalidate;
+ svp_vl3_inject_f scb_vl3_inject;
+ svp_shootdown_f scb_shootdown;
+} svp_cb_t;
+
+/*
+ * Core implementation structure.
+ */
+struct svp {
+ overlay_plugin_dest_t svp_dest; /* RO */
+ varpd_provider_handle_t *svp_hdl; /* RO */
+ svp_cb_t svp_cb; /* RO */
+ uint64_t svp_vid; /* RO? */
+ avl_node_t svp_rlink; /* Owned by svp_remote */
+ svp_remote_t *svp_remote; /* ROish XXX */
+ mutex_t svp_lock;
+ char *svp_host;
+ uint16_t svp_port;
+ uint16_t svp_uport;
+ boolean_t svp_huip;
+ struct in6_addr svp_uip;
+};
+
+extern bunyan_logger_t *svp_bunyan;
+
+/*
+ * XXX Strawman backend APIs
+ */
+extern int svp_remote_find(char *, uint16_t, svp_remote_t **);
+extern int svp_remote_attach(svp_remote_t *, svp_t *);
+extern void svp_remote_detach(svp_t *);
+extern void svp_remote_release(svp_remote_t *);
+extern void svp_remote_vl3_lookup(svp_t *, svp_query_t *,
+ const struct sockaddr *, void *);
+extern void svp_remote_vl2_lookup(svp_t *, svp_query_t *, const uint8_t *,
+ void *);
+
+/*
+ * Init functions
+ */
+extern int svp_remote_init(void);
+extern void svp_remote_fini(void);
+extern int svp_event_init(void);
+extern int svp_event_timer_init(svp_event_t *);
+extern void svp_event_fini(void);
+extern int svp_host_init(void);
+extern int svp_timer_init(void);
+
+/*
+ * Timers
+ */
+extern int svp_tickrate;
+extern void svp_timer_add(svp_timer_t *);
+extern void svp_timer_remove(svp_timer_t *);
+
+/*
+ * Event loop management
+ */
+extern int svp_event_associate(svp_event_t *, int);
+extern int svp_event_dissociate(svp_event_t *, int);
+extern int svp_event_inject(void *);
+
+/*
+ * Connection manager
+ */
+extern int svp_conn_create(svp_remote_t *, const struct in6_addr *);
+extern void svp_conn_destroy(svp_conn_t *);
+extern void svp_conn_fallout(svp_conn_t *);
+extern void svp_conn_queue(svp_conn_t *, svp_query_t *);
+
+/*
+ * FMA related
+ */
+extern void svp_remote_degrade(svp_remote_t *, svp_degrade_state_t);
+extern void svp_remote_restore(svp_remote_t *, svp_degrade_state_t);
+
+/*
+ * Misc.
+ */
+extern int svp_comparator(const void *, const void *);
+extern void svp_remote_reassign(svp_remote_t *, svp_conn_t *);
+extern void svp_remote_resolved(svp_remote_t *, struct addrinfo *);
+extern void svp_host_queue(svp_remote_t *);
+extern void svp_query_release(svp_query_t *);
+extern void svp_query_crc32(svp_req_t *, void *, size_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_SVP_H */
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c
new file mode 100644
index 0000000000..b9d3925b64
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c
@@ -0,0 +1,945 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/*
+ * Logic to manage an individual connection to a remote host.
+ *
+ * Individual connections always are associated with an svp_remote_t from their
+ * creation to their destruction.
+ */
+
+#include <assert.h>
+#include <umem.h>
+#include <errno.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/uio.h>
+
+#include <libvarpd_svp.h>
+
+static int svp_conn_query_timeout = 30;
+static int svp_conn_backoff_tbl[] = { 1, 2, 4, 8, 16, 32 };
+static int svp_conn_nbackoff = sizeof (svp_conn_backoff_tbl) / sizeof (int);
+
+typedef enum svp_conn_act {
+ SVP_RA_NONE = 0x00,
+ SVP_RA_DEGRADE = 0x01,
+ SVP_RA_RESTORE = 0x02,
+ SVP_RA_ERROR = 0x03
+} svp_conn_act_t;
+
+static void
+svp_conn_inject(svp_conn_t *scp)
+{
+ int ret;
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (scp->sc_flags & SVP_CF_USER)
+ return;
+ scp->sc_flags |= SVP_CF_USER;
+ if ((ret = svp_event_inject(scp)) != 0)
+ libvarpd_panic("failed to inject event: %d\n", ret);
+}
+
+static void
+svp_conn_degrade(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (scp->sc_flags & SVP_CF_DEGRADED)
+ return;
+
+ scp->sc_flags |= SVP_CF_DEGRADED;
+ srp->sr_ndconns++;
+ if (srp->sr_ndconns == srp->sr_tconns)
+ svp_remote_degrade(srp, SVP_RD_REMOTE_FAIL);
+}
+
+static void
+svp_conn_restore(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (!(scp->sc_flags & SVP_CF_DEGRADED))
+ return;
+
+ scp->sc_flags &= ~SVP_CF_DEGRADED;
+ if (srp->sr_ndconns == srp->sr_tconns)
+ svp_remote_restore(srp, SVP_RD_REMOTE_FAIL);
+ srp->sr_ndconns--;
+}
+
+static void
+svp_conn_add(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (scp->sc_flags & SVP_CF_ADDED)
+ return;
+
+ list_insert_tail(&srp->sr_conns, scp);
+ scp->sc_flags |= SVP_CF_ADDED;
+ srp->sr_tconns++;
+}
+
+static void
+svp_conn_remove(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (!(scp->sc_flags & SVP_CF_ADDED))
+ return;
+
+ scp->sc_flags &= ~SVP_CF_ADDED;
+ if (scp->sc_flags & SVP_CF_DEGRADED)
+ srp->sr_ndconns--;
+ srp->sr_tconns--;
+ if (srp->sr_tconns == srp->sr_ndconns)
+ svp_remote_degrade(srp, SVP_RD_REMOTE_FAIL);
+}
+
+static svp_query_t *
+svp_conn_query_find(svp_conn_t *scp, uint32_t id)
+{
+ svp_query_t *sqp;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ for (sqp = list_head(&scp->sc_queries); sqp != NULL;
+ sqp = list_next(&scp->sc_queries, sqp)) {
+ if (sqp->sq_header.svp_id == id)
+ break;
+ }
+
+ return (sqp);
+}
+
+static svp_conn_act_t
+svp_conn_backoff(svp_conn_t *scp)
+{
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (close(scp->sc_socket) != 0)
+ libvarpd_panic("failed to close socket %d: %d\n",
+ scp->sc_socket, errno);
+ scp->sc_socket = -1;
+
+ scp->sc_cstate = SVP_CS_BACKOFF;
+ scp->sc_nbackoff++;
+ if (scp->sc_nbackoff >= svp_conn_nbackoff) {
+ scp->sc_btimer.st_value =
+ svp_conn_backoff_tbl[svp_conn_nbackoff - 1];
+ } else {
+ scp->sc_btimer.st_value =
+ svp_conn_backoff_tbl[scp->sc_nbackoff - 1];
+ }
+ svp_timer_add(&scp->sc_btimer);
+
+ if (scp->sc_nbackoff > svp_conn_nbackoff)
+ return (SVP_RA_DEGRADE);
+ return (SVP_RA_NONE);
+}
+
+static svp_conn_act_t
+svp_conn_connect(svp_conn_t *scp)
+{
+ int ret;
+ struct sockaddr_in6 in6;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+ assert(scp->sc_cstate == SVP_CS_BACKOFF ||
+ scp->sc_cstate == SVP_CS_INITIAL);
+ assert(scp->sc_socket == -1);
+ if (scp->sc_cstate == SVP_CS_INITIAL)
+ scp->sc_nbackoff = 0;
+
+ scp->sc_socket = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
+ if (scp->sc_socket == -1) {
+ scp->sc_error = SVP_CE_SOCKET;
+ scp->sc_errno = errno;
+ scp->sc_cstate = SVP_CS_ERROR;
+ return (SVP_RA_DEGRADE);
+ }
+
+ bzero(&in6, sizeof (struct sockaddr_in6));
+ in6.sin6_family = AF_INET6;
+ in6.sin6_port = htons(scp->sc_remote->sr_rport);
+ bcopy(&scp->sc_addr, &in6.sin6_addr, sizeof (struct in6_addr));
+ ret = connect(scp->sc_socket, (struct sockaddr *)&in6,
+ sizeof (struct sockaddr_in6));
+ if (ret != 0) {
+ boolean_t async = B_FALSE;
+
+ switch (errno) {
+ case EACCES:
+ case EADDRINUSE:
+ case EAFNOSUPPORT:
+ case EALREADY:
+ case EBADF:
+ case EISCONN:
+ case ELOOP:
+ case ENOENT:
+ case ENOSR:
+ case EWOULDBLOCK:
+ libvarpd_panic("unanticipated connect errno %d", errno);
+ case EINPROGRESS:
+ case EINTR:
+ async = B_TRUE;
+ default:
+ break;
+ }
+
+ /*
+ * So, we will be connecting to this in the future, advance our
+ * state and make sure that we poll for the next round.
+ */
+ if (async == B_TRUE) {
+ scp->sc_cstate = SVP_CS_CONNECTING;
+ scp->sc_event.se_events = POLLOUT | POLLHUP;
+ ret = svp_event_associate(&scp->sc_event,
+ scp->sc_socket);
+ if (ret == 0)
+ return (SVP_RA_NONE);
+ scp->sc_error = SVP_CE_ASSOCIATE;
+ scp->sc_errno = ret;
+ scp->sc_cstate = SVP_CS_ERROR;
+ return (SVP_RA_DEGRADE);
+ } else {
+ /*
+ * This call failed, which means that we obtained one of
+ * the following:
+ *
+ * EADDRNOTAVAIL
+ * ECONNREFUSED
+ * EIO
+ * ENETUNREACH
+ * EHOSTUNREACH
+ * ENXIO
+ * ETIMEDOUT
+ *
+ * Therefore we need to set ourselves into backoff and
+ * wait for that to clear up.
+ */
+ return (svp_conn_backoff(scp));
+ }
+ }
+
+ /*
+ * We've connected. Successfully move ourselves to the bound
+ * state and start polling.
+ */
+ scp->sc_cstate = SVP_CS_ACTIVE;
+ scp->sc_event.se_events = POLLIN | POLLRDNORM | POLLHUP;
+ ret = svp_event_associate(&scp->sc_event, scp->sc_socket);
+ if (ret == 0)
+ return (SVP_RA_RESTORE);
+ scp->sc_error = SVP_CE_ASSOCIATE;
+ scp->sc_cstate = SVP_CS_ERROR;
+
+ return (SVP_RA_DEGRADE);
+}
+
+/*
+ * This should be the first call we get after a connect. If we have successfully
+ * connected, we should see a writeable event. We may also see an error or a
+ * hang up. In either of these cases, we transition to error mode. If there is
+ * also a readable event, we ignore it at the moment and just let a
+ * reassociation pick it up so we can simplify the set of state transitions that
+ * we have.
+ */
+static svp_conn_act_t
+svp_conn_poll_connect(port_event_t *pe, svp_conn_t *scp)
+{
+ int ret, err;
+ socklen_t sl = sizeof (err);
+ if (!(pe->portev_events & POLLOUT)) {
+ scp->sc_errno = 0;
+ scp->sc_error = SVP_CE_NOPOLLOUT;
+ scp->sc_cstate = SVP_CS_ERROR;
+ return (SVP_RA_DEGRADE);
+ }
+
+ ret = getsockopt(scp->sc_socket, SOL_SOCKET, SO_ERROR, &err, &sl);
+ /* XXX Really none of these? */
+ if (ret != 0)
+ libvarpd_panic("unanticipated getsockopt error");
+ if (err != 0) {
+ return (svp_conn_backoff(scp));
+ }
+
+ scp->sc_cstate = SVP_CS_ACTIVE;
+ scp->sc_event.se_events = POLLIN | POLLRDNORM | POLLHUP;
+ ret = svp_event_associate(&scp->sc_event, scp->sc_socket);
+ if (ret == 0)
+ return (SVP_RA_RESTORE);
+ scp->sc_error = SVP_CE_ASSOCIATE;
+ scp->sc_errno = ret;
+ scp->sc_cstate = SVP_CS_ERROR;
+ return (SVP_RA_DEGRADE);
+}
+
+static svp_conn_act_t
+svp_conn_pollout(svp_conn_t *scp)
+{
+ svp_query_t *sqp;
+ svp_req_t *req;
+ size_t off;
+ struct iovec iov[2];
+ int nvecs = 0;
+ ssize_t ret;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ /*
+ * We need to find a query and start writing it out.
+ */
+ if (scp->sc_output.sco_query == NULL) {
+ for (sqp = list_head(&scp->sc_queries); sqp != NULL;
+ sqp = list_next(&scp->sc_queries, sqp)) {
+ if (sqp->sq_state != SVP_QUERY_INIT)
+ continue;
+ break;
+ }
+
+ if (sqp == NULL) {
+ scp->sc_event.se_events &= ~POLLOUT;
+ return (SVP_RA_NONE);
+ }
+
+ scp->sc_output.sco_query = sqp;
+ scp->sc_output.sco_offset = 0;
+ sqp->sq_state = SVP_QUERY_WRITING;
+ svp_query_crc32(&sqp->sq_header, sqp->sq_rdata, sqp->sq_rsize);
+ }
+
+ sqp = scp->sc_output.sco_query;
+ req = &sqp->sq_header;
+ off = scp->sc_output.sco_offset;
+ if (off < sizeof (svp_req_t)) {
+ iov[nvecs].iov_base = (void *)((uintptr_t)req + off);
+ iov[nvecs].iov_len = sizeof (svp_req_t) - off;
+ nvecs++;
+ off = 0;
+ } else {
+ off -= sizeof (svp_req_t);
+ }
+
+ iov[nvecs].iov_base = (void *)((uintptr_t)sqp->sq_rdata + off);
+ iov[nvecs].iov_len = sqp->sq_rsize - off;
+ nvecs++;
+
+ do {
+ ret = writev(scp->sc_socket, iov, nvecs);
+ } while (ret == -1 && errno == EAGAIN);
+ if (ret == -1) {
+ switch (errno) {
+ case EAGAIN:
+ scp->sc_event.se_events |= POLLOUT;
+ return (SVP_RA_NONE);
+ case EIO:
+ case ENXIO:
+ case ECONNRESET:
+ return (SVP_RA_ERROR);
+ default:
+ libvarpd_panic("unexpected errno: %d", errno);
+ }
+ }
+
+ scp->sc_output.sco_offset += ret;
+ if (ret >= sizeof (svp_req_t) + sqp->sq_rsize) {
+ sqp->sq_state = SVP_QUERY_READING;
+ scp->sc_output.sco_query = NULL;
+ scp->sc_output.sco_offset = 0;
+ scp->sc_event.se_events |= POLLOUT;
+ }
+ return (SVP_RA_NONE);
+}
+
+static boolean_t
+svp_conn_pollin_validate(svp_conn_t *scp)
+{
+ svp_query_t *sqp;
+ uint32_t nsize;
+ uint16_t nvers, nop;
+ svp_req_t *resp = &scp->sc_input.sci_req;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ nvers = ntohs(resp->svp_ver);
+ nop = ntohs(resp->svp_op);
+ nsize = ntohl(resp->svp_size);
+
+ /* XXX Best practice around spaces in key names */
+ if (nvers != SVP_CURRENT_VERSION) {
+ bunyan_warn(svp_bunyan, "unsupported version",
+ BUNYAN_T_IP, "remote ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response id", resp->svp_id,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ if (nop != SVP_R_VL2_ACK && nop != SVP_R_VL3_ACK) {
+ bunyan_warn(svp_bunyan, "unsupported operation",
+ BUNYAN_T_IP, "remote ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response id", resp->svp_id,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ sqp = svp_conn_query_find(scp, resp->svp_id);
+ if (sqp == NULL) {
+ bunyan_warn(svp_bunyan, "unknown response id",
+ BUNYAN_T_IP, "remote ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response id", resp->svp_id,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ if (sqp->sq_state != SVP_QUERY_READING) {
+ bunyan_warn(svp_bunyan, "got response for unexpecting query",
+ BUNYAN_T_IP, "remote ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response id", resp->svp_id,
+ BUNYAN_T_INT32, "query state", sqp->sq_state,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ if ((nop == SVP_R_VL2_ACK && nsize != sizeof (svp_vl2_ack_t)) ||
+ (nop == SVP_R_VL3_ACK && nsize != sizeof (svp_vl3_ack_t))) {
+ bunyan_warn(svp_bunyan, "response size too large",
+ BUNYAN_T_IP, "remote ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response id", resp->svp_id,
+ BUNYAN_T_INT32, "response size", nsize,
+ BUNYAN_T_INT32, "expected size", nop == SVP_R_VL2_ACK ?
+ sizeof (svp_vl2_ack_t) : sizeof (svp_vl3_ack_t),
+ BUNYAN_T_INT32, "query state", sqp->sq_state,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ scp->sc_input.sci_query = sqp;
+ sqp->sq_wdata = &sqp->sq_wdun;
+ sqp->sq_wsize = sizeof (svp_query_data_t);
+
+ return (B_TRUE);
+}
+
+static svp_conn_act_t
+svp_conn_pollin(svp_conn_t *scp)
+{
+ size_t off, total;
+ ssize_t ret;
+ svp_query_t *sqp;
+ uint32_t crc;
+ uint16_t nop;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ /*
+ * No query implies that we're reading in the header and that the offset
+ * is associted with it.
+ */
+ off = scp->sc_input.sci_offset;
+ sqp = scp->sc_input.sci_query;
+ if (scp->sc_input.sci_query == NULL) {
+ svp_req_t *resp = &scp->sc_input.sci_req;
+
+ assert(off < sizeof (svp_req_t));
+
+ do {
+ ret = read(scp->sc_socket,
+ (void *)((uintptr_t)resp + off),
+ sizeof (svp_req_t) - off);
+ } while (ret == -1 && errno == EINTR);
+ if (ret == -1) {
+ switch (errno) {
+ case EAGAIN:
+ scp->sc_event.se_events |= POLLIN | POLLRDNORM;
+ return (SVP_RA_NONE);
+ case EIO:
+ case ECONNRESET:
+ return (SVP_RA_ERROR);
+ break;
+ default:
+ libvarpd_panic("unexpeted read errno: %d",
+ errno);
+ }
+ } else if (ret == 0) {
+ /* Try to reconnect to the remote host */
+ return (SVP_RA_ERROR);
+ }
+
+ /* Didn't get all the data we need */
+ if (off + ret < sizeof (svp_req_t)) {
+ scp->sc_input.sci_offset += ret;
+ scp->sc_event.se_events |= POLLIN | POLLRDNORM;
+ return (SVP_RA_NONE);
+ }
+
+ if (svp_conn_pollin_validate(scp) != B_TRUE)
+ return (SVP_RA_ERROR);
+ }
+
+ sqp = scp->sc_input.sci_query;
+ assert(sqp != NULL);
+ total = ntohl(scp->sc_input.sci_req.svp_size);
+ do {
+ ret = read(scp->sc_socket, sqp->sq_wdata + off, total - off);
+ } while (ret == -1 && errno == EINTR);
+
+ if (ret == -1) {
+ switch (errno) {
+ case EAGAIN:
+ scp->sc_event.se_events |= POLLIN | POLLRDNORM;
+ return (SVP_RA_NONE);
+ case EIO:
+ case ECONNRESET:
+ return (SVP_RA_ERROR);
+ break;
+ default:
+ libvarpd_panic("unexpeted read errno: %d", errno);
+ }
+ } else if (ret == 0) {
+ /* Try to reconnect to the remote host */
+ return (SVP_RA_ERROR);
+ }
+
+ if (ret + off < total) {
+ scp->sc_input.sci_offset += ret;
+ return (SVP_RA_NONE);
+ }
+
+ nop = ntohs(scp->sc_input.sci_req.svp_op);
+ crc = scp->sc_input.sci_req.svp_crc32;
+ svp_query_crc32(&scp->sc_input.sci_req, sqp->sq_wdata, total);
+ if (crc != scp->sc_input.sci_req.svp_crc32) {
+ bunyan_info(svp_bunyan, "crc32 mismatch",
+ BUNYAN_T_IP, "remote ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version",
+ ntohs(scp->sc_input.sci_req.svp_ver),
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response id",
+ ntohl(scp->sc_input.sci_req.svp_id),
+ BUNYAN_T_INT32, "query state", sqp->sq_state,
+ BUNYAN_T_UINT32, "msg_crc", ntohl(crc),
+ BUNYAN_T_UINT32, "calc_crc",
+ ntohl(scp->sc_input.sci_req.svp_crc32),
+ BUNYAN_T_END);
+ return (SVP_RA_ERROR);
+ }
+ scp->sc_input.sci_query = NULL;
+ scp->sc_input.sci_offset = 0;
+
+ if (nop == SVP_R_VL2_ACK) {
+ svp_vl2_ack_t *sl2a = sqp->sq_wdata;
+ sqp->sq_status = ntohs(sl2a->sl2a_status);
+ } else if (nop == SVP_R_VL3_ACK) {
+ svp_vl3_ack_t *sl3a = sqp->sq_wdata;
+ sqp->sq_status = ntohs(sl3a->sl3a_status);
+ } else {
+ libvarpd_panic("unhandled nop: %d", nop);
+ }
+
+ /*
+ * XXX What assumptions can now be violated?
+ */
+ list_remove(&scp->sc_queries, sqp);
+ mutex_unlock(&scp->sc_lock);
+
+ /*
+ * We have to release all of our resources associated with this entry
+ * before we call the callback. After we call it, the memory will be
+ * lost to time.
+ */
+ svp_query_release(sqp);
+ sqp->sq_func(sqp, sqp->sq_arg);
+ mutex_lock(&scp->sc_lock);
+ scp->sc_event.se_events |= POLLIN | POLLRDNORM;
+
+ return (SVP_RA_NONE);
+}
+
+static svp_conn_act_t
+svp_conn_reset(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ assert(svp_event_dissociate(&scp->sc_event, scp->sc_socket) ==
+ ENOENT);
+ if (close(scp->sc_socket) != 0)
+ libvarpd_panic("failed to close socket %d: %d", scp->sc_socket,
+ errno);
+ scp->sc_socket = -1;
+ scp->sc_cstate = SVP_CS_INITIAL;
+ scp->sc_input.sci_query = NULL;
+ scp->sc_output.sco_query = NULL;
+
+ svp_remote_reassign(srp, scp);
+
+ return (svp_conn_connect(scp));
+}
+
+/*
+ * This is our general state transition function. We're called here when we want
+ * to advance part of our state machine as well as to re-arm ourselves. We can
+ * also end up here from the standard event loop as a result of having a user
+ * event posted.
+ */
+static void
+svp_conn_handler(port_event_t *pe, void *arg)
+{
+ svp_conn_t *scp = arg;
+ svp_remote_t *srp = scp->sc_remote;
+ svp_conn_act_t ret = SVP_RA_NONE;
+
+ mutex_lock(&scp->sc_lock);
+
+ /*
+ * Check if one of our event interrupts is set. An event interrupt, such
+ * as having to be reaped or be torndown is notified by a
+ * PORT_SOURCE_USER event that tries to take care of this. However,
+ * because of the fact that the event loop can be ongoing despite this,
+ * we may get here before the PORT_SOURCE_USER has casued us to get
+ * here. In such a case, if the PORT_SOURCE_USER event is tagged, then
+ * we're going to opt to do nothing here and wait for it to come and
+ * tear us down. That will also indicate to us that we have nothing to
+ * worry about as far as general timing and the like goes.
+ */
+
+ if ((scp->sc_flags & SVP_CF_UFLAG) != 0 &&
+ (scp->sc_flags & SVP_CF_USER) != 0 &&
+ pe != NULL &&
+ pe->portev_source != PORT_SOURCE_USER) {
+ mutex_unlock(&scp->sc_lock);
+ return;
+ }
+
+ if (pe != NULL && pe->portev_source == PORT_SOURCE_USER) {
+ scp->sc_flags &= ~SVP_CF_USER;
+ if ((scp->sc_flags & SVP_CF_UFLAG) == 0) {
+ mutex_unlock(&scp->sc_lock);
+ return;
+ }
+ }
+
+ /* Check if this needs to be freed */
+ if (scp->sc_flags & SVP_CF_REAP) {
+ mutex_unlock(&scp->sc_lock);
+ svp_conn_destroy(scp);
+ return;
+ }
+
+ /* Check if this needs to be reset */
+ if (scp->sc_flags & SVP_CF_TEARDOWN) {
+ ret = SVP_RA_ERROR;
+ goto out;
+ }
+
+ switch (scp->sc_cstate) {
+ case SVP_CS_INITIAL:
+ case SVP_CS_BACKOFF:
+ assert(pe == NULL);
+ ret = svp_conn_connect(scp);
+ break;
+ case SVP_CS_CONNECTING:
+ assert(pe != NULL);
+ ret = svp_conn_poll_connect(pe, scp);
+ break;
+ case SVP_CS_ACTIVE:
+ assert(pe != NULL);
+ if (pe->portev_events & POLLOUT)
+ ret = svp_conn_pollout(scp);
+ if (ret == SVP_RA_NONE && (pe->portev_events & POLLIN))
+ ret = svp_conn_pollin(scp);
+ if (ret == SVP_RA_NONE) {
+ int err;
+ if ((err = svp_event_associate(&scp->sc_event,
+ scp->sc_socket)) != 0) {
+ scp->sc_error = SVP_CE_ASSOCIATE;
+ scp->sc_errno = ret;
+ scp->sc_cstate = SVP_CS_ERROR;
+ ret = SVP_RA_DEGRADE;
+ }
+ }
+ break;
+ default:
+ libvarpd_panic("svp_conn_handler encountered unexpected "
+ "state: %d", scp->sc_cstate);
+ }
+out:
+ mutex_unlock(&scp->sc_lock);
+
+ if (ret == SVP_RA_NONE)
+ return;
+
+ mutex_lock(&srp->sr_lock);
+ mutex_lock(&scp->sc_lock);
+ if (ret == SVP_RA_ERROR)
+ ret = svp_conn_reset(scp);
+
+ if (ret == SVP_RA_DEGRADE)
+ svp_conn_degrade(scp);
+ else if (ret == SVP_RA_RESTORE)
+ svp_conn_restore(scp);
+ mutex_unlock(&scp->sc_lock);
+ mutex_unlock(&srp->sr_lock);
+}
+
+static void
+svp_conn_backtimer(void *arg)
+{
+ svp_conn_t *scp = arg;
+
+ svp_conn_handler(NULL, scp);
+}
+
+/*
+ * This fires every svp_conn_query_timeout seconds. Its purpos is to determine
+ * if we haven't heard back on a request with in svp_conn_query_timeout seconds.
+ * If any of the svp_conn_query_t's that have been started (indicated by
+ * svp_query_t`sq_acttime != -1), and more than svp_conn_query_timeout seconds
+ * have passed, we basically tear this connection down and reassign outstanding
+ * queries.
+ */
+static void
+svp_conn_querytimer(void *arg)
+{
+ svp_query_t *sqp;
+ svp_conn_t *scp = arg;
+ hrtime_t now = gethrtime();
+
+ mutex_lock(&scp->sc_lock);
+
+ /*
+ * If we're not in the active state, then we don't care about this as
+ * we're already either going to die or we have no connections to worry
+ * about.
+ */
+ if (scp->sc_cstate != SVP_CS_ACTIVE) {
+ mutex_unlock(&scp->sc_lock);
+ return;
+ }
+
+ for (sqp = list_head(&scp->sc_queries); sqp != NULL;
+ sqp = list_next(&scp->sc_queries, sqp)) {
+ if (sqp->sq_acttime == -1)
+ continue;
+ if ((sqp->sq_acttime - now) / NANOSEC > svp_conn_query_timeout)
+ break;
+ }
+
+ /* Nothing timed out, we're good here */
+ if (sqp == NULL) {
+ mutex_unlock(&scp->sc_lock);
+ return;
+ }
+
+ scp->sc_flags |= SVP_CF_TEARDOWN;
+ svp_conn_inject(scp);
+
+ mutex_unlock(&scp->sc_lock);
+}
+
+/*
+ * This connection has fallen out of DNS, figure out what we need to do with it.
+ */
+void
+svp_conn_fallout(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+
+ mutex_lock(&scp->sc_lock);
+ switch (scp->sc_cstate) {
+ case SVP_CS_ERROR:
+ /*
+ * Connection is already inactive, so it's safe to tear down.
+ * Fire it off through the state machine to tear down via the
+ * backoff timer.
+ */
+ svp_conn_remove(scp);
+ scp->sc_flags |= SVP_CF_REAP;
+ svp_conn_inject(scp);
+ break;
+ case SVP_CS_INITIAL:
+ case SVP_CS_BACKOFF:
+ case SVP_CS_CONNECTING:
+ /*
+ * Here, we have something actively going on, so we'll let it be
+ * clean up the next time we hit the event loop by the event
+ * loop itself. As it has no connections, there isn't much to
+ * really do, though we'll take this chance to go ahead and
+ * remove it from the remote.
+ */
+ svp_conn_remove(scp);
+ scp->sc_flags |= SVP_CF_REAP;
+ svp_conn_inject(scp);
+ break;
+ case SVP_CS_ACTIVE:
+ scp->sc_cstate = SVP_CS_WINDDOWN;
+ /*
+ * XXX We need to look at what's currently outstanding. If
+ * nothing is going on at the moment, we should try to
+ * port disassociate, and if succsesful, eg. not ENOENT, clean
+ * up right here and now.
+ */
+ break;
+ case SVP_CS_WINDDOWN:
+ /*
+ * Nothing specific to do here, we'e finishing up with this,
+ * just haven't finished yet.
+ */
+ break;
+ default:
+ libvarpd_panic("svp_conn_fallout encountered"
+ "unkonwn state");
+ }
+ mutex_unlock(&scp->sc_lock);
+ mutex_unlock(&srp->sr_lock);
+}
+
+int
+svp_conn_create(svp_remote_t *srp, const struct in6_addr *addr)
+{
+ svp_conn_t *scp;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ scp = umem_zalloc(sizeof (svp_conn_t), UMEM_DEFAULT);
+ if (scp == NULL)
+ return (ENOMEM);
+
+ scp->sc_remote = srp;
+ scp->sc_event.se_func = svp_conn_handler;
+ scp->sc_event.se_arg = scp;
+ scp->sc_btimer.st_func = svp_conn_backtimer;
+ scp->sc_btimer.st_arg = scp;
+ scp->sc_btimer.st_oneshot = B_TRUE;
+ scp->sc_btimer.st_value = 1;
+
+ scp->sc_qtimer.st_func = svp_conn_querytimer;
+ scp->sc_qtimer.st_arg = scp;
+ scp->sc_qtimer.st_oneshot = B_FALSE;
+ scp->sc_qtimer.st_value = svp_conn_query_timeout;
+
+ scp->sc_socket = -1;
+
+ list_create(&scp->sc_queries, sizeof (svp_query_t),
+ offsetof(svp_query_t, sq_lnode));
+ scp->sc_gen = srp->sr_gen;
+ bcopy(addr, &scp->sc_addr, sizeof (struct in6_addr));
+ scp->sc_cstate = SVP_CS_INITIAL;
+ mutex_lock(&scp->sc_lock);
+ svp_conn_add(scp);
+ mutex_unlock(&scp->sc_lock);
+
+ /* Now that we're locked and loaded, add our timers */
+ svp_timer_add(&scp->sc_qtimer);
+ svp_timer_add(&scp->sc_btimer);
+
+ return (0);
+}
+
+/*
+ * At the time of calling, the entry has been removed from all lists. In
+ * addition, the entries state should be SVP_CS_ERROR, therefore, we know that
+ * the fd should not be associated with the event loop. We'll double check that
+ * just in case. We should also have already been removed from the remote's
+ * list.
+ */
+void
+svp_conn_destroy(svp_conn_t *scp)
+{
+ int ret;
+
+ mutex_lock(&scp->sc_lock);
+ if (scp->sc_cstate != SVP_CS_ERROR)
+ libvarpd_panic("asked to tear down an active connection");
+ if (scp->sc_flags & SVP_CF_ADDED)
+ libvarpd_panic("asked to remove a connection still in "
+ "the remote list\n");
+ if (!list_is_empty(&scp->sc_queries))
+ libvarpd_panic("asked to remove a connection with non-empty "
+ "query list");
+
+ if ((ret = svp_event_dissociate(&scp->sc_event, scp->sc_socket)) !=
+ ENOENT) {
+ libvarpd_panic("dissociate failed or was actually "
+ "associated: %d", ret);
+ }
+ mutex_unlock(&scp->sc_lock);
+
+ /* Verify our timers are killed */
+ svp_timer_remove(&scp->sc_btimer);
+ svp_timer_remove(&scp->sc_qtimer);
+
+ if (scp->sc_socket != -1 && close(scp->sc_socket) != 0)
+ libvarpd_panic("failed to close svp_conn_t`scp_socket fd "
+ "%d: %d", scp->sc_socket, errno);
+
+ list_destroy(&scp->sc_queries);
+ umem_free(scp, sizeof (svp_conn_t));
+}
+
+void
+svp_conn_queue(svp_conn_t *scp, svp_query_t *sqp)
+{
+ assert(MUTEX_HELD(&scp->sc_lock));
+ assert(scp->sc_cstate == SVP_CS_ACTIVE);
+
+ sqp->sq_acttime = -1;
+ list_insert_tail(&scp->sc_queries, sqp);
+ if (!(scp->sc_event.se_events & POLLOUT)) {
+ scp->sc_event.se_events |= POLLOUT;
+ /*
+ * XXX If this fails, we should give up this set of conns or
+ * something... For now, abort.
+ */
+ if (svp_event_associate(&scp->sc_event, scp->sc_socket) != 0)
+ libvarpd_panic("svp_event_associate failed somehow");
+ }
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c
new file mode 100644
index 0000000000..43d064d64d
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c
@@ -0,0 +1,50 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+/*
+ * Perform standard crc32 functions.
+ *
+ * XXX This should probably be a library
+ */
+
+#include <sys/crc32.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <inttypes.h>
+#include <libvarpd_svp.h>
+
+static uint32_t svp_crc32_tab[] = { CRC32_TABLE };
+
+static uint32_t
+svp_crc32(uint32_t old, const uint8_t *buf, size_t len)
+{
+ uint32_t out;
+
+ CRC32(out, buf, len, old, svp_crc32_tab);
+ return (out);
+}
+
+void
+svp_query_crc32(svp_req_t *shp, void *buf, size_t data)
+{
+ uint32_t crc = -1U;
+
+ shp->svp_crc32 = 0;
+ crc = svp_crc32(crc, (uint8_t *)shp, sizeof (svp_req_t));
+ crc = svp_crc32(crc, buf, data);
+ crc = ~crc;
+ shp->svp_crc32 = htonl(crc);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c
new file mode 100644
index 0000000000..2c80de097e
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c
@@ -0,0 +1,173 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/*
+ * DNS Host-name related functions.
+ *
+ * Every backend is stored in DNS. To find out memebership, we query DNS and use
+ * that to update our world. We update our DNS records on both a timer
+ * granularity and immediately after creation. We'll also XXX go through and do
+ * this after all of our valid entries have disappeared.
+ *
+ * Unfortuantely, doing host name resolution in a way that allows us to leverage
+ * the system resolvers and the system's caching, require us to use blocking
+ * calls in libc. If we can't reach a given server, that will tie up a thread
+ * for quite some time. To work around that fact, we're going to create a fixed
+ * number of threads and we'll use them to service this kind of work. While not
+ * great, we don't have many better options.
+ */
+
+#include <sys/socket.h>
+#include <netdb.h>
+#include <thread.h>
+#include <synch.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <libvarpd_svp.h>
+
+int svp_host_nthreads = 8;
+
+static mutex_t svp_host_lock = DEFAULTMUTEX;
+static cond_t svp_host_cv = DEFAULTCV;
+static svp_remote_t *svp_host_head;
+
+static void *
+svp_host_loop(void *unused)
+{
+ for (;;) {
+ int err;
+ svp_remote_t *srp;
+ struct addrinfo *addrs;
+
+ mutex_lock(&svp_host_lock);
+ while (svp_host_head == NULL)
+ cond_wait(&svp_host_cv, &svp_host_lock);
+ srp = svp_host_head;
+ svp_host_head = srp->sr_nexthost;
+ if (svp_host_head != NULL)
+ cond_signal(&svp_host_cv);
+ mutex_unlock(&svp_host_lock);
+
+ mutex_lock(&srp->sr_lock);
+ assert(srp->sr_state & SVP_RS_LOOKUP_SCHEDULED);
+ srp->sr_state &= ~SVP_RS_LOOKUP_SCHEDULED;
+ if (srp->sr_state & SVP_RS_LOOKUP_INPROGRESS) {
+ mutex_unlock(&srp->sr_lock);
+ continue;
+ }
+ srp->sr_state |= SVP_RS_LOOKUP_INPROGRESS;
+ mutex_unlock(&srp->sr_lock);
+
+ for (;;) {
+ err = getaddrinfo(srp->sr_hostname, NULL, NULL, &addrs);
+ if (err == 0)
+ break;
+ if (err != 0) {
+ switch (err) {
+ case EAI_ADDRFAMILY:
+ case EAI_BADFLAGS:
+ case EAI_FAMILY:
+ case EAI_SERVICE:
+ case EAI_SOCKTYPE:
+ case EAI_OVERFLOW:
+ default:
+ libvarpd_panic("unexpected getaddrinfo "
+ "failure: %d", err);
+ case EAI_AGAIN:
+ case EAI_MEMORY:
+ case EAI_SYSTEM:
+ continue;
+ case EAI_FAIL:
+ case EAI_NODATA:
+ case EAI_NONAME:
+ /*
+ * XXX At this point in time we have
+ * something which isn't very good. This
+ * may have been a typo or something may
+ * have been destroyed. We should go
+ * ahead and degrade this overall
+ * instance, because we're not going to
+ * make much forward progress... It'd be
+ * great if we could actually issue more
+ * of an EREPORT to describe what
+ * happened...
+ */
+ mutex_lock(&srp->sr_lock);
+ svp_remote_degrade(srp, SVP_RD_DNS_FAIL);
+ mutex_unlock(&srp->sr_lock);
+ break;
+ }
+ }
+ break;
+ }
+
+ if (err == 0) {
+ /*
+ * We've successfully resolved something, mark this
+ * degredation over for now.
+ */
+ mutex_lock(&srp->sr_lock);
+ svp_remote_restore(srp, SVP_RD_DNS_FAIL);
+ mutex_unlock(&srp->sr_lock);
+ svp_remote_resolved(srp, addrs);
+ }
+
+ mutex_lock(&srp->sr_lock);
+ srp->sr_state &= ~SVP_RS_LOOKUP_INPROGRESS;
+ mutex_unlock(&srp->sr_lock);
+ }
+}
+
+void
+svp_host_queue(svp_remote_t *srp)
+{
+ svp_remote_t *s;
+ mutex_lock(&svp_host_lock);
+ mutex_lock(&srp->sr_lock);
+ if (srp->sr_state & SVP_RS_LOOKUP_SCHEDULED) {
+ mutex_unlock(&srp->sr_lock);
+ mutex_unlock(&svp_host_lock);
+ return;
+ }
+ srp->sr_state |= SVP_RS_LOOKUP_SCHEDULED;
+ s = svp_host_head;
+ while (s != NULL && s->sr_nexthost != NULL)
+ s = s->sr_nexthost;
+ if (s == NULL) {
+ assert(s == svp_host_head);
+ svp_host_head = srp;
+ } else {
+ s->sr_nexthost = srp;
+ }
+ srp->sr_nexthost = NULL;
+ cond_signal(&svp_host_cv);
+ mutex_unlock(&srp->sr_lock);
+ mutex_unlock(&svp_host_lock);
+}
+
+int
+svp_host_init(void)
+{
+ int i;
+
+ for (i = 0; i < svp_host_nthreads; i++) {
+ if (thr_create(NULL, 0, svp_host_loop, NULL,
+ THR_DETACHED | THR_DAEMON, NULL) != 0)
+ return (errno);
+ }
+
+ return (0);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c
new file mode 100644
index 0000000000..a3579320ff
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c
@@ -0,0 +1,206 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/*
+ * Event loop mechanism for our backend.
+ */
+
+#include <unistd.h>
+#include <thread.h>
+#include <port.h>
+#include <signal.h>
+#include <time.h>
+#include <errno.h>
+#include <umem.h>
+
+#include <libvarpd_svp.h>
+
+typedef struct svp_event_loop {
+ int sel_port; /* RO */
+ int sel_nthread; /* RO */
+ thread_t *sel_threads; /* RO */
+ boolean_t sel_stop; /* svp_elock */
+ timer_t sel_hosttimer;
+} svp_event_loop_t;
+
+static svp_event_loop_t svp_event;
+static mutex_t svp_elock = DEFAULTMUTEX;
+
+static void *
+svp_event_thr(void *arg)
+{
+ for (;;) {
+ int ret;
+ port_event_t pe;
+ svp_event_t *sep;
+
+ mutex_lock(&svp_elock);
+ if (svp_event.sel_stop == B_TRUE) {
+ mutex_unlock(&svp_elock);
+ break;
+ }
+ mutex_unlock(&svp_elock);
+
+ ret = port_get(svp_event.sel_port, &pe, NULL);
+ if (ret != 0) {
+ switch (errno) {
+ case EFAULT:
+ case EBADF:
+ case EINVAL:
+ libvarpd_panic("unexpected port_get errno: %d",
+ errno);
+ default:
+ break;
+ }
+ }
+
+ /* TODO Process the event */
+ if (pe.portev_user == NULL)
+ libvarpd_panic("received event (%p) without "
+ "protev_user set", &pe);
+ sep = (svp_event_t *)pe.portev_user;
+ sep->se_func(&pe, sep->se_arg);
+ }
+
+ return (NULL);
+}
+
+int
+svp_event_associate(svp_event_t *sep, int fd)
+{
+ int ret;
+
+ ret = port_associate(svp_event.sel_port, PORT_SOURCE_FD, fd,
+ sep->se_events, sep);
+ if (ret != 0) {
+ switch (errno) {
+ case EBADF:
+ case EBADFD:
+ case EINVAL:
+ case EAGAIN:
+ libvarpd_panic("unexpected port_associate error: %d",
+ errno);
+ default:
+ ret = errno;
+ break;
+ }
+ }
+
+ return (ret);
+}
+
+int
+svp_event_dissociate(svp_event_t *sep, int fd)
+{
+ int ret;
+
+ ret = port_dissociate(svp_event.sel_port, PORT_SOURCE_FD, fd);
+ if (ret != 0) {
+ if (errno != ENOENT)
+ libvarpd_panic("unexpected port_dissociate error: %d",
+ errno);
+ ret = errno;
+ }
+ return (ret);
+}
+
+int
+svp_event_inject(void *user)
+{
+ return (port_send(svp_event.sel_port, 0, user));
+}
+
+int
+svp_event_timer_init(svp_event_t *sep)
+{
+ port_notify_t pn;
+ struct sigevent evp;
+ struct itimerspec ts;
+
+ pn.portnfy_port = svp_event.sel_port;
+ pn.portnfy_user = sep;
+ evp.sigev_notify = SIGEV_PORT;
+ evp.sigev_value.sival_ptr = &pn;
+
+ if (timer_create(CLOCK_REALTIME, &evp, &svp_event.sel_hosttimer) != 0)
+ return (errno);
+
+ ts.it_value.tv_sec = svp_tickrate;
+ ts.it_value.tv_nsec = 0;
+ ts.it_interval.tv_sec = svp_tickrate;
+ ts.it_interval.tv_nsec = 0;
+
+ if (timer_settime(svp_event.sel_hosttimer, TIMER_RELTIME, &ts,
+ NULL) != 0) {
+ int ret = errno;
+ (void) timer_delete(svp_event.sel_hosttimer);
+ return (ret);
+ }
+
+ return (0);
+}
+
+int
+svp_event_init(void)
+{
+ long i, ncpus;
+
+ svp_event.sel_port = port_create();
+ if (svp_event.sel_port == -1)
+ return (errno);
+
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN) * 2 + 1;
+ if (ncpus <= 0)
+ libvarpd_panic("sysconf for nprocs failed... %d/%d",
+ ncpus, errno);
+
+ svp_event.sel_threads = umem_alloc(sizeof (thread_t) * ncpus,
+ UMEM_DEFAULT);
+ if (svp_event.sel_threads == NULL) {
+ int ret = errno;
+ (void) timer_delete(svp_event.sel_hosttimer);
+ (void) close(svp_event.sel_port);
+ svp_event.sel_port = -1;
+ return (ret);
+ }
+
+ for (i = 0; i < ncpus; i++) {
+ int ret;
+ thread_t *thr = &svp_event.sel_threads[i];
+
+ ret = thr_create(NULL, 0, svp_event_thr, NULL,
+ THR_DETACHED | THR_DAEMON, thr);
+ if (ret != 0) {
+ ret = errno;
+ (void) timer_delete(svp_event.sel_hosttimer);
+ (void) close(svp_event.sel_port);
+ svp_event.sel_port = -1;
+ return (errno);
+ }
+ }
+
+ return (0);
+}
+
+void
+svp_event_fini(void)
+{
+ mutex_lock(&svp_elock);
+ svp_event.sel_stop = B_TRUE;
+ mutex_unlock(&svp_elock);
+
+ (void) timer_delete(svp_event.sel_hosttimer);
+ (void) close(svp_event.sel_port);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h b/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h
new file mode 100644
index 0000000000..e95f3e4c61
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h
@@ -0,0 +1,172 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_SVP_PROT_H
+#define _LIBVARPD_SVP_PROT_H
+
+/*
+ * SVP protocol Definitions
+ */
+
+#include <sys/types.h>
+#include <inttypes.h>
+#include <sys/ethernet.h>
+#include <netinet/in.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * SDC VXLAN Protocol Definitions
+ */
+
+#define SVP_VERSION_ONE 1
+#define SVP_CURRENT_VERSION SVP_VERSION_ONE
+
+typedef struct svp_req {
+ uint16_t svp_ver;
+ uint16_t svp_op;
+ uint32_t svp_size;
+ uint32_t svp_id;
+ uint32_t svp_crc32;
+} svp_req_t;
+
+typedef enum svp_op {
+ SVP_R_UNKNOWN = 0x00,
+ SVP_R_PING = 0x01,
+ SVP_R_PONG = 0x02,
+ SVP_R_VL2_REQ = 0x03,
+ SVP_R_VL2_ACK = 0x04,
+ SVP_R_VL3_REQ = 0x05,
+ SVP_R_VL3_ACK = 0x06,
+ SVP_R_BULK_REQ = 0x07,
+ SVP_R_BULK_ACK = 0x08,
+ SVP_R_LOG_REQ = 0x09,
+ SVP_R_LOG_ACK = 0x0A,
+ SVP_R_LOG_RM = 0x0B,
+ SVP_R_LOG_RACK = 0x0C,
+ SVP_R_SHOOTDOWN = 0x0D
+} svp_op_t;
+
+typedef enum svp_status {
+ SVP_S_OK = 0x00, /* Everything OK */
+ SVP_S_FATAL = 0x01, /* Fatal error, close connection */
+ SVP_S_NOTFOUND = 0x02, /* Entry not found */
+ SVP_S_BADL3TYPE = 0x03, /* Unknown svp_vl3_type_t */
+ SVP_S_BADBULK = 0x04, /* Unknown svp_bulk_type_t */
+ SVP_S_BADLOG = 0x05, /* Unknown svp_log_type_t */
+ SVP_S_LOGAGAIN = 0x06 /* Nothing in the log yet */
+} svp_status_t;
+
+typedef struct svp_vl2_req {
+ uint8_t sl2r_mac[ETHERADDRL];
+ uint8_t sl2r_pad[2];
+ uint32_t sl2r_vnetid;
+} svp_vl2_req_t;
+
+typedef struct svp_vl2_ack {
+ uint16_t sl2a_status;
+ uint16_t sl2a_port;
+ uint8_t sl2a_addr[16];
+} svp_vl2_ack_t;
+
+typedef enum svp_vl3_type {
+ SVP_VL3_IP = 0x01,
+ SVP_VL3_IPV6 = 0x02
+} svp_vl3_type_t;
+
+typedef struct svp_vl3_req {
+ uint8_t sl3r_ip[16];
+ uint32_t sl3r_type;
+ uint32_t sl3r_vnetid;
+} svp_vl3_req_t;
+
+typedef struct svp_vl3_ack {
+ uint32_t sl3a_status;
+ uint8_t sl3a_mac[ETHERADDRL];
+ uint16_t sl3a_uport;
+ uint8_t sl3a_uip[16];
+} svp_vl3_ack_t;
+
+typedef enum svp_bulk_type {
+ SVP_BULK_VL2 = 0x01,
+ SVP_BULK_VL3 = 0x02
+} svp_bulk_type_t;
+
+typedef struct svp_bulk_req {
+ uint32_t svbr_type;
+} svp_bulk_req_t;
+
+typedef struct svp_bulk_ack {
+ uint32_t svba_status;
+ uint32_t svba_type;
+ uint8_t svba_data[];
+} svp_bulk_ack_t;
+
+typedef enum svp_log_type {
+ SVP_LOG_VL2 = 0x01,
+ SVP_LOG_VL3 = 0x02
+} svp_log_type_t;
+
+typedef struct svp_log_req {
+ uint32_t svlr_type;
+ uint32_t svlr_count;
+} svp_log_req_t;
+
+typedef struct svp_log_vl2 {
+ uint8_t svl2_id[16]; /* 16-byte UUID */
+ uint8_t svl2_mac[ETHERADDRL];
+ uint8_t svl2_pad[2];
+ uint32_t svl2_vnetid;
+} svp_log_vl2_t;
+
+typedef struct svp_log_vl3 {
+ uint8_t svl3_id[16]; /* 16-byte UUID */
+ uint8_t slv3_ip[16];
+ uint8_t svl3_mac[ETHERADDRL];
+ uint16_t svl3_vlan;
+ uint8_t svl3_tmac[ETHERADDRL];
+ uint8_t svl3_tpad[2];
+ uint32_t svl3_vnetid;
+} svp_log_vl3_t;
+
+typedef struct svp_log_ack {
+ uint32_t svla_status;
+ uint32_t svla_type;
+ uint8_t svla_data[];
+} svp_log_ack_t;
+
+typedef struct svp_lrm_req {
+ uint32_t svrr_type;
+ uint32_t svrr_pad;
+ uint8_t svrr_ids[];
+} svp_lrm_req_t;
+
+typedef struct svp_lrm_ack {
+ uint32_t svra_status;
+} svp_lrm_ack_t;
+
+typedef struct svp_shootdown {
+ uint8_t svsd_mac[ETHERADDRL];
+ uint8_t svsd_pad[2];
+ uint32_t svsd_vnetid;
+} svp_shootdown_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_SVP_PROT_H */
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c
new file mode 100644
index 0000000000..6da565246c
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c
@@ -0,0 +1,596 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/*
+ * This file encapsulates all of the logic for dealing with a given remote host
+ * that is being used to service requests. Multiple different overlay devices
+ * all share the same single device here.
+ */
+
+#include <umem.h>
+#include <strings.h>
+#include <string.h>
+#include <stddef.h>
+#include <thread.h>
+#include <synch.h>
+#include <assert.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <errno.h>
+#include <libidspace.h>
+
+#include <libvarpd_provider.h>
+#include <libvarpd_svp.h>
+
+static mutex_t svp_remote_lock = DEFAULTMUTEX;
+static avl_tree_t svp_remote_tree;
+static svp_timer_t svp_dns_timer;
+static id_space_t *svp_idspace;
+static int svp_dns_timer_rate = 30; /* seconds */
+
+static void
+svp_remote_mkfmamsg(svp_remote_t *srp, svp_degrade_state_t state, char *buf,
+ size_t buflen)
+{
+ switch (state) {
+ case SVP_RD_DNS_FAIL:
+ (void) snprintf(buf, buflen, "failed to resolve or find "
+ "entries for hostname %s", srp->sr_hostname);
+ break;
+ case SVP_RD_REMOTE_FAIL:
+ (void) snprintf(buf, buflen, "cannot reach any remote peers");
+ break;
+ default:
+ (void) snprintf(buf, buflen, "unkonwn error state: %d", state);
+ }
+}
+
+static int
+svp_remote_comparator(const void *l, const void *r)
+{
+ int ret;
+ const svp_remote_t *lr = l, *rr = r;
+
+ ret = strcmp(lr->sr_hostname, rr->sr_hostname);
+ if (ret > 0)
+ return (1);
+ else if (ret < 0)
+ return (-1);
+
+ if (lr->sr_rport > rr->sr_rport)
+ return (1);
+ else if (lr->sr_rport < rr->sr_rport)
+ return (-1);
+ else
+ return (0);
+}
+
+void
+svp_query_release(svp_query_t *sqp)
+{
+ id_free(svp_idspace, sqp->sq_header.svp_id);
+}
+
+static void
+svp_remote_destroy(svp_remote_t *srp)
+{
+ size_t len;
+
+ /*
+ * XXX Clean up DNS related information, eg. make sure we're not in the
+ * queue. Likely need a flag cv...
+ */
+
+ if (mutex_destroy(&srp->sr_lock) != 0)
+ libvarpd_panic("failed to destroy mutex sr_lock");
+
+ if (srp->sr_addrinfo != NULL)
+ freeaddrinfo(srp->sr_addrinfo);
+ len = strlen(srp->sr_hostname) + 1;
+ umem_free(srp->sr_hostname, len);
+ umem_free(srp, sizeof (svp_remote_t));
+}
+
+static int
+svp_remote_create(const char *host, uint16_t port, svp_remote_t **outp)
+{
+ size_t hlen;
+ svp_remote_t *remote;
+
+ assert(MUTEX_HELD(&svp_remote_lock));
+
+ remote = umem_zalloc(sizeof (svp_remote_t), UMEM_DEFAULT);
+ if (remote == NULL) {
+ mutex_unlock(&svp_remote_lock);
+ return (ENOMEM);
+ }
+ hlen = strlen(host) + 1;
+ remote->sr_hostname = umem_alloc(hlen, UMEM_DEFAULT);
+ if (remote->sr_hostname == NULL) {
+ umem_free(remote, sizeof (svp_remote_t));
+ mutex_unlock(&svp_remote_lock);
+ return (ENOMEM);
+ }
+ remote->sr_rport = port;
+ if (mutex_init(&remote->sr_lock, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create mutex sr_lock");
+ list_create(&remote->sr_conns, sizeof (svp_conn_t),
+ offsetof(svp_conn_t, sc_rlist));
+ avl_create(&remote->sr_tree, svp_comparator, sizeof (svp_t),
+ offsetof(svp_t, svp_rlink));
+ (void) strlcpy(remote->sr_hostname, host, hlen);
+ remote->sr_count = 1;
+
+ *outp = remote;
+ return (0);
+}
+
+int
+svp_remote_find(char *host, uint16_t port, svp_remote_t **outp)
+{
+ int ret;
+ svp_remote_t lookup, *remote;
+
+ lookup.sr_hostname = host;
+ lookup.sr_rport = port;
+ mutex_lock(&svp_remote_lock);
+ remote = avl_find(&svp_remote_tree, &lookup, NULL);
+ if (remote != NULL) {
+ assert(remote->sr_count > 0);
+ remote->sr_count++;
+ *outp = remote;
+ mutex_unlock(&svp_remote_lock);
+ return (0);
+ }
+
+ if ((ret = svp_remote_create(host, port, outp)) != 0) {
+ mutex_unlock(&svp_remote_lock);
+ return (ret);
+ }
+
+ avl_add(&svp_remote_tree, *outp);
+ mutex_unlock(&svp_remote_lock);
+
+ /* Make sure DNS is up to date */
+ svp_host_queue(*outp);
+
+ return (0);
+}
+
+void
+svp_remote_release(svp_remote_t *srp)
+{
+ mutex_lock(&svp_remote_lock);
+ mutex_lock(&srp->sr_lock);
+ srp->sr_count--;
+ if (srp->sr_count != 0) {
+ mutex_unlock(&srp->sr_lock);
+ mutex_unlock(&svp_remote_lock);
+ return;
+ }
+ mutex_unlock(&srp->sr_lock);
+
+ avl_remove(&svp_remote_tree, srp);
+ mutex_unlock(&svp_remote_lock);
+ svp_remote_destroy(srp);
+}
+
+int
+svp_remote_attach(svp_remote_t *srp, svp_t *svp)
+{
+ svp_t check;
+ avl_index_t where;
+
+ mutex_lock(&srp->sr_lock);
+ if (svp->svp_remote != NULL)
+ libvarpd_panic("failed to create mutex sr_lock");
+
+ /*
+ * We require everything except shootdowns
+ */
+ if (svp->svp_cb.scb_vl2_lookup == NULL)
+ libvarpd_panic("missing callback scb_vl2_lookup");
+ if (svp->svp_cb.scb_vl3_lookup == NULL)
+ libvarpd_panic("missing callback scb_vl3_lookup");
+ if (svp->svp_cb.scb_vl2_invalidate == NULL)
+ libvarpd_panic("missing callback scb_vl2_invalidate");
+ if (svp->svp_cb.scb_vl3_inject == NULL)
+ libvarpd_panic("missing callback scb_vl3_inject");
+
+ check.svp_vid = svp->svp_vid;
+ if (avl_find(&srp->sr_tree, &check, &where) != NULL)
+ libvarpd_panic("found duplicate entry with vid %ld",
+ svp->svp_vid);
+ avl_insert(&srp->sr_tree, svp, where);
+ svp->svp_remote = srp;
+ mutex_unlock(&srp->sr_lock);
+
+ return (0);
+}
+
+void
+svp_remote_detach(svp_t *svp)
+{
+ svp_t *lookup;
+ svp_remote_t *srp = svp->svp_remote;
+
+ if (srp == NULL)
+ libvarpd_panic("trying to detach remote when none exists");
+
+ mutex_lock(&srp->sr_lock);
+ lookup = avl_find(&srp->sr_tree, svp, NULL);
+ if (lookup == NULL || lookup != svp)
+ libvarpd_panic("inconsitent remote avl tree...");
+ avl_remove(&srp->sr_tree, svp);
+ svp->svp_remote = NULL;
+ mutex_unlock(&srp->sr_lock);
+ svp_remote_release(srp);
+}
+
+/*
+ * Walk the list of connections and find the first one that's available, the
+ * move it to the back of the list so it's less likely to be used again.
+ */
+static boolean_t
+svp_remote_conn_queue(svp_remote_t *srp, svp_query_t *sqp)
+{
+ svp_conn_t *scp;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ for (scp = list_head(&srp->sr_conns); scp != NULL;
+ scp = list_next(&srp->sr_conns, scp)) {
+ mutex_lock(&scp->sc_lock);
+ if (scp->sc_cstate != SVP_CS_ACTIVE) {
+ mutex_unlock(&scp->sc_lock);
+ continue;
+ }
+ svp_conn_queue(scp, sqp);
+ mutex_unlock(&scp->sc_lock);
+ list_remove(&srp->sr_conns, scp);
+ list_insert_tail(&srp->sr_conns, scp);
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+static void
+svp_remote_vl2_lookup_cb(svp_query_t *sqp, void *arg)
+{
+ svp_t *svp = sqp->sq_svp;
+ svp_vl2_ack_t *vl2a = (svp_vl2_ack_t *)sqp->sq_wdata;
+
+ if (sqp->sq_status == SVP_S_OK)
+ svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status,
+ (struct in6_addr *)vl2a->sl2a_addr, ntohs(vl2a->sl2a_port),
+ arg);
+ else
+ svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, NULL, 0, arg);
+}
+
+void
+svp_remote_vl2_lookup(svp_t *svp, svp_query_t *sqp, const uint8_t *mac,
+ void *arg)
+{
+ svp_remote_t *srp;
+ svp_vl2_req_t *vl2r = &sqp->sq_rdun.sqd_vl2r;
+
+ srp = svp->svp_remote;
+ sqp->sq_func = svp_remote_vl2_lookup_cb;
+ sqp->sq_arg = arg;
+ sqp->sq_svp = svp;
+ sqp->sq_state = SVP_QUERY_INIT;
+ sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
+ sqp->sq_header.svp_op = htons(SVP_R_VL2_REQ);
+ sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t));
+ /*
+ * XXX ID, crc32 need real values
+ */
+ sqp->sq_header.svp_id = id_alloc(svp_idspace);
+ if (sqp->sq_header.svp_id == -1)
+ libvarpd_panic("failed to allcoate from svp_idspace: %d",
+ errno);
+ sqp->sq_header.svp_crc32 = htonl(0);
+ sqp->sq_rdata = vl2r;
+ sqp->sq_rsize = sizeof (svp_vl2_req_t);
+ sqp->sq_wdata = NULL;
+ sqp->sq_wsize = 0;
+
+ bcopy(mac, vl2r->sl2r_mac, ETHERADDRL);
+ vl2r->sl2r_vnetid = ntohl(svp->svp_vid);
+
+ mutex_lock(&srp->sr_lock);
+ if (svp_remote_conn_queue(srp, sqp) == B_FALSE)
+ svp->svp_cb.scb_vl2_lookup(svp, SVP_S_FATAL, NULL, NULL, arg);
+ mutex_unlock(&srp->sr_lock);
+}
+
+static void
+svp_remote_vl3_lookup_cb(svp_query_t *sqp, void *arg)
+{
+ svp_t *svp = sqp->sq_svp;
+ svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
+
+ if (sqp->sq_status == SVP_S_OK)
+ svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, vl3a->sl3a_mac,
+ (struct in6_addr *)vl3a->sl3a_uip, ntohs(vl3a->sl3a_uport),
+ arg);
+ else
+ svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, NULL, NULL, 0,
+ arg);
+}
+
+void
+svp_remote_vl3_lookup(svp_t *svp, svp_query_t *sqp,
+ const struct sockaddr *addr, void *arg)
+{
+ svp_remote_t *srp;
+ svp_vl3_req_t *vl3r = &sqp->sq_rdun.sdq_vl3r;
+
+ if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
+ libvarpd_panic("unexpected sa_family for the vl3 lookup");
+
+ srp = svp->svp_remote;
+ sqp->sq_func = svp_remote_vl3_lookup_cb;
+ sqp->sq_arg = arg;
+ sqp->sq_svp = svp;
+ sqp->sq_state = SVP_QUERY_INIT;
+ sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
+ sqp->sq_header.svp_op = htons(SVP_R_VL3_REQ);
+ sqp->sq_header.svp_size = htons(sizeof (svp_vl3_req_t));
+ /*
+ * XXX ID, crc32 need real values
+ */
+ sqp->sq_header.svp_id = id_alloc(svp_idspace);
+ if (sqp->sq_header.svp_id == -1)
+ libvarpd_panic("failed to allcoate from svp_idspace: %d",
+ errno);
+ sqp->sq_header.svp_crc32 = htonl(0);
+ sqp->sq_rdata = vl3r;
+ sqp->sq_rsize = sizeof (svp_vl3_req_t);
+ sqp->sq_wdata = NULL;
+ sqp->sq_wsize = 0;
+
+ if (addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)addr;
+ vl3r->sl3r_type = SVP_VL3_IPV6;
+ bcopy(&s6->sin6_addr, vl3r->sl3r_ip,
+ sizeof (struct in6_addr));
+ } else {
+ struct sockaddr_in *s4 = (struct sockaddr_in *)addr;
+ struct in6_addr v6;
+
+ vl3r->sl3r_type = SVP_VL3_IP;
+ IN6_INADDR_TO_V4MAPPED(&s4->sin_addr, &v6);
+ bcopy(&v6, vl3r->sl3r_ip, sizeof (struct in6_addr));
+ }
+ vl3r->sl3r_vnetid = ntohl(svp->svp_vid);
+
+ mutex_lock(&srp->sr_lock);
+ if (svp_remote_conn_queue(srp, sqp) == B_FALSE)
+ svp->svp_cb.scb_vl3_lookup(svp, SVP_S_FATAL, NULL, NULL, NULL,
+ arg);
+ mutex_unlock(&srp->sr_lock);
+}
+
+void
+svp_remote_dns_timer(void *unused)
+{
+ svp_remote_t *s;
+ mutex_lock(&svp_remote_lock);
+ for (s = avl_first(&svp_remote_tree); s != NULL;
+ s = AVL_NEXT(&svp_remote_tree, s)) {
+ svp_host_queue(s);
+ }
+ mutex_unlock(&svp_remote_lock);
+}
+
+void
+svp_remote_resolved(svp_remote_t *srp, struct addrinfo *newaddrs)
+{
+ struct addrinfo *a;
+ svp_conn_t *scp;
+ int ngen;
+
+ mutex_lock(&srp->sr_lock);
+ srp->sr_gen++;
+ ngen = srp->sr_gen;
+ mutex_unlock(&srp->sr_lock);
+
+ for (a = newaddrs; a != NULL; a = a->ai_next) {
+ struct in6_addr in6;
+ struct in6_addr *addrp;
+
+ if (a->ai_family != AF_INET && a->ai_family != AF_INET6)
+ continue;
+
+ if (a->ai_family == AF_INET) {
+ struct sockaddr_in *v4;
+ v4 = (struct sockaddr_in *)a->ai_addr;
+ addrp = &in6;
+ IN6_INADDR_TO_V4MAPPED(&v4->sin_addr, addrp);
+ } else {
+ struct sockaddr_in6 *v6;
+ v6 = (struct sockaddr_in6 *)a->ai_addr;
+ addrp = &v6->sin6_addr;
+ }
+
+ mutex_lock(&srp->sr_lock);
+ for (scp = list_head(&srp->sr_conns); scp != NULL;
+ scp = list_next(&srp->sr_conns, scp)) {
+ mutex_lock(&scp->sc_lock);
+ if (bcmp(addrp, &scp->sc_addr,
+ sizeof (struct in6_addr)) == 0) {
+ scp->sc_gen = ngen;
+ mutex_unlock(&scp->sc_lock);
+ break;
+ }
+ mutex_unlock(&scp->sc_lock);
+ }
+
+ /*
+ * We need to be careful in the assumptions that we make here,
+ * as there's a good chance that svp_conn_create will
+ * drop the svp_remote_t`sr_lock to kick off its effective event
+ * loop.
+ */
+ if (scp == NULL)
+ svp_conn_create(srp, addrp);
+ mutex_unlock(&srp->sr_lock);
+ }
+
+ /*
+ * Now it's time to clean things up. We do not actively clean up the
+ * current connections that we have, instead allowing them to stay
+ * around assuming that they're still useful. Instead, we go through and
+ * purge the degraded list for anything that's from an older generation.
+ */
+ mutex_lock(&srp->sr_lock);
+ for (scp = list_head(&srp->sr_conns); scp != NULL;
+ scp = list_next(&srp->sr_conns, scp)) {
+ boolean_t fall = B_FALSE;
+ mutex_lock(&scp->sc_lock);
+ if (scp->sc_gen < srp->sr_gen)
+ fall = B_TRUE;
+ mutex_unlock(&scp->sc_lock);
+ if (fall == B_TRUE)
+ svp_conn_fallout(scp);
+ }
+ mutex_unlock(&srp->sr_lock);
+}
+
+/*
+ * This connection is in the process of being reset, we need to reassign all of
+ * its queries to other places or mark them as fatal.
+ */
+void
+svp_remote_reassign(svp_remote_t *srp, svp_conn_t *scp)
+{
+ assert(MUTEX_HELD(&srp->sr_lock));
+ svp_query_t *sqp;
+
+ /*
+ * As we try to reassing all of its queries, remove it from the list.
+ */
+ list_remove(&srp->sr_conns, scp);
+
+ while ((sqp = list_remove_head(&scp->sc_queries)) != NULL) {
+ sqp->sq_wdata = NULL;
+ sqp->sq_wsize = 0;
+ sqp->sq_acttime = -1;
+
+ /*
+ * XXX We probably want to maintain a queue of these for some
+ * time.
+ */
+ if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
+ sqp->sq_status = SVP_S_FATAL;
+ sqp->sq_func(sqp, sqp->sq_arg);
+ }
+ }
+
+ /*
+ * Now that we're done, go ahead and re-insert.
+ */
+ list_insert_tail(&srp->sr_conns, scp);
+}
+
+void
+svp_remote_degrade(svp_remote_t *srp, svp_degrade_state_t flag)
+{
+ int sf, nf;
+ char buf[256];
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+
+ if (flag == SVP_RD_ALL || flag == 0)
+ libvarpd_panic("invalid flag passed to degrade");
+
+ if ((flag & srp->sr_degrade) != 0) {
+ return;
+ }
+
+ sf = ffs(srp->sr_degrade);
+ nf = ffs(flag);
+ srp->sr_degrade |= flag;
+ if (sf == 0 || sf > nf) {
+ svp_t *svp;
+ svp_remote_mkfmamsg(srp, flag, buf, sizeof (buf));
+
+ for (svp = avl_first(&srp->sr_tree); svp != NULL;
+ svp = AVL_NEXT(&srp->sr_tree, svp)) {
+ libvarpd_fma_degrade(svp->svp_hdl, buf);
+ }
+ }
+}
+
+void
+svp_remote_restore(svp_remote_t *srp, svp_degrade_state_t flag)
+{
+ int sf, nf;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ sf = ffs(srp->sr_degrade);
+ if ((srp->sr_degrade & flag) != flag)
+ return;
+ srp->sr_degrade &= ~flag;
+ nf = ffs(srp->sr_degrade);
+
+ /*
+ * If we're now empty, restore the device. If we still are degraded, but
+ * we now have a higher base than we used to, change the message.
+ */
+ if (srp->sr_degrade == 0) {
+ svp_t *svp;
+ for (svp = avl_first(&srp->sr_tree); svp != NULL;
+ svp = AVL_NEXT(&srp->sr_tree, svp)) {
+ libvarpd_fma_restore(svp->svp_hdl);
+ }
+ } else if (nf != sf) {
+ svp_t *svp;
+ char buf[256];
+
+ svp_remote_mkfmamsg(srp, 1U << (nf - 1), buf, sizeof (buf));
+ for (svp = avl_first(&srp->sr_tree); svp != NULL;
+ svp = AVL_NEXT(&srp->sr_tree, svp)) {
+ libvarpd_fma_degrade(svp->svp_hdl, buf);
+ }
+ }
+}
+
+int
+svp_remote_init(void)
+{
+ svp_idspace = id_space_create("svp_req_ids", 1, INT32_MAX);
+ if (svp_idspace == NULL)
+ return (errno);
+ avl_create(&svp_remote_tree, svp_remote_comparator,
+ sizeof (svp_remote_t), offsetof(svp_remote_t, sr_gnode));
+ svp_dns_timer.st_func = svp_remote_dns_timer;
+ svp_dns_timer.st_arg = NULL;
+ svp_dns_timer.st_oneshot = B_FALSE;
+ svp_dns_timer.st_value = svp_dns_timer_rate;
+ svp_timer_add(&svp_dns_timer);
+ return (0);
+}
+
+void
+svp_remote_fini(void)
+{
+ svp_timer_remove(&svp_dns_timer);
+ avl_destroy(&svp_remote_tree);
+ if (svp_idspace == NULL)
+ id_space_destroy(svp_idspace);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c
new file mode 100644
index 0000000000..f1fb2908c7
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c
@@ -0,0 +1,144 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+#include <stddef.h>
+#include <libvarpd_svp.h>
+
+/*
+ * This implements all of the logic of maintaining a timer for the svp backend.
+ * We have a timer that fires at a one second tick. We maintain all of our
+ * events in avl tree, sorted by the tick that they need to be processed at.
+ */
+
+int svp_tickrate = 1;
+static svp_event_t svp_timer_event;
+static mutex_t svp_timer_lock = DEFAULTMUTEX;
+static cond_t svp_timer_cv = DEFAULTCV;
+static avl_tree_t svp_timer_tree;
+static uint64_t svp_timer_nticks;
+
+static int
+svp_timer_comparator(const void *l, const void *r)
+{
+ const svp_timer_t *lt, *rt;
+
+ lt = l;
+ rt = r;
+
+ if (lt->st_expire > rt->st_expire)
+ return (1);
+ else if (lt->st_expire < rt->st_expire)
+ return (-1);
+
+ /*
+ * Multiple timers can have the same delivery time, so sort within that
+ * by the address of the timer itself.
+ */
+ if ((uintptr_t)lt > (uintptr_t)rt)
+ return (1);
+ else if ((uintptr_t)lt < (uintptr_t)rt)
+ return (-1);
+
+ return (0);
+}
+
+static void
+svp_timer_tick(port_event_t *pe, void *arg)
+{
+ mutex_lock(&svp_timer_lock);
+ svp_timer_nticks++;
+
+ for (;;) {
+ svp_timer_t *t;
+
+ t = avl_first(&svp_timer_tree);
+ if (t == NULL || t->st_expire > svp_timer_nticks)
+ break;
+
+ avl_remove(&svp_timer_tree, t);
+
+ /*
+ * We drop this while performing an operation so that way state
+ * can advance in the face of a long-running callback.
+ */
+ t->st_delivering = B_TRUE;
+ mutex_unlock(&svp_timer_lock);
+ t->st_func(t->st_arg);
+ mutex_lock(&svp_timer_lock);
+ t->st_delivering = B_FALSE;
+ cond_broadcast(&svp_timer_cv);
+ if (t->st_oneshot == B_FALSE) {
+ t->st_expire += t->st_value;
+ avl_add(&svp_timer_tree, t);
+ }
+ }
+ mutex_unlock(&svp_timer_lock);
+}
+
+void
+svp_timer_add(svp_timer_t *stp)
+{
+ if (stp->st_value == 0)
+ libvarpd_panic("tried to add svp timer with zero value");
+
+ mutex_lock(&svp_timer_lock);
+ stp->st_delivering = B_FALSE;
+ stp->st_expire = svp_timer_nticks + stp->st_value;
+ avl_add(&svp_timer_tree, stp);
+ mutex_unlock(&svp_timer_lock);
+}
+
+void
+svp_timer_remove(svp_timer_t *stp)
+{
+ mutex_lock(&svp_timer_lock);
+
+ /*
+ * If the event in question is not currently being delivered, then we
+ * can stop it before it next fires. If it is currently being delivered,
+ * we need to wait for that to finish. Because we hold the timer lock,
+ * we know that it cannot be rearmed. Therefore, we make sure the one
+ * shot is set to zero, and wait until it's no longer set to delivering.
+ */
+ if (stp->st_delivering == B_FALSE) {
+ avl_remove(&svp_timer_tree, stp);
+ mutex_unlock(&svp_timer_lock);
+ return;
+ }
+
+ stp->st_oneshot = B_TRUE;
+ while (stp->st_delivering == B_TRUE)
+ cond_wait(&svp_timer_cv, &svp_timer_lock);
+
+ mutex_unlock(&svp_timer_lock);
+}
+
+int
+svp_timer_init(void)
+{
+ int ret;
+
+ svp_timer_event.se_func = svp_timer_tick;
+ svp_timer_event.se_arg = NULL;
+
+ avl_create(&svp_timer_tree, svp_timer_comparator, sizeof (svp_timer_t),
+ offsetof(svp_timer_t, st_link));
+
+ if ((ret = svp_event_timer_init(&svp_timer_event)) != 0) {
+ avl_destroy(&svp_timer_tree);
+ }
+
+ return (ret);
+}
diff --git a/usr/src/lib/varpd/svp/common/llib-lvarpd_svp b/usr/src/lib/varpd/svp/common/llib-lvarpd_svp
new file mode 100644
index 0000000000..31b3d36fbe
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/llib-lvarpd_svp
@@ -0,0 +1,18 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
diff --git a/usr/src/lib/varpd/svp/common/mapfile-vers b/usr/src/lib/varpd/svp/common/mapfile-vers
new file mode 100644
index 0000000000..642ef72adc
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/mapfile-vers
@@ -0,0 +1,35 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate {
+ local:
+ *;
+};
diff --git a/usr/src/lib/varpd/svp/i386/Makefile b/usr/src/lib/varpd/svp/i386/Makefile
new file mode 100644
index 0000000000..cf2f2487af
--- /dev/null
+++ b/usr/src/lib/varpd/svp/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/svp/sparc/Makefile b/usr/src/lib/varpd/svp/sparc/Makefile
new file mode 100644
index 0000000000..cf2f2487af
--- /dev/null
+++ b/usr/src/lib/varpd/svp/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/svp/sparcv9/Makefile b/usr/src/lib/varpd/svp/sparcv9/Makefile
new file mode 100644
index 0000000000..b64b830ddd
--- /dev/null
+++ b/usr/src/lib/varpd/svp/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/man/man1m/ping.1m b/usr/src/man/man1m/ping.1m
index 6aa5ecc604..3c40dd9b00 100644
--- a/usr/src/man/man1m/ping.1m
+++ b/usr/src/man/man1m/ping.1m
@@ -1,5 +1,6 @@
'\" te
.\" Copyright (C) 2006, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright (C) 2015, Joyent, Inc.
.\" Copyright 1989 AT&T
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
@@ -15,7 +16,7 @@ ping \- send ICMP (ICMP6) ECHO_REQUEST packets to network hosts
.LP
.nf
-\fB/usr/sbin/ping\fR \fB-s\fR [\fB-l\fR | \fB-U\fR] [\fB-abdlLnrRv\fR] [\fB-A\fR \fIaddr_family\fR]
+\fB/usr/sbin/ping\fR \fB-s\fR [\fB-l\fR | \fB-U\fR] [\fB-abdDlLnrRv\fR] [\fB-A\fR \fIaddr_family\fR]
[\fB-c\fR \fItraffic_class\fR] [\fB-g\fR \fIgateway\fR [\fB-g\fR \fIgateway\fR...]]
[\fB-N\fR \fInext_hop_router\fR] [\fB-F\fR \fIflow_label\fR] [\fB-I\fR \fIinterval\fR]
[\fB-i\fR \fIinterface\fR] [\fB-P\fR \fItos\fR] [\fB-p\fR \fIport\fR] [\fB-t\fR \fIttl\fR] \fIhost\fR
@@ -23,7 +24,6 @@ ping \- send ICMP (ICMP6) ECHO_REQUEST packets to network hosts
.fi
.SH DESCRIPTION
-.sp
.LP
The utility \fBping\fR utilizes the \fBICMP\fR (\fBICMP6\fR in IPv6) protocol's
\fBECHO_REQUEST\fR datagram to elicit an \fBICMP\fR (\fBICMP6\fR)
@@ -68,7 +68,6 @@ displays a summary of this information upon termination or timeout. The default
When using \fBping\fR for fault isolation, first \fBping\fR the local host to
verify that the local network interface is running.
.SH OPTIONS
-.sp
.LP
The following options are supported:
.sp
@@ -100,6 +99,15 @@ host is unknown and exits.
.sp
.ne 2
.na
+.B -D
+.ad
+.RS 24n
+Specifies that the don't fragment bit should be set.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-F\fR\ \fIflow_label\fR\fR
.ad
.RS 24n
@@ -336,7 +344,6 @@ from the target host.
.RE
.SH OPERANDS
-.sp
.ne 2
.na
\fB\fIhost\fR\fR
@@ -389,7 +396,6 @@ This example shows the \fBndd\fR module, \fB/dev/icmp\fR, used to set the
.sp
.SH EXIT STATUS
-.sp
.LP
The following exit values are returned:
.sp
@@ -412,7 +418,6 @@ machine was not alive.
.RE
.SH SEE ALSO
-.sp
.LP
\fBifconfig\fR(1M), \fBin.routed\fR(1M), \fBndd\fR(1M), \fBnetstat\fR(1M),
\fBrpcinfo\fR(1M), \fBtraceroute\fR(1M), \fBattributes\fR(5), \fBicmp\fR(7P),
diff --git a/usr/src/man/man3socket/Makefile b/usr/src/man/man3socket/Makefile
index c4bbd116c7..53a6e475e8 100644
--- a/usr/src/man/man3socket/Makefile
+++ b/usr/src/man/man3socket/Makefile
@@ -155,9 +155,11 @@ ntohll.3socket := LINKSRC = byteorder.3socket
ntohs.3socket := LINKSRC = byteorder.3socket
ether_aton.3socket := LINKSRC = ethers.3socket
+ether_aton_r.3socket := LINKSRC = ethers.3socket
ether_hostton.3socket := LINKSRC = ethers.3socket
ether_line.3socket := LINKSRC = ethers.3socket
ether_ntoa.3socket := LINKSRC = ethers.3socket
+ether_ntoa_r.3socket := LINKSRC = ethers.3socket
ether_ntohost.3socket := LINKSRC = ethers.3socket
freeaddrinfo.3socket := LINKSRC = getaddrinfo.3socket
diff --git a/usr/src/man/man3socket/ethers.3socket b/usr/src/man/man3socket/ethers.3socket
index 94cec9c957..377a0b884f 100644
--- a/usr/src/man/man3socket/ethers.3socket
+++ b/usr/src/man/man3socket/ethers.3socket
@@ -1,12 +1,13 @@
'\" te
.\" Copyright 1989 AT&T.
.\" Copyright (C) 2004, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (C) 2014, Joyent, Inc. All Rights Reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH ETHERS 3SOCKET "Apr 5, 2004"
+.TH ETHERS 3SOCKET "Aug 9, 2014"
.SH NAME
-ethers, ether_ntoa, ether_aton, ether_ntohost, ether_hostton, ether_line \-
+ethers, ether_ntoa, ether_ntoa_r, ether_aton, ether_aton_r, ether_ntohost, ether_hostton, ether_line \-
Ethernet address mapping operations
.SH SYNOPSIS
.LP
@@ -20,11 +21,21 @@ Ethernet address mapping operations
.LP
.nf
+\fBchar *\fR\fBether_ntoa_r\fR(\fBconst struct ether_addr *\fR\fIe\fR, \fBchar *\fR\fIs\fR);
+.fi
+
+.LP
+.nf
\fBstruct ether_addr *\fR\fBether_aton\fR(\fBconst char *\fR\fIs\fR);
.fi
.LP
.nf
+\fBstruct ether_addr *\fR\fBether_aton_r\fR(\fBconst char *\fR\fIs\fR, \fBstruct ether_addr *\fR\fIe\fR);
+.fi
+
+.LP
+.nf
\fBint\fR \fBether_ntohost\fR(\fBchar *\fR\fIhostname\fR, \fBconst struct ether_addr *\fR\fIe\fR);
.fi
@@ -55,6 +66,18 @@ representation back to a 48 bit Ethernet number; the function returns
\fINULL\fR if the string cannot be scanned successfully.
.sp
.LP
+The functions \fBether_ntoa()\fR and \fBether_aton()\fR return values in
+per-thread buffers, one for each function. A second call to one of these
+functions will overwrite the previous value. The functions
+\fBether_ntoa_r()\fR and \fBether_aton_r()\fR behave identically to
+their non-reentrant versions; however, instead of using a per-thread
+buffer, they use caller-supplied buffers. It is the caller's
+responsibility to ensure that the character buffer passed to
+\fBether_ntoa_r()\fR is at least \fBETHERADDRSTRL\fR bytes large -- the
+minimum size to hold the ASCII representation of a 48 bit Ethernet
+number and a \fINULL\fR terminator.
+.sp
+.LP
The function \fBether_ntohost()\fR maps an Ethernet number (pointed to by
\fIe\fR) to its associated hostname. The string pointed to by hostname must be
long enough to hold the hostname and a \fINULL\fR character. The function
diff --git a/usr/src/test/util-tests/runfiles/default.run b/usr/src/test/util-tests/runfiles/default.run
index 3f7498e23b..c8a3f290f1 100644
--- a/usr/src/test/util-tests/runfiles/default.run
+++ b/usr/src/test/util-tests/runfiles/default.run
@@ -24,6 +24,7 @@ outputdir = /var/tmp/test_results
[/opt/util-tests/tests/printf_test]
[/opt/util-tests/tests/allowed-ips]
+[/opt/util-tests/tests/bunyan]
[/opt/util-tests/tests/xargs_test]
diff --git a/usr/src/test/util-tests/tests/bunyan/Makefile b/usr/src/test/util-tests/tests/bunyan/Makefile
new file mode 100644
index 0000000000..1ddf7b254d
--- /dev/null
+++ b/usr/src/test/util-tests/tests/bunyan/Makefile
@@ -0,0 +1,69 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc.
+#
+
+include $(SRC)/Makefile.master
+
+ROOTOPTPKG = $(ROOT)/opt/util-tests
+TESTDIR = $(ROOTOPTPKG)/tests/bunyan
+ROOTBINDIR = $(ROOTOPTPKG)/bin
+
+PROG = btest
+
+SCRIPTS = \
+ bunyan
+
+include $(SRC)/cmd/Makefile.cmd
+include $(SRC)/test/Makefile.com
+
+OBJS = $(PROG:%=%.o)
+SRCS = $(OBJS:%.o=%.c)
+
+CMDS = $(PROG:%=$(ROOTBINDIR)/%) $(SCRIPTS:%=$(TESTDIR)/%)
+$(CMDS) := FILEMODE = 0555
+
+LDLIBS += -lbunyan
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+install: all $(CMDS)
+
+lint:
+
+clobber: clean
+ -$(RM) $(PROG)
+
+clean:
+ -$(RM) $(OBJS)
+
+$(CMDS): $(TESTDIR) $(PROG)
+
+$(ROOTBINDIR):
+ $(INS.dir)
+
+$(ROOTBINDIR)/%: %
+ $(INS.file)
+
+$(TESTDIR):
+ $(INS.dir)
+
+$(TESTDIR)/%: %.ksh
+ $(INS.rename)
+
+$(TESTDIR)/%: %
+ $(INS.file)
diff --git a/usr/src/test/util-tests/tests/bunyan/btest.c b/usr/src/test/util-tests/tests/bunyan/btest.c
new file mode 100644
index 0000000000..5239e91f1e
--- /dev/null
+++ b/usr/src/test/util-tests/tests/bunyan/btest.c
@@ -0,0 +1,312 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include <bunyan.h>
+#include <netinet/in.h>
+#include <strings.h>
+
+static void
+create_handles(void)
+{
+ bunyan_logger_t *a, *b, *c;
+
+ assert(bunyan_init("foo", &a) == 0);
+ assert(bunyan_init("foo", &b) == 0);
+ assert(bunyan_init("foo", &c) == 0);
+ bunyan_fini(a);
+ bunyan_fini(b);
+ bunyan_fini(c);
+}
+
+static void
+create_stream(void)
+{
+ bunyan_logger_t *a;
+
+ assert(bunyan_init("foo", &a) == 0);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_stream_add(a, "baz", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == EEXIST);
+ assert(bunyan_stream_add(a, "baz", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == EEXIST);
+ assert(bunyan_stream_remove(a, "baz") == 0);
+ assert(bunyan_stream_remove(a, "baz") == ENOENT);
+ assert(bunyan_stream_remove(a, "foobaz") == ENOENT);
+ assert(bunyan_stream_remove(a, "blah") == ENOENT);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == EEXIST);
+ assert(bunyan_stream_add(a, "baz", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_stream_add(a, "debug", BUNYAN_L_DEBUG,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_stream_add(a, "info", BUNYAN_L_INFO,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_stream_add(a, "warn", BUNYAN_L_WARN,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_stream_add(a, "error", BUNYAN_L_ERROR,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_stream_add(a, "fatal", BUNYAN_L_FATAL,
+ bunyan_stream_fd, (void *)1) == 0);
+
+ bunyan_fini(a);
+}
+
+static void
+create_key(void)
+{
+ bunyan_logger_t *a;
+ struct in_addr v4;
+ struct in6_addr v6;
+
+ assert(bunyan_init("foo", &a) == 0);
+ assert(bunyan_key_remove(a, "blah") == ENOENT);
+ assert(bunyan_key_add(a, BUNYAN_T_END) == 0);
+
+ assert(bunyan_key_add(a, BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+
+ assert(bunyan_key_remove(a, "s") == 0);
+ assert(bunyan_key_remove(a, "s") == ENOENT);
+ assert(bunyan_key_remove(a, "p") == 0);
+ assert(bunyan_key_remove(a, "p") == ENOENT);
+ assert(bunyan_key_remove(a, "v4") == 0);
+ assert(bunyan_key_remove(a, "v4") == ENOENT);
+ assert(bunyan_key_remove(a, "v6") == 0);
+ assert(bunyan_key_remove(a, "v6") == ENOENT);
+ assert(bunyan_key_remove(a, "b") == 0);
+ assert(bunyan_key_remove(a, "b") == ENOENT);
+ assert(bunyan_key_remove(a, "i32") == 0);
+ assert(bunyan_key_remove(a, "i32") == ENOENT);
+ assert(bunyan_key_remove(a, "i64") == 0);
+ assert(bunyan_key_remove(a, "i64") == ENOENT);
+ assert(bunyan_key_remove(a, "u32") == 0);
+ assert(bunyan_key_remove(a, "u32") == ENOENT);
+ assert(bunyan_key_remove(a, "u64") == 0);
+ assert(bunyan_key_remove(a, "u64") == ENOENT);
+ assert(bunyan_key_remove(a, "d") == 0);
+ assert(bunyan_key_remove(a, "d") == ENOENT);
+ assert(bunyan_key_remove(a, "i64s") == 0);
+ assert(bunyan_key_remove(a, "i64s") == ENOENT);
+ assert(bunyan_key_remove(a, "u64s") == 0);
+ assert(bunyan_key_remove(a, "u64s") == ENOENT);
+
+ bunyan_fini(a);
+}
+
+static void
+bad_level(void)
+{
+ bunyan_logger_t *a;
+
+ assert(bunyan_init("bad level", &a) == 0);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_TRACE - 1,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_TRACE + 1,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_DEBUG - 1,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_INFO + 1,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_WARN - 1,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_ERROR + 1,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_FATAL - 1,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+ assert(bunyan_stream_add(a, "bar", -5,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+
+ bunyan_fini(a);
+}
+
+static void
+basic_log(void)
+{
+ bunyan_logger_t *a;
+
+ assert(bunyan_init("basic", &a) == 0);
+ assert(bunyan_stream_add(a, "foo", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_trace(a, "trace", BUNYAN_T_END) == 0);
+ assert(bunyan_debug(a, "debug", BUNYAN_T_END) == 0);
+ assert(bunyan_info(a, "info", BUNYAN_T_END) == 0);
+ assert(bunyan_warn(a, "warn", BUNYAN_T_END) == 0);
+ assert(bunyan_error(a, "error", BUNYAN_T_END) == 0);
+ assert(bunyan_fatal(a, "fatal", BUNYAN_T_END) == 0);
+
+ bunyan_fini(a);
+}
+
+static void
+crazy_log(void)
+{
+ bunyan_logger_t *a;
+ struct in_addr v4;
+ struct in6_addr v6;
+
+ bzero(&v4, sizeof (struct in_addr));
+ bzero(&v6, sizeof (struct in6_addr));
+
+ assert(bunyan_init("basic", &a) == 0);
+ assert(bunyan_stream_add(a, "foo", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_trace(a, "trace", BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+ assert(bunyan_debug(a, "debug", BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+ assert(bunyan_info(a, "info", BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+ assert(bunyan_warn(a, "warn", BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+ assert(bunyan_error(a, "error", BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+ assert(bunyan_fatal(a, "fatal", BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+
+ bunyan_fini(a);
+}
+
+static void
+child_log(void)
+{
+ bunyan_logger_t *a, *child;
+ struct in_addr v4;
+ struct in6_addr v6;
+
+ bzero(&v4, sizeof (struct in_addr));
+ bzero(&v6, sizeof (struct in6_addr));
+
+ assert(bunyan_init("child", &a) == 0);
+ assert(bunyan_stream_add(a, "foo", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_child(a, &child, BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+
+ bunyan_fini(a);
+ assert(bunyan_trace(child, "trace", BUNYAN_T_END) == 0);
+ assert(bunyan_debug(child, "debug", BUNYAN_T_END) == 0);
+ assert(bunyan_info(child, "info", BUNYAN_T_END) == 0);
+ assert(bunyan_warn(child, "warn", BUNYAN_T_END) == 0);
+ assert(bunyan_error(child, "error", BUNYAN_T_END) == 0);
+ assert(bunyan_fatal(child, "fatal", BUNYAN_T_END) == 0);
+
+ bunyan_fini(child);
+}
+
+static void
+crazy_child(void)
+{
+ bunyan_logger_t *a, *child;
+ struct in_addr v4;
+ struct in6_addr v6;
+
+ bzero(&v4, sizeof (struct in_addr));
+ bzero(&v6, sizeof (struct in6_addr));
+
+ assert(bunyan_init("crazy child", &a) == 0);
+ assert(bunyan_stream_add(a, "foo", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_key_add(a, BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+ assert(bunyan_child(a, &child, BUNYAN_T_END) == 0);
+ bunyan_fini(a);
+
+ assert(bunyan_stream_remove(child, "foo") == 0);
+ assert(bunyan_stream_add(child, "bar", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_key_remove(child, "u64s") == 0);
+ assert(bunyan_trace(child, "trace", BUNYAN_T_END) == 0);
+ assert(bunyan_debug(child, "debug", BUNYAN_T_END) == 0);
+ assert(bunyan_info(child, "info", BUNYAN_T_END) == 0);
+ assert(bunyan_warn(child, "warn", BUNYAN_T_END) == 0);
+ assert(bunyan_error(child, "error", BUNYAN_T_END) == 0);
+ assert(bunyan_fatal(child, "fatal", BUNYAN_T_END) == 0);
+
+ bunyan_fini(child);
+}
+
+int
+main(void)
+{
+ create_handles();
+ create_stream();
+ create_key();
+ bad_level();
+ basic_log();
+ crazy_log();
+ child_log();
+ crazy_child();
+
+ return (0);
+}
diff --git a/usr/src/test/util-tests/tests/bunyan/bunyan.ksh b/usr/src/test/util-tests/tests/bunyan/bunyan.ksh
new file mode 100644
index 0000000000..4ec0f4bd62
--- /dev/null
+++ b/usr/src/test/util-tests/tests/bunyan/bunyan.ksh
@@ -0,0 +1,26 @@
+#! /usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014, Joyent, Inc.
+#
+
+#
+# Simple wrapper for the classic test.out
+#
+
+set -o errexit
+
+btest_root=$(dirname $0)/../..
+btest_bin=$btest_root/bin/btest
+
+LD_PRELOAD=libumem.so UMEM_DEBUG=default $btest_bin >/dev/null
diff --git a/usr/src/uts/Makefile.targ b/usr/src/uts/Makefile.targ
index 034817dc31..86ef21bc3c 100644
--- a/usr/src/uts/Makefile.targ
+++ b/usr/src/uts/Makefile.targ
@@ -205,6 +205,9 @@ $(ROOT_FONT_DIR)/%: $(OBJS_DIR)/% $(ROOT_MOD_DIR) $(ROOT_FONT_DIR) FRC
$(ROOT_MAC_DIR)/%: $(OBJS_DIR)/% $(ROOT_MOD_DIR) $(ROOT_MAC_DIR) FRC
$(INS.file)
+$(ROOT_OVERLAY_DIR)/%: $(OBJS_DIR)/% $(ROOT_MOD_DIR) $(ROOT_OVERLAY_DIR) FRC
+ $(INS.file)
+
$(USR_DRV_DIR)/%: $(OBJS_DIR)/% $(USR_DRV_DIR) FRC
$(INS.file)
diff --git a/usr/src/uts/Makefile.uts b/usr/src/uts/Makefile.uts
index 33de8780bd..219a310dc3 100644
--- a/usr/src/uts/Makefile.uts
+++ b/usr/src/uts/Makefile.uts
@@ -457,6 +457,7 @@ ROOT_FONT_DIR_32 = $(ROOT_MOD_DIR)/fonts
ROOT_DACF_DIR_32 = $(ROOT_MOD_DIR)/dacf
ROOT_CRYPTO_DIR_32 = $(ROOT_MOD_DIR)/crypto
ROOT_MAC_DIR_32 = $(ROOT_MOD_DIR)/mac
+ROOT_OVERLAY_DIR_32 = $(ROOT_MOD_DIR)/overlay
ROOT_KICONV_DIR_32 = $(ROOT_MOD_DIR)/kiconv
ROOT_KERN_DIR_64 = $(ROOT_MOD_DIR)/$(SUBDIR64)
@@ -484,6 +485,7 @@ ROOT_FONT_DIR_64 = $(ROOT_MOD_DIR)/fonts/$(SUBDIR64)
ROOT_DACF_DIR_64 = $(ROOT_MOD_DIR)/dacf/$(SUBDIR64)
ROOT_CRYPTO_DIR_64 = $(ROOT_MOD_DIR)/crypto/$(SUBDIR64)
ROOT_MAC_DIR_64 = $(ROOT_MOD_DIR)/mac/$(SUBDIR64)
+ROOT_OVERLAY_DIR_64 = $(ROOT_MOD_DIR)/overlay/$(SUBDIR64)
ROOT_KICONV_DIR_64 = $(ROOT_MOD_DIR)/kiconv/$(SUBDIR64)
ROOT_KERN_DIR = $(ROOT_KERN_DIR_$(CLASS))
@@ -511,6 +513,7 @@ ROOT_FONT_DIR = $(ROOT_FONT_DIR_$(CLASS))
ROOT_DACF_DIR = $(ROOT_DACF_DIR_$(CLASS))
ROOT_CRYPTO_DIR = $(ROOT_CRYPTO_DIR_$(CLASS))
ROOT_MAC_DIR = $(ROOT_MAC_DIR_$(CLASS))
+ROOT_OVERLAY_DIR = $(ROOT_OVERLAY_DIR_$(CLASS))
ROOT_KICONV_DIR = $(ROOT_KICONV_DIR_$(CLASS))
ROOT_MOD_DIRS_32 = $(ROOT_BRAND_DIR_32) $(ROOT_DRV_DIR_32)
@@ -528,6 +531,7 @@ ROOT_MOD_DIRS_32 += $(ROOT_EMLXS_FW_DIR_32)
ROOT_MOD_DIRS_32 += $(ROOT_CPU_DIR_32) $(ROOT_FONT_DIR_32)
ROOT_MOD_DIRS_32 += $(ROOT_TOD_DIR_32) $(ROOT_DACF_DIR_32)
ROOT_MOD_DIRS_32 += $(ROOT_CRYPTO_DIR_32) $(ROOT_MAC_DIR_32)
+ROOT_MOD_DIRS_32 += $(ROOT_OVERLAY_DIR_32)
ROOT_MOD_DIRS_32 += $(ROOT_KICONV_DIR_32)
USR_MOD_DIR = $(ROOT)/usr/kernel
@@ -620,7 +624,7 @@ PARALLEL_KMODS = $(DRV_KMODS) $(EXEC_KMODS) $(FS_KMODS) $(SCHED_KMODS) \
$(MMU_KMODS) $(DACF_KMODS) $(EXPORT_KMODS) $(IPP_KMODS) \
$(CRYPTO_KMODS) $(PCBE_KMODS) \
$(DRV_KMODS_$(CLASS)) $(MISC_KMODS_$(CLASS)) $(MAC_KMODS) \
- $(BRAND_KMODS) $(KICONV_KMODS) \
+ $(BRAND_KMODS) $(KICONV_KMODS) $(OVERLAY_KMODS) \
$(SOCKET_KMODS)
KMODS = $(GENUNIX_KMODS) $(PARALLEL_KMODS)
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 4de1edf971..4f0b76f33d 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -280,6 +280,7 @@ GENUNIX_OBJS += \
rctl.o \
rctlsys.o \
readlink.o \
+ refhash.o \
refstr.o \
rename.o \
resolvepath.o \
@@ -682,6 +683,15 @@ NET80211_OBJS += net80211.o net80211_proto.o net80211_input.o \
VNIC_OBJS += vnic_ctl.o vnic_dev.o
+OVERLAY_OBJS += overlay.o overlay_fm.o overlay_mux.o overlay_plugin.o \
+ overlay_prop.o overlay_target.o
+
+OVERLAY_GENEVE_OBJS += overlay_geneve.o
+
+OVERLAY_NVGRE_OBJS += overlay_nvgre.o
+
+OVERLAY_VXLAN_OBJS += overlay_vxlan.o
+
VND_OBJS += vnd.o frameio.o
GSQUEUE_OBJS += gsqueue.o
@@ -1873,7 +1883,7 @@ ZYD_OBJS += zyd.o zyd_usb.o zyd_hw.o zyd_fw.o
MXFE_OBJS += mxfe.o
-MPTSAS_OBJS += mptsas.o mptsas_hash.o mptsas_impl.o mptsas_init.o \
+MPTSAS_OBJS += mptsas.o mptsas_impl.o mptsas_init.o \
mptsas_raid.o mptsas_smhba.o
SFE_OBJS += sfe.o sfe_util.o
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index 9b523bbec4..55eaa38697 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -994,6 +994,14 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/nxge/npi/%.c
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/nxge/%.s
$(COMPILE.s) -o $@ $<
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/overlay/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/overlay/plugins/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/pci-ide/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1494,6 +1502,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/vioblk/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(COMMONBASE)/idspace/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
#
# krtld must refer to its own bzero/bcopy until the kernel is fully linked
#
@@ -1562,6 +1574,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/pcmcia/pcs/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/refhash/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/rpc/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -2337,6 +2353,12 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/nxge/%.s
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/nxge/npi/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/overlay/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/overlay/plugins/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/pci-ide/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -2724,6 +2746,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/pcmcia/nexus/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/pcmcia/pcs/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/refhash/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/rpc/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -2802,3 +2827,6 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/skd/%.c
$(LINTS_DIR)/%.ln: $(COMMONBASE)/fsreparse/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(COMMONBASE)/idspace/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/common/fs/dev/sdev_netops.c b/usr/src/uts/common/fs/dev/sdev_netops.c
index 3f637f4cf5..41441ec52d 100644
--- a/usr/src/uts/common/fs/dev/sdev_netops.c
+++ b/usr/src/uts/common/fs/dev/sdev_netops.c
@@ -487,8 +487,10 @@ devnet_filldir(struct sdev_node *ddv)
/*
* Clean out everything underneath before we remove ourselves.
*/
- ret = sdev_cleandir(dv, NULL, 0);
- ASSERT(ret == 0);
+ if (SDEVTOV(ddv)->v_type == VDIR) {
+ ret = sdev_cleandir(dv, NULL, 0);
+ ASSERT(ret == 0);
+ }
/* remove the cache node */
(void) sdev_cache_update(ddv, &dv, dv->sdev_name,
SDEV_CACHE_DELETE);
diff --git a/usr/src/uts/common/fs/sockfs/sockcommon.c b/usr/src/uts/common/fs/sockfs/sockcommon.c
index 703e26ea61..0b58def71c 100644
--- a/usr/src/uts/common/fs/sockfs/sockcommon.c
+++ b/usr/src/uts/common/fs/sockfs/sockcommon.c
@@ -501,6 +501,9 @@ sonode_constructor(void *buf, void *cdrarg, int kmflags)
cv_init(&so->so_copy_cv, NULL, CV_DEFAULT, NULL);
cv_init(&so->so_closing_cv, NULL, CV_DEFAULT, NULL);
+ so->so_krecv_cb = NULL;
+ so->so_krecv_arg = NULL;
+
return (0);
}
@@ -654,6 +657,10 @@ sonode_fini(struct sonode *so)
if (so->so_filter_top != NULL)
sof_sonode_cleanup(so);
+ /* Clean up any remnants of krecv callbacks */
+ so->so_krecv_cb = NULL;
+ so->so_krecv_arg = NULL;
+
ASSERT(list_is_empty(&so->so_acceptq_list));
ASSERT(list_is_empty(&so->so_acceptq_defer));
ASSERT(!list_link_active(&so->so_acceptq_node));
diff --git a/usr/src/uts/common/fs/sockfs/sockcommon_sops.c b/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
index d3ff264eef..652dcf9245 100644
--- a/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
+++ b/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
@@ -586,11 +586,6 @@ so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp));
- if ((so->so_mode & SM_SENDFILESUPP) == 0) {
- SO_UNBLOCK_FALLBACK(so);
- return (EOPNOTSUPP);
- }
-
error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top,
B_FALSE);
@@ -1327,6 +1322,26 @@ so_queue_msg_impl(struct sonode *so, mblk_t *mp,
}
}
+ mutex_enter(&so->so_lock);
+ if (so->so_krecv_cb != NULL) {
+ boolean_t cont;
+ so_krecv_f func = so->so_krecv_cb;
+ void *arg = so->so_krecv_arg;
+
+ mutex_exit(&so->so_lock);
+ cont = so->so_krecv_cb(so, mp, msg_size, flags & MSG_OOB, arg);
+ mutex_enter(&so->so_lock);
+ if (cont == B_TRUE) {
+ space_left = so->so_rcvbuf;
+ } else {
+ so->so_rcv_queued = so->so_rcvlowat;
+ *errorp = ENOSPC;
+ space_left = -1;
+ }
+ goto done_unlock;
+ }
+ mutex_exit(&so->so_lock);
+
if (flags & MSG_OOB) {
so_queue_oob(so, mp, msg_size);
mutex_enter(&so->so_lock);
@@ -1605,6 +1620,13 @@ so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
return (ENOTCONN);
}
+ mutex_enter(&so->so_lock);
+ if (so->so_krecv_cb != NULL) {
+ mutex_exit(&so->so_lock);
+ return (EOPNOTSUPP);
+ }
+ mutex_exit(&so->so_lock);
+
if (msg->msg_flags & MSG_PEEK)
msg->msg_flags &= ~MSG_WAITALL;
diff --git a/usr/src/uts/common/fs/sockfs/sockcommon_subr.c b/usr/src/uts/common/fs/sockfs/sockcommon_subr.c
index 957c8f93b4..cec64a19d6 100644
--- a/usr/src/uts/common/fs/sockfs/sockcommon_subr.c
+++ b/usr/src/uts/common/fs/sockfs/sockcommon_subr.c
@@ -2276,9 +2276,9 @@ so_tpi_fallback(struct sonode *so, struct cred *cr)
fbfunc = sp->sp_smod_info->smod_proto_fallback_func;
/*
- * Cannot fallback if the socket has active filters
+ * Cannot fallback if the socket has active filters or a krecv callback.
*/
- if (so->so_filter_active > 0)
+ if (so->so_filter_active > 0 || so->so_krecv_cb != NULL)
return (EINVAL);
switch (so->so_family) {
@@ -2456,3 +2456,49 @@ out:
return (error);
}
+
+int
+so_krecv_set(sonode_t *so, so_krecv_f cb, void *arg)
+{
+ int ret;
+
+ if (cb == NULL && arg != NULL)
+ return (EINVAL);
+
+ SO_BLOCK_FALLBACK(so, so_krecv_set(so, cb, arg));
+
+ mutex_enter(&so->so_lock);
+ if (so->so_state & SS_FALLBACK_COMP) {
+ mutex_exit(&so->so_lock);
+ SO_UNBLOCK_FALLBACK(so);
+ return (ENOTSUP);
+ }
+
+ ret = so_lock_read(so, 0);
+ VERIFY(ret == 0);
+ /*
+ * XXX We should really deliver this data to the user in a
+ * flush-friendly way.
+ */
+ so_rcv_flush(so);
+
+ so->so_krecv_cb = cb;
+ so->so_krecv_arg = arg;
+
+ so_unlock_read(so);
+ mutex_exit(&so->so_lock);
+ SO_UNBLOCK_FALLBACK(so);
+
+ return (0);
+}
+
+void
+so_krecv_unblock(sonode_t *so)
+{
+ mutex_enter(&so->so_lock);
+ VERIFY(so->so_krecv_cb != NULL);
+
+ so->so_rcv_queued = 0;
+ (void) so_check_flow_control(so);
+ mutex_exit(&so->so_lock);
+}
diff --git a/usr/src/uts/common/io/dld/dld_drv.c b/usr/src/uts/common/io/dld/dld_drv.c
index 2152ce0baa..21080529ad 100644
--- a/usr/src/uts/common/io/dld/dld_drv.c
+++ b/usr/src/uts/common/io/dld/dld_drv.c
@@ -1378,7 +1378,8 @@ static dld_ioc_modentry_t dld_ioc_modtable[] = {
{SIMNET_IOC, "simnet", 0, NULL, 0},
{BRIDGE_IOC, "bridge", 0, NULL, 0},
{IPTUN_IOC, "iptun", 0, NULL, 0},
- {IBPART_IOC, "ibp", -1, NULL, 0}
+ {IBPART_IOC, "ibp", -1, NULL, 0},
+ {OVERLAY_IOC, "overlay", 0, NULL, 0}
};
#define DLDIOC_CNT \
(sizeof (dld_ioc_modtable) / sizeof (dld_ioc_modentry_t))
diff --git a/usr/src/uts/common/io/ksocket/ksocket.c b/usr/src/uts/common/io/ksocket/ksocket.c
index 8944fcbff3..d83b10dcca 100644
--- a/usr/src/uts/common/io/ksocket/ksocket.c
+++ b/usr/src/uts/common/io/ksocket/ksocket.c
@@ -932,3 +932,15 @@ ksocket_rele(ksocket_t ks)
cv_signal(&so->so_closing_cv);
}
}
+
+int
+ksocket_krecv_set(ksocket_t ks, ksocket_krecv_f cb, void *arg)
+{
+ return (so_krecv_set(KSTOSO(ks), (so_krecv_f)cb, arg));
+}
+
+void
+ksocket_krecv_unblock(ksocket_t ks)
+{
+ return (so_krecv_unblock(KSTOSO(ks)));
+}
diff --git a/usr/src/uts/common/io/ksocket/ksocket_impl.h b/usr/src/uts/common/io/ksocket/ksocket_impl.h
index ac5251540f..7dce836dd0 100644
--- a/usr/src/uts/common/io/ksocket/ksocket_impl.h
+++ b/usr/src/uts/common/io/ksocket/ksocket_impl.h
@@ -27,6 +27,11 @@
#ifndef _INET_KSOCKET_KSOCKET_IMPL_H
#define _INET_KSOCKET_KSOCKET_IMPL_H
+/*
+ * Note that if this relationship ever changes, the logic in ksocket_krecv_set
+ * must be updated and we must maintain local state about this on whatever the
+ * new ksocket object is.
+ */
#define KSTOSO(ks) ((struct sonode *)(ks))
#define SOTOKS(so) ((ksocket_t)(uintptr_t)(so))
diff --git a/usr/src/uts/common/io/overlay/overlay.c b/usr/src/uts/common/io/overlay/overlay.c
new file mode 100644
index 0000000000..b0d64e7f71
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay.c
@@ -0,0 +1,1432 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * XXX
+ */
+
+#include <sys/conf.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/modctl.h>
+#include <sys/policy.h>
+#include <sys/stream.h>
+#include <sys/strsubr.h>
+#include <sys/strsun.h>
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/ddifm.h>
+
+#include <sys/dls.h>
+#include <sys/dld_ioc.h>
+#include <sys/mac_provider.h>
+#include <sys/mac_client_priv.h>
+#include <sys/mac_ether.h>
+#include <sys/vlan.h>
+/* XXX Should we really need this? */
+#include <sys/socket.h>
+#include <inet/ip.h>
+
+#include <sys/overlay_impl.h>
+
+dev_info_t *overlay_dip;
+static kmutex_t overlay_dev_lock;
+static list_t overlay_dev_list;
+static uint8_t overlay_macaddr[ETHERADDRL] =
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+
+typedef enum overlay_dev_prop {
+ OVERLAY_DEV_P_MTU = 0,
+ OVERLAY_DEV_P_VNETID,
+ OVERLAY_DEV_P_ENCAP,
+ OVERLAY_DEV_P_VARPDID
+} overlay_dev_prop_t;
+
+#define OVERLAY_DEV_NPROPS 4
+static const char *overlay_dev_props[] = {
+ "mtu",
+ "vnetid",
+ "encap",
+ "varpd/id"
+};
+
+/*
+ * XXX Dear Future rm, these are obviously kind of janky values, going from the
+ * minimum size an IP implementation can handle to 100 less than MTU. While
+ * having a default of 1400 is a bit unfortunate, it really does help things.
+ * However, we should determine the largest MTU in a given netstack and use that
+ * to bracket these. eg. min should be min(576, max(netstack mtus)-100) and max
+ * should be max(netstack mtus) - 100).
+ *
+ * There are also some open questions on how we should handle interacting with
+ * PMTU. I'd suggest that in general, we set the don't fragment bit on our
+ * selves, or make that a property of a given protocol and then always ensure
+ * that we listen for errors and emit them in a form of FMA ireport/erprort.
+ */
+#define OVERLAY_MTU_MIN 576
+#define OVERLAY_MTU_DEF 1400
+#define OVERLAY_MTU_MAX 8900
+
+overlay_dev_t *
+overlay_hold_by_dlid(datalink_id_t id)
+{
+ overlay_dev_t *o;
+
+ mutex_enter(&overlay_dev_lock);
+ for (o = list_head(&overlay_dev_list); o != NULL;
+ o = list_next(&overlay_dev_list, o)) {
+ if (id == o->odd_linkid) {
+ mutex_enter(&o->odd_lock);
+ o->odd_ref++;
+ mutex_exit(&o->odd_lock);
+ mutex_exit(&overlay_dev_lock);
+ return (o);
+ }
+ }
+
+ mutex_exit(&overlay_dev_lock);
+ return (NULL);
+}
+
+void
+overlay_hold_rele(overlay_dev_t *odd)
+{
+ mutex_enter(&odd->odd_lock);
+ ASSERT(odd->odd_ref > 0);
+ odd->odd_ref--;
+ mutex_exit(&odd->odd_lock);
+}
+
+void
+overlay_io_start(overlay_dev_t *odd, overlay_dev_flag_t flag)
+{
+ ASSERT(flag == OVERLAY_F_IN_RX || flag == OVERLAY_F_IN_TX);
+ ASSERT(MUTEX_HELD(&odd->odd_lock));
+
+ /* XXX Stat tracking */
+ if (flag & OVERLAY_F_IN_RX)
+ odd->odd_rxcount++;
+ if (flag & OVERLAY_F_IN_TX)
+ odd->odd_txcount++;
+ odd->odd_flags |= flag;
+}
+
+void
+overlay_io_done(overlay_dev_t *odd, overlay_dev_flag_t flag)
+{
+ boolean_t signal = B_FALSE;
+
+ ASSERT(flag == OVERLAY_F_IN_RX || flag == OVERLAY_F_IN_TX);
+ ASSERT(MUTEX_HELD(&odd->odd_lock));
+
+ /* XXX Stat tracking */
+ if (flag & OVERLAY_F_IN_RX) {
+ ASSERT(odd->odd_rxcount > 0);
+ odd->odd_rxcount--;
+ if (odd->odd_rxcount == 0) {
+ signal = B_TRUE;
+ odd->odd_flags &= ~OVERLAY_F_IN_RX;
+ }
+ }
+ if (flag & OVERLAY_F_IN_TX) {
+ ASSERT(odd->odd_txcount > 0);
+ odd->odd_txcount--;
+ if (odd->odd_txcount == 0) {
+ signal = B_TRUE;
+ odd->odd_flags &= ~OVERLAY_F_IN_TX;
+ }
+ }
+
+ /*
+ * XXX Work out semantics here so we don't have to broadcast.
+ */
+ if (signal == B_TRUE)
+ cv_broadcast(&odd->odd_iowait);
+}
+
+static void
+overlay_io_wait(overlay_dev_t *odd, overlay_dev_flag_t flag)
+{
+ ASSERT((flag & ~OVERLAY_F_IOMASK) == 0);
+ ASSERT(MUTEX_HELD(&odd->odd_lock));
+
+ while (odd->odd_flags & flag) {
+ cv_wait(&odd->odd_iowait, &odd->odd_lock);
+ }
+}
+
+void
+overlay_dev_iter(overlay_dev_iter_f func, void *arg)
+{
+ overlay_dev_t *odd;
+
+ mutex_enter(&overlay_dev_lock);
+ for (odd = list_head(&overlay_dev_list); odd != NULL;
+ odd = list_next(&overlay_dev_list, odd)) {
+ if (func(odd, arg) != 0) {
+ mutex_exit(&overlay_dev_lock);
+ return;
+ }
+ }
+ mutex_exit(&overlay_dev_lock);
+}
+
+static int
+overlay_m_stat(void *arg, uint_t stat, uint64_t *val)
+{
+ return (ENOTSUP);
+}
+
+/*
+ * XXX We should use this as a means of lazily opening the lower level
+ * port that we end up using.
+ */
+static int
+overlay_m_start(void *arg)
+{
+ overlay_dev_t *odd = arg;
+ overlay_mux_t *mux;
+ int ret, domain, family, prot;
+ struct sockaddr_storage storage;
+ socklen_t slen;
+
+ mutex_enter(&odd->odd_lock);
+ if ((odd->odd_flags & OVERLAY_F_ACTIVATED) == 0) {
+ mutex_exit(&odd->odd_lock);
+ return (EAGAIN);
+ }
+ mutex_exit(&odd->odd_lock);
+
+ ret = odd->odd_plugin->ovp_ops->ovpo_socket(odd->odd_pvoid, &domain,
+ &family, &prot, (struct sockaddr *)&storage, &slen);
+ if (ret != 0)
+ return (ret);
+
+ mux = overlay_mux_open(odd->odd_plugin, domain, family, prot,
+ (struct sockaddr *)&storage, slen, &ret);
+ if (mux == NULL)
+ return (ret);
+
+ overlay_mux_add_dev(mux, odd);
+ odd->odd_mux = mux;
+ mutex_enter(&odd->odd_lock);
+ ASSERT(!(odd->odd_flags & OVERLAY_F_IN_MUX));
+ odd->odd_flags |= OVERLAY_F_IN_MUX;
+ mutex_exit(&odd->odd_lock);
+
+ return (0);
+}
+
+static void
+overlay_m_stop(void *arg)
+{
+ overlay_dev_t *odd = arg;
+
+ /*
+ * The MAC Perimeter is held here, so we don't have to worry about
+ * synchornizing this with respect to metadata operations.
+ */
+ mutex_enter(&odd->odd_lock);
+ VERIFY(odd->odd_flags & OVERLAY_F_IN_MUX);
+ VERIFY(!(odd->odd_flags & OVERLAY_F_MDDROP));
+ odd->odd_flags |= OVERLAY_F_MDDROP;
+ overlay_io_wait(odd, OVERLAY_F_IOMASK);
+ mutex_exit(&odd->odd_lock);
+
+ overlay_mux_remove_dev(odd->odd_mux, odd);
+ overlay_mux_close(odd->odd_mux);
+ odd->odd_mux = NULL;
+
+ mutex_enter(&odd->odd_lock);
+ odd->odd_flags &= ~OVERLAY_F_IN_MUX;
+ odd->odd_flags &= ~OVERLAY_F_MDDROP;
+ VERIFY((odd->odd_flags & OVERLAY_F_STOPMASK) == 0);
+ mutex_exit(&odd->odd_lock);
+}
+
+static int
+overlay_m_promisc(void *arg, boolean_t on)
+{
+ /* XXX Is there anything for us to do for promisc, I don't think so */
+ return (0);
+}
+
+static int
+overlay_m_multicast(void *arg, boolean_t add, const uint8_t *addrp)
+{
+ /*
+ * XXX Semantically we support an unlimited number of multicast mac
+ * addresses. I think the real long term question is should this hit
+ * varpd for notification and/or approval? Likely only the former.
+ */
+ return (0);
+}
+
+static int
+overlay_m_unicast(void *arg, const uint8_t *macaddr)
+{
+ /*
+ * XXX Semantically we support an unlimited number of multicast mac
+ * addresses. I think the real long term question is should this hit
+ * varpd for notification and/or approval? Likely only the former.
+ */
+ return (0);
+}
+
+mblk_t *
+overlay_m_tx(void *arg, mblk_t *mp_chain)
+{
+ overlay_dev_t *odd = arg;
+ mblk_t *mp, *ep;
+ int ret;
+ ovep_encap_info_t einfo;
+ struct msghdr hdr;
+
+ mutex_enter(&odd->odd_lock);
+ if ((odd->odd_flags & OVERLAY_F_MDDROP) ||
+ !(odd->odd_flags & OVERLAY_F_IN_MUX)) {
+ /* XXX Stats, etc. */
+ mutex_exit(&odd->odd_lock);
+ freemsgchain(mp_chain);
+ return (NULL);
+ }
+ overlay_io_start(odd, OVERLAY_F_IN_TX);
+ mutex_exit(&odd->odd_lock);
+
+ bzero(&hdr, sizeof (struct msghdr));
+
+ /* XXX Zero this out */
+ einfo.ovdi_id = odd->odd_vid;
+ mp = mp_chain;
+ while (mp != NULL) {
+ socklen_t slen;
+ struct sockaddr_storage storage;
+
+ mp_chain = mp->b_next;
+ mp->b_next = NULL;
+ ep = NULL;
+
+ ret = overlay_target_lookup(odd, mp,
+ (struct sockaddr *)&storage, &slen);
+ if (ret != OVERLAY_TARGET_OK) {
+ if (ret == OVERLAY_TARGET_DROP)
+ freemsg(mp);
+ mp = mp_chain;
+ continue;
+ }
+
+ hdr.msg_name = &storage;
+ hdr.msg_namelen = slen;
+
+ ret = odd->odd_plugin->ovp_ops->ovpo_encap(odd->odd_mh, mp,
+ &einfo, &ep);
+ if (ret != 0 || ep == NULL) {
+ freemsg(mp);
+ goto out;
+ }
+
+ ep->b_cont = mp;
+ ret = overlay_mux_tx(odd->odd_mux, &hdr, ep);
+ if (ret != 0)
+ goto out;
+
+ mp = mp_chain;
+ }
+
+out:
+ mutex_enter(&odd->odd_lock);
+ overlay_io_done(odd, OVERLAY_F_IN_TX);
+ mutex_exit(&odd->odd_lock);
+ return (mp_chain);
+}
+
+static void
+overlay_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
+{
+ miocnak(q, mp, 0, ENOTSUP);
+}
+
+static boolean_t
+overlay_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
+{
+ return (B_FALSE);
+}
+
+static int
+overlay_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_valsize, const void *pr_val)
+{
+ uint32_t mtu, old;
+ int err;
+ overlay_dev_t *odd = arg;
+
+ if (pr_num != MAC_PROP_MTU)
+ return (ENOTSUP);
+
+ bcopy(pr_val, &mtu, sizeof (mtu));
+ if (mtu < OVERLAY_MTU_MIN || mtu > OVERLAY_MTU_MAX)
+ return (EINVAL);
+
+ mutex_enter(&odd->odd_lock);
+ old = odd->odd_mtu;
+ odd->odd_mtu = mtu;
+ err = mac_maxsdu_update(odd->odd_mh, mtu);
+ if (err != 0)
+ odd->odd_mtu = old;
+ mutex_exit(&odd->odd_lock);
+
+ return (err);
+}
+
+/*
+ * XXX Come back and figure out what other properties to emulate. Don't forget,
+ * MAC handles a bunch of these for us.
+ */
+static int
+overlay_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_valsize, void *pr_val)
+{
+ return (ENOTSUP);
+}
+
+static void
+overlay_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ mac_prop_info_handle_t prh)
+{
+ if (pr_num != MAC_PROP_MTU)
+ return;
+
+ mac_prop_info_set_default_uint32(prh, OVERLAY_MTU_DEF);
+ mac_prop_info_set_range_uint32(prh, OVERLAY_MTU_MIN, OVERLAY_MTU_MAX);
+}
+
+static mac_callbacks_t overlay_m_callbacks = {
+ .mc_callbacks = (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP |
+ MC_PROPINFO),
+ .mc_getstat = overlay_m_stat,
+ .mc_start = overlay_m_start,
+ .mc_stop = overlay_m_stop,
+ .mc_setpromisc = overlay_m_promisc,
+ .mc_multicst = overlay_m_multicast,
+ .mc_unicst = overlay_m_unicast,
+ .mc_tx = overlay_m_tx,
+ .mc_ioctl = overlay_m_ioctl,
+ .mc_getcapab = overlay_m_getcapab,
+ .mc_getprop = overlay_m_getprop,
+ .mc_setprop = overlay_m_setprop,
+ .mc_propinfo = overlay_m_propinfo
+};
+
+static boolean_t
+overlay_valid_name(const char *name, size_t buflen)
+{
+ size_t actlen;
+ int err, i;
+
+ for (i = 0; i < buflen; i++) {
+ if (name[i] == '\0')
+ break;
+ }
+
+ if (i == 0 || i == buflen)
+ return (B_FALSE);
+ actlen = i;
+ if (strchr(name, '/') != NULL)
+ return (B_FALSE);
+ if (u8_validate((char *)name, actlen, NULL,
+ U8_VALIDATE_ENTIRE, &err) < 0)
+ return (B_FALSE);
+ return (B_TRUE);
+}
+
+static int
+overlay_i_create(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
+{
+ int err;
+ uint64_t maxid;
+ overlay_dev_t *odd, *o;
+ mac_register_t *mac;
+ char name[MAXLINKNAMELEN];
+ overlay_ioc_create_t *oicp = karg;
+
+ if (overlay_valid_name(oicp->oic_encap, MAXLINKNAMELEN) == B_FALSE)
+ return (EINVAL);
+
+ odd = kmem_zalloc(sizeof (overlay_dev_t), KM_SLEEP);
+ odd->odd_linkid = oicp->oic_linkid;
+ odd->odd_plugin = overlay_plugin_lookup(oicp->oic_encap);
+ if (odd->odd_plugin == NULL) {
+ kmem_free(odd, sizeof (overlay_dev_t));
+ /* XXX Better errno */
+ return (ENOENT);
+ }
+ err = odd->odd_plugin->ovp_ops->ovpo_init((overlay_handle_t)odd,
+ &odd->odd_pvoid);
+ if (err != 0) {
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+ /* XXX Better errno */
+ return (EINVAL);
+ }
+
+ /*
+ * Make sure that our virtual network id is valid for the given plugin
+ * that we're working with.
+ */
+ ASSERT(odd->odd_plugin->ovp_id_size <= 8);
+ maxid = UINT64_MAX;
+ if (odd->odd_plugin->ovp_id_size != 8)
+ maxid = (1ULL << (odd->odd_plugin->ovp_id_size * 8)) - 1ULL;
+ if (oicp->oic_vnetid > maxid) {
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+ /* XXX Better errno */
+ return (EINVAL);
+ }
+ odd->odd_vid = oicp->oic_vnetid;
+
+ mac = mac_alloc(MAC_VERSION);
+ if (mac == NULL) {
+ mutex_exit(&overlay_dev_lock);
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+ return (EINVAL);
+ }
+
+ /* TODO These are always good props */
+ mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
+ mac->m_driver = odd;
+ mac->m_dip = overlay_dip;
+ mac->m_dst_addr = NULL;
+ mac->m_callbacks = &overlay_m_callbacks;
+ mac->m_pdata = NULL;
+ mac->m_pdata_size = 0;
+
+ /* XXX This will almost certainly change */
+ mac->m_priv_props = NULL;
+
+ /* XXX I think we should let mac handle this itself */
+ mac->m_instance = (uint_t)-1;
+
+ /*
+ * XXX This is definitely wrong. There is no real source address that
+ * should be used here, but saying that we're not ethernet is going to
+ * cause its own problems.
+ */
+ mac->m_src_addr = overlay_macaddr;
+
+ /*
+ * XXX These should come from the underlying device that we've been
+ * created over and be influenced based on the encap method.
+ */
+ mac->m_min_sdu = 1;
+ mac->m_max_sdu = 1400;
+
+ /*
+ * XXX This needs to come from the encapsulation protocol, well, really,
+ * we'll ask if it it supports vlans then go from there. In this case
+ * the underlying device doesn't matter, since it will be encapsulated.
+ */
+ mac->m_margin = VLAN_TAGSZ;
+
+ /*
+ * XXX While it seems like we should say that we have no virt
+ * assistence, we should figure out what that implies elsewhere and
+ * whether we can leverage existing software.
+ */
+ mac->m_v12n = MAC_VIRT_NONE;
+
+ /*
+ * XXX I'm not sure that we should bother emulating a separate multicast
+ * sdu. While we will have to take it into account in our min and max
+ * sdu that we give because some of our traffic may be arbitrarily sent
+ * over multicast, I'm pretty sure vnics above us don't need it.
+ */
+ mac->m_multicast_sdu = 0;
+
+ mutex_enter(&overlay_dev_lock);
+ for (o = list_head(&overlay_dev_list); o != NULL;
+ o = list_next(&overlay_dev_list, o)) {
+ if (o->odd_linkid == oicp->oic_linkid) {
+ mutex_exit(&overlay_dev_lock);
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+ return (EEXIST);
+ }
+
+ if (o->odd_vid == oicp->oic_vnetid &&
+ o->odd_plugin == odd->odd_plugin) {
+ mutex_exit(&overlay_dev_lock);
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+ return (EEXIST);
+ }
+ }
+
+ err = mac_register(mac, &odd->odd_mh);
+ mac_free(mac);
+ if (err != 0) {
+ mutex_exit(&overlay_dev_lock);
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+ return (err);
+ }
+
+ err = dls_devnet_create(odd->odd_mh, odd->odd_linkid,
+ crgetzoneid(cred));
+ if (err != 0) {
+ mutex_exit(&overlay_dev_lock);
+ (void) mac_unregister(odd->odd_mh);
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+ return (err);
+ }
+
+ mutex_init(&odd->odd_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&odd->odd_iowait, NULL, CV_DRIVER, NULL);
+ odd->odd_ref = 0;
+ odd->odd_flags = 0;
+ list_insert_tail(&overlay_dev_list, odd);
+ mutex_exit(&overlay_dev_lock);
+
+ return (0);
+}
+
+static int
+overlay_i_activate(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
+{
+ int i, ret;
+ overlay_dev_t *odd;
+ mac_perim_handle_t mph;
+ overlay_ioc_activate_t *oiap = karg;
+ overlay_ioc_propinfo_t *infop;
+ overlay_ioc_prop_t *oip;
+ overlay_prop_handle_t phdl;
+
+ odd = overlay_hold_by_dlid(oiap->oia_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ infop = kmem_alloc(sizeof (overlay_ioc_propinfo_t), KM_SLEEP);
+ oip = kmem_alloc(sizeof (overlay_ioc_prop_t), KM_SLEEP);
+ phdl = (overlay_prop_handle_t)infop;
+
+ mac_perim_enter_by_mh(odd->odd_mh, &mph);
+ mutex_enter(&odd->odd_lock);
+ if (odd->odd_flags & OVERLAY_F_ACTIVATED) {
+ mutex_exit(&odd->odd_lock);
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ kmem_free(infop, sizeof (overlay_ioc_propinfo_t));
+ kmem_free(oip, sizeof (overlay_ioc_prop_t));
+ return (EEXIST);
+ }
+ mutex_exit(&odd->odd_lock);
+
+ for (i = 0; i < odd->odd_plugin->ovp_nprops; i++) {
+ const char *pname = odd->odd_plugin->ovp_props[i];
+ bzero(infop, sizeof (overlay_ioc_propinfo_t));
+ overlay_prop_init(phdl);
+ ret = odd->odd_plugin->ovp_ops->ovpo_propinfo(pname, phdl);
+ if (ret != 0) {
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ kmem_free(infop, sizeof (overlay_ioc_propinfo_t));
+ kmem_free(oip, sizeof (overlay_ioc_prop_t));
+ return (ret);
+ }
+
+ if ((infop->oipi_prot & OVERLAY_PROP_PERM_REQ) == 0)
+ continue;
+ bzero(oip, sizeof (overlay_ioc_prop_t));
+ oip->oip_size = sizeof (oip->oip_value);
+ ret = odd->odd_plugin->ovp_ops->ovpo_getprop(odd->odd_pvoid,
+ pname, oip->oip_value, &oip->oip_size);
+ if (ret != 0) {
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ kmem_free(infop, sizeof (overlay_ioc_propinfo_t));
+ kmem_free(oip, sizeof (overlay_ioc_prop_t));
+ return (ret);
+ }
+ if (oip->oip_size == 0) {
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ kmem_free(infop, sizeof (overlay_ioc_propinfo_t));
+ kmem_free(oip, sizeof (overlay_ioc_prop_t));
+ return (EINVAL);
+ }
+ }
+
+ mutex_enter(&odd->odd_lock);
+ if ((odd->odd_flags & OVERLAY_F_VARPD) == 0) {
+ mutex_exit(&odd->odd_lock);
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ kmem_free(infop, sizeof (overlay_ioc_propinfo_t));
+ kmem_free(oip, sizeof (overlay_ioc_prop_t));
+ return (ENXIO);
+ }
+
+ ASSERT((odd->odd_flags & OVERLAY_F_ACTIVATED) == 0);
+ odd->odd_flags |= OVERLAY_F_ACTIVATED;
+ mutex_exit(&odd->odd_lock);
+
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ kmem_free(infop, sizeof (overlay_ioc_propinfo_t));
+ kmem_free(oip, sizeof (overlay_ioc_prop_t));
+
+ return (0);
+}
+
+static int
+overlay_i_delete(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
+{
+ overlay_ioc_delete_t *oidp = karg;
+ overlay_dev_t *odd;
+ datalink_id_t tid;
+ int ret;
+
+ /*
+ * XXX Need to better understand the semantics of what gld is doing with
+ * respect to our data structure... This probably needs to be entirely
+ * reworked.
+ */
+ odd = overlay_hold_by_dlid(oidp->oid_linkid);
+ if (odd == NULL) {
+ return (ENOENT);
+ }
+
+ mutex_enter(&odd->odd_lock);
+ /* If we're not the only hold, we're busy */
+ if (odd->odd_ref != 1) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (EBUSY);
+ }
+
+ if (odd->odd_flags & OVERLAY_F_IN_MUX) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (EBUSY);
+ }
+
+ /*
+ * To remove this, we need to first remove it from dls and then remove
+ * it from mac. The act of removing it from mac will check if there are
+ * devices on top of this, eg. vnics. If there are, then that will fail
+ * and we'll have to go through and recreate the dls entry. Only after
+ * mac_unregister has succeeded, then we'll go through and actually free
+ * everything and drop the dev lock.
+ */
+ ret = dls_devnet_destroy(odd->odd_mh, &tid, B_TRUE);
+ if (ret != 0) {
+ overlay_hold_rele(odd);
+ return (ret);
+ }
+
+ ASSERT(oidp->oid_linkid == tid);
+ ret = mac_disable(odd->odd_mh);
+ if (ret != 0) {
+ (void) dls_devnet_create(odd->odd_mh, odd->odd_linkid,
+ crgetzoneid(cred));
+ overlay_hold_rele(odd);
+ return (ret);
+ }
+
+ overlay_target_quiesce(odd->odd_target);
+
+ mutex_enter(&overlay_dev_lock);
+ list_remove(&overlay_dev_list, odd);
+ mutex_exit(&overlay_dev_lock);
+
+ cv_destroy(&odd->odd_iowait);
+ mutex_destroy(&odd->odd_lock);
+ overlay_target_free(odd);
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+
+ return (0);
+}
+
+static int
+overlay_i_nprops(void *karg, intptr_t arg, int mode, cred_t *cred,
+ int *rvalp)
+{
+ overlay_dev_t *odd;
+ overlay_ioc_nprops_t *on = karg;
+
+ odd = overlay_hold_by_dlid(on->oipn_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+ on->oipn_nprops = odd->odd_plugin->ovp_nprops + OVERLAY_DEV_NPROPS;
+ overlay_hold_rele(odd);
+
+ return (0);
+}
+
+static int
+overlay_propinfo_plugin_cb(overlay_plugin_t *opp, void *arg)
+{
+ overlay_prop_handle_t phdl = arg;
+ overlay_prop_set_range_str(phdl, opp->ovp_name);
+ return (0);
+}
+
+static int
+overlay_i_name_to_propid(overlay_dev_t *odd, const char *name, uint_t *id)
+{
+ int i;
+
+ for (i = 0; i < OVERLAY_DEV_NPROPS; i++) {
+ if (strcmp(overlay_dev_props[i], name) == 0) {
+ *id = i;
+ return (0);
+ }
+ }
+
+ for (i = 0; i < odd->odd_plugin->ovp_nprops; i++) {
+ if (strcmp(odd->odd_plugin->ovp_props[i], name) == 0) {
+ *id = i + OVERLAY_DEV_NPROPS;
+ return (0);
+ }
+ }
+
+ return (ENOENT);
+}
+
+static void
+overlay_i_propinfo_mtu(overlay_dev_t *odd, overlay_prop_handle_t phdl)
+{
+ uint32_t def;
+ mac_propval_range_t range;
+ uint_t perm;
+
+ ASSERT(MAC_PERIM_HELD(odd->odd_mh));
+
+ bzero(&range, sizeof (mac_propval_range_t));
+ range.mpr_count = 1;
+ if (mac_prop_info(odd->odd_mh, MAC_PROP_MTU, "mtu", &def,
+ sizeof (def), &range, &perm) != 0)
+ return;
+
+ if (perm == MAC_PROP_PERM_READ)
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_READ);
+ else if (perm == MAC_PROP_PERM_WRITE)
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_WRITE);
+ else if (perm == MAC_PROP_PERM_RW)
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_RW);
+
+ overlay_prop_set_type(phdl, OVERLAY_PROP_T_UINT);
+ overlay_prop_set_default(phdl, &def, sizeof (def));
+ overlay_prop_set_range_uint32(phdl, range.mpr_range_uint32[0].mpur_min,
+ range.mpr_range_uint32[0].mpur_max);
+}
+
+static int
+overlay_i_propinfo(void *karg, intptr_t arg, int mode, cred_t *cred,
+ int *rvalp)
+{
+ overlay_dev_t *odd;
+ const char *pname;
+ int ret;
+ mac_perim_handle_t mph;
+ uint_t propid = UINT_MAX;
+ overlay_ioc_propinfo_t *oip = karg;
+ overlay_prop_handle_t phdl = (overlay_prop_handle_t)oip;
+
+ odd = overlay_hold_by_dlid(oip->oipi_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ overlay_prop_init(phdl);
+ mac_perim_enter_by_mh(odd->odd_mh, &mph);
+
+ /*
+ * If the id is -1, then the property that we're looking for is named in
+ * oipi_name and we should fill in its id. Otherwise, we've been given
+ * an id and we need to turn that into a name for our plugin's sake. The
+ * id is our own fabrication for property discovery.
+ */
+ if (oip->oipi_id == -1) {
+ int i;
+
+ /*
+ * Determine if it's a known generic property or it belongs to a
+ * module by checking against the list of known names.
+ */
+ oip->oipi_name[OVERLAY_PROP_NAMELEN-1] = '\0';
+ if ((ret = overlay_i_name_to_propid(odd, oip->oipi_name,
+ &propid)) != 0) {
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ret);
+ }
+ oip->oipi_id = propid;
+ if (propid >= OVERLAY_DEV_NPROPS) {
+ ret = odd->odd_plugin->ovp_ops->ovpo_propinfo(
+ oip->oipi_name, phdl);
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ret);
+
+ }
+ } else if (oip->oipi_id >= OVERLAY_DEV_NPROPS) {
+ uint_t id = oip->oipi_id - OVERLAY_DEV_NPROPS;
+
+ if (id >= odd->odd_plugin->ovp_nprops) {
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (EINVAL);
+ }
+ ret = odd->odd_plugin->ovp_ops->ovpo_propinfo(
+ odd->odd_plugin->ovp_props[id], phdl);
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ret);
+ } else if (oip->oipi_id < -1) {
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (EINVAL);
+ } else {
+ ASSERT(oip->oipi_id < OVERLAY_DEV_NPROPS);
+ ASSERT(oip->oipi_id >= 0);
+ propid = oip->oipi_id;
+ (void) strlcpy(oip->oipi_name, overlay_dev_props[propid],
+ sizeof (oip->oipi_name));
+ }
+
+ switch (propid) {
+ case OVERLAY_DEV_P_MTU:
+ overlay_i_propinfo_mtu(odd, phdl);
+ break;
+ case OVERLAY_DEV_P_VNETID:
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_RW);
+ overlay_prop_set_type(phdl, OVERLAY_PROP_T_UINT);
+ overlay_prop_set_nodefault(phdl);
+ break;
+ case OVERLAY_DEV_P_ENCAP:
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_READ);
+ overlay_prop_set_type(phdl, OVERLAY_PROP_T_STRING);
+ overlay_prop_set_nodefault(phdl);
+ overlay_plugin_walk(overlay_propinfo_plugin_cb, phdl);
+ break;
+ case OVERLAY_DEV_P_VARPDID:
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_READ);
+ overlay_prop_set_type(phdl, OVERLAY_PROP_T_UINT);
+ overlay_prop_set_nodefault(phdl);
+ break;
+ default:
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ENOENT);
+ }
+
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (0);
+}
+
+static int
+overlay_i_getprop(void *karg, intptr_t arg, int mode, cred_t *cred,
+ int *rvalp)
+{
+ int ret;
+ overlay_dev_t *odd;
+ mac_perim_handle_t mph;
+ overlay_ioc_prop_t *oip = karg;
+ uint_t propid, mtu;
+
+ odd = overlay_hold_by_dlid(oip->oip_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mac_perim_enter_by_mh(odd->odd_mh, &mph);
+ oip->oip_size = OVERLAY_PROP_SIZEMAX;
+ oip->oip_name[OVERLAY_PROP_NAMELEN-1] = '\0';
+ if (oip->oip_id == -1) {
+ int i;
+
+ for (i = 0; i < OVERLAY_DEV_NPROPS; i++) {
+ if (strcmp(overlay_dev_props[i], oip->oip_name) == 0)
+ break;
+ if (i == OVERLAY_DEV_NPROPS) {
+ ret = odd->odd_plugin->ovp_ops->ovpo_getprop(
+ odd->odd_pvoid, oip->oip_name,
+ oip->oip_value, &oip->oip_size);
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ret);
+ }
+ }
+
+ propid = i;
+ } else if (oip->oip_id >= OVERLAY_DEV_NPROPS) {
+ uint_t id = oip->oip_id - OVERLAY_DEV_NPROPS;
+
+ if (id > odd->odd_plugin->ovp_nprops) {
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (EINVAL);
+ }
+ ret = odd->odd_plugin->ovp_ops->ovpo_getprop(odd->odd_pvoid,
+ odd->odd_plugin->ovp_props[id], oip->oip_value,
+ &oip->oip_size);
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ret);
+ } else if (oip->oip_id < -1) {
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (EINVAL);
+ } else {
+ ASSERT(oip->oip_id < OVERLAY_DEV_NPROPS);
+ ASSERT(oip->oip_id >= 0);
+ propid = oip->oip_id;
+ }
+
+ ret = 0;
+ switch (propid) {
+ case OVERLAY_DEV_P_MTU:
+ /*
+ * The MTU is always set and retrieved through MAC, to allow for
+ * MAC to do whatever it wants, as really that property belongs
+ * to MAC. This is important for things where vnics have hold on
+ * the MTU.
+ */
+ mac_sdu_get(odd->odd_mh, NULL, &mtu);
+ bcopy(&mtu, oip->oip_value, sizeof (uint_t));
+ oip->oip_size = sizeof (uint_t);
+ break;
+ case OVERLAY_DEV_P_VNETID:
+ /*
+ * While it's read-only while inside of a mux, we're not in a
+ * context that can guarantee that. Therefore we always grab the
+ * overlay_dev_t's odd_lock.
+ */
+ mutex_enter(&odd->odd_lock);
+ bcopy(&odd->odd_vid, oip->oip_value, sizeof (uint64_t));
+ mutex_exit(&odd->odd_lock);
+ oip->oip_size = sizeof (uint64_t);
+ break;
+ case OVERLAY_DEV_P_ENCAP:
+ oip->oip_size = strlcpy((char *)oip->oip_value,
+ odd->odd_plugin->ovp_name, oip->oip_size);
+ break;
+ case OVERLAY_DEV_P_VARPDID:
+ mutex_enter(&odd->odd_lock);
+ if (odd->odd_flags & OVERLAY_F_VARPD) {
+ const uint64_t val = odd->odd_target->ott_id;
+ bcopy(&val, oip->oip_value, sizeof (uint64_t));
+ oip->oip_size = sizeof (uint64_t);
+ } else {
+ oip->oip_size = 0;
+ }
+ mutex_exit(&odd->odd_lock);
+ break;
+ default:
+ ret = ENOENT;
+ }
+
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ret);
+}
+
+static void
+overlay_setprop_vnetid(overlay_dev_t *odd, uint64_t vnetid)
+{
+ mutex_enter(&odd->odd_lock);
+
+ /* Simple case, not active */
+ if (!(odd->odd_flags & OVERLAY_F_IN_MUX)) {
+ odd->odd_vid = vnetid;
+ mutex_exit(&odd->odd_lock);
+ return;
+ }
+
+ /*
+ * In the hard case, we need to set the drop flag, quiesce I/O and then
+ * we can go ahead and do everything.
+ */
+ odd->odd_flags |= OVERLAY_F_MDDROP;
+ overlay_io_wait(odd, OVERLAY_F_IOMASK);
+ mutex_exit(&odd->odd_lock);
+
+ overlay_mux_remove_dev(odd->odd_mux, odd);
+ mutex_enter(&odd->odd_lock);
+ odd->odd_vid = vnetid;
+ mutex_exit(&odd->odd_lock);
+ overlay_mux_add_dev(odd->odd_mux, odd);
+
+ mutex_enter(&odd->odd_lock);
+ ASSERT(odd->odd_flags & OVERLAY_F_IN_MUX);
+ odd->odd_flags &= ~OVERLAY_F_IN_MUX;
+ mutex_exit(&odd->odd_lock);
+}
+
+static int
+overlay_i_setprop(void *karg, intptr_t arg, int mode, cred_t *cred,
+ int *rvalp)
+{
+ int ret;
+ overlay_dev_t *odd;
+ overlay_ioc_prop_t *oip = karg;
+ uint_t propid = UINT_MAX;
+ mac_perim_handle_t mph;
+ uint64_t maxid, *vidp;
+
+ if (oip->oip_size > OVERLAY_PROP_SIZEMAX)
+ return (EINVAL);
+
+ odd = overlay_hold_by_dlid(oip->oip_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ oip->oip_name[OVERLAY_PROP_NAMELEN-1] = '\0';
+ mac_perim_enter_by_mh(odd->odd_mh, &mph);
+ mutex_enter(&odd->odd_lock);
+ if (odd->odd_flags & OVERLAY_F_ACTIVATED) {
+ mac_perim_exit(mph);
+ mutex_exit(&odd->odd_lock);
+ return (ENOTSUP);
+ }
+ mutex_exit(&odd->odd_lock);
+ if (oip->oip_id == -1) {
+ int i;
+
+ for (i = 0; i < OVERLAY_DEV_NPROPS; i++) {
+ if (strcmp(overlay_dev_props[i], oip->oip_name) == 0)
+ break;
+ if (i == OVERLAY_DEV_NPROPS) {
+ ret = odd->odd_plugin->ovp_ops->ovpo_setprop(
+ odd->odd_pvoid, oip->oip_name,
+ oip->oip_value, oip->oip_size);
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ret);
+ }
+ }
+
+ propid = i;
+ } else if (oip->oip_id >= OVERLAY_DEV_NPROPS) {
+ uint_t id = oip->oip_id - OVERLAY_DEV_NPROPS;
+
+ if (id > odd->odd_plugin->ovp_nprops) {
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ return (EINVAL);
+ }
+ ret = odd->odd_plugin->ovp_ops->ovpo_setprop(odd->odd_pvoid,
+ odd->odd_plugin->ovp_props[id], oip->oip_value,
+ oip->oip_size);
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ return (ret);
+ } else if (oip->oip_id < -1) {
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ return (EINVAL);
+ } else {
+ ASSERT(oip->oip_id < OVERLAY_DEV_NPROPS);
+ ASSERT(oip->oip_id >= 0);
+ propid = oip->oip_id;
+ }
+
+ ret = 0;
+ switch (propid) {
+ case OVERLAY_DEV_P_MTU:
+ ret = mac_set_prop(odd->odd_mh, MAC_PROP_MTU, "mtu",
+ oip->oip_value, oip->oip_size);
+ break;
+ case OVERLAY_DEV_P_VNETID:
+ if (oip->oip_size != sizeof (uint64_t)) {
+ ret = EINVAL;
+ break;
+ }
+ vidp = (uint64_t *)oip->oip_value;
+ ASSERT(odd->odd_plugin->ovp_id_size <= 8);
+ maxid = UINT64_MAX;
+ if (odd->odd_plugin->ovp_id_size != 8)
+ maxid = (1ULL << (odd->odd_plugin->ovp_id_size * 8)) -
+ 1ULL;
+ if (*vidp >= maxid) {
+ ret = EINVAL;
+ break;
+ }
+ overlay_setprop_vnetid(odd, *vidp);
+ break;
+ case OVERLAY_DEV_P_ENCAP:
+ case OVERLAY_DEV_P_VARPDID:
+ ret = EPERM;
+ break;
+ default:
+ ret = ENOENT;
+ }
+
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ return (ret);
+}
+
+static int
+overlay_i_status(void *karg, intptr_t arg, int mode, cred_t *cred,
+ int *rvalp)
+{
+ overlay_dev_t *odd;
+ overlay_ioc_status_t *os = karg;
+
+ odd = overlay_hold_by_dlid(os->ois_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ if ((odd->odd_flags & OVERLAY_F_DEGRADED) != 0) {
+ os->ois_status = OVERLAY_I_DEGRADED;
+ if (odd->odd_fmamsg != NULL) {
+ (void) strlcpy(os->ois_message, odd->odd_fmamsg,
+ OVERLAY_STATUS_BUFLEN);
+ } else {
+ os->ois_message[0] = '\0';
+ }
+
+ } else {
+ os->ois_status = OVERLAY_I_OK;
+ os->ois_message[0] = '\0';
+ }
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+
+ return (0);
+}
+
+static dld_ioc_info_t overlay_ioc_list[] = {
+ { OVERLAY_IOC_CREATE, DLDCOPYIN, sizeof (overlay_ioc_create_t),
+ overlay_i_create, secpolicy_dl_config },
+ { OVERLAY_IOC_ACTIVATE, DLDCOPYIN, sizeof (overlay_ioc_activate_t),
+ overlay_i_activate, secpolicy_dl_config },
+ { OVERLAY_IOC_DELETE, DLDCOPYIN, sizeof (overlay_ioc_delete_t),
+ overlay_i_delete, secpolicy_dl_config },
+ { OVERLAY_IOC_PROPINFO, DLDCOPYIN | DLDCOPYOUT,
+ sizeof (overlay_ioc_propinfo_t), overlay_i_propinfo,
+ secpolicy_dl_config },
+ { OVERLAY_IOC_GETPROP, DLDCOPYIN | DLDCOPYOUT,
+ sizeof (overlay_ioc_prop_t), overlay_i_getprop,
+ secpolicy_dl_config },
+ { OVERLAY_IOC_SETPROP, DLDCOPYIN,
+ sizeof (overlay_ioc_prop_t), overlay_i_setprop,
+ secpolicy_dl_config },
+ { OVERLAY_IOC_NPROPS, DLDCOPYIN | DLDCOPYOUT,
+ sizeof (overlay_ioc_nprops_t), overlay_i_nprops,
+ secpolicy_dl_config },
+ { OVERLAY_IOC_STATUS, DLDCOPYIN | DLDCOPYOUT,
+ sizeof (overlay_ioc_status_t), overlay_i_status,
+ NULL }
+};
+
+static int
+overlay_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int fmcap = DDI_FM_EREPORT_CAPABLE;
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ if (overlay_dip != NULL || ddi_get_instance(dip) != 0)
+ return (DDI_FAILURE);
+
+ /*
+ * XXX The nexus driver doesn't actually support DDI_FM_REPORT, that's
+ * unfortunate.
+ */
+ ddi_fm_init(dip, &fmcap, NULL);
+ if (fmcap != DDI_FM_EREPORT_CAPABLE)
+ cmn_err(CE_WARN, "XXX didn't get DDI_FM_EREPORT");
+
+ if (ddi_create_minor_node(dip, OVERLAY_CTL, S_IFCHR,
+ ddi_get_instance(dip), DDI_PSEUDO, 0) == DDI_FAILURE)
+ return (DDI_FAILURE);
+
+ if (dld_ioc_register(OVERLAY_IOC, overlay_ioc_list,
+ DLDIOCCNT(overlay_ioc_list)) != 0) {
+ ddi_remove_minor_node(dip, OVERLAY_CTL);
+ return (DDI_FAILURE);
+ }
+
+ overlay_dip = dip;
+ return (DDI_SUCCESS);
+}
+
+static int
+overlay_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp)
+{
+ int error;
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *resp = (void *)overlay_dip;
+ error = DDI_SUCCESS;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *resp = (void *)0;
+ error = DDI_SUCCESS;
+ break;
+ default:
+ error = DDI_FAILURE;
+ break;
+ }
+
+ return (error);
+}
+
+static int
+overlay_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ mutex_enter(&overlay_dev_lock);
+ if (!list_is_empty(&overlay_dev_list) || overlay_target_busy()) {
+ mutex_exit(&overlay_dev_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&overlay_dev_lock);
+
+
+ dld_ioc_unregister(VNIC_IOC);
+ ddi_remove_minor_node(dip, OVERLAY_CTL);
+ ddi_fm_fini(dip);
+ overlay_dip = NULL;
+ return (DDI_SUCCESS);
+}
+
+static struct cb_ops overlay_cbops = {
+ overlay_target_open, /* cb_open */
+ overlay_target_close, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ overlay_target_ioctl, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ NULL, /* cb_stream */
+ D_MP, /* cb_flag */
+ CB_REV, /* cb_rev */
+ nodev, /* cb_aread */
+ nodev, /* cb_awrite */
+};
+
+static struct dev_ops overlay_dev_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ overlay_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ overlay_attach, /* devo_attach */
+ overlay_detach, /* devo_detach */
+ nulldev, /* devo_reset */
+ &overlay_cbops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ NULL, /* devo_power */
+ ddi_quiesce_not_supported /* devo_quiesce */
+};
+
+static struct modldrv overlay_modldrv = {
+ &mod_driverops,
+ "Overlay Network Driver",
+ &overlay_dev_ops
+};
+
+static struct modlinkage overlay_linkage = {
+ MODREV_1,
+ &overlay_modldrv
+};
+
+static int
+overlay_init(void)
+{
+ mutex_init(&overlay_dev_lock, NULL, MUTEX_DRIVER, NULL);
+ list_create(&overlay_dev_list, sizeof (overlay_dev_t),
+ offsetof(overlay_dev_t, odd_link));
+ overlay_mux_init();
+ overlay_plugin_init();
+ overlay_target_init();
+
+ return (DDI_SUCCESS);
+}
+
+static void
+overlay_fini(void)
+{
+ overlay_target_fini();
+ overlay_plugin_fini();
+ overlay_mux_fini();
+ mutex_destroy(&overlay_dev_lock);
+ list_destroy(&overlay_dev_list);
+}
+
+int
+_init(void)
+{
+ int err;
+
+ if ((err = overlay_init()) != DDI_SUCCESS)
+ return (err);
+
+ mac_init_ops(NULL, "overlay");
+ err = mod_install(&overlay_linkage);
+ if (err != DDI_SUCCESS) {
+ overlay_fini();
+ return (err);
+ }
+
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&overlay_linkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ err = mod_remove(&overlay_linkage);
+ if (err != 0)
+ return (err);
+
+ overlay_fini();
+ return (0);
+}
diff --git a/usr/src/uts/common/io/overlay/overlay.conf b/usr/src/uts/common/io/overlay/overlay.conf
new file mode 100644
index 0000000000..3b30393113
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay.conf
@@ -0,0 +1,16 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
+#
+
+name="overlay" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/io/overlay/overlay.mapfile b/usr/src/uts/common/io/overlay/overlay.mapfile
new file mode 100644
index 0000000000..c7611702d2
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay.mapfile
@@ -0,0 +1,46 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION ILLUMOSprivate {
+ global:
+ # DDI Interfaces
+ _fini;
+ _init;
+ _info;
+
+ # Encapsualation Plugin interfaces
+ overlay_plugin_alloc;
+ overlay_plugin_free;
+ overlay_plugin_register;
+ overlay_plugin_unregister;
+ local:
+ *;
+};
diff --git a/usr/src/uts/common/io/overlay/overlay_fm.c b/usr/src/uts/common/io/overlay/overlay_fm.c
new file mode 100644
index 0000000000..a061870257
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay_fm.c
@@ -0,0 +1,80 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Overlay device FMA operations
+ */
+
+#include <sys/ddifm.h>
+#include <sys/overlay_impl.h>
+
+kmutex_t overlay_fm_lock;
+uint_t overlay_fm_count;
+
+void
+overlay_fm_init(void)
+{
+ overlay_fm_count = 0;
+ mutex_init(&overlay_fm_lock, NULL, MUTEX_DRIVER, NULL);
+}
+
+void
+overlay_fm_fini(void)
+{
+ VERIFY(overlay_fm_count == 0);
+ mutex_destroy(&overlay_fm_lock);
+}
+
+void
+overlay_fm_degrade(overlay_dev_t *odd, const char *msg)
+{
+ mutex_enter(&overlay_fm_lock);
+ mutex_enter(&odd->odd_lock);
+
+ if (msg != NULL)
+ (void) strlcpy(odd->odd_fmamsg, msg, OVERLAY_STATUS_BUFLEN);
+
+ if (odd->odd_flags & OVERLAY_F_DEGRADED)
+ goto out;
+
+ odd->odd_flags |= OVERLAY_F_DEGRADED;
+ overlay_fm_count++;
+ if (overlay_fm_count == 1) {
+ ddi_fm_service_impact(overlay_dip, DDI_SERVICE_DEGRADED);
+ mac_link_update(odd->odd_mh, LINK_STATE_DOWN);
+ }
+out:
+ mutex_exit(&odd->odd_lock);
+ mutex_exit(&overlay_fm_lock);
+}
+
+void
+overlay_fm_restore(overlay_dev_t *odd)
+{
+ mutex_enter(&overlay_fm_lock);
+ mutex_enter(&odd->odd_lock);
+ if (!(odd->odd_flags & OVERLAY_F_DEGRADED))
+ goto out;
+
+ odd->odd_flags &= ~OVERLAY_F_DEGRADED;
+ overlay_fm_count--;
+ if (overlay_fm_count == 0) {
+ ddi_fm_service_impact(overlay_dip, DDI_SERVICE_RESTORED);
+ mac_link_update(odd->odd_mh, LINK_STATE_UP);
+ }
+out:
+ mutex_exit(&odd->odd_lock);
+ mutex_exit(&overlay_fm_lock);
+}
diff --git a/usr/src/uts/common/io/overlay/overlay_mux.c b/usr/src/uts/common/io/overlay/overlay_mux.c
new file mode 100644
index 0000000000..000911e706
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay_mux.c
@@ -0,0 +1,345 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Overlay device multiplexer. Handles dealing with devices
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ksynch.h>
+#include <sys/ksocket.h>
+#include <sys/avl.h>
+#include <sys/list.h>
+#include <sys/sysmacros.h>
+#include <sys/strsubr.h>
+#include <sys/strsun.h>
+#include <sys/tihdr.h>
+
+#include <sys/overlay_impl.h>
+
+#include <sys/sdt.h>
+
+#define OVERLAY_FREEMSG(mp, reason) \
+ DTRACE_PROBE2(overlay__fremsg, mblk_t *, mp, char *, reason)
+
+static list_t overlay_mux_list;
+static kmutex_t overlay_mux_lock;
+
+void
+overlay_mux_init(void)
+{
+ list_create(&overlay_mux_list, sizeof (overlay_mux_t),
+ offsetof(overlay_mux_t, omux_lnode));
+ mutex_init(&overlay_mux_lock, NULL, MUTEX_DRIVER, NULL);
+}
+
+void
+overlay_mux_fini(void)
+{
+ mutex_destroy(&overlay_mux_lock);
+ list_destroy(&overlay_mux_list);
+}
+
+static int
+overlay_mux_comparator(const void *a, const void *b)
+{
+ const overlay_dev_t *odl, *odr;
+ odl = a;
+ odr = b;
+ if (odl->odd_vid > odr->odd_vid)
+ return (1);
+ else if (odl->odd_vid < odr->odd_vid)
+ return (-1);
+ else
+ return (0);
+}
+
+/*
+ * This is the central receive data path. We need to decode the packet, if we
+ * can, and then deliver it to the appropriate overlay.
+ */
+static boolean_t
+overlay_mux_recv(ksocket_t ks, mblk_t *mpchain, size_t msgsize, int oob,
+ void *arg)
+{
+ mblk_t *mp, *nmp, *fmp;
+ overlay_mux_t *mux = arg;
+
+ /*
+ * We may have a received a chain of messages. Each messsage in the
+ * chain will likely have a T_unitdata_ind attached to it as an M_PROTO.
+ * If we aren't getting that, we should probably drop that for the
+ * moment.
+ *
+ * XXX kstats and D Probes
+ */
+ for (mp = mpchain; mp != NULL; mp = nmp) {
+ struct T_unitdata_ind *tudi;
+ ovep_encap_info_t infop;
+ overlay_dev_t od, *odd;
+ int ret;
+
+ nmp = mp->b_next;
+ mp->b_next = NULL;
+
+ if (DB_TYPE(mp) != M_PROTO) {
+ OVERLAY_FREEMSG(mp, "first one isn't M_PROTO");
+ freemsg(mp);
+ continue;
+ }
+
+ if (mp->b_cont == NULL) {
+ OVERLAY_FREEMSG(mp, "missing a b_cont");
+ freemsg(mp);
+ continue;
+ }
+
+ tudi = (struct T_unitdata_ind *)mp->b_rptr;
+ if (tudi->PRIM_type != T_UNITDATA_IND) {
+ OVERLAY_FREEMSG(mp, "Not a T_unitdata_ind *");
+ freemsg(mp);
+ continue;
+ }
+
+ /*
+ * XXX In the future, we'll care about the source information
+ * for purposes of telling varpd for oob invalidation. But for
+ * now, just drop that block.
+ */
+ fmp = mp;
+ mp = fmp->b_cont;
+ fmp->b_cont = NULL;
+ freemsg(fmp);
+
+ /*
+ * Decap and deliver.
+ *
+ * XXX Zero out infop probably?
+ */
+ ret = mux->omux_plugin->ovp_ops->ovpo_decap(NULL, mp, &infop);
+ if (ret != 0) {
+ OVERLAY_FREEMSG(mp, "decap failed");
+ freemsg(mp);
+ continue;
+ }
+ if (MBLKL(mp) > infop.ovdi_hdr_size) {
+ mp->b_rptr += infop.ovdi_hdr_size;
+ } else {
+ while (infop.ovdi_hdr_size != 0) {
+ size_t rem, blkl;
+
+ if (mp == NULL)
+ break;
+
+ blkl = MBLKL(mp);
+ rem = MIN(infop.ovdi_hdr_size, blkl);
+ infop.ovdi_hdr_size -= rem;
+ mp->b_rptr += rem;
+ if (rem == blkl) {
+ fmp = mp;
+ mp = fmp->b_cont;
+ fmp->b_cont = NULL;
+ OVERLAY_FREEMSG(mp,
+ "freed a fmp block");
+ freemsg(fmp);
+ }
+ }
+ if (mp == NULL) {
+ OVERLAY_FREEMSG(mp, "freed it all...");
+ continue;
+ }
+ }
+
+
+ od.odd_vid = infop.ovdi_id;
+ mutex_enter(&mux->omux_lock);
+ odd = avl_find(&mux->omux_devices, &od, NULL);
+ if (odd == NULL) {
+ mutex_exit(&mux->omux_lock);
+ OVERLAY_FREEMSG(mp, "no matching vid");
+ freemsg(mp);
+ continue;
+ }
+ mutex_enter(&odd->odd_lock);
+ if ((odd->odd_flags & OVERLAY_F_MDDROP) ||
+ !(odd->odd_flags & OVERLAY_F_IN_MUX)) {
+ mutex_exit(&odd->odd_lock);
+ mutex_exit(&mux->omux_lock);
+ OVERLAY_FREEMSG(mp, "dev dropped");
+ freemsg(mp);
+ continue;
+ }
+ overlay_io_start(odd, OVERLAY_F_IN_RX);
+ mutex_exit(&odd->odd_lock);
+ mutex_exit(&mux->omux_lock);
+
+ mac_rx(odd->odd_mh, NULL, mp);
+
+ mutex_enter(&odd->odd_lock);
+ overlay_io_done(odd, OVERLAY_F_IN_RX);
+ mutex_exit(&odd->odd_lock);
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * Register a given device with a socket backend. If no such device socket
+ * exists, create a new one.
+ */
+overlay_mux_t *
+overlay_mux_open(overlay_plugin_t *opp, int domain, int family, int protocol,
+ struct sockaddr *addr, socklen_t len, int *errp)
+{
+ int err;
+ overlay_mux_t *mux;
+ ksocket_t ksock;
+
+ if (errp == NULL)
+ errp = &err;
+
+ mutex_enter(&overlay_mux_lock);
+ for (mux = list_head(&overlay_mux_list); mux != NULL;
+ mux = list_next(&overlay_mux_list, mux)) {
+ if (domain == mux->omux_domain &&
+ family == mux->omux_family &&
+ protocol == mux->omux_protocol &&
+ len == mux->omux_alen &&
+ bcmp(addr, mux->omux_addr, len) == 0) {
+
+ if (opp != mux->omux_plugin) {
+ *errp = EEXIST;
+ return (NULL);
+ }
+
+ mutex_enter(&mux->omux_lock);
+ mux->omux_count++;
+ mutex_exit(&mux->omux_lock);
+ mutex_exit(&overlay_mux_lock);
+ *errp = 0;
+ return (mux);
+ }
+ }
+
+ /*
+ * XXX This is entirely the wrong cred to use. Needs to be specific to
+ * the zone, etc.
+ */
+ *errp = ksocket_socket(&ksock, domain, family, protocol, KSOCKET_SLEEP,
+ kcred);
+ if (*errp != 0) {
+ mutex_exit(&overlay_mux_lock);
+ return (NULL);
+ }
+
+ /* XXX Again, wrong credp */
+ *errp = ksocket_bind(ksock, addr, len, kcred);
+ if (*errp != 0) {
+ mutex_exit(&overlay_mux_lock);
+ /* XXX This is the wrong cred */
+ ksocket_close(ksock, kcred);
+ return (NULL);
+ }
+
+ mux = kmem_alloc(sizeof (overlay_mux_t), KM_SLEEP);
+ list_link_init(&mux->omux_lnode);
+ mux->omux_ksock = ksock;
+ mux->omux_plugin = opp;
+ mux->omux_domain = domain;
+ mux->omux_family = family;
+ mux->omux_protocol = protocol;
+ mux->omux_addr = kmem_alloc(len, KM_SLEEP);
+ bcopy(addr, mux->omux_addr, len);
+ mux->omux_alen = len;
+ mux->omux_count = 1;
+ avl_create(&mux->omux_devices, overlay_mux_comparator,
+ sizeof (overlay_dev_t), offsetof(overlay_dev_t, odd_muxnode));
+ mutex_init(&mux->omux_lock, NULL, MUTEX_DRIVER, NULL);
+
+
+ /* XXX Probably should have a credp here */
+ /* XXX Once this is called, we need to expect to rx data */
+ *errp = ksocket_krecv_set(ksock, overlay_mux_recv, mux);
+ if (*errp != 0) {
+ /* XXX Wrong credp */
+ ksocket_close(ksock, kcred);
+ mutex_destroy(&mux->omux_lock);
+ avl_destroy(&mux->omux_devices);
+ kmem_free(mux->omux_addr, len);
+ kmem_free(mux, sizeof (overlay_mux_t));
+ return (NULL);
+ }
+
+ list_insert_tail(&overlay_mux_list, mux);
+ mutex_exit(&overlay_mux_lock);
+
+ *errp = 0;
+ return (mux);
+}
+
+void
+overlay_mux_close(overlay_mux_t *mux)
+{
+ mutex_enter(&overlay_mux_lock);
+ mutex_enter(&mux->omux_lock);
+ mux->omux_count--;
+ if (mux->omux_count != 0) {
+ mutex_exit(&mux->omux_lock);
+ mutex_exit(&overlay_mux_lock);
+ return;
+ }
+ list_remove(&overlay_mux_list, mux);
+ mutex_exit(&mux->omux_lock);
+ mutex_exit(&overlay_mux_lock);
+
+ /* XXX This is the wrong cred */
+ ksocket_close(mux->omux_ksock, kcred);
+ avl_destroy(&mux->omux_devices);
+ kmem_free(mux->omux_addr, mux->omux_alen);
+ kmem_free(mux, sizeof (overlay_mux_t));
+}
+
+void
+overlay_mux_add_dev(overlay_mux_t *mux, overlay_dev_t *odd)
+{
+ mutex_enter(&mux->omux_lock);
+ avl_add(&mux->omux_devices, odd);
+ mutex_exit(&mux->omux_lock);
+}
+
+void
+overlay_mux_remove_dev(overlay_mux_t *mux, overlay_dev_t *odd)
+{
+ /* XXX We should verify it's in the tree */
+ mutex_enter(&mux->omux_lock);
+ avl_remove(&mux->omux_devices, odd);
+ mutex_exit(&mux->omux_lock);
+}
+
+int
+overlay_mux_tx(overlay_mux_t *mux, struct msghdr *hdr, mblk_t *mp)
+{
+ int ret;
+ /*
+ * XXX We probably want MSG_MBLK_QUICKRELE, but that doesn't work by
+ * default with UDP
+ */
+ ret = ksocket_sendmblk(mux->omux_ksock, hdr, 0, &mp, kcred);
+ if (ret != 0)
+ freemsg(mp);
+
+ return (ret);
+}
diff --git a/usr/src/uts/common/io/overlay/overlay_plugin.c b/usr/src/uts/common/io/overlay/overlay_plugin.c
new file mode 100644
index 0000000000..6772e71646
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay_plugin.c
@@ -0,0 +1,261 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/errno.h>
+#include <sys/sysmacros.h>
+#include <sys/modctl.h>
+
+#include <sys/overlay_impl.h>
+
+static kmem_cache_t *overlay_plugin_cache;
+static kmutex_t overlay_plugin_lock;
+static list_t overlay_plugin_list;
+
+#define OVERLAY_MODDIR "overlay"
+
+static int
+overlay_plugin_cache_constructor(void *buf, void *arg, int kmflags)
+{
+ overlay_plugin_t *opp = buf;
+
+ mutex_init(&opp->ovp_mutex, NULL, MUTEX_DRIVER, NULL);
+ list_link_init(&opp->ovp_link);
+
+ return (0);
+}
+
+static void
+overlay_plugin_cache_destructor(void *buf, void *arg)
+{
+ overlay_plugin_t *opp = buf;
+ ASSERT(list_link_active(&opp->ovp_link) == 0);
+ mutex_destroy(&opp->ovp_mutex);
+}
+
+void
+overlay_plugin_init(void)
+{
+ mutex_init(&overlay_plugin_lock, NULL, MUTEX_DRIVER, 0);
+ /*
+ * XXX Should we have a reclaim function to effectively reap unused
+ * modules or just let it happen some other way?
+ */
+ overlay_plugin_cache = kmem_cache_create("overlay_plugin_cache",
+ sizeof (overlay_plugin_t), 0, overlay_plugin_cache_constructor,
+ overlay_plugin_cache_destructor, NULL, NULL, NULL, 0);
+ list_create(&overlay_plugin_list, sizeof (overlay_plugin_t),
+ offsetof(overlay_plugin_t, ovp_link));
+}
+
+void
+overlay_plugin_fini(void)
+{
+ /*
+ * XXX 22 May 2014
+ *
+ * This gets called from the general overlay module's fini path. The
+ * question is can the overlay module actually unload if plugins still
+ * exist. I would assum that because of dependencies, it can't. This
+ * raises several questions. Should we basically force unload some of
+ * these modules somehow and call modunload ourselves? But if we can't
+ * get called until that happens, then what...
+ */
+ mutex_enter(&overlay_plugin_lock);
+ VERIFY(list_is_empty(&overlay_plugin_list));
+ mutex_exit(&overlay_plugin_lock);
+
+ list_destroy(&overlay_plugin_list);
+ kmem_cache_destroy(overlay_plugin_cache);
+ mutex_destroy(&overlay_plugin_lock);
+}
+
+overlay_plugin_register_t *
+overlay_plugin_alloc(uint_t version)
+{
+ overlay_plugin_register_t *ovrp;
+ /* Version 1 is the only one that exists */
+ if (version != OVEP_VERSION_ONE)
+ return (NULL);
+
+ ovrp = kmem_zalloc(sizeof (overlay_plugin_register_t), KM_SLEEP);
+ ovrp->ovep_version = version;
+ return (ovrp);
+}
+
+void
+overlay_plugin_free(overlay_plugin_register_t *ovrp)
+{
+ kmem_free(ovrp, sizeof (overlay_plugin_register_t));
+}
+
+int
+overlay_plugin_register(overlay_plugin_register_t *ovrp)
+{
+ overlay_plugin_t *opp, *ipp;
+
+ /* Sanity check parameters of the registration */
+ if (ovrp->ovep_version != OVEP_VERSION_ONE)
+ return (EINVAL);
+
+ if (ovrp->ovep_name == NULL || ovrp->ovep_ops == NULL)
+ return (EINVAL);
+
+ if ((ovrp->ovep_flags & ~(OVEP_F_VLAN_TAG | OVEP_F_STRIP_TAG)) != 0)
+ return (EINVAL);
+
+ if (ovrp->ovep_id_size < 1)
+ return (EINVAL);
+
+ if (ovrp->ovep_hdr_max < ovrp->ovep_hdr_min)
+ return (EINVAL);
+
+ if (ovrp->ovep_dest == OVERLAY_PLUGIN_D_INVALID)
+ return (EINVAL);
+
+ if ((ovrp->ovep_dest & ~OVERLAY_PLUGIN_D_MASK) != 0)
+ return (EINVAL);
+
+ /* Dont' support anything that has an id size larger than 8 bytes */
+ if (ovrp->ovep_id_size > 8)
+ return (ENOTSUP);
+
+ /* XXX Check to make sure overlay property namelen is okay */
+
+ opp = kmem_cache_alloc(overlay_plugin_cache, KM_SLEEP);
+ opp->ovp_active = 0;
+ opp->ovp_name = ovrp->ovep_name;
+ opp->ovp_ops = ovrp->ovep_ops;
+ opp->ovp_props = ovrp->ovep_props;
+ opp->ovp_id_size = ovrp->ovep_id_size;
+ opp->ovp_flags = ovrp->ovep_flags;
+ opp->ovp_hdr_min = ovrp->ovep_hdr_min;
+ opp->ovp_hdr_max = ovrp->ovep_hdr_max;
+ opp->ovp_dest = ovrp->ovep_dest;
+
+ opp->ovp_nprops = 0;
+ if (ovrp->ovep_props != NULL) {
+ while (ovrp->ovep_props[opp->ovp_nprops] != NULL)
+ opp->ovp_nprops++;
+ }
+
+ mutex_enter(&overlay_plugin_lock);
+ for (ipp = list_head(&overlay_plugin_list); ipp != NULL;
+ ipp = list_next(&overlay_plugin_list, ipp)) {
+ if (strcmp(ipp->ovp_name, opp->ovp_name) == 0) {
+ mutex_exit(&overlay_plugin_lock);
+ kmem_cache_free(overlay_plugin_cache, opp);
+ return (EEXIST);
+ }
+ }
+ list_insert_tail(&overlay_plugin_list, opp);
+ mutex_exit(&overlay_plugin_lock);
+
+ return (0);
+}
+
+int
+overlay_plugin_unregister(const char *name)
+{
+ overlay_plugin_t *opp;
+
+ mutex_enter(&overlay_plugin_lock);
+ for (opp = list_head(&overlay_plugin_list); opp != NULL;
+ opp = list_next(&overlay_plugin_list, opp)) {
+ if (strcmp(opp->ovp_name, name) == 0)
+ break;
+ }
+
+ if (opp == NULL) {
+ mutex_exit(&overlay_plugin_lock);
+ return (ENOENT);
+ }
+
+ mutex_enter(&opp->ovp_mutex);
+ if (opp->ovp_active > 0) {
+ mutex_exit(&opp->ovp_mutex);
+ mutex_exit(&overlay_plugin_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&opp->ovp_mutex);
+
+ list_remove(&overlay_plugin_list, opp);
+ mutex_exit(&overlay_plugin_lock);
+
+ kmem_cache_free(overlay_plugin_cache, opp);
+ return (0);
+}
+
+overlay_plugin_t *
+overlay_plugin_lookup(const char *name)
+{
+ overlay_plugin_t *opp;
+ boolean_t trymodload = B_FALSE;
+
+ for (;;) {
+ mutex_enter(&overlay_plugin_lock);
+ for (opp = list_head(&overlay_plugin_list); opp != NULL;
+ opp = list_next(&overlay_plugin_list, opp)) {
+ if (strcmp(name, opp->ovp_name) == 0) {
+ mutex_enter(&opp->ovp_mutex);
+ opp->ovp_active++;
+ mutex_exit(&opp->ovp_mutex);
+ mutex_exit(&overlay_plugin_lock);
+ return (opp);
+ }
+ }
+ mutex_exit(&overlay_plugin_lock);
+
+ if (trymodload == B_TRUE)
+ return (NULL);
+
+ /*
+ * If we didn't find it, it may still exist, but just not have
+ * been a loaded module. In that case, we'll do one attempt to
+ * load it.
+ */
+ if (modload(OVERLAY_MODDIR, (char *)name) == -1)
+ return (NULL);
+ trymodload = B_TRUE;
+ }
+
+}
+
+void
+overlay_plugin_rele(overlay_plugin_t *opp)
+{
+ mutex_enter(&opp->ovp_mutex);
+ ASSERT(opp->ovp_active > 0);
+ opp->ovp_active--;
+ mutex_exit(&opp->ovp_mutex);
+}
+
+void
+overlay_plugin_walk(overlay_plugin_walk_f func, void *arg)
+{
+ overlay_plugin_t *opp;
+ mutex_enter(&overlay_plugin_lock);
+ for (opp = list_head(&overlay_plugin_list); opp != NULL;
+ opp = list_next(&overlay_plugin_list, opp)) {
+ if (func(opp, arg) != 0) {
+ mutex_exit(&overlay_plugin_lock);
+ return;
+ }
+ }
+ mutex_exit(&overlay_plugin_lock);
+}
diff --git a/usr/src/uts/common/io/overlay/overlay_prop.c b/usr/src/uts/common/io/overlay/overlay_prop.c
new file mode 100644
index 0000000000..3fb380e6ea
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay_prop.c
@@ -0,0 +1,123 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Routines for manipulating property information structures.
+ */
+
+#include <sys/overlay_impl.h>
+
+void
+overlay_prop_init(overlay_prop_handle_t phdl)
+{
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->oipi_poss;
+
+ infop->oipi_posssize = sizeof (mac_propval_range_t);
+ bzero(rangep, sizeof (mac_propval_range_t));
+}
+
+void
+overlay_prop_set_name(overlay_prop_handle_t phdl, const char *name)
+{
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+ (void) strlcpy(infop->oipi_name, name, OVERLAY_PROP_NAMELEN);
+}
+
+void
+overlay_prop_set_prot(overlay_prop_handle_t phdl, overlay_prop_prot_t prot)
+{
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+ infop->oipi_prot = prot;
+}
+
+void
+overlay_prop_set_type(overlay_prop_handle_t phdl, overlay_prop_type_t type)
+{
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+ infop->oipi_type = type;
+}
+
+int
+overlay_prop_set_default(overlay_prop_handle_t phdl, void *def, ssize_t len)
+{
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+
+ if (len > OVERLAY_PROP_SIZEMAX)
+ return (E2BIG);
+
+ if (len < 0)
+ return (EOVERFLOW);
+
+ bcopy(def, infop->oipi_default, len);
+ infop->oipi_defsize = len;
+
+ return (0);
+}
+
+void
+overlay_prop_set_nodefault(overlay_prop_handle_t phdl)
+{
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+ infop->oipi_default[0] = '\0';
+ infop->oipi_defsize = 0;
+}
+
+void
+overlay_prop_set_range_uint32(overlay_prop_handle_t phdl, uint32_t min,
+ uint32_t max)
+{
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->oipi_poss;
+
+ /* XXX We should probably set some kind of error here... */
+ if (rangep->mpr_count != 0 && rangep->mpr_type != MAC_PROPVAL_UINT32)
+ return;
+
+ /* XXX We should probably set some kind of error here... */
+ if (infop->oipi_posssize + sizeof (mac_propval_uint32_range_t) >
+ sizeof (infop->oipi_poss))
+ return;
+
+ infop->oipi_posssize += sizeof (mac_propval_uint32_range_t);
+ rangep->mpr_count++;
+ rangep->mpr_type = MAC_PROPVAL_UINT32;
+ rangep->u.mpr_uint32[rangep->mpr_count-1].mpur_min = min;
+ rangep->u.mpr_uint32[rangep->mpr_count-1].mpur_max = max;
+}
+
+void
+overlay_prop_set_range_str(overlay_prop_handle_t phdl, const char *str)
+{
+ size_t len = strlen(str) + 1; /* Account for a null terminator */
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->oipi_poss;
+ mac_propval_str_range_t *pstr = &rangep->u.mpr_str;
+
+ /* XXX Errors */
+ if (rangep->mpr_count != 0 && rangep->mpr_type != MAC_PROPVAL_STR)
+ return;
+
+ /* XXX Errors */
+ if (infop->oipi_posssize + len > sizeof (infop->oipi_poss))
+ return;
+
+ rangep->mpr_count++;
+ rangep->mpr_type = MAC_PROPVAL_STR;
+ strlcpy((char *)&pstr->mpur_data[pstr->mpur_nextbyte], str,
+ sizeof (infop->oipi_poss) - infop->oipi_posssize);
+ pstr->mpur_nextbyte += len;
+ infop->oipi_posssize += len;
+}
diff --git a/usr/src/uts/common/io/overlay/overlay_target.c b/usr/src/uts/common/io/overlay/overlay_target.c
new file mode 100644
index 0000000000..905e19413d
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay_target.c
@@ -0,0 +1,1590 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc.
+ */
+
+/*
+ * Overlay devices can operate in one of many modes. They may be a point to
+ * point tunnel, they may be on a single multicast group, or they may have
+ * dynamic destinations. All of these are programmed via varpd.
+ *
+ * XXX This all probably won't remain true.
+ */
+
+#include <sys/types.h>
+#include <sys/ethernet.h>
+#include <sys/kmem.h>
+#include <sys/policy.h>
+#include <sys/sysmacros.h>
+#include <sys/stream.h>
+#include <sys/strsun.h>
+#include <sys/strsubr.h>
+#include <sys/mac_provider.h>
+#include <sys/mac_client.h>
+#include <sys/mac_client_priv.h>
+#include <sys/vlan.h>
+#include <sys/crc32.h>
+#include <sys/cred.h>
+#include <sys/file.h>
+#include <sys/errno.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+
+#include <sys/overlay_impl.h>
+#include <sys/sdt.h>
+
+/*
+ * XXX This is total straw man, but at least it's a prime number. Here we're
+ * going to have to go through and do a lot of evaluation and understanding as
+ * to how these target caches should grow and shrink, as well as, memory
+ * pressure and evictions. This just gives us a starting point that'll be 'good
+ * enough'.
+ */
+#define OVERLAY_HSIZE 823
+
+/*
+ * We use this data structure to keep track of what requests have been actively
+ * allocated to a given instance so we know what to put back on the pending
+ * list.
+ */
+typedef struct overlay_target_hdl {
+ minor_t oth_minor; /* RO */
+ zoneid_t oth_zoneid; /* RO */
+ list_node_t oth_link; /* overlay_target_lock */
+ kmutex_t oth_lock;
+ list_t oth_outstanding; /* oth_lock */
+} overlay_target_hdl_t;
+
+typedef int (*overlay_target_copyin_f)(const void *, void **, size_t *, int);
+typedef int (*overlay_target_ioctl_f)(overlay_target_hdl_t *, void *);
+typedef int (*overlay_target_copyout_f)(void *, void *, size_t, int);
+
+typedef struct overaly_target_ioctl {
+ int oti_cmd; /* ioctl id */
+ boolean_t oti_write; /* ioctl requires FWRITE */
+ boolean_t oti_ncopyout; /* copyout data? */
+ overlay_target_copyin_f oti_copyin; /* copyin func */
+ overlay_target_ioctl_f oti_func; /* function to call */
+ overlay_target_copyout_f oti_copyout; /* copyin func */
+ size_t oti_size; /* size of user level structure */
+} overlay_target_ioctl_t;
+
+static kmem_cache_t *overlay_target_cache;
+static kmem_cache_t *overlay_entry_cache;
+static id_space_t *overlay_thdl_idspace;
+static list_t overlay_thdl_list;
+static void *overlay_thdl_state;
+/*
+ * XXX This all needs to become zone/netstack aware.
+ */
+static kmutex_t overlay_target_lock;
+static kcondvar_t overlay_target_condvar;
+static list_t overlay_target_list;
+
+static int
+overlay_target_cache_constructor(void *buf, void *arg, int kmflgs)
+{
+ overlay_target_t *ott = buf;
+
+ mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL);
+ return (0);
+}
+
+static void
+overlay_target_cache_destructor(void *buf, void *arg)
+{
+ overlay_target_t *ott = buf;
+
+ cv_destroy(&ott->ott_cond);
+ mutex_destroy(&ott->ott_lock);
+}
+
+static int
+overlay_entry_cache_constructor(void *buf, void *arg, int kmflgs)
+{
+ overlay_target_entry_t *ote = buf;
+
+ bzero(ote, sizeof (overlay_target_entry_t));
+ mutex_init(&ote->ote_lock, NULL, MUTEX_DRIVER, NULL);
+ return (0);
+}
+
+static void
+overlay_entry_cache_destructor(void *buf, void *arg)
+{
+ overlay_target_entry_t *ote = buf;
+
+ mutex_destroy(&ote->ote_lock);
+}
+
+static uint64_t
+overlay_mac_hash(const void *v)
+{
+ uint32_t crc;
+ return (CRC32(crc, v, ETHERADDRL, -1U, crc32_table));
+}
+
+static int
+overlay_mac_cmp(const void *a, const void *b)
+{
+ return (bcmp(a, b, ETHERADDRL));
+}
+
+static void
+overlay_target_entry_dtor(void *arg)
+{
+ mblk_t *mp;
+ overlay_target_entry_t *ote = arg;
+
+ ote->ote_flags = 0;
+ bzero(ote->ote_addr, ETHERADDRL);
+ ote->ote_ott = NULL;
+ ote->ote_odd = NULL;
+ freemsgchain(ote->ote_chead);
+ ote->ote_chead = ote->ote_ctail = NULL;
+ ote->ote_mbsize = 0;
+ ote->ote_vtime = 0;
+ kmem_cache_free(overlay_entry_cache, ote);
+}
+
+static int
+overlay_mac_avl(const void *a, const void *b)
+{
+ int i;
+ const overlay_target_entry_t *l, *r;
+ l = a;
+ r = b;
+
+ for (i = 0; i < ETHERADDRL; i++) {
+ if (l->ote_addr[i] > r->ote_addr[i])
+ return (1);
+ else if (l->ote_addr[i] < r->ote_addr[i])
+ return (-1);
+ }
+
+ return (0);
+}
+
+void
+overlay_target_init(void)
+{
+ int ret;
+ ret = ddi_soft_state_init(&overlay_thdl_state,
+ sizeof (overlay_target_hdl_t), 1);
+ VERIFY(ret == 0);
+ overlay_target_cache = kmem_cache_create("overlay_target",
+ sizeof (overlay_target_t), 0, overlay_target_cache_constructor,
+ overlay_target_cache_destructor, NULL, NULL, NULL, 0);
+ overlay_entry_cache = kmem_cache_create("overlay_entry",
+ sizeof (overlay_target_entry_t), 0, overlay_entry_cache_constructor,
+ overlay_entry_cache_destructor, NULL, NULL, NULL, 0);
+ mutex_init(&overlay_target_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&overlay_target_condvar, NULL, CV_DRIVER, NULL);
+ list_create(&overlay_target_list, sizeof (overlay_target_entry_t),
+ offsetof(overlay_target_entry_t, ote_qlink));
+ list_create(&overlay_thdl_list, sizeof (overlay_target_hdl_t),
+ offsetof(overlay_target_hdl_t, oth_link));
+ overlay_thdl_idspace = id_space_create("overlay_target_minors",
+ 1, INT32_MAX);
+}
+
+void
+overlay_target_fini(void)
+{
+ id_space_destroy(overlay_thdl_idspace);
+ list_destroy(&overlay_thdl_list);
+ list_destroy(&overlay_target_list);
+ cv_destroy(&overlay_target_condvar);
+ mutex_destroy(&overlay_target_lock);
+ kmem_cache_destroy(overlay_entry_cache);
+ kmem_cache_destroy(overlay_target_cache);
+ ddi_soft_state_fini(&overlay_thdl_state);
+}
+
+void
+overlay_target_free(overlay_dev_t *odd)
+{
+ if (odd->odd_target == NULL)
+ return;
+
+ if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) {
+ refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash;
+ avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree;
+ overlay_target_entry_t *ote;
+
+ /*
+ * Our AVL tree and hashtable contain the same elements,
+ * therefore we should just remove it from the tree, but then
+ * delete the entries when we remove them from the hash table
+ * (which happens through the refhash dtor).
+ */
+ while ((ote = avl_first(ap)) != NULL)
+ avl_remove(ap, ote);
+
+ avl_destroy(ap);
+ for (ote = refhash_first(rp); ote != NULL;
+ ote = refhash_next(rp, ote)) {
+ refhash_remove(rp, ote);
+ }
+ refhash_destroy(rp);
+ }
+
+ ASSERT(odd->odd_target->ott_ocount == 0);
+ kmem_cache_free(overlay_target_cache, odd->odd_target);
+}
+
+int
+overlay_target_busy()
+{
+ int ret;
+
+ mutex_enter(&overlay_target_lock);
+ ret = !list_is_empty(&overlay_thdl_list);
+ mutex_exit(&overlay_target_lock);
+
+ return (ret);
+}
+
+static void
+overlay_target_queue(overlay_target_entry_t *entry)
+{
+ mutex_enter(&overlay_target_lock);
+ mutex_enter(&entry->ote_ott->ott_lock);
+ if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) {
+ mutex_exit(&entry->ote_ott->ott_lock);
+ mutex_exit(&overlay_target_lock);
+ return;
+ }
+ entry->ote_ott->ott_ocount++;
+ mutex_exit(&entry->ote_ott->ott_lock);
+ list_insert_tail(&overlay_target_list, entry);
+ cv_signal(&overlay_target_condvar);
+ mutex_exit(&overlay_target_lock);
+}
+
+void
+overlay_target_quiesce(overlay_target_t *ott)
+{
+ if (ott == NULL)
+ return;
+ mutex_enter(&ott->ott_lock);
+ ott->ott_flags |= OVERLAY_T_TEARDOWN;
+ while (ott->ott_ocount != 0)
+ cv_wait(&ott->ott_cond, &ott->ott_lock);
+ mutex_exit(&ott->ott_lock);
+}
+
+/*
+ * XXX This is assuming a non-gre style bits
+ */
+int
+overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock,
+ socklen_t *slenp)
+{
+ int ret;
+ struct sockaddr_in6 *v6;
+ overlay_target_t *ott;
+ mac_header_info_t mhi;
+ overlay_target_entry_t *entry;
+
+ ASSERT(odd->odd_target != NULL);
+
+ ott = odd->odd_target;
+ if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
+ panic("implement me rm...");
+
+ v6 = (struct sockaddr_in6 *)sock;
+ bzero(v6, sizeof (struct sockaddr_in6));
+ v6->sin6_family = AF_INET6;
+
+ /* XXX Can we go lockless here, aka RO when in a mux? */
+ if (ott->ott_mode == OVERLAY_TARGET_POINT) {
+ mutex_enter(&ott->ott_lock);
+ bcopy(&ott->ott_u.ott_point.otp_ip, &v6->sin6_addr,
+ sizeof (struct in6_addr));
+ v6->sin6_port = htons(ott->ott_u.ott_point.otp_port);
+ mutex_exit(&ott->ott_lock);
+ *slenp = sizeof (struct sockaddr_in6);
+
+ return (OVERLAY_TARGET_OK);
+ }
+
+ ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC);
+ if (mac_header_info(odd->odd_mh, mp, &mhi) != 0)
+ return (OVERLAY_TARGET_DROP);
+ mutex_enter(&ott->ott_lock);
+ entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
+ mhi.mhi_daddr);
+ if (entry == NULL) {
+ /*
+ * XXX Create a new entry here and send it off to lookup
+ */
+ entry = kmem_cache_alloc(overlay_entry_cache,
+ KM_NOSLEEP | KM_NORMALPRI);
+ if (entry == NULL) {
+ mutex_exit(&ott->ott_lock);
+ return (OVERLAY_TARGET_DROP);
+ }
+ bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL);
+ entry->ote_chead = entry->ote_ctail = mp;
+ entry->ote_mbsize = msgsize(mp);
+ entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
+ entry->ote_ott = ott;
+ entry->ote_odd = odd;
+ refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry);
+ avl_add(&ott->ott_u.ott_dyn.ott_tree, entry);
+ mutex_exit(&ott->ott_lock);
+ overlay_target_queue(entry);
+ return (OVERLAY_TARGET_ASYNC);
+ }
+ refhash_hold(ott->ott_u.ott_dyn.ott_dhash, entry);
+ mutex_exit(&ott->ott_lock);
+
+ mutex_enter(&entry->ote_lock);
+ if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) {
+ ret = OVERLAY_TARGET_DROP;
+ } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
+ /* XXX Check valid expiration at some point */
+ bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr,
+ sizeof (struct in6_addr));
+ v6->sin6_port = htons(entry->ote_dest.otp_port);
+ *slenp = sizeof (struct sockaddr_in6);
+ ret = OVERLAY_TARGET_OK;
+ } else {
+ size_t mlen = msgsize(mp);
+
+ if (mlen + entry->ote_mbsize > 1024 * 1024) {
+ ret = OVERLAY_TARGET_DROP;
+ } else {
+ if (entry->ote_ctail != NULL) {
+ ASSERT(entry->ote_ctail->b_next ==
+ NULL);
+ entry->ote_ctail->b_next = mp;
+ entry->ote_ctail = mp;
+ } else {
+ entry->ote_chead = mp;
+ entry->ote_ctail = mp;
+ }
+ entry->ote_mbsize += mlen;
+ if ((entry->ote_flags &
+ OVERLAY_ENTRY_F_PENDING) == 0) {
+ entry->ote_flags |=
+ OVERLAY_ENTRY_F_PENDING;
+ overlay_target_queue(entry);
+ }
+ ret = OVERLAY_TARGET_ASYNC;
+ }
+ }
+ mutex_exit(&entry->ote_lock);
+
+ mutex_enter(&ott->ott_lock);
+ refhash_rele(ott->ott_u.ott_dyn.ott_dhash, entry);
+ mutex_exit(&ott->ott_lock);
+
+ return (ret);
+}
+
+static int
+overlay_target_info(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_t *odd;
+ overlay_targ_info_t *oti = arg;
+
+ odd = overlay_hold_by_dlid(oti->oti_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ oti->oti_flags = 0;
+ oti->oti_needs = odd->odd_plugin->ovp_dest;
+ if (odd->odd_flags & OVERLAY_F_DEGRADED)
+ oti->oti_flags |= OVERLAY_TARG_INFO_F_DEGRADED;
+ if (odd->odd_flags & OVERLAY_F_ACTIVATED)
+ oti->oti_flags |= OVERLAY_TARG_INFO_F_ACTIVE;
+ oti->oti_vnetid = odd->odd_vid;
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (0);
+}
+
+static int
+overlay_target_associate(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_t *odd;
+ overlay_target_t *ott;
+ overlay_targ_associate_t *ota = arg;
+
+ odd = overlay_hold_by_dlid(ota->ota_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ if (ota->ota_id == 0) {
+ overlay_hold_rele(odd);
+ return (EINVAL);
+ }
+
+ if (ota->ota_mode != OVERLAY_TARGET_POINT &&
+ ota->ota_mode != OVERLAY_TARGET_DYNAMIC) {
+ overlay_hold_rele(odd);
+ return (EINVAL);
+ }
+
+ if (ota->ota_provides != odd->odd_plugin->ovp_dest) {
+ overlay_hold_rele(odd);
+ return (EINVAL);
+ }
+
+ if (ota->ota_mode == OVERLAY_TARGET_POINT) {
+ /* XXX What checks make sense for Ethernet? */
+ if (ota->ota_provides & OVERLAY_PLUGIN_D_IP) {
+ if (IN6_IS_ADDR_UNSPECIFIED(&ota->ota_point.otp_ip) ||
+ IN6_IS_ADDR_V4COMPAT(&ota->ota_point.otp_ip) ||
+ IN6_IS_ADDR_V4MAPPED_ANY(&ota->ota_point.otp_ip)) {
+ overlay_hold_rele(odd);
+ return (EINVAL);
+ }
+ }
+
+ if (ota->ota_provides & OVERLAY_PLUGIN_D_PORT) {
+ if (ota->ota_point.otp_port == 0) {
+ overlay_hold_rele(odd);
+ return (EINVAL);
+ }
+ }
+ }
+
+ ott = kmem_cache_alloc(overlay_target_cache, KM_SLEEP);
+ ott->ott_flags = 0;
+ ott->ott_ocount = 0;
+ ott->ott_mode = ota->ota_mode;
+ ott->ott_dest = ota->ota_provides;
+ ott->ott_id = ota->ota_id;
+
+ if (ott->ott_mode == OVERLAY_TARGET_POINT) {
+ bcopy(&ota->ota_point, &ott->ott_u.ott_point,
+ sizeof (overlay_target_point_t));
+ } else {
+ ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE,
+ overlay_mac_hash, overlay_mac_cmp,
+ overlay_target_entry_dtor, sizeof (overlay_target_entry_t),
+ offsetof(overlay_target_entry_t, ote_reflink),
+ offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP);
+ avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl,
+ sizeof (overlay_target_entry_t),
+ offsetof(overlay_target_entry_t, ote_avllink));
+ }
+ mutex_enter(&odd->odd_lock);
+ if (odd->odd_flags & OVERLAY_F_VARPD) {
+ mutex_exit(&odd->odd_lock);
+ kmem_cache_free(overlay_target_cache, ott);
+ overlay_hold_rele(odd);
+ return (EEXIST);
+ }
+
+ odd->odd_flags |= OVERLAY_F_VARPD;
+ odd->odd_target = ott;
+ mutex_exit(&odd->odd_lock);
+
+ /*
+ * Now that we've successfully integrated ourselves here, we should note
+ * that the link state is now up, and transition it away from UKNOWN.
+ */
+ mac_link_update(odd->odd_mh, LINK_STATE_UP);
+
+ overlay_hold_rele(odd);
+
+
+ return (0);
+}
+
+
+static int
+overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_t *odd;
+ overlay_targ_degrade_t *otd = arg;
+
+ odd = overlay_hold_by_dlid(otd->otd_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ overlay_fm_degrade(odd, otd->otd_buf);
+ overlay_hold_rele(odd);
+ return (0);
+}
+
+static int
+overlay_target_restore(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_t *odd;
+ overlay_targ_id_t *otid = arg;
+
+ odd = overlay_hold_by_dlid(otid->otid_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ overlay_fm_restore(odd);
+ overlay_hold_rele(odd);
+ return (0);
+}
+
+static int
+overlay_target_disassociate(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_t *odd;
+ overlay_targ_id_t *otid = arg;
+
+ odd = overlay_hold_by_dlid(otid->otid_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ odd->odd_flags &= ~OVERLAY_F_VARPD;
+ mutex_exit(&odd->odd_lock);
+
+ /*
+ * Without a varpd instance, we should consider ourselves link down.
+ */
+ mac_link_update(odd->odd_mh, LINK_STATE_DOWN);
+
+ overlay_hold_rele(odd);
+ return (0);
+
+}
+
+static int
+overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_targ_lookup_t *otl = arg;
+ overlay_target_entry_t *entry;
+ clock_t ret, timeout;
+ mac_header_info_t mhi;
+
+ timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC);
+again:
+ mutex_enter(&overlay_target_lock);
+ while (list_is_empty(&overlay_target_list)) {
+ ret = cv_timedwait(&overlay_target_condvar,
+ &overlay_target_lock, timeout);
+ if (ret == -1) {
+ mutex_exit(&overlay_target_lock);
+ return (ETIME);
+ }
+ }
+ entry = list_remove_head(&overlay_target_list);
+ mutex_exit(&overlay_target_lock);
+ mutex_enter(&entry->ote_lock);
+ if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
+ ASSERT(entry->ote_chead == NULL);
+ mutex_exit(&entry->ote_lock);
+ goto again;
+ }
+ ASSERT(entry->ote_chead != NULL);
+
+ /* XXX We probably shouldn't assume it's valid */
+ (void) mac_header_info(entry->ote_odd->odd_mh, entry->ote_chead, &mhi);
+
+ otl->otl_dlid = entry->ote_odd->odd_linkid;
+ otl->otl_reqid = (uintptr_t)entry;
+ otl->otl_varpdid = entry->ote_ott->ott_id;
+ otl->otl_vnetid = entry->ote_odd->odd_vid;
+
+ otl->otl_hdrsize = mhi.mhi_hdrsize;
+ otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize;
+ bcopy(mhi.mhi_daddr, otl->otl_dstaddr, ETHERADDRL);
+ bcopy(mhi.mhi_saddr, otl->otl_srcaddr, ETHERADDRL);
+ otl->otl_dsttype = mhi.mhi_dsttype;
+ otl->otl_sap = mhi.mhi_bindsap;
+ otl->otl_vlan = VLAN_ID(mhi.mhi_tci);
+ mutex_exit(&entry->ote_lock);
+
+ mutex_enter(&thdl->oth_lock);
+ list_insert_tail(&thdl->oth_outstanding, entry);
+ mutex_exit(&thdl->oth_lock);
+
+ return (0);
+}
+
+static int
+overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg)
+{
+ const overlay_targ_resp_t *otr = arg;
+ overlay_target_entry_t *entry;
+ mblk_t *mp;
+
+ mutex_enter(&thdl->oth_lock);
+ for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
+ entry = list_next(&thdl->oth_outstanding, entry)) {
+ if ((uintptr_t)entry == otr->otr_reqid)
+ break;
+ }
+
+ if (entry == NULL) {
+ mutex_exit(&thdl->oth_lock);
+ return (EINVAL);
+ }
+ list_remove(&thdl->oth_outstanding, entry);
+ mutex_exit(&thdl->oth_lock);
+
+ mutex_enter(&entry->ote_lock);
+ bcopy(&otr->otr_answer, &entry->ote_dest,
+ sizeof (overlay_target_point_t));
+ entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
+ entry->ote_flags |= OVERLAY_ENTRY_F_VALID;
+ mp = entry->ote_chead;
+ entry->ote_chead = NULL;
+ entry->ote_ctail = NULL;
+ entry->ote_mbsize = 0;
+ entry->ote_vtime = gethrtime();
+ mutex_exit(&entry->ote_lock);
+
+ /*
+ * XXX Should we drain this in situ or some other way? For now, just
+ * call tx and then drop anything we get back...
+ */
+ mp = overlay_m_tx(entry->ote_odd, mp);
+ freemsgchain(mp);
+
+ mutex_enter(&entry->ote_ott->ott_lock);
+ entry->ote_ott->ott_ocount--;
+ cv_signal(&entry->ote_ott->ott_cond);
+ mutex_exit(&entry->ote_ott->ott_lock);
+
+ return (0);
+}
+
+static int
+overlay_target_lookup_drop(overlay_target_hdl_t *thdl, void *arg)
+{
+ const overlay_targ_resp_t *otr = arg;
+ overlay_target_entry_t *entry;
+ mblk_t *mp;
+ boolean_t queue = B_FALSE;
+
+ mutex_enter(&thdl->oth_lock);
+ for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
+ entry = list_next(&thdl->oth_outstanding, entry)) {
+ if ((uintptr_t)entry == otr->otr_reqid)
+ break;
+ }
+
+ if (entry == NULL) {
+ mutex_exit(&thdl->oth_lock);
+ return (EINVAL);
+ }
+ list_remove(&thdl->oth_outstanding, entry);
+ mutex_exit(&thdl->oth_lock);
+
+ mutex_enter(&entry->ote_lock);
+
+ /* Safeguard against a confused varpd */
+ if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
+ entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
+ DTRACE_PROBE1(overlay__target__valid__drop,
+ overlay_target_entry_t *, entry);
+ mutex_exit(&entry->ote_lock);
+ goto done;
+ }
+
+ mp = entry->ote_chead;
+ if (mp != NULL) {
+ entry->ote_chead = mp->b_next;
+ mp->b_next = NULL;
+ if (entry->ote_ctail == mp)
+ entry->ote_ctail = entry->ote_chead;
+ entry->ote_mbsize -= msgsize(mp);
+ }
+ if (entry->ote_chead != NULL) {
+ queue = B_TRUE;
+ entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
+ } else {
+ entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
+ }
+ mutex_exit(&entry->ote_lock);
+
+ if (queue == B_TRUE)
+ overlay_target_queue(entry);
+ freemsg(mp);
+
+done:
+ mutex_enter(&entry->ote_ott->ott_lock);
+ entry->ote_ott->ott_ocount--;
+ cv_signal(&entry->ote_ott->ott_cond);
+ mutex_exit(&entry->ote_ott->ott_lock);
+
+ return (0);
+}
+
+static int
+overlay_target_pkt_copyin(const void *ubuf, void **outp, size_t *bsize,
+ int flags)
+{
+ int ret;
+ overlay_targ_pkt_t *pkt;
+ overlay_targ_pkt32_t *pkt32;
+
+ pkt = kmem_alloc(sizeof (overlay_targ_pkt_t), KM_SLEEP);
+ *outp = pkt;
+ *bsize = sizeof (overlay_targ_pkt_t);
+ if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) {
+ uintptr_t addr;
+
+ if (ddi_copyin(ubuf, pkt, sizeof (overlay_targ_pkt32_t),
+ flags & FKIOCTL) != 0) {
+ kmem_free(pkt, *bsize);
+ return (EFAULT);
+ }
+ pkt32 = (overlay_targ_pkt32_t *)pkt;
+ addr = pkt32->otp_buf;
+ pkt->otp_buf = (void *)addr;
+ } else {
+ if (ddi_copyin(ubuf, pkt, *bsize, flags & FKIOCTL) != 0) {
+ kmem_free(pkt, *bsize);
+ return (EFAULT);
+ }
+ }
+ return (0);
+}
+
+static int
+overlay_target_pkt_copyout(void *ubuf, void *buf, size_t bufsize,
+ int flags)
+{
+ if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) {
+ overlay_targ_pkt_t *pkt = buf;
+ overlay_targ_pkt32_t *pkt32 = buf;
+ uintptr_t addr = (uintptr_t)pkt->otp_buf;
+ pkt32->otp_buf = (caddr32_t)addr;
+ if (ddi_copyout(buf, ubuf, sizeof (overlay_targ_pkt32_t),
+ flags & FKIOCTL) != 0)
+ return (EFAULT);
+ } else {
+ if (ddi_copyout(buf, ubuf, bufsize, flags & FKIOCTL) != 0)
+ return (EFAULT);
+ }
+ return (0);
+}
+
+static int
+overlay_target_packet(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_targ_pkt_t *pkt = arg;
+ overlay_target_entry_t *entry;
+ mblk_t *mp;
+ size_t mlen;
+ size_t boff;
+
+ mutex_enter(&thdl->oth_lock);
+ for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
+ entry = list_next(&thdl->oth_outstanding, entry)) {
+ if ((uintptr_t)entry == pkt->otp_reqid)
+ break;
+ }
+
+ if (entry == NULL) {
+ mutex_exit(&thdl->oth_lock);
+ return (EINVAL);
+ }
+ mutex_enter(&entry->ote_lock);
+ mutex_exit(&thdl->oth_lock);
+ mp = entry->ote_chead;
+ /* XXX We should protect against a rouge varpd, don't assert */
+ ASSERT(mp != NULL);
+ mlen = MIN(msgsize(mp), pkt->otp_size);
+ pkt->otp_size = mlen;
+ boff = 0;
+ while (mlen > 0) {
+ size_t wlen = MIN(MBLKL(mp), mlen);
+ if (ddi_copyout(mp->b_rptr,
+ (void *)((uintptr_t)pkt->otp_buf + boff),
+ wlen, 0) != 0) {
+ mutex_exit(&entry->ote_lock);
+ return (EFAULT);
+ }
+ mlen -= wlen;
+ boff += wlen;
+ mp = mp->b_cont;
+ }
+ mutex_exit(&entry->ote_lock);
+ return (0);
+}
+
+static int
+overlay_target_inject(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_targ_pkt_t *pkt = arg;
+ overlay_target_entry_t *entry;
+ overlay_dev_t *odd;
+ mblk_t *mp;
+
+ /* XXX No support for injecting jumbo frames */
+ if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ)
+ return (EINVAL);
+
+ mp = allocb(pkt->otp_size, 0);
+ if (mp == NULL)
+ return (ENOMEM);
+
+ if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) {
+ freeb(mp);
+ return (EFAULT);
+ }
+ mp->b_wptr += pkt->otp_size;
+
+ if (pkt->otp_linkid != UINT64_MAX) {
+ odd = overlay_hold_by_dlid(pkt->otp_linkid);
+ if (odd == NULL) {
+ freeb(mp);
+ return (ENOENT);
+ }
+ } else {
+ mutex_enter(&thdl->oth_lock);
+ for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
+ entry = list_next(&thdl->oth_outstanding, entry)) {
+ if ((uintptr_t)entry == pkt->otp_reqid)
+ break;
+ }
+
+ if (entry == NULL) {
+ mutex_exit(&thdl->oth_lock);
+ freeb(mp);
+ return (ENOENT);
+ }
+ odd = entry->ote_odd;
+ mutex_exit(&thdl->oth_lock);
+ }
+
+ mutex_enter(&odd->odd_lock);
+ overlay_io_start(odd, OVERLAY_F_IN_RX);
+ mutex_exit(&odd->odd_lock);
+
+ mac_rx(odd->odd_mh, NULL, mp);
+
+ mutex_enter(&odd->odd_lock);
+ overlay_io_done(odd, OVERLAY_F_IN_RX);
+ mutex_exit(&odd->odd_lock);
+
+ return (0);
+}
+
+static int
+overlay_target_resend(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_targ_pkt_t *pkt = arg;
+ overlay_target_entry_t *entry;
+ overlay_dev_t *odd;
+ mblk_t *mp;
+
+ /* XXX No support for injecting jumbo frames */
+ if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ)
+ return (EINVAL);
+
+ mp = allocb(pkt->otp_size, 0);
+ if (mp == NULL)
+ return (ENOMEM);
+
+ if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) {
+ freeb(mp);
+ return (EFAULT);
+ }
+ mp->b_wptr += pkt->otp_size;
+
+ if (pkt->otp_linkid != UINT64_MAX) {
+ odd = overlay_hold_by_dlid(pkt->otp_linkid);
+ if (odd == NULL) {
+ freeb(mp);
+ return (ENOENT);
+ }
+ } else {
+ mutex_enter(&thdl->oth_lock);
+ for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
+ entry = list_next(&thdl->oth_outstanding, entry)) {
+ if ((uintptr_t)entry == pkt->otp_reqid)
+ break;
+ }
+
+ if (entry == NULL) {
+ mutex_exit(&thdl->oth_lock);
+ freeb(mp);
+ return (ENOENT);
+ }
+ odd = entry->ote_odd;
+ mutex_exit(&thdl->oth_lock);
+ }
+
+ mp = overlay_m_tx(odd, mp);
+ freemsgchain(mp);
+
+ return (0);
+}
+
+typedef struct overlay_targ_list_int {
+ boolean_t otli_count;
+ uint32_t otli_cur;
+ uint32_t otli_nents;
+ uint32_t otli_ents[];
+} overlay_targ_list_int_t;
+
+static int
+overlay_target_list_copyin(const void *ubuf, void **outp, size_t *bsize,
+ int flags)
+{
+ overlay_targ_list_t n;
+ overlay_targ_list_int_t *otl;
+
+ if (ddi_copyin(ubuf, &n, sizeof (overlay_targ_list_t),
+ flags & FKIOCTL) != 0)
+ return (EFAULT);
+
+ /*
+ * XXX We should do something to limit the number of entries we
+ * support, or at least more than this...
+ */
+ if (n.otl_nents >= INT32_MAX / sizeof (uint32_t))
+ return (EINVAL);
+ *bsize = sizeof (overlay_targ_list_int_t) +
+ sizeof (uint32_t) * n.otl_nents;
+ otl = kmem_zalloc(*bsize, KM_SLEEP);
+ otl->otli_cur = 0;
+ otl->otli_nents = n.otl_nents;
+ if (otl->otli_nents != 0) {
+ otl->otli_count = B_FALSE;
+ if (ddi_copyin(ubuf + offsetof(overlay_targ_list_t, otl_ents),
+ otl->otli_ents, n.otl_nents * sizeof (uint32_t),
+ flags & FKIOCTL) != 0) {
+ kmem_free(otl, *bsize);
+ return (EFAULT);
+ }
+ } else {
+ otl->otli_count = B_TRUE;
+ }
+
+ *outp = otl;
+ return (0);
+}
+
+static int
+overlay_target_ioctl_list_cb(overlay_dev_t *odd, void *arg)
+{
+ overlay_targ_list_int_t *otl = arg;
+
+ if (otl->otli_cur < otl->otli_nents)
+ otl->otli_ents[otl->otli_cur] = odd->odd_linkid;
+ otl->otli_cur++;
+ return (0);
+}
+
+static int
+overlay_target_ioctl_list(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_targ_list_int_t *otl = arg;
+ overlay_dev_iter(overlay_target_ioctl_list_cb, arg);
+ return (0);
+}
+
+static int
+overlay_target_list_copyout(void *ubuf, void *buf, size_t bufsize, int flags)
+{
+ overlay_targ_list_int_t *otl = buf;
+
+ if (ddi_copyout(&otl->otli_cur, ubuf, sizeof (uint32_t),
+ flags & FKIOCTL) != 0)
+ return (EFAULT);
+
+ if (otl->otli_count == B_FALSE) {
+ if (ddi_copyout(otl->otli_ents,
+ ubuf + offsetof(overlay_targ_list_t, otl_ents),
+ sizeof (uint32_t *) * otl->otli_nents,
+ flags & FKIOCTL) != 0)
+ return (EFAULT);
+ }
+ return (0);
+}
+
+static int
+overlay_target_cache_get(overlay_target_hdl_t *thdl, void *arg)
+{
+ int ret = 0;
+ overlay_dev_t *odd;
+ overlay_target_t *ott;
+ overlay_targ_cache_t *otc = arg;
+
+ odd = overlay_hold_by_dlid(otc->otc_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENXIO);
+ }
+ ott = odd->odd_target;
+ if (ott->ott_mode != OVERLAY_TARGET_POINT &&
+ ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENOTSUP);
+ }
+ mutex_enter(&ott->ott_lock);
+ mutex_exit(&odd->odd_lock);
+
+ if (ott->ott_mode == OVERLAY_TARGET_POINT) {
+ otc->otc_entry.otce_flags = 0;
+ bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest,
+ sizeof (overlay_target_point_t));
+ } else {
+ overlay_target_entry_t *ote;
+ ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
+ otc->otc_entry.otce_mac);
+ if (ote != NULL) {
+ mutex_enter(&ote->ote_lock);
+ if ((ote->ote_flags &
+ OVERLAY_ENTRY_F_VALID_MASK) != 0) {
+ if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) {
+ otc->otc_entry.otce_flags =
+ OVERLAY_TARGET_CACHE_DROP;
+ } else {
+ otc->otc_entry.otce_flags = 0;
+ bcopy(&ote->ote_dest,
+ &otc->otc_entry.otce_dest,
+ sizeof (overlay_target_point_t));
+ }
+ ret = 0;
+ } else {
+ ret = ENOENT;
+ }
+ mutex_exit(&ote->ote_lock);
+ } else {
+ ret = ENOENT;
+ }
+ }
+
+ mutex_exit(&ott->ott_lock);
+ overlay_hold_rele(odd);
+
+ return (ret);
+}
+
+static int
+overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_t *odd;
+ overlay_target_t *ott;
+ overlay_target_entry_t *ote;
+ overlay_targ_cache_t *otc = arg;
+ mblk_t *mp = NULL;
+
+ if (otc->otc_entry.otce_flags & ~OVERLAY_TARGET_CACHE_DROP)
+ return (EINVAL);
+
+ odd = overlay_hold_by_dlid(otc->otc_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENXIO);
+ }
+ ott = odd->odd_target;
+ if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENOTSUP);
+ }
+ mutex_enter(&ott->ott_lock);
+ mutex_exit(&odd->odd_lock);
+
+ ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
+ otc->otc_entry.otce_mac);
+ if (ote == NULL) {
+ ote = kmem_cache_alloc(overlay_entry_cache, KM_SLEEP);
+ bcopy(otc->otc_entry.otce_mac, ote->ote_addr, ETHERADDRL);
+ ote->ote_chead = ote->ote_ctail = NULL;
+ ote->ote_mbsize = 0;
+ ote->ote_ott = ott;
+ ote->ote_odd = odd;
+ mutex_enter(&ote->ote_lock);
+ refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote);
+ avl_add(&ott->ott_u.ott_dyn.ott_tree, ote);
+ } else {
+ mutex_enter(&ote->ote_lock);
+ }
+
+ if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) {
+ ote->ote_flags |= OVERLAY_ENTRY_F_DROP;
+ } else {
+ ote->ote_flags |= OVERLAY_ENTRY_F_VALID;
+ bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest,
+ sizeof (overlay_target_point_t));
+ mp = ote->ote_chead;
+ ote->ote_chead = NULL;
+ ote->ote_ctail = NULL;
+ ote->ote_mbsize = 0;
+ ote->ote_vtime = gethrtime();
+ }
+
+ mutex_exit(&ote->ote_lock);
+ mutex_exit(&ott->ott_lock);
+
+ if (mp != NULL) {
+ mp = overlay_m_tx(ote->ote_odd, mp);
+ freemsgchain(mp);
+ }
+
+ overlay_hold_rele(odd);
+
+ return (0);
+}
+
+static int
+overlay_target_cache_remove(overlay_target_hdl_t *thdl, void *arg)
+{
+ int ret = 0;
+ overlay_dev_t *odd;
+ overlay_target_t *ott;
+ overlay_target_entry_t *ote;
+ overlay_targ_cache_t *otc = arg;
+
+ odd = overlay_hold_by_dlid(otc->otc_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENXIO);
+ }
+ ott = odd->odd_target;
+ if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENOTSUP);
+ }
+ mutex_enter(&ott->ott_lock);
+ mutex_exit(&odd->odd_lock);
+
+ ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
+ otc->otc_entry.otce_mac);
+ if (ote != NULL) {
+ mutex_enter(&ote->ote_lock);
+ ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK;
+ mutex_exit(&ote->ote_lock);
+ ret = 0;
+ } else {
+ ret = ENOENT;
+ }
+
+ mutex_exit(&ott->ott_lock);
+ overlay_hold_rele(odd);
+
+ return (ret);
+}
+
+static int
+overlay_target_cache_flush(overlay_target_hdl_t *thdl, void *arg)
+{
+ avl_tree_t *avl;
+ overlay_dev_t *odd;
+ overlay_target_t *ott;
+ overlay_target_entry_t *ote;
+ overlay_targ_cache_t *otc = arg;
+
+ odd = overlay_hold_by_dlid(otc->otc_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENXIO);
+ }
+ ott = odd->odd_target;
+ if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENOTSUP);
+ }
+ mutex_enter(&ott->ott_lock);
+ mutex_exit(&odd->odd_lock);
+ avl = &ott->ott_u.ott_dyn.ott_tree;
+
+ for (ote = avl_first(avl); ote != NULL; ote = AVL_NEXT(avl, ote)) {
+ mutex_enter(&ote->ote_lock);
+ ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK;
+ mutex_exit(&ote->ote_lock);
+ }
+ ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
+ otc->otc_entry.otce_mac);
+
+ mutex_exit(&ott->ott_lock);
+ overlay_hold_rele(odd);
+
+ return (0);
+}
+
+static int
+overlay_target_cache_iter_copyin(const void *ubuf, void **outp, size_t *bsize,
+ int flags)
+{
+ overlay_targ_cache_iter_t base, *iter;
+
+ if (ddi_copyin(ubuf, &base, sizeof (overlay_targ_cache_iter_t),
+ flags & FKIOCTL) != 0)
+ return (EFAULT);
+
+ if (base.otci_count > OVERLAY_TARGET_ITER_MAX)
+ return (E2BIG);
+
+ if (base.otci_count == 0)
+ return (EINVAL);
+
+ *bsize = sizeof (overlay_targ_cache_iter_t) +
+ base.otci_count * sizeof (overlay_targ_cache_entry_t);
+ iter = kmem_alloc(*bsize, KM_SLEEP);
+ bcopy(&base, iter, sizeof (overlay_targ_cache_iter_t));
+ *outp = iter;
+
+ return (0);
+}
+
+typedef struct overlay_targ_cache_marker {
+ uint8_t otcm_mac[ETHERADDRL];
+ uint16_t otcm_done;
+} overlay_targ_cache_marker_t;
+
+static int
+overlay_target_cache_iter(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_t *odd;
+ overlay_target_t *ott;
+ overlay_target_entry_t lookup, *ent;
+ overlay_targ_cache_marker_t *mark;
+ avl_index_t where;
+ avl_tree_t *avl;
+ uint16_t written = 0;
+
+ overlay_targ_cache_iter_t *iter = arg;
+ mark = (void *)&iter->otci_marker;
+
+ if (mark->otcm_done != 0) {
+ iter->otci_count = 0;
+ return (0);
+ }
+
+ odd = overlay_hold_by_dlid(iter->otci_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENXIO);
+ }
+ ott = odd->odd_target;
+ if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC &&
+ ott->ott_mode != OVERLAY_TARGET_POINT) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENOTSUP);
+ }
+
+ /*
+ * XXX Holding this lock across the entire iteration probably isn't very
+ * good. We should perhaps add an r/w lock for the avl tree.
+ */
+ mutex_enter(&ott->ott_lock);
+ mutex_exit(&odd->odd_lock);
+
+ if (ott->ott_mode == OVERLAY_TARGET_POINT) {
+ overlay_targ_cache_entry_t *out = &iter->otci_ents[0];
+ bzero(out->otce_mac, ETHERADDRL);
+ out->otce_flags = 0;
+ bcopy(&ott->ott_u.ott_point, &out->otce_dest,
+ sizeof (overlay_target_point_t));
+ written++;
+ mark->otcm_done = 1;
+ }
+
+ avl = &ott->ott_u.ott_dyn.ott_tree;
+ bcopy(mark->otcm_mac, lookup.ote_addr, ETHERADDRL);
+ ent = avl_find(avl, &lookup, &where);
+
+ /*
+ * NULL ent means that the entry does not exist, so we want to start
+ * with the closest node in the tree. This means that we implicitly rely
+ * on the tree's order and the first node will be the mac 00:00:00:00:00
+ * and the last will be ff:ff:ff:ff:ff:ff.
+ */
+ if (ent == NULL) {
+ ent = avl_nearest(avl, where, AVL_AFTER);
+ if (ent == NULL) {
+ mark->otcm_done = 1;
+ goto done;
+ }
+ }
+
+ for (; ent != NULL && written < iter->otci_count;
+ ent = AVL_NEXT(avl, ent)) {
+ overlay_targ_cache_entry_t *out = &iter->otci_ents[written];
+ mutex_enter(&ent->ote_lock);
+ if ((ent->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) == 0) {
+ mutex_exit(&ent->ote_lock);
+ continue;
+ }
+ bcopy(ent->ote_addr, out->otce_mac, ETHERADDRL);
+ out->otce_flags = 0;
+ if (ent->ote_flags & OVERLAY_ENTRY_F_DROP)
+ out->otce_flags |= OVERLAY_TARGET_CACHE_DROP;
+ if (ent->ote_flags & OVERLAY_ENTRY_F_VALID)
+ bcopy(&ent->ote_dest, &out->otce_dest,
+ sizeof (overlay_target_point_t));
+ written++;
+ mutex_exit(&ent->ote_lock);
+ }
+
+ if (ent != NULL) {
+ bcopy(ent->ote_addr, mark->otcm_mac, ETHERADDRL);
+ } else {
+ mark->otcm_done = 1;
+ }
+
+done:
+ iter->otci_count = written;
+ mutex_exit(&ott->ott_lock);
+ overlay_hold_rele(odd);
+
+ return (0);
+}
+
+static int
+overlay_target_cache_iter_copyout(void *ubuf, void *buf, size_t bufsize,
+ int flags)
+{
+ size_t outsize;
+ const overlay_targ_cache_iter_t *iter = buf;
+
+ outsize = sizeof (overlay_targ_cache_iter_t) +
+ iter->otci_count * sizeof (overlay_targ_cache_entry_t);
+
+ if (ddi_copyout(buf, ubuf, outsize, flags & FKIOCTL) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static overlay_target_ioctl_t overlay_target_ioctab[] = {
+ { OVERLAY_TARG_INFO, B_TRUE, B_TRUE,
+ NULL, overlay_target_info,
+ NULL, sizeof (overlay_targ_info_t) },
+ { OVERLAY_TARG_ASSOCIATE, B_TRUE, B_FALSE,
+ NULL, overlay_target_associate,
+ NULL, sizeof (overlay_targ_associate_t) },
+ { OVERLAY_TARG_DISASSOCIATE, B_TRUE, B_FALSE,
+ NULL, overlay_target_disassociate,
+ NULL, sizeof (overlay_targ_id_t) },
+ { OVERLAY_TARG_DEGRADE, B_TRUE, B_FALSE,
+ NULL, overlay_target_degrade,
+ NULL, sizeof (overlay_targ_degrade_t) },
+ { OVERLAY_TARG_RESTORE, B_TRUE, B_FALSE,
+ NULL, overlay_target_restore,
+ NULL, sizeof (overlay_targ_id_t) },
+ { OVERLAY_TARG_LOOKUP, B_FALSE, B_TRUE,
+ NULL, overlay_target_lookup_request,
+ NULL, sizeof (overlay_targ_lookup_t) },
+ { OVERLAY_TARG_RESPOND, B_TRUE, B_FALSE,
+ NULL, overlay_target_lookup_respond,
+ NULL, sizeof (overlay_targ_resp_t) },
+ { OVERLAY_TARG_DROP, B_TRUE, B_FALSE,
+ NULL, overlay_target_lookup_drop,
+ NULL, sizeof (overlay_targ_resp_t) },
+ { OVERLAY_TARG_PKT, B_TRUE, B_TRUE,
+ overlay_target_pkt_copyin,
+ overlay_target_packet,
+ overlay_target_pkt_copyout,
+ sizeof (overlay_targ_pkt_t) },
+ { OVERLAY_TARG_INJECT, B_TRUE, B_FALSE,
+ overlay_target_pkt_copyin,
+ overlay_target_inject,
+ NULL, sizeof (overlay_targ_pkt_t) },
+ { OVERLAY_TARG_RESEND, B_TRUE, B_FALSE,
+ overlay_target_pkt_copyin,
+ overlay_target_resend,
+ NULL, sizeof (overlay_targ_pkt_t) },
+ { OVERLAY_TARG_LIST, B_FALSE, B_TRUE,
+ overlay_target_list_copyin,
+ overlay_target_ioctl_list,
+ overlay_target_list_copyout,
+ sizeof (overlay_targ_list_t) },
+ { OVERLAY_TARG_CACHE_GET, B_FALSE, B_TRUE,
+ NULL, overlay_target_cache_get,
+ NULL, sizeof (overlay_targ_cache_t) },
+ { OVERLAY_TARG_CACHE_SET, B_TRUE, B_TRUE,
+ NULL, overlay_target_cache_set,
+ NULL, sizeof (overlay_targ_cache_t) },
+ { OVERLAY_TARG_CACHE_REMOVE, B_TRUE, B_TRUE,
+ NULL, overlay_target_cache_remove,
+ NULL, sizeof (overlay_targ_cache_t) },
+ { OVERLAY_TARG_CACHE_FLUSH, B_TRUE, B_TRUE,
+ NULL, overlay_target_cache_flush,
+ NULL, sizeof (overlay_targ_cache_t) },
+ { OVERLAY_TARG_CACHE_ITER, B_FALSE, B_TRUE,
+ overlay_target_cache_iter_copyin,
+ overlay_target_cache_iter,
+ overlay_target_cache_iter_copyout,
+ sizeof (overlay_targ_cache_iter_t) },
+ { 0 }
+};
+
+int
+overlay_target_open(dev_t *devp, int flags, int otype, cred_t *credp)
+{
+ minor_t mid;
+ overlay_target_hdl_t *thdl;
+
+ if (secpolicy_dl_config(credp) != 0)
+ return (EPERM);
+
+ if (getminor(*devp) != 0)
+ return (ENXIO);
+
+ if (otype & OTYP_BLK)
+ return (EINVAL);
+
+ /* XXX nonblock, excl? */
+ if (flags & ~(FREAD | FWRITE))
+ return (EINVAL);
+
+ /* XXX Really, you need FREAD and FWRITE? */
+ if (!(flags & FREAD) || !(flags & FWRITE))
+ return (EINVAL);
+
+ /* XXX Relax this */
+ if (crgetzoneid(credp) != GLOBAL_ZONEID)
+ return (EPERM);
+
+ mid = id_alloc(overlay_thdl_idspace);
+ if (ddi_soft_state_zalloc(overlay_thdl_state, mid) != 0) {
+ id_free(overlay_thdl_idspace, mid);
+ return (ENXIO);
+ }
+
+ thdl = ddi_get_soft_state(overlay_thdl_state, mid);
+ VERIFY(thdl != NULL);
+ thdl->oth_minor = mid;
+ thdl->oth_zoneid = crgetzoneid(credp);
+ mutex_init(&thdl->oth_lock, NULL, MUTEX_DRIVER, NULL);
+ list_create(&thdl->oth_outstanding, sizeof (overlay_target_entry_t),
+ offsetof(overlay_target_entry_t, ote_qlink));
+ *devp = makedevice(getmajor(*devp), mid);
+
+ mutex_enter(&overlay_target_lock);
+ list_insert_tail(&overlay_thdl_list, thdl);
+ mutex_exit(&overlay_target_lock);
+
+ return (0);
+}
+
+int
+overlay_target_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ overlay_target_ioctl_t *ioc;
+ overlay_target_hdl_t *thdl;
+
+ if (secpolicy_dl_config(credp) != 0)
+ return (EPERM);
+
+ if ((thdl = ddi_get_soft_state(overlay_thdl_state,
+ getminor(dev))) == NULL)
+ return (ENXIO);
+
+ for (ioc = &overlay_target_ioctab[0]; ioc->oti_cmd != 0; ioc++) {
+ int ret;
+ caddr_t buf;
+ size_t bufsize;
+
+ if (ioc->oti_cmd != cmd)
+ continue;
+
+ /* XXX Really the write errno? */
+ if (ioc->oti_write == B_TRUE && !(mode & FWRITE))
+ return (EBADF);
+
+ if (ioc->oti_copyin == NULL) {
+ bufsize = ioc->oti_size;
+ buf = kmem_alloc(bufsize, KM_SLEEP);
+ if (ddi_copyin((void *)(uintptr_t)arg, buf, bufsize,
+ mode & FKIOCTL) != 0) {
+ kmem_free(buf, bufsize);
+ return (EFAULT);
+ }
+ } else {
+ if ((ret = ioc->oti_copyin((void *)(uintptr_t)arg,
+ (void **)&buf, &bufsize, mode)) != 0)
+ return (ret);
+ }
+
+ ret = ioc->oti_func(thdl, buf);
+ if (ret == 0 && ioc->oti_size != 0 &&
+ ioc->oti_ncopyout == B_TRUE) {
+ if (ioc->oti_copyout == NULL) {
+ if (ddi_copyout(buf, (void *)(uintptr_t)arg,
+ bufsize, mode & FKIOCTL) != 0)
+ ret = EFAULT;
+ } else {
+ ret = ioc->oti_copyout((void *)(uintptr_t)arg,
+ buf, bufsize, mode);
+ }
+ }
+
+ kmem_free(buf, bufsize);
+ return (ret);
+ }
+
+ return (ENOTTY);
+}
+
+int
+overlay_target_close(dev_t dev, int flags, int otype, cred_t *credp)
+{
+ overlay_target_hdl_t *thdl;
+ overlay_target_entry_t *entry;
+ minor_t mid = getminor(dev);
+
+ if ((thdl = ddi_get_soft_state(overlay_thdl_state, mid)) == NULL)
+ return (ENXIO);
+
+ mutex_enter(&overlay_target_lock);
+ list_remove(&overlay_thdl_list, thdl);
+ mutex_enter(&thdl->oth_lock);
+ while ((entry = list_remove_head(&thdl->oth_outstanding)) != NULL)
+ list_insert_tail(&overlay_target_list, entry);
+ cv_signal(&overlay_target_condvar);
+ mutex_exit(&thdl->oth_lock);
+ mutex_exit(&overlay_target_lock);
+
+ list_destroy(&thdl->oth_outstanding);
+ mutex_destroy(&thdl->oth_lock);
+ mid = thdl->oth_minor;
+ ddi_soft_state_free(overlay_thdl_state, mid);
+ id_free(overlay_thdl_idspace, mid);
+
+ return (0);
+}
diff --git a/usr/src/uts/common/io/overlay/plugins/overlay_geneve.c b/usr/src/uts/common/io/overlay/plugins/overlay_geneve.c
new file mode 100644
index 0000000000..f96b64076b
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/plugins/overlay_geneve.c
@@ -0,0 +1,171 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Geneve encapsulation module
+ */
+
+#include <sys/overlay_plugin.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/geneve.h>
+
+static const char *geneve_ident = "geneve";
+
+static const char *geneve_props[] = {
+ "geneve/listen_ip",
+ "geneve/listen_port",
+ NULL
+};
+
+static int
+geneve_o_init(overlay_handle_t oh, void **outp)
+{
+ *outp = NULL;
+ return (0);
+}
+
+static void
+geneve_o_fini(void *arg)
+{
+}
+
+int
+geneve_o_encap(void *arg, mblk_t *mp, ovep_encap_info_t *einfop,
+ mblk_t **outp)
+{
+ mblk_t *op;
+ geneve_hdr_t *hp;
+
+ ASSERT(einfop->ovdi_id < (1 << 24));
+
+ op = allocb(GENEVE_HDR_MIN, 0);
+ if (op == NULL)
+ return (ENOMEM);
+
+ hp = (geneve_hdr_t *)op->b_rptr;
+ hp->geneve_flags = 0;
+ hp->geneve_prot = htons(GENEVE_PROT_ETHERNET);
+ hp->geneve_id = htonl(einfop->ovdi_id << GENEVE_ID_SHIFT);
+ op->b_wptr += GENEVE_HDR_MIN;
+ *outp = op;
+
+ return (0);
+}
+
+int
+geneve_o_decap(void *arg, mblk_t *mp, ovep_encap_info_t *dinfop)
+{
+ geneve_hdr_t *hp;
+ uint16_t flags;
+ uint8_t len;
+
+ hp = (geneve_hdr_t *)mp->b_rptr;
+ flags = ntohs(hp->geneve_flags);
+
+ if ((flags & GENEVE_VERS_MASK) != GENEVE_VERSION)
+ return (EINVAL);
+
+ len = (flags & GENEVE_OPT_MASK) >> GENEVE_OPT_SHIFT;
+
+ /*
+ * Today we have no notion of control messages, so we'll need to drop
+ * those. We also support no options. Therefore if the critical options
+ * flag has been turned on and the spec says we MAY drop it, we shall.
+ * The idea being that if this option is critical and we don't
+ * understand it, well, we shouldn't go forward.
+ */
+ if ((flags & GENEVE_F_OAM) || (flags & GENEVE_F_COPT))
+ return (EINVAL);
+
+ /*
+ * We may some day support non-Ethernet encapsulations, but that day is
+ * not today.
+ */
+ if (ntohs(hp->geneve_prot) != GENEVE_PROT_ETHERNET)
+ return (EINVAL);
+
+ dinfop->ovdi_id = ntohl(hp->geneve_id) >> GENEVE_ID_SHIFT;
+ dinfop->ovdi_hdr_size = GENEVE_HDR_MIN + len;
+
+ /* XXX Fields we should opt to ignore probably */
+ dinfop->ovdi_vlan = -1;
+ dinfop->ovdi_encap_type = GENEVE_PROT_ETHERNET;
+
+ return (0);
+}
+
+static struct overlay_plugin_ops geneve_o_ops = {
+ 0,
+ geneve_o_init,
+ geneve_o_fini,
+ geneve_o_encap,
+ geneve_o_decap
+};
+
+static struct modlmisc geneve_modlmisc = {
+ &mod_miscops,
+ "Geneve encap plugin"
+};
+
+static struct modlinkage geneve_modlinkage = {
+ MODREV_1,
+ &geneve_modlmisc
+};
+
+int
+_init(void)
+{
+ int err;
+ overlay_plugin_register_t *ovrp;
+
+ ovrp = overlay_plugin_alloc(OVEP_VERSION);
+ if (ovrp == NULL)
+ return (ENOTSUP);
+ ovrp->ovep_name = geneve_ident;
+ ovrp->ovep_ops = &geneve_o_ops;
+ ovrp->ovep_id_size = GENEVE_ID_LEN;
+ ovrp->ovep_flags = OVEP_F_VLAN_TAG;
+ ovrp->ovep_hdr_min = GENEVE_HDR_MIN;
+ ovrp->ovep_hdr_max = GENEVE_HDR_MAX;
+ ovrp->ovep_dest = OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT;
+ ovrp->ovep_props = geneve_props;
+
+ if ((err = overlay_plugin_register(ovrp)) == 0) {
+ if ((err = mod_install(&geneve_modlinkage)) != 0) {
+ (void) overlay_plugin_unregister(geneve_ident);
+ }
+ }
+
+ overlay_plugin_free(ovrp);
+ return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&geneve_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ if ((err = overlay_plugin_unregister(geneve_ident)) != 0)
+ return (err);
+
+ return (mod_remove(&geneve_modlinkage));
+}
diff --git a/usr/src/uts/common/io/overlay/plugins/overlay_nvgre.c b/usr/src/uts/common/io/overlay/plugins/overlay_nvgre.c
new file mode 100644
index 0000000000..a097a56358
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/plugins/overlay_nvgre.c
@@ -0,0 +1,200 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * NVGRE encapsulation module
+ */
+
+#include <sys/overlay_plugin.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/nvgre.h>
+#include <sys/byteorder.h>
+#include <inet/ip.h>
+
+static const char *nvgre_ident = "nvgre";
+/* XXX this should probably be global across all plugin-modules */
+static char *nvgre_defip = "::ffff:0.0.0.0";
+
+/* XXX We're locking here, should the higher level serialize this for us? */
+typedef struct nvgre {
+ kmutex_t nvg_lock;
+ struct in6_addr nvg_laddr;
+} nvgre_t;
+
+static const char *nvgre_props[] = {
+ "nvgre/listen_ip",
+ NULL
+};
+
+static int
+nvgre_o_init(overlay_handle_t oh, void **outp)
+{
+ nvgre_t *nvg;
+
+ nvg = kmem_alloc(sizeof (nvgre_t), KM_SLEEP);
+ mutex_init(&nvg->nvg_lock, NULL, MUTEX_DRIVER, NULL);
+ /* XXX Assert return? */
+ (void) inet_pton(AF_INET6, nvgre_defip, &nvg->nvg_laddr);
+
+ return (0);
+}
+
+static void
+nvgre_o_fini(void *arg)
+{
+ nvgre_t *nvg = arg;
+ mutex_destroy(&nvg->nvg_lock);
+ kmem_free(arg, sizeof (nvgre_t));
+}
+
+static int
+nvgre_o_socket(void *arg, int *dp, int *fp, int *pp, struct sockaddr *addr,
+ socklen_t *slenp)
+{
+ nvgre_t *nvg = arg;
+ struct sockaddr_in6 *in = (struct sockaddr_in6 *)addr;
+
+ *dp = AF_INET6;
+ *fp = SOCK_RAW;
+ *pp = 0x2f; /* XXX Move into a IPPPROTO_* def */
+
+ bzero(in, sizeof (struct sockaddr_in6));
+ in->sin6_family = AF_INET6;
+
+ mutex_enter(&nvg->nvg_lock);
+ in->sin6_addr = nvg->nvg_laddr;
+ mutex_exit(&nvg->nvg_lock);
+ in->sin6_port = 0;
+ *slenp = sizeof (struct sockaddr_in6);
+
+ return (0);
+}
+
+/* XXX Should we keep track of kstats here? */
+
+int
+nvgre_o_encap(void *arg, mblk_t *mp, ovep_encap_info_t *einfop,
+ mblk_t **outp)
+{
+ mblk_t *op;
+ nvgre_hdr_t *hp;
+ uint32_t id;
+
+ ASSERT(einfop->ovdi_id < 1<<24);
+ op = allocb(NVGRE_HDR_LEN, 0);
+ if (op == NULL)
+ return (ENOMEM);
+
+ hp = (nvgre_hdr_t *)op->b_rptr;
+ hp->nvgre_flags = htons(NVGRE_FLAG_VALUE);
+ hp->nvgre_prot = htons(NVGRE_PROTOCOL);
+ id = (uint32_t)einfop->ovdi_id << NVGRE_ID_SHIFT;
+ id |= einfop->ovdi_hash & NVGRE_FLOW_MASK;
+ hp->nvgre_id = htonl(id);
+ op->b_wptr += NVGRE_HDR_LEN;
+ *outp = op;
+
+ return (0);
+}
+
+int
+nvgre_o_decap(void *arg, mblk_t *mp, ovep_encap_info_t *dinfop)
+{
+ uint32_t id;
+ nvgre_hdr_t *hp;
+
+ hp = (nvgre_hdr_t *)mp->b_rptr;
+ if ((ntohs(hp->nvgre_flags) & NVGRE_FLAG_MASK) != NVGRE_FLAG_VALUE)
+ return (EINVAL);
+
+ if (ntohs(hp->nvgre_prot) != NVGRE_PROTOCOL)
+ return (EINVAL);
+
+ id = ntohl(hp->nvgre_id);
+ dinfop->ovdi_id = (id & NVGRE_ID_MASK) >> NVGRE_ID_SHIFT;
+ dinfop->ovdi_hdr_size = NVGRE_HDR_LEN;
+ /* XXX I'm not really sure why we'd want to save the flow */
+ dinfop->ovdi_hash = id & NVGRE_FLOW_MASK;
+ /* XXX These fields may not be necessary */
+ dinfop->ovdi_encap_type = -1;
+ dinfop->ovdi_vlan = -1;
+
+ return (0);
+}
+
+static struct overlay_plugin_ops nvgre_o_ops = {
+ 0,
+ nvgre_o_init,
+ nvgre_o_fini,
+ nvgre_o_encap,
+ nvgre_o_decap,
+ nvgre_o_socket
+};
+
+static struct modlmisc nvgre_modlmisc = {
+ &mod_miscops,
+ "NVGRE encap plugin"
+};
+
+static struct modlinkage nvgre_modlinkage = {
+ MODREV_1,
+ &nvgre_modlmisc
+};
+
+int
+_init(void)
+{
+ int err;
+ overlay_plugin_register_t *ovrp;
+
+ ovrp = overlay_plugin_alloc(OVEP_VERSION);
+ if (ovrp == NULL)
+ return (ENOTSUP);
+ ovrp->ovep_name = nvgre_ident;
+ ovrp->ovep_ops = &nvgre_o_ops;
+ ovrp->ovep_id_size = NVGRE_ID_LEN;
+ ovrp->ovep_flags = 0;
+ ovrp->ovep_hdr_min = NVGRE_HDR_LEN;
+ ovrp->ovep_hdr_max = NVGRE_HDR_LEN;
+ ovrp->ovep_dest = OVERLAY_PLUGIN_D_IP;
+ ovrp->ovep_props = nvgre_props;
+
+ if ((err = overlay_plugin_register(ovrp)) == 0) {
+ if ((err = mod_install(&nvgre_modlinkage)) != 0) {
+ (void) overlay_plugin_unregister(nvgre_ident);
+ }
+ }
+
+ overlay_plugin_free(ovrp);
+ return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&nvgre_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ if ((err = overlay_plugin_unregister(nvgre_ident)) != 0)
+ return (err);
+
+ return (mod_remove(&nvgre_modlinkage));
+}
diff --git a/usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c b/usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c
new file mode 100644
index 0000000000..f6a5467ec5
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c
@@ -0,0 +1,337 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * VXLAN encapsulation module
+ *
+ *
+ * The VXLAN header looks as follows in network byte order:
+ *
+ * |0 3| 4 |5 31|
+ * +----------+---+------------------------+
+ * | Reserved | I | Reserved |
+ * +---------------------------------------+
+ * | Virtual Network ID | Reserved |
+ * +----------------------------+----------+
+ * |0 23|24 31|
+ * END CSTYLED
+ *
+ * All reserved values must be 0. The I bit must be 1. We call the top
+ * word the VXLAN magic field for the time being. The second word is
+ * definitely not the most friendly way to operate. Specifically, the ID
+ * is a 24-bit big endian value, but we have to make sure not to use the
+ * lower word.
+ */
+
+#include <sys/overlay_plugin.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/byteorder.h>
+#include <sys/vxlan.h>
+#include <inet/ip.h>
+#include <netinet/in.h>
+
+static const char *vxlan_ident = "vxlan";
+static uint16_t vxlan_defport = IPPORT_VXLAN;
+
+static const char *vxlan_props[] = {
+ "vxlan/listen_ip",
+ "vxlan/listen_port",
+ NULL
+};
+
+/* XXX Should we do locking or let the higher level do it for us? */
+typedef struct vxlan {
+ kmutex_t vxl_lock;
+ overlay_handle_t vxl_oh;
+ uint16_t vxl_lport;
+ boolean_t vxl_hladdr;
+ struct in6_addr vxl_laddr;
+} vxlan_t;
+
+static int
+vxlan_o_init(overlay_handle_t oh, void **outp)
+{
+ vxlan_t *vxl;
+
+ vxl = kmem_alloc(sizeof (vxlan_t), KM_SLEEP);
+ *outp = vxl;
+ mutex_init(&vxl->vxl_lock, NULL, MUTEX_DRIVER, NULL);
+ vxl->vxl_oh = oh;
+ vxl->vxl_lport = vxlan_defport;
+ vxl->vxl_hladdr = B_FALSE;
+
+ return (0);
+}
+
+static void
+vxlan_o_fini(void *arg)
+{
+ vxlan_t *vxl = arg;
+
+ mutex_destroy(&vxl->vxl_lock);
+ kmem_free(arg, sizeof (vxlan_t));
+}
+
+static int
+vxlan_o_socket(void *arg, int *dp, int *fp, int *pp, struct sockaddr *addr,
+ socklen_t *slenp)
+{
+ vxlan_t *vxl = arg;
+ struct sockaddr_in6 *in = (struct sockaddr_in6 *)addr;
+
+ *dp = AF_INET6;
+ *fp = SOCK_DGRAM;
+ *pp = 0;
+ bzero(in, sizeof (struct sockaddr_in6));
+ in->sin6_family = AF_INET6;
+
+ mutex_enter(&vxl->vxl_lock);
+ if (vxl->vxl_hladdr == B_FALSE) {
+ mutex_exit(&vxl->vxl_lock);
+ /* XXX This errno sucks and is now about trains */
+ return (EINVAL);
+ }
+ in->sin6_port = htons(vxl->vxl_lport);
+ in->sin6_addr = vxl->vxl_laddr;
+ mutex_exit(&vxl->vxl_lock);
+ *slenp = sizeof (struct sockaddr_in6);
+
+ return (0);
+}
+
+/*
+ * XXX Stats?
+ */
+static int
+vxlan_o_encap(void *arg, mblk_t *mp, ovep_encap_info_t *einfop,
+ mblk_t **outp)
+{
+ mblk_t *ob;
+ vxlan_hdr_t *vxh;
+
+ ASSERT(einfop->ovdi_id < (1 << 24));
+
+ /*
+ * XXX We probably want a good way to cache and handle the allocation
+ * and destruction of these message blocks.
+ */
+ ob = allocb(VXLAN_HDR_LEN, 0);
+ if (ob == NULL)
+ return (ENOMEM);
+
+
+
+ vxh = (vxlan_hdr_t *)ob->b_rptr;
+ vxh->vxlan_flags = ntohl(VXLAN_MAGIC);
+ vxh->vxlan_id = htonl((uint32_t)einfop->ovdi_id << VXLAN_ID_SHIFT);
+ ob->b_wptr += VXLAN_HDR_LEN;
+ *outp = ob;
+
+ return (0);
+}
+
+/* XXX Stats */
+static int
+vxlan_o_decap(void *arg, mblk_t *mp, ovep_encap_info_t *dinfop)
+{
+ vxlan_hdr_t *vxh;
+
+ /* XXX This assumes that we have a pulled up block, which is false */
+ vxh = (vxlan_hdr_t *)mp->b_rptr;
+ if ((ntohl(vxh->vxlan_flags) & VXLAN_MAGIC) == 0)
+ return (EINVAL);
+
+ dinfop->ovdi_id = ntohl(vxh->vxlan_id) >> VXLAN_ID_SHIFT;
+ dinfop->ovdi_hdr_size = VXLAN_HDR_LEN;
+ /* XXX Probably don't need these fields in the long run */
+ dinfop->ovdi_encap_type = -1;
+ dinfop->ovdi_vlan = -1;
+
+ return (0);
+}
+
+static int
+vxlan_o_getprop(void *arg, const char *pr_name, void *buf, uint32_t *bufsize)
+{
+ vxlan_t *vxl = arg;
+
+ /* vxlan/listen_ip */
+ if (strcmp(pr_name, vxlan_props[0]) == 0) {
+ if (*bufsize < sizeof (struct in6_addr))
+ return (EOVERFLOW);
+
+ mutex_enter(&vxl->vxl_lock);
+ if (vxl->vxl_hladdr == B_FALSE) {
+ *bufsize = 0;
+ } else {
+ bcopy(&vxl->vxl_laddr, buf, sizeof (struct in6_addr));
+ *bufsize = sizeof (struct in6_addr);
+ }
+ mutex_exit(&vxl->vxl_lock);
+ return (0);
+ }
+
+ /* vxlan/listen_port */
+ if (strcmp(pr_name, vxlan_props[1]) == 0) {
+ uint64_t val;
+ if (*bufsize < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ mutex_enter(&vxl->vxl_lock);
+ val = vxl->vxl_lport;
+ bcopy(&val, buf, sizeof (uint64_t));
+ *bufsize = sizeof (uint64_t);
+ mutex_exit(&vxl->vxl_lock);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+vxlan_o_setprop(void *arg, const char *pr_name, const void *buf,
+ uint32_t bufsize)
+{
+ vxlan_t *vxl = arg;
+
+ /* vxlan/listen_ip */
+ if (strcmp(pr_name, vxlan_props[0]) == 0) {
+ const struct in6_addr *ipv6 = buf;
+ if (bufsize != sizeof (struct in6_addr))
+ return (EINVAL);
+
+ /*
+ * XXX What else should be disallowed?
+ */
+ if (IN6_IS_ADDR_V4COMPAT(ipv6))
+ return (EINVAL);
+
+ mutex_enter(&vxl->vxl_lock);
+ vxl->vxl_hladdr = B_TRUE;
+ bcopy(ipv6, &vxl->vxl_laddr, sizeof (struct in6_addr));
+ mutex_exit(&vxl->vxl_lock);
+
+ return (0);
+ }
+
+ /* vxlan/listen_port */
+ if (strcmp(pr_name, vxlan_props[1]) == 0) {
+ const uint64_t *valp = buf;
+ if (bufsize != 8)
+ return (EINVAL);
+
+ if (*valp == 0 || *valp > UINT16_MAX)
+ return (EINVAL);
+
+ mutex_enter(&vxl->vxl_lock);
+ vxl->vxl_lport = *valp;
+ mutex_exit(&vxl->vxl_lock);
+ return (0);
+ }
+ return (EINVAL);
+}
+
+static int
+vxlan_o_propinfo(const char *pr_name, overlay_prop_handle_t phdl)
+{
+ /* vxlan/listen_ip */
+ if (strcmp(pr_name, vxlan_props[0]) == 0) {
+ overlay_prop_set_name(phdl, vxlan_props[0]);
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_RRW);
+ overlay_prop_set_type(phdl, OVERLAY_PROP_T_IP);
+ overlay_prop_set_nodefault(phdl);
+ return (0);
+ }
+
+ if (strcmp(pr_name, vxlan_props[1]) == 0) {
+ overlay_prop_set_name(phdl, vxlan_props[1]);
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_RRW);
+ overlay_prop_set_type(phdl, OVERLAY_PROP_T_UINT);
+ overlay_prop_set_default(phdl, &vxlan_defport,
+ sizeof (vxlan_defport));
+ overlay_prop_set_range_uint32(phdl, 1, UINT16_MAX);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static struct overlay_plugin_ops vxlan_o_ops = {
+ 0,
+ vxlan_o_init,
+ vxlan_o_fini,
+ vxlan_o_encap,
+ vxlan_o_decap,
+ vxlan_o_socket,
+ vxlan_o_getprop,
+ vxlan_o_setprop,
+ vxlan_o_propinfo
+};
+
+static struct modlmisc vxlan_modlmisc = {
+ &mod_miscops,
+ "VXLAN encap plugin"
+};
+
+static struct modlinkage vxlan_modlinkage = {
+ MODREV_1,
+ &vxlan_modlmisc
+};
+
+int
+_init(void)
+{
+ int err;
+ overlay_plugin_register_t *ovrp;
+
+ ovrp = overlay_plugin_alloc(OVEP_VERSION);
+ if (ovrp == NULL)
+ return (ENOTSUP);
+ ovrp->ovep_name = vxlan_ident;
+ ovrp->ovep_ops = &vxlan_o_ops;
+ ovrp->ovep_id_size = VXLAN_ID_LEN;
+ ovrp->ovep_flags = OVEP_F_VLAN_TAG;
+ ovrp->ovep_hdr_min = VXLAN_HDR_LEN;
+ ovrp->ovep_hdr_max = VXLAN_HDR_LEN;
+ ovrp->ovep_dest = OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT;
+ ovrp->ovep_props = vxlan_props;
+
+ if ((err = overlay_plugin_register(ovrp)) == 0) {
+ if ((err = mod_install(&vxlan_modlinkage)) != 0) {
+ (void) overlay_plugin_unregister(vxlan_ident);
+ }
+ }
+
+ overlay_plugin_free(ovrp);
+ return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&vxlan_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ if ((err = overlay_plugin_unregister(vxlan_ident)) != 0)
+ return (err);
+
+ return (mod_remove(&vxlan_modlinkage));
+}
diff --git a/usr/src/uts/common/io/pseudonex.c b/usr/src/uts/common/io/pseudonex.c
index f83b0abf39..c8808fdf73 100644
--- a/usr/src/uts/common/io/pseudonex.c
+++ b/usr/src/uts/common/io/pseudonex.c
@@ -83,6 +83,8 @@ static int pseudonex_detach(dev_info_t *, ddi_detach_cmd_t);
static int pseudonex_open(dev_t *, int, int, cred_t *);
static int pseudonex_close(dev_t, int, int, cred_t *);
static int pseudonex_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+static int pseudonex_fm_init(dev_info_t *, dev_info_t *, int,
+ ddi_iblock_cookie_t *);
static int pseudonex_ctl(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
void *);
@@ -90,6 +92,8 @@ static void *pseudonex_state;
typedef struct pseudonex_state {
dev_info_t *pnx_devi;
+ int pnx_fmcap;
+ ddi_iblock_cookie_t pnx_fm_ibc;
} pseudonex_state_t;
static struct bus_ops pseudonex_bus_ops = {
@@ -116,7 +120,7 @@ static struct bus_ops pseudonex_bus_ops = {
NULL, /* bus_intr_ctl */
NULL, /* bus_config */
NULL, /* bus_unconfig */
- NULL, /* bus_fm_init */
+ pseudonex_fm_init, /* bus_fm_init */
NULL, /* bus_fm_fini */
NULL, /* bus_fm_access_enter */
NULL, /* bus_fm_access_exit */
@@ -228,6 +232,9 @@ pseudonex_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
pnx_state = ddi_get_soft_state(pseudonex_state, instance);
pnx_state->pnx_devi = devi;
+ pnx_state->pnx_fmcap = DDI_FM_EREPORT_CAPABLE;
+ ddi_fm_init(devi, &pnx_state->pnx_fmcap, &pnx_state->pnx_fm_ibc);
+
if (ddi_create_minor_node(devi, "devctl", S_IFCHR, instance,
DDI_NT_NEXUS, 0) != DDI_SUCCESS) {
ddi_remove_minor_node(devi, NULL);
@@ -375,6 +382,19 @@ pseudonex_auto_assign(dev_info_t *child)
}
static int
+pseudonex_fm_init(dev_info_t *dip, dev_info_t *tdip, int cap,
+ ddi_iblock_cookie_t *ibc)
+{
+ pseudonex_state_t *pnx_state;
+
+ pnx_state = ddi_get_soft_state(pseudonex_state, ddi_get_instance(dip));
+ ASSERT(pnx_state != NULL);
+ ASSERT(ibc != NULL);
+ *ibc = pnx_state->pnx_fm_ibc;
+ return (pnx_state->pnx_fmcap & cap);
+}
+
+static int
pseudonex_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
void *arg, void *result)
{
diff --git a/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas.c b/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas.c
index f094c9a510..78a6a67056 100644
--- a/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas.c
+++ b/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas.c
@@ -72,6 +72,7 @@
#include <sys/file.h>
#include <sys/policy.h>
#include <sys/model.h>
+#include <sys/refhash.h>
#include <sys/sysevent.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent/dr.h>
@@ -98,7 +99,6 @@
#include <sys/scsi/adapters/mpt_sas/mptsas_var.h>
#include <sys/scsi/adapters/mpt_sas/mptsas_ioctl.h>
#include <sys/scsi/adapters/mpt_sas/mptsas_smhba.h>
-#include <sys/scsi/adapters/mpt_sas/mptsas_hash.h>
#include <sys/raidioctl.h>
#include <sys/fs/dv_node.h> /* devfs_clean */
diff --git a/usr/src/uts/common/netinet/in.h b/usr/src/uts/common/netinet/in.h
index d530b7f36e..a64095c6c2 100644
--- a/usr/src/uts/common/netinet/in.h
+++ b/usr/src/uts/common/netinet/in.h
@@ -225,6 +225,7 @@ typedef uint16_t sa_family_t;
#define IPPORT_SLP 427
#define IPPORT_MIP 434
#define IPPORT_SMB 445 /* a.k.a. microsoft-ds */
+#define IPPORT_VXLAN 4789
/*
* Internet Key Exchange (IKE) ports
diff --git a/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_hash.c b/usr/src/uts/common/refhash/refhash.c
index 8f96c2d9f1..7eb8d967c6 100644
--- a/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_hash.c
+++ b/usr/src/uts/common/refhash/refhash.c
@@ -13,13 +13,15 @@
* Copyright 2014 Joyent, Inc. All rights reserved.
*/
-#include <sys/scsi/adapters/mpt_sas/mptsas_hash.h>
+#include <sys/refhash.h>
#include <sys/sysmacros.h>
#include <sys/types.h>
#include <sys/kmem.h>
#include <sys/list.h>
#include <sys/ddi.h>
+#define RHL_F_DEAD 0x01
+
#ifdef lint
extern refhash_link_t *obj_to_link(refhash_t *, void *);
extern void *link_to_obj(refhash_t *, refhash_link_t *);
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index 7acdd7ebe8..7651028768 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -422,6 +422,9 @@ CHKHDRS= \
ontrap.h \
open.h \
openpromio.h \
+ overlay.h \
+ overlay_common.h \
+ overlay_target.h \
panic.h \
param.h \
pathconf.h \
@@ -654,6 +657,7 @@ CHKHDRS= \
vuid_queue.h \
vuid_state.h \
vuid_store.h \
+ vxlan.h \
wait.h \
waitq.h \
wanboot_impl.h \
diff --git a/usr/src/uts/common/sys/dld_ioc.h b/usr/src/uts/common/sys/dld_ioc.h
index 2f519a8eda..6c864f4700 100644
--- a/usr/src/uts/common/sys/dld_ioc.h
+++ b/usr/src/uts/common/sys/dld_ioc.h
@@ -59,6 +59,8 @@ extern "C" {
#define IPTUN_IOC 0x454A
#define BRIDGE_IOC 0xB81D
#define IBPART_IOC 0x6171
+/* XXX Should we do something better than the date */
+#define OVERLAY_IOC 0x2005
/* GLDv3 modules use these macros to generate unique ioctl commands */
#define DLDIOC(cmdid) DLD_IOC_CMD(DLD_IOC, (cmdid))
@@ -68,6 +70,7 @@ extern "C" {
#define IPTUNIOC(cmdid) DLD_IOC_CMD(IPTUN_IOC, (cmdid))
#define BRIDGEIOC(cmdid) DLD_IOC_CMD(BRIDGE_IOC, (cmdid))
#define IBPARTIOC(cmdid) DLD_IOC_CMD(IBPART_IOC, (cmdid))
+#define OVERLAYIOC(cmdid) DLD_IOC_CMD(OVERLAY_IOC, (cmdid))
#ifdef _KERNEL
diff --git a/usr/src/uts/common/sys/dls_mgmt.h b/usr/src/uts/common/sys/dls_mgmt.h
index 4f73d92118..ff3de32e9d 100644
--- a/usr/src/uts/common/sys/dls_mgmt.h
+++ b/usr/src/uts/common/sys/dls_mgmt.h
@@ -47,13 +47,15 @@ typedef enum {
DATALINK_CLASS_SIMNET = 0x20,
DATALINK_CLASS_BRIDGE = 0x40,
DATALINK_CLASS_IPTUN = 0x80,
- DATALINK_CLASS_PART = 0x100
+ DATALINK_CLASS_PART = 0x100,
+ DATALINK_CLASS_OVERLAY = 0x200
} datalink_class_t;
#define DATALINK_CLASS_ALL (DATALINK_CLASS_PHYS | \
DATALINK_CLASS_VLAN | DATALINK_CLASS_AGGR | DATALINK_CLASS_VNIC | \
DATALINK_CLASS_ETHERSTUB | DATALINK_CLASS_SIMNET | \
- DATALINK_CLASS_BRIDGE | DATALINK_CLASS_IPTUN | DATALINK_CLASS_PART)
+ DATALINK_CLASS_BRIDGE | DATALINK_CLASS_IPTUN | DATALINK_CLASS_PART | \
+ DATALINK_CLASS_OVERLAY)
/*
* A combination of flags and media.
diff --git a/usr/src/uts/common/sys/ethernet.h b/usr/src/uts/common/sys/ethernet.h
index 6c9aeef4af..5b9de2f2bf 100644
--- a/usr/src/uts/common/sys/ethernet.h
+++ b/usr/src/uts/common/sys/ethernet.h
@@ -38,6 +38,7 @@ extern "C" {
#define ETHERADDRL (6) /* ethernet address length in octets */
#define ETHERFCSL (4) /* ethernet FCS length in octets */
+#define ETHERADDRSTRL (18) /* char size of ETHERADDRL with null */
/*
* Ethernet address - 6 octets
@@ -144,7 +145,9 @@ extern char *ether_sprintf(struct ether_addr *);
extern int ether_aton(char *, uchar_t *);
#else /* _KERNEL */
extern char *ether_ntoa(const struct ether_addr *);
+extern char *ether_ntoa_r(const struct ether_addr *, char *);
extern struct ether_addr *ether_aton(const char *);
+extern struct ether_addr *ether_aton_r(const char *, struct ether_addr *);
extern int ether_ntohost(char *, const struct ether_addr *);
extern int ether_hostton(const char *, struct ether_addr *);
extern int ether_line(const char *, struct ether_addr *, char *);
diff --git a/usr/src/uts/common/sys/geneve.h b/usr/src/uts/common/sys/geneve.h
new file mode 100644
index 0000000000..6a9dc5ecda
--- /dev/null
+++ b/usr/src/uts/common/sys/geneve.h
@@ -0,0 +1,64 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_GENEVE_H
+#define _SYS_GENEVE_H
+
+/*
+ * Common GENEVE information
+ */
+
+#include <sys/inttypes.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+/* Sizes in bytes */
+#define GENEVE_HDR_MIN 8
+#define GENEVE_HDR_MAX 20
+#define GENEVE_ID_LEN 3
+
+#define GENEVE_OPT_MASK 0x3f00
+#define GENEVE_OPT_SHIFT 8
+
+#define GENEVE_VERSION 0
+#define GENEVE_VERS_MASK 0xc000
+#define GENEVE_VERS_SHIFT 14
+
+#define GENEVE_F_OAM 0x0080
+#define GENEVE_F_COPT 0x0040
+
+#define GENEVE_ID_SHIFT 8
+#define GENEVE_PROT_ETHERNET 0x6558
+
+#pragma pack(1)
+typedef struct geneve_hdr {
+ uint16_t geneve_flags;
+ uint16_t geneve_prot;
+ uint32_t geneve_id;
+ uint8_t geneve_opts[];
+} geneve_hdr_t;
+#pragma pack()
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_GENEVE_H */
diff --git a/usr/src/uts/common/sys/id_space.h b/usr/src/uts/common/sys/id_space.h
index d56fcceb5a..46d25f207f 100644
--- a/usr/src/uts/common/sys/id_space.h
+++ b/usr/src/uts/common/sys/id_space.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All Rights reserved.
*/
#ifndef _ID_SPACE_H
@@ -34,8 +35,6 @@ extern "C" {
#include <sys/mutex.h>
#include <sys/vmem.h>
-#ifdef _KERNEL
-
typedef vmem_t id_space_t;
id_space_t *id_space_create(const char *, id_t, id_t);
@@ -48,8 +47,6 @@ id_t id_allocff_nosleep(id_space_t *);
id_t id_alloc_specific_nosleep(id_space_t *, id_t);
void id_free(id_space_t *, id_t);
-#endif /* _KERNEL */
-
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/ksocket.h b/usr/src/uts/common/sys/ksocket.h
index dfe25eec76..f602e1e1dd 100644
--- a/usr/src/uts/common/sys/ksocket.h
+++ b/usr/src/uts/common/sys/ksocket.h
@@ -121,6 +121,10 @@ extern int ksocket_close(ksocket_t, struct cred *);
extern void ksocket_hold(ksocket_t);
extern void ksocket_rele(ksocket_t);
+typedef boolean_t (*ksocket_krecv_f)(ksocket_t, mblk_t *, size_t, int, void *);
+extern int ksocket_krecv_set(ksocket_t, ksocket_krecv_f, void *);
+extern void ksocket_krecv_unblock(ksocket_t);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h
index 5803ad58d4..1490a4330d 100644
--- a/usr/src/uts/common/sys/mac.h
+++ b/usr/src/uts/common/sys/mac.h
@@ -100,6 +100,14 @@ typedef struct mac_propval_uint32_range_s {
} mac_propval_uint32_range_t;
/*
+ * Defines ranges which are a series of C style strings.
+ */
+typedef struct mac_propval_str_range_s {
+ uint32_t mpur_nextbyte;
+ char mpur_data[1];
+} mac_propval_str_range_t;
+
+/*
* Data type of property values.
*/
typedef enum {
@@ -119,6 +127,7 @@ typedef struct mac_propval_range_s {
mac_propval_type_t mpr_type; /* type of value */
union {
mac_propval_uint32_range_t mpr_uint32[1];
+ mac_propval_str_range_t mpr_str;
} u;
} mac_propval_range_t;
diff --git a/usr/src/uts/common/sys/nvgre.h b/usr/src/uts/common/sys/nvgre.h
new file mode 100644
index 0000000000..dc47db4f85
--- /dev/null
+++ b/usr/src/uts/common/sys/nvgre.h
@@ -0,0 +1,56 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_NVGRE_H
+#define _SYS_NVGRE_H
+
+/*
+ * Common NVGRE information
+ */
+
+#include <sys/inttypes.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+/* Sizes in bytes */
+#define NVGRE_HDR_LEN 8
+#define NVGRE_ID_LEN 3
+
+#define NVGRE_FLAG_MASK 0xb007
+#define NVGRE_FLAG_VALUE 0x2000
+#define NVGRE_PROTOCOL 0x6558
+#define NVGRE_ID_MASK 0xffffff00
+#define NVGRE_ID_SHIFT 8
+#define NVGRE_FLOW_MASK 0x000000ff
+
+#pragma pack(1)
+typedef struct nvgre_hdr {
+ uint16_t nvgre_flags;
+ uint16_t nvgre_prot;
+ uint32_t nvgre_id;
+} nvgre_hdr_t;
+#pragma pack()
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_NVGRE_H */
diff --git a/usr/src/uts/common/sys/overlay.h b/usr/src/uts/common/sys/overlay.h
new file mode 100644
index 0000000000..db161cd7f8
--- /dev/null
+++ b/usr/src/uts/common/sys/overlay.h
@@ -0,0 +1,96 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_OVERLAY_H
+#define _SYS_OVERLAY_H
+
+/*
+ * Overlay device support
+ */
+
+#include <sys/param.h>
+#include <sys/dld_ioc.h>
+#include <sys/mac.h>
+#include <sys/overlay_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define OVERLAY_IOC_CREATE OVERLAYIOC(1)
+#define OVERLAY_IOC_DELETE OVERLAYIOC(2)
+#define OVERLAY_IOC_PROPINFO OVERLAYIOC(3)
+#define OVERLAY_IOC_GETPROP OVERLAYIOC(4)
+#define OVERLAY_IOC_SETPROP OVERLAYIOC(5)
+#define OVERLAY_IOC_NPROPS OVERLAYIOC(6)
+#define OVERLAY_IOC_ACTIVATE OVERLAYIOC(7)
+#define OVERLAY_IOC_STATUS OVERLAYIOC(8)
+
+typedef struct overlay_ioc_create {
+ datalink_id_t oic_linkid;
+ uint32_t oic_filler;
+ uint64_t oic_vnetid;
+ char oic_encap[MAXLINKNAMELEN];
+} overlay_ioc_create_t;
+
+typedef struct overlay_ioc_activate {
+ datalink_id_t oia_linkid;
+} overlay_ioc_activate_t;
+
+typedef struct overlay_ioc_delete {
+ datalink_id_t oid_linkid;
+} overlay_ioc_delete_t;
+
+typedef struct overlay_ioc_nprops {
+ datalink_id_t oipn_linkid;
+ int32_t oipn_nprops;
+} overlay_ioc_nprops_t;
+
+typedef struct overlay_ioc_propinfo {
+ datalink_id_t oipi_linkid;
+ int32_t oipi_id;
+ char oipi_name[OVERLAY_PROP_NAMELEN];
+ uint_t oipi_type;
+ uint_t oipi_prot;
+ uint8_t oipi_default[OVERLAY_PROP_SIZEMAX];
+ uint32_t oipi_defsize;
+ uint32_t oipi_posssize;
+ uint8_t oipi_poss[OVERLAY_PROP_SIZEMAX];
+} overlay_ioc_propinfo_t;
+
+typedef struct overlay_ioc_prop {
+ datalink_id_t oip_linkid;
+ int32_t oip_id;
+ char oip_name[OVERLAY_PROP_NAMELEN];
+ uint8_t oip_value[OVERLAY_PROP_SIZEMAX];
+ uint32_t oip_size;
+} overlay_ioc_prop_t;
+
+typedef enum overlay_status {
+ OVERLAY_I_OK = 0x00,
+ OVERLAY_I_DEGRADED = 0x01
+} overlay_status_t;
+
+typedef struct overlay_ioc_status {
+ datalink_id_t ois_linkid;
+ uint_t ois_status;
+ char ois_message[OVERLAY_STATUS_BUFLEN];
+} overlay_ioc_status_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_OVERLAY_H */
diff --git a/usr/src/uts/common/sys/overlay_common.h b/usr/src/uts/common/sys/overlay_common.h
new file mode 100644
index 0000000000..f96cb4c39e
--- /dev/null
+++ b/usr/src/uts/common/sys/overlay_common.h
@@ -0,0 +1,65 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_OVERLAY_COMMON_H
+#define _SYS_OVERLAY_COMMON_H
+
+/*
+ * Common overlay definitions
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum overlay_target_mode {
+ OVERLAY_TARGET_NONE = 0x0,
+ OVERLAY_TARGET_POINT,
+ OVERLAY_TARGET_DYNAMIC
+} overlay_target_mode_t;
+
+typedef enum overlay_plugin_dest {
+ OVERLAY_PLUGIN_D_INVALID = 0x0,
+ OVERLAY_PLUGIN_D_ETHERNET = 0x1,
+ OVERLAY_PLUGIN_D_IP = 0x2,
+ OVERLAY_PLUGIN_D_PORT = 0x4,
+ OVERLAY_PLUGIN_D_MASK = 0x7
+} overlay_plugin_dest_t;
+
+typedef enum overlay_prop_type {
+ OVERLAY_PROP_T_INT = 0x1, /* signed int */
+ OVERLAY_PROP_T_UINT, /* unsigned int */
+ OVERLAY_PROP_T_IP, /* sinaddr6 */
+ OVERLAY_PROP_T_STRING /* OVERLAY_PROPS_SIZEMAX */
+} overlay_prop_type_t;
+
+typedef enum overlay_prop_prot {
+ OVERLAY_PROP_PERM_REQ = 0x1,
+ OVERLAY_PROP_PERM_READ = 0x2,
+ OVERLAY_PROP_PERM_WRITE = 0x4,
+ OVERLAY_PROP_PERM_RW = 0x6,
+ OVERLAY_PROP_PERM_RRW = 0x7,
+ OVERLAY_PROP_PERM_MASK = 0x7
+} overlay_prop_prot_t;
+
+#define OVERLAY_PROP_NAMELEN 64
+#define OVERLAY_PROP_SIZEMAX 256
+#define OVERLAY_STATUS_BUFLEN 256
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_OVERLAY_COMMON_H */
diff --git a/usr/src/uts/common/sys/overlay_impl.h b/usr/src/uts/common/sys/overlay_impl.h
new file mode 100644
index 0000000000..1e0007c5a5
--- /dev/null
+++ b/usr/src/uts/common/sys/overlay_impl.h
@@ -0,0 +1,207 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_OVERLAY_IMPL_H
+#define _SYS_OVERLAY_IMPL_H
+
+/*
+ * Overlay device support
+ */
+
+#include <sys/overlay.h>
+#include <sys/overlay_common.h>
+#include <sys/overlay_plugin.h>
+#include <sys/overlay_target.h>
+#include <sys/ksynch.h>
+#include <sys/list.h>
+#include <sys/avl.h>
+#include <sys/ksocket.h>
+#include <sys/socket.h>
+#include <sys/refhash.h>
+#include <sys/ethernet.h>
+#include <sys/list.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define OVEP_VERSION_ONE 0x1
+
+typedef struct overlay_plugin {
+ kmutex_t ovp_mutex;
+ list_node_t ovp_link; /* overlay_plugin_lock */
+ uint_t ovp_active; /* ovp_mutex */
+ const char *ovp_name; /* RO */
+ const overlay_plugin_ops_t *ovp_ops; /* RO */
+ const char *const *ovp_props; /* RO */
+ uint_t ovp_nprops; /* RO */
+ uint_t ovp_id_size; /* RO */
+ overlay_plugin_flags_t ovp_flags; /* RO */
+ uint_t ovp_hdr_min; /* RO */
+ uint_t ovp_hdr_max; /* RO */
+ overlay_plugin_dest_t ovp_dest; /* RO */
+} overlay_plugin_t;
+
+typedef struct overlay_mux {
+ list_node_t omux_lnode;
+ ksocket_t omux_ksock; /* RO */
+ overlay_plugin_t *omux_plugin; /* RO: associated encap */
+ int omux_domain; /* RO: socket domain */
+ int omux_family; /* RO: socket family */
+ int omux_protocol; /* RO: socket protocol */
+ struct sockaddr *omux_addr; /* RO: socket address */
+ socklen_t omux_alen; /* RO: sockaddr len */
+ kmutex_t omux_lock; /* Protects everything below */
+ uint_t omux_count; /* Active instances */
+ avl_tree_t omux_devices; /* Tree of devices */
+} overlay_mux_t;
+
+typedef enum overlay_target_flag {
+ OVERLAY_T_TEARDOWN = 0x1
+} overlay_target_flag_t;
+
+typedef struct overlay_target {
+ kmutex_t ott_lock;
+ kcondvar_t ott_cond;
+ overlay_target_mode_t ott_mode; /* RO */
+ overlay_plugin_dest_t ott_dest; /* RO */
+ uint64_t ott_id; /* RO */
+ overlay_target_flag_t ott_flags; /* ott_lock */
+ uint_t ott_ocount; /* ott_lock */
+ union { /* ott_lock */
+ overlay_target_point_t ott_point;
+ struct overlay_target_dyn {
+ refhash_t *ott_dhash;
+ avl_tree_t ott_tree;
+ } ott_dyn;
+ } ott_u;
+} overlay_target_t;
+
+typedef enum overlay_dev_flag {
+ OVERLAY_F_ACTIVATED = 0x01, /* Activate ioctl completed */
+ OVERLAY_F_IN_MUX = 0x02, /* Currently in a mux */
+ OVERLAY_F_IN_TX = 0x04, /* Currently doing tx */
+ OVERLAY_F_IN_RX = 0x08, /* Currently doing rx */
+ OVERLAY_F_IOMASK = 0x0c, /* A mask for rx and tx */
+ OVERLAY_F_MDDROP = 0x10, /* Drop traffic for metadata update */
+ OVERLAY_F_STOPMASK = 0x1e, /* None set when stopping */
+ OVERLAY_F_VARPD = 0x20, /* varpd plugin exists */
+ OVERLAY_F_DEGRADED = 0x40, /* device is degraded */
+ OVERLAY_F_MASK = 0x7f /* mask of everything */
+} overlay_dev_flag_t;
+
+typedef struct overlay_dev {
+ kmutex_t odd_lock;
+ kcondvar_t odd_iowait;
+ list_node_t odd_link; /* overlay_dev_lock */
+ mac_handle_t odd_mh; /* RO */
+ overlay_plugin_t *odd_plugin; /* RO */
+ datalink_id_t odd_linkid; /* RO */
+ void *odd_pvoid; /* RO -- only used by plugin */
+ uint_t odd_ref; /* protected by odd_lock */
+ uint_t odd_mtu; /* protected by odd_lock */
+ overlay_dev_flag_t odd_flags; /* protected by odd_lock */
+ uint_t odd_rxcount; /* protected by odd_lock */
+ uint_t odd_txcount; /* protected by odd_lock */
+ overlay_mux_t *odd_mux; /* protected by odd_lock */
+ uint64_t odd_vid; /* RO if active else odd_lock */
+ avl_node_t odd_muxnode; /* managed by mux */
+ overlay_target_t *odd_target; /* XXX Write once? */
+ char odd_fmamsg[OVERLAY_STATUS_BUFLEN]; /* odd_lock */
+} overlay_dev_t;
+
+typedef enum overlay_target_entry_flags {
+ OVERLAY_ENTRY_F_PENDING = 0x01, /* lookup in progress */
+ OVERLAY_ENTRY_F_VALID = 0x02, /* entry is currently valid */
+ OVERLAY_ENTRY_F_DROP = 0x04, /* always drop target */
+ OVERLAY_ENTRY_F_VALID_MASK = 0x06
+} overlay_target_entry_flags_t;
+
+typedef struct overlay_target_entry {
+ kmutex_t ote_lock;
+ refhash_link_t ote_reflink; /* hashtable link */
+ avl_node_t ote_avllink; /* iteration link */
+ list_node_t ote_qlink;
+ overlay_target_entry_flags_t ote_flags; /* RW: state flags */
+ uint8_t ote_addr[ETHERADDRL]; /* RO: mac addr */
+ overlay_target_t *ote_ott; /* RO */
+ overlay_dev_t *ote_odd; /* RO */
+ overlay_target_point_t ote_dest; /* RW: destination */
+ mblk_t *ote_chead; /* RW: blocked mb chain head */
+ mblk_t *ote_ctail; /* RW: blocked mb chain tail */
+ size_t ote_mbsize; /* RW: outstanding mblk size */
+ hrtime_t ote_vtime; /* RW: valid timestamp */
+} overlay_target_entry_t;
+
+
+#define OVERLAY_CTL "overlay"
+
+extern dev_info_t *overlay_dip;
+
+extern mblk_t *overlay_m_tx(void *, mblk_t *);
+
+typedef int (*overlay_dev_iter_f)(overlay_dev_t *, void *);
+extern void overlay_dev_iter(overlay_dev_iter_f, void *);
+
+extern void overlay_plugin_init(void);
+extern overlay_plugin_t *overlay_plugin_lookup(const char *);
+extern void overlay_plugin_rele(overlay_plugin_t *);
+extern void overlay_plugin_fini(void);
+typedef int (*overlay_plugin_walk_f)(overlay_plugin_t *, void *);
+extern void overlay_plugin_walk(overlay_plugin_walk_f, void *);
+
+extern void overlay_io_start(overlay_dev_t *, overlay_dev_flag_t);
+extern void overlay_io_done(overlay_dev_t *, overlay_dev_flag_t);
+
+extern void overlay_mux_init(void);
+extern void overlay_mux_fini(void);
+
+extern overlay_mux_t *overlay_mux_open(overlay_plugin_t *, int, int, int,
+ struct sockaddr *, socklen_t, int *);
+extern void overlay_mux_close(overlay_mux_t *);
+extern void overlay_mux_add_dev(overlay_mux_t *, overlay_dev_t *);
+extern void overlay_mux_remove_dev(overlay_mux_t *, overlay_dev_t *);
+extern int overlay_mux_tx(overlay_mux_t *, struct msghdr *, mblk_t *);
+
+extern void overlay_prop_init(overlay_prop_handle_t);
+
+extern void overlay_target_init(void);
+extern int overlay_target_busy(void);
+extern int overlay_target_open(dev_t *, int, int, cred_t *);
+extern int overlay_target_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+extern int overlay_target_close(dev_t, int, int, cred_t *);
+extern void overlay_target_free(overlay_dev_t *);
+
+#define OVERLAY_TARGET_OK 0
+#define OVERLAY_TARGET_DROP 1
+#define OVERLAY_TARGET_ASYNC 2
+extern int overlay_target_lookup(overlay_dev_t *, mblk_t *, struct sockaddr *,
+ socklen_t *);
+extern void overlay_target_quiesce(overlay_target_t *);
+extern void overlay_target_fini(void);
+
+extern void overlay_fm_init(void);
+extern void overlay_fm_fini(void);
+extern void overlay_fm_degrade(overlay_dev_t *, const char *);
+extern void overlay_fm_restore(overlay_dev_t *);
+
+extern overlay_dev_t *overlay_hold_by_dlid(datalink_id_t);
+extern void overlay_hold_rele(overlay_dev_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_OVERLAY_IMPL_H */
diff --git a/usr/src/uts/common/sys/overlay_plugin.h b/usr/src/uts/common/sys/overlay_plugin.h
new file mode 100644
index 0000000000..bc35935c3c
--- /dev/null
+++ b/usr/src/uts/common/sys/overlay_plugin.h
@@ -0,0 +1,150 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_OVERLAY_PLUGIN_H
+#define _SYS_OVERLAY_PLUGIN_H
+
+/*
+ * Plugin interface for encapsulation/decapsulation modules
+ * XXX This is probably totally wrong
+ */
+
+#include <sys/stream.h>
+#include <sys/mac_provider.h>
+#include <sys/overlay_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define OVEP_VERSION 0x1
+
+typedef enum overlay_plugin_flags {
+ OVEP_F_VLAN_TAG = 0x01, /* Supports VLAN Tags */
+ /*
+ * XXX STT has a weird property where it can have a VLAN ID, but it is
+ * not allowed to be a part of the encapsulated packet. Though compared
+ * to its abuse of tcp of sorts, it shouldn't really surprise us. We
+ * probably won't care about this for real, but it's here for now.
+ */
+ OVEP_F_STRIP_TAG = 0x02 /* VLAN tag should be stripped for encap */
+} overlay_plugin_flags_t;
+
+typedef struct ovep_encap_info {
+ /*
+ * XXX The ID space could easily be more than a 64-bit number, even
+ * though today it's either a 24-64 bit value. How should we future
+ * proof ourselves here?
+ */
+ uint64_t ovdi_id;
+ size_t ovdi_hdr_size;
+ /* XXX Geneve supports non-Ethernet, not sure why we would */
+ int ovdi_encap_type;
+ /* XXX STT Doesn't have a vlan present in it by default */
+ int ovdi_vlan;
+ /*
+ * XXX NVGRE requires an 8-bit hash, UDP ports generally want a 16-bit
+ * hash which is used for entropy. I guess we should pass in a 16-64 bit
+ * hash and truncate it for NVGRE? We can also set it to zero, but given
+ * its use by ECMP or theoretical use at least, we probably shouldn't.
+ * XXX Is a hash space going to be uniform when truncated in half?
+ */
+ uint16_t ovdi_hash;
+} ovep_encap_info_t;
+
+/* XXX These are total strawmen */
+/*
+ * XXX Some of these protocols, aka geneve, have defined themselves to have
+ * options available to them. Of course, none of them are currently defined, but
+ * that likely means that we're going to want to have a way to instantiate and
+ * set properties on these things, so we'll probably want to have the first
+ * argument turn into a void * and add a create and destroy endpoint that gets
+ * given the corresponding mac handle.
+ */
+typedef struct __overlay_prop_handle *overlay_prop_handle_t;
+typedef struct __overlay_handle *overlay_handle_t;
+
+/*
+ * Plugins are guaranteed that calls to setprop are serialized. However, any
+ * number of calls can be going on in parallel otherwise.
+ */
+typedef int (*overlay_plugin_encap_t)(void *, mblk_t *,
+ ovep_encap_info_t *, mblk_t **);
+typedef int (*overlay_plugin_decap_t)(void *, mblk_t *,
+ ovep_encap_info_t *);
+typedef int (*overlay_plugin_init_t)(overlay_handle_t, void **);
+typedef void (*overlay_plugin_fini_t)(void *);
+typedef int (*overlay_plugin_socket_t)(void *, int *, int *, int *,
+ struct sockaddr *, socklen_t *);
+typedef int (*overlay_plugin_getprop_t)(void *, const char *, void *,
+ uint32_t *);
+typedef int (*overlay_plugin_setprop_t)(void *, const char *, const void *,
+ uint32_t);
+typedef int (*overlay_plugin_propinfo_t)(const char *, overlay_prop_handle_t);
+
+typedef struct overlay_plugin_ops {
+ uint_t ovpo_callbacks;
+ overlay_plugin_init_t ovpo_init;
+ overlay_plugin_fini_t ovpo_fini;
+ overlay_plugin_encap_t ovpo_encap;
+ overlay_plugin_decap_t ovpo_decap;
+ overlay_plugin_socket_t ovpo_socket;
+ overlay_plugin_getprop_t ovpo_getprop;
+ overlay_plugin_setprop_t ovpo_setprop;
+ overlay_plugin_propinfo_t ovpo_propinfo;
+} overlay_plugin_ops_t;
+
+typedef struct overlay_plugin_register {
+ uint_t ovep_version;
+ const char *ovep_name;
+ const overlay_plugin_ops_t *ovep_ops;
+ const char **ovep_props;
+ uint_t ovep_id_size;
+ uint_t ovep_flags;
+ uint_t ovep_hdr_min;
+ uint_t ovep_hdr_max;
+ uint_t ovep_dest;
+} overlay_plugin_register_t;
+
+/*
+ * Functions that interact with registration
+ */
+extern overlay_plugin_register_t *overlay_plugin_alloc(uint_t);
+extern void overlay_plugin_free(overlay_plugin_register_t *);
+extern int overlay_plugin_register(overlay_plugin_register_t *);
+extern int overlay_plugin_unregister(const char *);
+
+/*
+ * Property information callbacks
+ */
+extern void overlay_prop_set_name(overlay_prop_handle_t, const char *);
+extern void overlay_prop_set_prot(overlay_prop_handle_t, overlay_prop_prot_t);
+extern void overlay_prop_set_type(overlay_prop_handle_t, overlay_prop_type_t);
+extern int overlay_prop_set_default(overlay_prop_handle_t, void *, ssize_t);
+extern void overlay_prop_set_nodefault(overlay_prop_handle_t);
+extern void overlay_prop_set_range_uint32(overlay_prop_handle_t, uint32_t,
+ uint32_t);
+extern void overlay_prop_set_range_str(overlay_prop_handle_t, const char *);
+
+/*
+ * Callbacks that should be made -- without locks held by the user.
+ */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_OVERLAY_PLUGIN_H */
diff --git a/usr/src/uts/common/sys/overlay_target.h b/usr/src/uts/common/sys/overlay_target.h
new file mode 100644
index 0000000000..8fe3824dbf
--- /dev/null
+++ b/usr/src/uts/common/sys/overlay_target.h
@@ -0,0 +1,292 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#ifndef _OVERLAY_TARGET_H
+#define _OVERLAY_TARGET_H
+
+/*
+ * Overlay device varpd ioctl interface (/dev/overlay)
+ */
+
+#include <sys/types.h>
+#include <sys/ethernet.h>
+#include <netinet/in.h>
+#include <sys/overlay_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct overlay_target_point {
+ uint8_t otp_mac[ETHERADDRL];
+ struct in6_addr otp_ip;
+ uint16_t otp_port;
+} overlay_target_point_t;
+
+#define OVERLAY_TARG_IOCTL (('o' << 24) | ('v' << 16) | ('t' << 8))
+
+#define OVERLAY_TARG_INFO (OVERLAY_TARG_IOCTL | 0x01)
+
+typedef enum overlay_targ_info_flags {
+ OVERLAY_TARG_INFO_F_ACTIVE = 0x01,
+ OVERLAY_TARG_INFO_F_DEGRADED = 0x02
+} overlay_targ_info_flags_t;
+
+/*
+ * Get target information about an overlay device
+ */
+typedef struct overlay_targ_info {
+ datalink_id_t oti_linkid;
+ uint32_t oti_needs;
+ uint64_t oti_flags;
+ uint64_t oti_vnetid;
+} overlay_targ_info_t;
+
+/*
+ * Declare an association between a given varpd instance and a datalink.
+ */
+#define OVERLAY_TARG_ASSOCIATE (OVERLAY_TARG_IOCTL | 0x02)
+
+typedef struct overlay_targ_associate {
+ datalink_id_t ota_linkid;
+ uint32_t ota_mode;
+ uint64_t ota_id;
+ uint32_t ota_provides;
+ overlay_target_point_t ota_point;
+} overlay_targ_associate_t;
+
+/*
+ * Remove an association from a device. If the device has already been started,
+ * this implies OVERLAY_TARG_DEGRADE.
+ */
+#define OVERLAY_TARG_DISASSOCIATE (OVERLAY_TARG_IOCTL | 0x3)
+
+/*
+ * Tells the kernel that while a varpd instance still exists, it basically isn't
+ * making any forward progress, so the device should consider itself degraded.
+ */
+#define OVERLAY_TARG_DEGRADE (OVERLAY_TARG_IOCTL | 0x4)
+
+typedef struct overlay_targ_degrade {
+ datalink_id_t otd_linkid;
+ uint32_t otd_pad;
+ char otd_buf[OVERLAY_STATUS_BUFLEN];
+} overlay_targ_degrade_t;
+
+/*
+ * Tells the kernel to remove the degraded status that it set on a device.
+ */
+#define OVERLAY_TARG_RESTORE (OVERLAY_TARG_IOCTL | 0x5)
+
+typedef struct overlay_targ_id {
+ datalink_id_t otid_linkid;
+} overlay_targ_id_t;
+
+/*
+ * The following ioctls are all used to support dynamic lookups from userland,
+ * generally serviced by varpd.
+ *
+ * The way this is designed to work is that user land will have threads sitting
+ * in OVERLAY_TARG_LOOKUP ioctls waiting to service requests. A thread will sit
+ * waiting for work for up to approximately one second of time before they will
+ * be sent back out to user land to give user land a chance to clean itself up
+ * or more generally, come back into the kernel for work. Once these threads
+ * return, they will have a request with which more action can be done. The
+ * following ioctls can all be used to answer the request.
+ *
+ * OVERLAY_TARG_RESPOND - overlay_targ_resp_t
+ *
+ * The overlay_targ_resp_t has the appropriate information from
+ * which a reply can be generated. The information is filled into
+ * an overlay_targ_point_t as appropriate based on the
+ * overlay_plugin_dest_t type.
+ *
+ *
+ * OVERLAY_TARG_DROP - overlay_targ_resp_t
+ *
+ * The overlay_targ_resp_t should identify a request for which to
+ * drop a packet.
+ *
+ *
+ * OVERLAY_TARG_INJECT - overlay_targ_pkt_t
+ *
+ * The overlay_targ_pkt_t injects a fully formed packet into the
+ * virtual network. It may either be identified by its data link id
+ * or by the request id. If both are specified, the
+ * datalink id will be used. Note, that an injection is not
+ * considered a reply and if this corresponds to a requeset, then
+ * that individual packet must still be dropped.
+ *
+ *
+ * OVERLAY_TARG_PKT - overlay_targ_pkt_t
+ *
+ * This ioctl can be used to copy data from a given request into a
+ * user buffer. This can be used in combination with
+ * OVERLAY_TARG_INJECT to implemnt services such as a proxy-arp.
+ *
+ *
+ * OVERLAY_TARG_RESEND - overlay_targ_pkt_t
+ *
+ * This ioctl is similar to the OVERLAY_TARG_INJECT, except instead
+ * of receiving it on the local mac handle, it queues it for
+ * retransmission again. This is useful if you have a packet that
+ * was originally destined for some broadcast or multicast address
+ * that you now want to send to a unicast address.
+ */
+#define OVERLAY_TARG_LOOKUP (OVERLAY_TARG_IOCTL | 0x10)
+#define OVERLAY_TARG_RESPOND (OVERLAY_TARG_IOCTL | 0x11)
+#define OVERLAY_TARG_DROP (OVERLAY_TARG_IOCTL | 0x12)
+#define OVERLAY_TARG_INJECT (OVERLAY_TARG_IOCTL | 0x13)
+#define OVERLAY_TARG_PKT (OVERLAY_TARG_IOCTL | 0x14)
+#define OVERLAY_TARG_RESEND (OVERLAY_TARG_IOCTL | 0x15)
+
+typedef struct overlay_targ_lookup {
+ uint64_t otl_dlid;
+ uint64_t otl_reqid;
+ uint64_t otl_varpdid;
+ uint64_t otl_vnetid;
+ uint64_t otl_hdrsize;
+ uint64_t otl_pktsize;
+ uint8_t otl_srcaddr[ETHERADDRL];
+ uint8_t otl_dstaddr[ETHERADDRL];
+ uint32_t otl_dsttype;
+ uint32_t otl_sap;
+ int32_t otl_vlan;
+} overlay_targ_lookup_t;
+
+typedef struct overlay_targ_resp {
+ uint64_t otr_reqid;
+ overlay_target_point_t otr_answer;
+} overlay_targ_resp_t;
+
+typedef struct overlay_targ_pkt {
+ uint64_t otp_linkid;
+ uint64_t otp_reqid;
+ uint64_t otp_size;
+ void *otp_buf;
+} overlay_targ_pkt_t;
+
+#ifdef _KERNEL
+
+typedef struct overlay_targ_pkt32 {
+ uint64_t otp_linkid;
+ uint64_t otp_reqid;
+ uint64_t otp_size;
+ caddr32_t otp_buf;
+} overlay_targ_pkt32_t;
+
+#endif /* _KERNEL */
+
+/*
+ * This provides a way to get a list of active overlay devices independently
+ * from dlmgmtd. At the end of the day the kernel always knows what will exist
+ * and this allows varpd which is an implementation of libdladm not to end up
+ * needing to call back into dlmgmtd via libdladm and create an unfortunate
+ * dependency cycle.
+ */
+
+#define OVERLAY_TARG_LIST (OVERLAY_TARG_IOCTL | 0x20)
+
+typedef struct overlay_targ_list {
+ uint32_t otl_nents;
+ uint32_t otl_ents[];
+} overlay_targ_list_t;
+
+/*
+ * The following family of ioctls all manipulate the target cache of a given
+ * device.
+ *
+ * OVERLAY_TARG_CACHE_GET - overlay_targ_cache_t
+ *
+ * The overlay_targ_cache_t should be have its link identifier and
+ * the desired mac address filled in. On return, it will fill in
+ * the otc_dest member, if the entry exists in the table.
+ *
+ *
+ * OVERLAY_TARG_CACHE_SET - overlay_targ_cache_t
+ *
+ * The cache table entry of the mac address referred to by otc_mac
+ * and otd_linkid will be filled in with the details provided by in
+ * the otc_dest member.
+ *
+ * OVERLAY_TARG_CACHE_REMOVE - overlay_targ_cache_t
+ *
+ * Removes the cache entry identified by otc_mac from the table.
+ * Note that this does not stop any in-flight lookups or deal with
+ * any data that is awaiting a lookup.
+ *
+ *
+ * OVERLAY_TARG_CACHE_FLUSH - overlay_targ_cache_t
+ *
+ * Similar to OVERLAY_TARG_CACHE_REMOVE, but functions on the
+ * entire table identified by otc_linkid. All other parameters are
+ * ignored.
+ *
+ *
+ * OVERLAY_TARG_CACHE_ITER - overlay_targ_cache_iter_t
+ *
+ * Iterates over the contents of a target cache identified by
+ * otci_linkid. Iteration is guaranteed to be exactly once for
+ * items which are in the hashtable at the beginning and end of
+ * iteration. For items which are added or removed after iteration
+ * has begun, only at most once semantics are guaranteed. Consumers
+ * should ensure that otci_marker is zeroed before starting
+ * iteration and should preserve its contents across calls.
+ *
+ * Before calling in, otci_count should be set to the number of
+ * entries that space has been allocated for in otci_ents. The
+ * value will be updated to indicate the total number written out.
+ */
+
+#define OVERLAY_TARG_CACHE_GET (OVERLAY_TARG_IOCTL | 0x30)
+#define OVERLAY_TARG_CACHE_SET (OVERLAY_TARG_IOCTL | 0x31)
+#define OVERLAY_TARG_CACHE_REMOVE (OVERLAY_TARG_IOCTL | 0x32)
+#define OVERLAY_TARG_CACHE_FLUSH (OVERLAY_TARG_IOCTL | 0x33)
+#define OVERLAY_TARG_CACHE_ITER (OVERLAY_TARG_IOCTL | 0x34)
+
+/*
+ * XXX This is a pretty arbitrary number that we're constraining ourselves to
+ * for iteration. Basically the goal is to make sure that we can't have a user
+ * ask us to allocate too much memory on their behalf. We should revisit this
+ * and think through what a good number here is.
+ */
+#define OVERLAY_TARGET_ITER_MAX 500
+
+#define OVERLAY_TARGET_CACHE_DROP 0x01
+
+typedef struct overlay_targ_cache_entry {
+ uint8_t otce_mac[ETHERADDRL];
+ uint16_t otce_flags;
+ overlay_target_point_t otce_dest;
+} overlay_targ_cache_entry_t;
+
+typedef struct overlay_targ_cache {
+ datalink_id_t otc_linkid;
+ overlay_targ_cache_entry_t otc_entry;
+} overlay_targ_cache_t;
+
+typedef struct overlay_targ_cache_iter {
+ datalink_id_t otci_linkid;
+ uint32_t otci_pad;
+ uint64_t otci_marker;
+ uint16_t otci_count;
+ overlay_targ_cache_entry_t otci_ents[];
+} overlay_targ_cache_iter_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _OVERLAY_TARGET_H */
diff --git a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_hash.h b/usr/src/uts/common/sys/refhash.h
index 2069e6d3f1..22653cfedf 100644
--- a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_hash.h
+++ b/usr/src/uts/common/sys/refhash.h
@@ -13,8 +13,8 @@
* Copyright 2014 Joyent, Inc. All rights reserved.
*/
-#ifndef _SYS_SCSI_ADAPTERS_MPTHASH_H
-#define _SYS_SCSI_ADAPTERS_MPTHASH_H
+#ifndef _SYS_REFHASH_H
+#define _SYS_REFHASH_H
#include <sys/types.h>
#include <sys/list.h>
@@ -58,4 +58,4 @@ extern void *refhash_first(refhash_t *);
extern void *refhash_next(refhash_t *, void *);
extern boolean_t refhash_obj_valid(refhash_t *hp, const void *);
-#endif /* _SYS_SCSI_ADAPTERS_MPTHASH_H */
+#endif /* _SYS_REFHASH_H */
diff --git a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h b/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h
index 836548aa30..530a0bbbc0 100644
--- a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h
+++ b/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h
@@ -58,10 +58,10 @@
#include <sys/byteorder.h>
#include <sys/queue.h>
+#include <sys/refhash.h>
#include <sys/isa_defs.h>
#include <sys/sunmdi.h>
#include <sys/mdi_impldefs.h>
-#include <sys/scsi/adapters/mpt_sas/mptsas_hash.h>
#include <sys/scsi/adapters/mpt_sas/mptsas_ioctl.h>
#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_tool.h>
#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_cnfg.h>
diff --git a/usr/src/uts/common/sys/socketvar.h b/usr/src/uts/common/sys/socketvar.h
index 8221c620a8..7b053b0cff 100644
--- a/usr/src/uts/common/sys/socketvar.h
+++ b/usr/src/uts/common/sys/socketvar.h
@@ -99,6 +99,7 @@ struct sockaddr_ux {
typedef struct sonodeops sonodeops_t;
typedef struct sonode sonode_t;
+typedef boolean_t (*so_krecv_f)(sonode_t *, mblk_t *, size_t, int, void *);
struct sodirect_s;
@@ -241,6 +242,10 @@ struct sonode {
struct sof_instance *so_filter_top; /* top of stack */
struct sof_instance *so_filter_bottom; /* bottom of stack */
clock_t so_filter_defertime; /* time when deferred */
+
+ /* Kernel direct receive callbacks */
+ so_krecv_f so_krecv_cb; /* recv callback */
+ void *so_krecv_arg; /* recv cb arg */
};
#define SO_HAVE_DATA(so) \
@@ -942,6 +947,13 @@ extern struct sonode *socreate(struct sockparams *, int, int, int, int,
extern int so_copyin(const void *, void *, size_t, int);
extern int so_copyout(const void *, void *, size_t, int);
+/*
+ * Functions to manipulate the use of direct receive callbacks. This should not
+ * be used outside of sockfs and ksocket.
+ */
+extern int so_krecv_set(sonode_t *, so_krecv_f, void *);
+extern void so_krecv_unblock(sonode_t *);
+
#endif
/*
diff --git a/usr/src/uts/common/sys/stt.h b/usr/src/uts/common/sys/stt.h
new file mode 100644
index 0000000000..28c08e347d
--- /dev/null
+++ b/usr/src/uts/common/sys/stt.h
@@ -0,0 +1,67 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_STT_H
+#define _SYS_STT_H
+
+/*
+ * Common STT information
+ */
+
+#include <sys/inttypes.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+/* Sizes in bytes */
+#define STT_HDR_LEN 18
+#define STT_ID_LEN 8
+
+#define STT_SET_FLAG(val, f) ((val) |= 1 << (f))
+#define STT_F_CKSUM_VERIFY 0x80
+#define STT_F_CKSUM_PARTIAL 0x40
+#define STT_F_IPV4 0x20
+#define STT_F_ISTCP 0x10
+#define STT_F_RESERVED 0x0f
+
+#define STT_L4OFF_MAX 0xff
+
+#define STT_VLAN_VALID 0x1000
+
+#define STT_VERSION 0
+
+#pragma pack(1)
+typedef struct stt_hdr {
+ uint8_t stt_version;
+ uint8_t stt_flags;
+ uint8_t stt_l4off;
+ uint8_t stt_reserved;
+ uint16_t stt_mss;
+ uint16_t stt_vlan;
+ uint64_t stt_id;
+ uint16_t stt_padding;
+} stt_hdr_t;
+#pragma pack()
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_STT_H */
diff --git a/usr/src/uts/common/sys/vxlan.h b/usr/src/uts/common/sys/vxlan.h
new file mode 100644
index 0000000000..bc0efa5dd6
--- /dev/null
+++ b/usr/src/uts/common/sys/vxlan.h
@@ -0,0 +1,48 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_VXLAN_H
+#define _SYS_VXLAN_H
+
+/*
+ * Common VXLAN information
+ */
+
+#include <sys/inttypes.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Sizes in bytes */
+#define VXLAN_HDR_LEN 8
+#define VXLAN_ID_LEN 3
+
+#define VXLAN_MAGIC 0x08000000
+#define VXLAN_F_VDI 0x08000000
+#define VXLAN_ID_SHIFT 8
+
+#pragma pack(1)
+typedef struct vxlan_hdr {
+ uint32_t vxlan_flags;
+ uint32_t vxlan_id;
+} vxlan_hdr_t;
+#pragma pack()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VXLAN_H */
diff --git a/usr/src/uts/common/syscall/sendfile.c b/usr/src/uts/common/syscall/sendfile.c
index cb8246f584..ccceca7c6d 100644
--- a/usr/src/uts/common/syscall/sendfile.c
+++ b/usr/src/uts/common/syscall/sendfile.c
@@ -82,7 +82,7 @@ extern sotpi_info_t *sotpi_sototpi(struct sonode *);
* 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer
* more than 2GB of data.
*/
-int
+static int
sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv,
int copy_cnt, ssize32_t *count)
{
@@ -343,7 +343,7 @@ sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv,
return (0);
}
-ssize32_t
+static ssize32_t
sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt,
size32_t *xferred, int fildes)
{
@@ -390,7 +390,7 @@ sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt,
}
#endif
-int
+static int
sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
int copy_cnt, ssize_t total_size, int maxblk, ssize_t *count)
{
@@ -680,7 +680,7 @@ sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
}
-int
+static int
sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
int copy_cnt, ssize_t *count)
{
@@ -1160,6 +1160,17 @@ sendfilev(int opcode, int fildes, const struct sendfilevec *vec, int sfvcnt,
} else {
maxblk = (int)vp->v_stream->sd_maxblk;
}
+
+ /*
+ * We need to make sure that the socket that we're sending on
+ * supports sendfile behavior. sockfs doesn't know that the APIs
+ * we want to use are coming from sendfile, so we can't rely on
+ * it to check for us.
+ */
+ if ((so->so_mode & SM_SENDFILESUPP) == 0) {
+ error = EOPNOTSUPP;
+ goto err;
+ }
break;
case VREG:
break;
diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel
index 830580e390..2aeb0b89e4 100644
--- a/usr/src/uts/intel/Makefile.intel
+++ b/usr/src/uts/intel/Makefile.intel
@@ -717,6 +717,14 @@ MAC_KMODS += mac_wifi
MAC_KMODS += mac_ib
#
+# Overlay related modules (/kernel/overlay)
+#
+DRV_KMODS += overlay
+OVERLAY_KMODS += geneve
+OVERLAY_KMODS += nvgre
+OVERLAY_KMODS += vxlan
+
+#
# socketmod (kernel/socketmod)
#
SOCKET_KMODS += sockpfp
diff --git a/usr/src/uts/intel/geneve/Makefile b/usr/src/uts/intel/geneve/Makefile
new file mode 100644
index 0000000000..180a720a21
--- /dev/null
+++ b/usr/src/uts/intel/geneve/Makefile
@@ -0,0 +1,52 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+UTSBASE = ../..
+
+MODULE = geneve
+OBJECTS = $(OVERLAY_GENEVE_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(OVERLAY_GENEVE_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_OVERLAY_DIR)/$(MODULE)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+LDFLAGS += -dy -Ndrv/overlay
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/nvgre/Makefile b/usr/src/uts/intel/nvgre/Makefile
new file mode 100644
index 0000000000..50bf4a3fbc
--- /dev/null
+++ b/usr/src/uts/intel/nvgre/Makefile
@@ -0,0 +1,49 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+UTSBASE = ../..
+
+MODULE = nvgre
+OBJECTS = $(OVERLAY_NVGRE_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(OVERLAY_NVGRE_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_OVERLAY_DIR)/$(MODULE)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+LDFLAGS += -dy -Ndrv/overlay
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/overlay/Makefile b/usr/src/uts/intel/overlay/Makefile
new file mode 100644
index 0000000000..4f24075de5
--- /dev/null
+++ b/usr/src/uts/intel/overlay/Makefile
@@ -0,0 +1,56 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+UTSBASE = ../..
+
+MODULE = overlay
+OBJECTS = $(OVERLAY_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(OVERLAY_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY) $(SRC_CONFFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+CONF_SRCDIR = $(UTSBASE)/common/io/overlay
+MAPFILE = $(UTSBASE)/common/io/overlay/overlay.mapfile
+
+LDFLAGS += -dy -Nmisc/mac -Ndrv/dld -Nmisc/dls -Nmisc/ksocket
+
+#
+# To ease development
+#
+CERRWARN += -_gcc=-Wno-unused-variable
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/vxlan/Makefile b/usr/src/uts/intel/vxlan/Makefile
new file mode 100644
index 0000000000..894e2f84c9
--- /dev/null
+++ b/usr/src/uts/intel/vxlan/Makefile
@@ -0,0 +1,49 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+UTSBASE = ../..
+
+MODULE = vxlan
+OBJECTS = $(OVERLAY_VXLAN_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(OVERLAY_VXLAN_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_OVERLAY_DIR)/$(MODULE)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+LDFLAGS += -dy -Ndrv/overlay -Ndrv/ip
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ