summaryrefslogtreecommitdiff
path: root/usr/src/common
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/common')
-rw-r--r--usr/src/common/brand/lx/lx_auxv.c96
-rw-r--r--usr/src/common/brand/lx/lx_auxv.h32
-rw-r--r--usr/src/common/brand/lx/lx_errno.c206
-rw-r--r--usr/src/common/brand/lx/lx_errno.h29
-rw-r--r--usr/src/common/brand/lx/lx_signum.c339
-rw-r--r--usr/src/common/brand/lx/lx_signum.h114
-rw-r--r--usr/src/common/brand/lx/lx_syscall.h123
-rw-r--r--usr/src/common/brand/lx/tools/Makefile47
-rw-r--r--usr/src/common/brand/lx/tools/README.md39
-rw-r--r--usr/src/common/brand/lx/tools/gen_errno.c444
-rw-r--r--usr/src/common/crypto/aes/aes_modes.c4
-rw-r--r--usr/src/common/crypto/chacha/chacha.c24
-rw-r--r--usr/src/common/crypto/chacha/chacha.h6
-rw-r--r--usr/src/common/crypto/modes/ctr.c280
-rw-r--r--usr/src/common/crypto/modes/modes.h15
-rw-r--r--usr/src/common/dis/i386/dis_tables.c26
-rw-r--r--usr/src/common/idspace/id_space.c184
-rw-r--r--usr/src/common/inet/inet_hash.c359
-rw-r--r--usr/src/common/mc/imc/imc_decode.c770
-rw-r--r--usr/src/common/mc/imc/imc_dump.c569
-rw-r--r--usr/src/common/net/dhcp/octet.c3
-rw-r--r--usr/src/common/pnglite/pnglite.c1
-rw-r--r--usr/src/common/zfs/zfs_prop.c17
-rw-r--r--usr/src/common/zfs/zfs_prop.h2
24 files changed, 3576 insertions, 153 deletions
diff --git a/usr/src/common/brand/lx/lx_auxv.c b/usr/src/common/brand/lx/lx_auxv.c
new file mode 100644
index 0000000000..2ed5fd0517
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_auxv.c
@@ -0,0 +1,96 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/auxv.h>
+#include <sys/lx_brand.h>
+
+/*
+ * Linux does not make the distinction between 'int' and 'long' when it comes
+ * to the format of the aux vector. In order to properly clear the struct
+ * padding present in the native auxv_t in 64-bit, we employ the Linux format.
+ */
+struct lx_auxv {
+ long la_type;
+ long la_val;
+};
+
+int
+lx_auxv_stol(const auxv_t *ap, auxv_t *oap, const lx_elf_data_t *edp)
+{
+ struct lx_auxv *loap = (struct lx_auxv *)oap;
+
+ switch (ap->a_type) {
+ case AT_BASE:
+ loap->la_val = edp->ed_base;
+ break;
+ case AT_ENTRY:
+ loap->la_val = edp->ed_entry;
+ break;
+ case AT_PHDR:
+ loap->la_val = edp->ed_phdr;
+ break;
+ case AT_PHENT:
+ loap->la_val = edp->ed_phent;
+ break;
+ case AT_PHNUM:
+ loap->la_val = edp->ed_phnum;
+ break;
+ case AT_SUN_BRAND_LX_SYSINFO_EHDR:
+ loap->la_type = AT_SYSINFO_EHDR;
+ loap->la_val = ap->a_un.a_val;
+ return (0);
+ case AT_SUN_BRAND_LX_CLKTCK:
+ loap->la_type = AT_CLKTCK;
+ loap->la_val = ap->a_un.a_val;
+ return (0);
+ case AT_SUN_AUXFLAGS:
+ if ((ap->a_un.a_val & AF_SUN_SETUGID) != 0) {
+ loap->la_type = AT_SECURE;
+ loap->la_val = 1;
+ return (0);
+ } else {
+ return (1);
+ }
+ case AT_SUN_GID:
+ loap->la_type = AT_LX_EGID;
+ loap->la_val = ap->a_un.a_val;
+ return (0);
+ case AT_SUN_RGID:
+ loap->la_type = AT_LX_GID;
+ loap->la_val = ap->a_un.a_val;
+ return (0);
+ case AT_SUN_UID:
+ loap->la_type = AT_LX_EUID;
+ loap->la_val = ap->a_un.a_val;
+ return (0);
+ case AT_SUN_RUID:
+ loap->la_type = AT_LX_UID;
+ loap->la_val = ap->a_un.a_val;
+ return (0);
+ case AT_EXECFD:
+ case AT_PAGESZ:
+ case AT_FLAGS:
+ case AT_RANDOM:
+ case AT_NULL:
+ /* No translate needed */
+ loap->la_val = ap->a_un.a_val;
+ break;
+ default:
+ /* All other unrecognized entries are ignored */
+ return (1);
+ }
+ loap->la_type = ap->a_type;
+ return (0);
+}
diff --git a/usr/src/common/brand/lx/lx_auxv.h b/usr/src/common/brand/lx/lx_auxv.h
new file mode 100644
index 0000000000..190d939f35
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_auxv.h
@@ -0,0 +1,32 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LX_AUXV_H
+#define _LX_AUXV_H
+
+#include <sys/auxv.h>
+#include <sys/lx_brand.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int lx_auxv_stol(const auxv_t *, auxv_t *, const lx_elf_data_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_AUXV_H */
diff --git a/usr/src/common/brand/lx/lx_errno.c b/usr/src/common/brand/lx/lx_errno.c
new file mode 100644
index 0000000000..269ed470dc
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_errno.c
@@ -0,0 +1,206 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * This file contains a mapping table and lookup function for converting
+ * illumos native error numbers into error numbers appropriate for Linux
+ * emulation.
+ *
+ * The translation table is generated by the "gen_errno", built from and
+ * documented in "usr/src/common/brand/lx/tools".
+ */
+
+#include <sys/debug.h>
+
+const int
+lx_stol_errno[] = {
+ 0, /* 0: No Error */
+ 1, /* 1: EPERM --> 1: EPERM */
+ 2, /* 2: ENOENT --> 2: ENOENT */
+ 3, /* 3: ESRCH --> 3: ESRCH */
+ 4, /* 4: EINTR --> 4: EINTR */
+ 5, /* 5: EIO --> 5: EIO */
+ 6, /* 6: ENXIO --> 6: ENXIO */
+ 7, /* 7: E2BIG --> 7: E2BIG */
+ 8, /* 8: ENOEXEC --> 8: ENOEXEC */
+ 9, /* 9: EBADF --> 9: EBADF */
+ 10, /* 10: ECHILD --> 10: ECHILD */
+ 11, /* 11: EAGAIN --> 11: EAGAIN */
+ 12, /* 12: ENOMEM --> 12: ENOMEM */
+ 13, /* 13: EACCES --> 13: EACCES */
+ 14, /* 14: EFAULT --> 14: EFAULT */
+ 15, /* 15: ENOTBLK --> 15: ENOTBLK */
+ 16, /* 16: EBUSY --> 16: EBUSY */
+ 17, /* 17: EEXIST --> 17: EEXIST */
+ 18, /* 18: EXDEV --> 18: EXDEV */
+ 19, /* 19: ENODEV --> 19: ENODEV */
+ 20, /* 20: ENOTDIR --> 20: ENOTDIR */
+ 21, /* 21: EISDIR --> 21: EISDIR */
+ 22, /* 22: EINVAL --> 22: EINVAL */
+ 23, /* 23: ENFILE --> 23: ENFILE */
+ 24, /* 24: EMFILE --> 24: EMFILE */
+ 25, /* 25: ENOTTY --> 25: ENOTTY */
+ 26, /* 26: ETXTBSY --> 26: ETXTBSY */
+ 27, /* 27: EFBIG --> 27: EFBIG */
+ 28, /* 28: ENOSPC --> 28: ENOSPC */
+ 29, /* 29: ESPIPE --> 29: ESPIPE */
+ 30, /* 30: EROFS --> 30: EROFS */
+ 31, /* 31: EMLINK --> 31: EMLINK */
+ 32, /* 32: EPIPE --> 32: EPIPE */
+ 33, /* 33: EDOM --> 33: EDOM */
+ 34, /* 34: ERANGE --> 34: ERANGE */
+ 42, /* 35: ENOMSG --> 42: ENOMSG */
+ 43, /* 36: EIDRM --> 43: EIDRM */
+ 44, /* 37: ECHRNG --> 44: ECHRNG */
+ 45, /* 38: EL2NSYNC --> 45: EL2NSYNC */
+ 46, /* 39: EL3HLT --> 46: EL3HLT */
+ 47, /* 40: EL3RST --> 47: EL3RST */
+ 48, /* 41: ELNRNG --> 48: ELNRNG */
+ 49, /* 42: EUNATCH --> 49: EUNATCH */
+ 50, /* 43: ENOCSI --> 50: ENOCSI */
+ 51, /* 44: EL2HLT --> 51: EL2HLT */
+ 35, /* 45: EDEADLK --> 35: EDEADLK */
+ 37, /* 46: ENOLCK --> 37: ENOLCK */
+ 125, /* 47: ECANCELED --> 125: ECANCELED */
+ 38, /* 48: ENOTSUP --> 38: ENOSYS */
+ 122, /* 49: EDQUOT --> 122: EDQUOT */
+ 52, /* 50: EBADE --> 52: EBADE */
+ 53, /* 51: EBADR --> 53: EBADR */
+ 54, /* 52: EXFULL --> 54: EXFULL */
+ 55, /* 53: ENOANO --> 55: ENOANO */
+ 56, /* 54: EBADRQC --> 56: EBADRQC */
+ 57, /* 55: EBADSLT --> 57: EBADSLT */
+ 35, /* 56: EDEADLOCK --> 35: EDEADLK */
+ 59, /* 57: EBFONT --> 59: EBFONT */
+ 130, /* 58: EOWNERDEAD --> 130: EOWNERDEAD */
+ 131, /* 59: ENOTRECOVERABLE --> 131: ENOTRECOVERABLE */
+ 60, /* 60: ENOSTR --> 60: ENOSTR */
+ 61, /* 61: ENODATA --> 61: ENODATA */
+ 62, /* 62: ETIME --> 62: ETIME */
+ 63, /* 63: ENOSR --> 63: ENOSR */
+ 64, /* 64: ENONET --> 64: ENONET */
+ 65, /* 65: ENOPKG --> 65: ENOPKG */
+ 66, /* 66: EREMOTE --> 66: EREMOTE */
+ 67, /* 67: ENOLINK --> 67: ENOLINK */
+ 68, /* 68: EADV --> 68: EADV */
+ 69, /* 69: ESRMNT --> 69: ESRMNT */
+ 70, /* 70: ECOMM --> 70: ECOMM */
+ 71, /* 71: EPROTO --> 71: EPROTO */
+ -2, /* 72: ELOCKUNMAPPED --> -2: No Analogue */
+ -2, /* 73: ENOTACTIVE --> -2: No Analogue */
+ 72, /* 74: EMULTIHOP --> 72: EMULTIHOP */
+ -1, /* 75: Unused Number */
+ -1, /* 76: Unused Number */
+ 74, /* 77: EBADMSG --> 74: EBADMSG */
+ 36, /* 78: ENAMETOOLONG --> 36: ENAMETOOLONG */
+ 75, /* 79: EOVERFLOW --> 75: EOVERFLOW */
+ 76, /* 80: ENOTUNIQ --> 76: ENOTUNIQ */
+ 77, /* 81: EBADFD --> 77: EBADFD */
+ 78, /* 82: EREMCHG --> 78: EREMCHG */
+ 79, /* 83: ELIBACC --> 79: ELIBACC */
+ 80, /* 84: ELIBBAD --> 80: ELIBBAD */
+ 81, /* 85: ELIBSCN --> 81: ELIBSCN */
+ 82, /* 86: ELIBMAX --> 82: ELIBMAX */
+ 83, /* 87: ELIBEXEC --> 83: ELIBEXEC */
+ 84, /* 88: EILSEQ --> 84: EILSEQ */
+ 38, /* 89: ENOSYS --> 38: ENOSYS */
+ 40, /* 90: ELOOP --> 40: ELOOP */
+ 85, /* 91: ERESTART --> 85: ERESTART */
+ 86, /* 92: ESTRPIPE --> 86: ESTRPIPE */
+ 39, /* 93: ENOTEMPTY --> 39: ENOTEMPTY */
+ 87, /* 94: EUSERS --> 87: EUSERS */
+ 88, /* 95: ENOTSOCK --> 88: ENOTSOCK */
+ 89, /* 96: EDESTADDRREQ --> 89: EDESTADDRREQ */
+ 90, /* 97: EMSGSIZE --> 90: EMSGSIZE */
+ 91, /* 98: EPROTOTYPE --> 91: EPROTOTYPE */
+ 92, /* 99: ENOPROTOOPT --> 92: ENOPROTOOPT */
+ -1, /* 100: Unused Number */
+ -1, /* 101: Unused Number */
+ -1, /* 102: Unused Number */
+ -1, /* 103: Unused Number */
+ -1, /* 104: Unused Number */
+ -1, /* 105: Unused Number */
+ -1, /* 106: Unused Number */
+ -1, /* 107: Unused Number */
+ -1, /* 108: Unused Number */
+ -1, /* 109: Unused Number */
+ -1, /* 110: Unused Number */
+ -1, /* 111: Unused Number */
+ -1, /* 112: Unused Number */
+ -1, /* 113: Unused Number */
+ -1, /* 114: Unused Number */
+ -1, /* 115: Unused Number */
+ -1, /* 116: Unused Number */
+ -1, /* 117: Unused Number */
+ -1, /* 118: Unused Number */
+ -1, /* 119: Unused Number */
+ 93, /* 120: EPROTONOSUPPORT --> 93: EPROTONOSUPPORT */
+ 94, /* 121: ESOCKTNOSUPPORT --> 94: ESOCKTNOSUPPORT */
+ 95, /* 122: EOPNOTSUPP --> 95: EOPNOTSUPP */
+ 96, /* 123: EPFNOSUPPORT --> 96: EPFNOSUPPORT */
+ 97, /* 124: EAFNOSUPPORT --> 97: EAFNOSUPPORT */
+ 98, /* 125: EADDRINUSE --> 98: EADDRINUSE */
+ 99, /* 126: EADDRNOTAVAIL --> 99: EADDRNOTAVAIL */
+ 100, /* 127: ENETDOWN --> 100: ENETDOWN */
+ 101, /* 128: ENETUNREACH --> 101: ENETUNREACH */
+ 102, /* 129: ENETRESET --> 102: ENETRESET */
+ 103, /* 130: ECONNABORTED --> 103: ECONNABORTED */
+ 104, /* 131: ECONNRESET --> 104: ECONNRESET */
+ 105, /* 132: ENOBUFS --> 105: ENOBUFS */
+ 106, /* 133: EISCONN --> 106: EISCONN */
+ 107, /* 134: ENOTCONN --> 107: ENOTCONN */
+ -1, /* 135: Unused Number */
+ -1, /* 136: Unused Number */
+ -1, /* 137: Unused Number */
+ -1, /* 138: Unused Number */
+ -1, /* 139: Unused Number */
+ -1, /* 140: Unused Number */
+ -1, /* 141: Unused Number */
+ -1, /* 142: Unused Number */
+ 108, /* 143: ESHUTDOWN --> 108: ESHUTDOWN */
+ 109, /* 144: ETOOMANYREFS --> 109: ETOOMANYREFS */
+ 110, /* 145: ETIMEDOUT --> 110: ETIMEDOUT */
+ 111, /* 146: ECONNREFUSED --> 111: ECONNREFUSED */
+ 112, /* 147: EHOSTDOWN --> 112: EHOSTDOWN */
+ 113, /* 148: EHOSTUNREACH --> 113: EHOSTUNREACH */
+ 114, /* 149: EALREADY --> 114: EALREADY */
+ 115, /* 150: EINPROGRESS --> 115: EINPROGRESS */
+ 116 /* 151: ESTALE --> 116: ESTALE */
+};
+
+/*
+ * Convert an illumos native error number to a Linux error number and return
+ * it. If no valid conversion is possible, the function fails back to the
+ * value of "defval". In userland, passing a default error number of "-1"
+ * will abort the program if the error number could not be converted.
+ */
+int
+lx_errno(int native_errno, int defval)
+{
+#ifdef _KERNEL
+ VERIFY3S(defval, >=, 0);
+#endif
+
+ if (native_errno < 0 || native_errno >= (sizeof (lx_stol_errno) /
+ sizeof (lx_stol_errno[0]))) {
+#ifndef _KERNEL
+ VERIFY3S(defval, >=, 0);
+#endif
+
+ return (defval);
+ }
+
+ return (lx_stol_errno[native_errno]);
+}
diff --git a/usr/src/common/brand/lx/lx_errno.h b/usr/src/common/brand/lx/lx_errno.h
new file mode 100644
index 0000000000..10b6b3066c
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_errno.h
@@ -0,0 +1,29 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LX_ERRNO_H
+#define _LX_ERRNO_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int lx_errno(int, int);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_ERRNO_H */
diff --git a/usr/src/common/brand/lx/lx_signum.c b/usr/src/common/brand/lx/lx_signum.c
new file mode 100644
index 0000000000..9c861c282a
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_signum.c
@@ -0,0 +1,339 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/signal.h>
+#include <sys/lx_siginfo.h>
+#include <lx_signum.h>
+#include <sys/debug.h>
+
+/*
+ * Delivering signals to a Linux process is complicated by differences in
+ * signal numbering, stack structure and contents, and the action taken when a
+ * signal handler exits. In addition, many signal-related structures, such as
+ * sigset_ts, vary between Solaris and Linux.
+ *
+ * The simplest transformation that must be done when sending signals is to
+ * translate between Linux and Solaris signal numbers.
+ *
+ * These are the major signal number differences between Linux and Solaris:
+ *
+ * ====================================
+ * | Number | Linux | Solaris |
+ * | ====== | ========= | ========== |
+ * | 7 | SIGBUS | SIGEMT |
+ * | 10 | SIGUSR1 | SIGBUS |
+ * | 12 | SIGUSR2 | SIGSYS |
+ * | 16 | SIGSTKFLT | SIGUSR1 |
+ * | 17 | SIGCHLD | SIGUSR2 |
+ * | 18 | SIGCONT | SIGCHLD |
+ * | 19 | SIGSTOP | SIGPWR |
+ * | 20 | SIGTSTP | SIGWINCH |
+ * | 21 | SIGTTIN | SIGURG |
+ * | 22 | SIGTTOU | SIGPOLL |
+ * | 23 | SIGURG | SIGSTOP |
+ * | 24 | SIGXCPU | SIGTSTP |
+ * | 25 | SIGXFSZ | SIGCONT |
+ * | 26 | SIGVTALARM | SIGTTIN |
+ * | 27 | SIGPROF | SIGTTOU |
+ * | 28 | SIGWINCH | SIGVTALARM |
+ * | 29 | SIGPOLL | SIGPROF |
+ * | 30 | SIGPWR | SIGXCPU |
+ * | 31 | SIGSYS | SIGXFSZ |
+ * ====================================
+ *
+ * Not every Linux signal maps to a Solaris signal, nor does every Solaris
+ * signal map to a Linux counterpart. However, when signals do map, the
+ * mapping is unique.
+ *
+ * One mapping issue is that Linux supports 33 real time signals, with SIGRTMIN
+ * typically starting at or near 32 (SIGRTMIN) and proceeding to 64 (SIGRTMAX)
+ * (SIGRTMIN is "at or near" 32 because glibc usually "steals" one ore more of
+ * these signals for its own internal use, adjusting SIGRTMIN and SIGRTMAX as
+ * needed.) Conversely, Solaris actively uses signals 32-40 for other purposes
+ * and supports exactly 32 real time signals, in the range 41 (SIGRTMIN)
+ * to 72 (SIGRTMAX).
+ *
+ * At present, attempting to translate a Linux signal equal to 63
+ * will generate an error (we allow SIGRTMAX because a program
+ * should be able to send SIGRTMAX without getting an EINVAL, though obviously
+ * anything that loops through the signals from SIGRTMIN to SIGRTMAX will
+ * fail.)
+ *
+ * Similarly, attempting to translate a native Solaris signal in the range
+ * 32-40 will also generate an error as we don't want to support the receipt of
+ * those signals from the Solaris global zone.
+ */
+
+/*
+ * Linux to Solaris signal map
+ *
+ * Usage: solaris_signal = ltos_signum[lx_signal];
+ */
+const int
+ltos_signo[LX_NSIG + 1] = {
+ 0,
+ SIGHUP,
+ SIGINT,
+ SIGQUIT,
+ SIGILL,
+ SIGTRAP,
+ SIGABRT,
+ SIGBUS,
+ SIGFPE,
+ SIGKILL,
+ SIGUSR1,
+ SIGSEGV,
+ SIGUSR2,
+ SIGPIPE,
+ SIGALRM,
+ SIGTERM,
+ SIGEMT, /* 16: Linux SIGSTKFLT; use Solaris SIGEMT */
+ SIGCHLD,
+ SIGCONT,
+ SIGSTOP,
+ SIGTSTP,
+ SIGTTIN,
+ SIGTTOU,
+ SIGURG,
+ SIGXCPU,
+ SIGXFSZ,
+ SIGVTALRM,
+ SIGPROF,
+ SIGWINCH,
+ SIGPOLL,
+ SIGPWR,
+ SIGSYS,
+ _SIGRTMIN, /* 32: Linux SIGRTMIN */
+ _SIGRTMIN + 1,
+ _SIGRTMIN + 2,
+ _SIGRTMIN + 3,
+ _SIGRTMIN + 4,
+ _SIGRTMIN + 5,
+ _SIGRTMIN + 6,
+ _SIGRTMIN + 7,
+ _SIGRTMIN + 8,
+ _SIGRTMIN + 9,
+ _SIGRTMIN + 10,
+ _SIGRTMIN + 11,
+ _SIGRTMIN + 12,
+ _SIGRTMIN + 13,
+ _SIGRTMIN + 14,
+ _SIGRTMIN + 15,
+ _SIGRTMIN + 16,
+ _SIGRTMIN + 17,
+ _SIGRTMIN + 18,
+ _SIGRTMIN + 19,
+ _SIGRTMIN + 20,
+ _SIGRTMIN + 21,
+ _SIGRTMIN + 22,
+ _SIGRTMIN + 23,
+ _SIGRTMIN + 24,
+ _SIGRTMIN + 25,
+ _SIGRTMIN + 26,
+ _SIGRTMIN + 27,
+ _SIGRTMIN + 28,
+ _SIGRTMIN + 29,
+ _SIGRTMIN + 30,
+ _SIGRTMIN + 31,
+ _SIGRTMAX, /* 64: Linux SIGRTMAX */
+};
+
+/*
+ * Solaris to Linux signal map
+ *
+ * Usage: lx_signal = stol_signo[solaris_signal];
+ */
+const int
+stol_signo[NSIG] = {
+ 0,
+ LX_SIGHUP,
+ LX_SIGINT,
+ LX_SIGQUIT,
+ LX_SIGILL,
+ LX_SIGTRAP,
+ LX_SIGABRT,
+ LX_SIGSTKFLT, /* 7: Solaris SIGEMT; use for LX_SIGSTKFLT */
+ LX_SIGFPE,
+ LX_SIGKILL,
+ LX_SIGBUS,
+ LX_SIGSEGV,
+ LX_SIGSYS,
+ LX_SIGPIPE,
+ LX_SIGALRM,
+ LX_SIGTERM,
+ LX_SIGUSR1,
+ LX_SIGUSR2,
+ LX_SIGCHLD,
+ LX_SIGPWR,
+ LX_SIGWINCH,
+ LX_SIGURG,
+ LX_SIGPOLL,
+ LX_SIGSTOP,
+ LX_SIGTSTP,
+ LX_SIGCONT,
+ LX_SIGTTIN,
+ LX_SIGTTOU,
+ LX_SIGVTALRM,
+ LX_SIGPROF,
+ LX_SIGXCPU,
+ LX_SIGXFSZ,
+ -1, /* 32: Solaris SIGWAITING */
+ -1, /* 33: Solaris SIGLWP */
+ -1, /* 34: Solaris SIGFREEZE */
+ -1, /* 35: Solaris SIGTHAW */
+ -1, /* 36: Solaris SIGCANCEL */
+ -1, /* 37: Solaris SIGLOST */
+ -1, /* 38: Solaris SIGXRES */
+ -1, /* 39: Solaris SIGJVM1 */
+ -1, /* 40: Solaris SIGJVM2 */
+ -1, /* 41: Solaris SIGINFO */
+ LX_SIGRTMIN, /* 42: Solaris _SIGRTMIN */
+ LX_SIGRTMIN + 1,
+ LX_SIGRTMIN + 2,
+ LX_SIGRTMIN + 3,
+ LX_SIGRTMIN + 4,
+ LX_SIGRTMIN + 5,
+ LX_SIGRTMIN + 6,
+ LX_SIGRTMIN + 7,
+ LX_SIGRTMIN + 8,
+ LX_SIGRTMIN + 9,
+ LX_SIGRTMIN + 10,
+ LX_SIGRTMIN + 11,
+ LX_SIGRTMIN + 12,
+ LX_SIGRTMIN + 13,
+ LX_SIGRTMIN + 14,
+ LX_SIGRTMIN + 15,
+ LX_SIGRTMIN + 16,
+ LX_SIGRTMIN + 17,
+ LX_SIGRTMIN + 18,
+ LX_SIGRTMIN + 19,
+ LX_SIGRTMIN + 20,
+ LX_SIGRTMIN + 21,
+ LX_SIGRTMIN + 22,
+ LX_SIGRTMIN + 23,
+ LX_SIGRTMIN + 24,
+ LX_SIGRTMIN + 25,
+ LX_SIGRTMIN + 26,
+ LX_SIGRTMIN + 27,
+ LX_SIGRTMIN + 28,
+ LX_SIGRTMIN + 29,
+ LX_SIGRTMIN + 30,
+ LX_SIGRTMIN + 31,
+ LX_SIGRTMAX, /* 74: Solaris _SIGRTMAX */
+};
+
+/*
+ * Convert an illumos native signal number to a Linux signal number and return
+ * it. If no valid conversion is possible, the function fails back to the
+ * value of "defsig". In userland, passing a default signal number of "-1"
+ * will abort the program if the signal number could not be converted.
+ */
+int
+lx_stol_signo(int signo, int defsig)
+{
+ int rval;
+
+#ifdef _KERNEL
+ VERIFY3S(defsig, >=, 0);
+#endif
+
+ if (signo < 0 || signo >= NSIG || (rval = stol_signo[signo]) < 1) {
+#ifndef _KERNEL
+ VERIFY3S(defsig, >=, 0);
+#endif
+ return (defsig);
+ }
+
+ return (rval);
+}
+
+
+/*
+ * Convert a Linux signal number to an illumos signal number and return it.
+ * Error behavior is identical to lx_stol_signo.
+ */
+int
+lx_ltos_signo(int signo, int defsig)
+{
+#ifdef _KERNEL
+ VERIFY3S(defsig, >=, 0);
+#endif
+
+ if (signo < 1 || signo >= NSIG) {
+#ifndef _KERNEL
+ VERIFY3S(defsig, >=, 0);
+#endif
+ return (defsig);
+ }
+
+ return (ltos_signo[signo]);
+}
+
+/*
+ * Convert the "status" field of a SIGCLD siginfo_t. We need to extract the
+ * illumos signal number and convert it to a Linux signal number while leaving
+ * the ptrace(2) event bits intact. In userland, passing a default signal
+ * number of "-1" will abort the program if the signal number could not be
+ * converted, as for lx_stol_signo().
+ */
+int
+lx_stol_status(int s, int defsig)
+{
+ /*
+ * We mask out the top bit here in case PTRACE_O_TRACESYSGOOD
+ * is in use and 0x80 has been ORed with the signal number.
+ */
+ int stat = lx_stol_signo(s & 0x7f, defsig);
+
+ /*
+ * We must mix in the ptrace(2) event which may be stored in
+ * the second byte of the status code. We also re-include the
+ * PTRACE_O_TRACESYSGOOD bit.
+ */
+ return ((s & 0xff80) | stat);
+}
+
+int
+lx_stol_sigcode(int code)
+{
+ switch (code) {
+ case SI_USER:
+ return (LX_SI_USER);
+ case SI_LWP:
+ return (LX_SI_TKILL);
+ case SI_QUEUE:
+ return (LX_SI_QUEUE);
+ case SI_TIMER:
+ return (LX_SI_TIMER);
+ case SI_ASYNCIO:
+ return (LX_SI_ASYNCIO);
+ case SI_MESGQ:
+ return (LX_SI_MESGQ);
+ default:
+ return (code);
+ }
+}
diff --git a/usr/src/common/brand/lx/lx_signum.h b/usr/src/common/brand/lx/lx_signum.h
new file mode 100644
index 0000000000..b6c5f32731
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_signum.h
@@ -0,0 +1,114 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LX_SIGNUM_H
+#define _LX_SIGNUM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LX_SIGHUP 1
+#define LX_SIGINT 2
+#define LX_SIGQUIT 3
+#define LX_SIGILL 4
+#define LX_SIGTRAP 5
+#define LX_SIGABRT 6
+#define LX_SIGIOT 6
+#define LX_SIGBUS 7
+#define LX_SIGFPE 8
+#define LX_SIGKILL 9
+#define LX_SIGUSR1 10
+#define LX_SIGSEGV 11
+#define LX_SIGUSR2 12
+#define LX_SIGPIPE 13
+#define LX_SIGALRM 14
+#define LX_SIGTERM 15
+#define LX_SIGSTKFLT 16
+#define LX_SIGCHLD 17
+#define LX_SIGCONT 18
+#define LX_SIGSTOP 19
+#define LX_SIGTSTP 20
+#define LX_SIGTTIN 21
+#define LX_SIGTTOU 22
+#define LX_SIGURG 23
+#define LX_SIGXCPU 24
+#define LX_SIGXFSZ 25
+#define LX_SIGVTALRM 26
+#define LX_SIGPROF 27
+#define LX_SIGWINCH 28
+#define LX_SIGIO 29
+#define LX_SIGPOLL LX_SIGIO
+#define LX_SIGPWR 30
+#define LX_SIGSYS 31
+#define LX_SIGUNUSED 31
+
+#define LX_NSIG 64 /* Linux _NSIG */
+
+#define LX_SIGRTMIN 32
+#define LX_SIGRTMAX LX_NSIG
+
+extern const int ltos_signo[];
+extern const int stol_signo[];
+
+extern int lx_stol_signo(int, int);
+extern int lx_ltos_signo(int, int);
+extern int lx_stol_status(int, int);
+extern int lx_stol_sigcode(int);
+
+/*
+ * NOTE: Linux uses different definitions for 'sigset_t's and 'sigaction_t's
+ * depending on whether the definition is for user space or the kernel.
+ *
+ * The definitions below MUST correspond to the Linux kernel versions,
+ * as glibc will do the necessary translation from the Linux user
+ * versions.
+ */
+#if defined(_LP64)
+#define LX_NSIG_WORDS 1
+#define LX_WSHIFT 6
+#elif defined(_ILP32)
+#define LX_NSIG_WORDS 2
+#define LX_WSHIFT 5
+#else
+#error "LX only supports LP64 and ILP32"
+#endif
+
+typedef struct {
+ ulong_t __bits[LX_NSIG_WORDS];
+} lx_sigset_t;
+
+#define LX_NBITS (sizeof (ulong_t) * NBBY)
+#define lx_sigmask(n) (1UL << (((n) - 1) % LX_NBITS))
+#define lx_sigword(n) (((ulong_t)((n) - 1)) >> LX_WSHIFT)
+#define lx_sigismember(s, n) (lx_sigmask(n) & (s)->__bits[lx_sigword(n)])
+#define lx_sigaddset(s, n) ((s)->__bits[lx_sigword(n)] |= lx_sigmask(n))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_SIGNUM_H */
diff --git a/usr/src/common/brand/lx/lx_syscall.h b/usr/src/common/brand/lx/lx_syscall.h
new file mode 100644
index 0000000000..01e8b79512
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_syscall.h
@@ -0,0 +1,123 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LX_SYSCALL_H
+#define _LX_SYSCALL_H
+
+#include <sys/lx_brand.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The br_scall_args field of lx_lwp_data is going to be populated with
+ * pointers to structs. The types of these structs should be defined in this
+ * header file. These are Linux specific arguments to system calls that don't
+ * exist in illumos. Each section should be labelled with which system call it
+ * belongs to.
+ */
+
+/* arguments for waitpid(2) */
+/* see comments in usr/src/lib/brand/lx/lx_brand/common/wait.c */
+#define LX_WNOTHREAD 0x20000000 /* Do not wait on siblings' children */
+#define LX_WALL 0x40000000 /* Wait on all children */
+#define LX_WCLONE 0x80000000 /* Wait only on clone children */
+
+/* For arch_prctl(2) */
+#define LX_ARCH_SET_GS 0x1001
+#define LX_ARCH_SET_FS 0x1002
+#define LX_ARCH_GET_FS 0x1003
+#define LX_ARCH_GET_GS 0x1004
+
+/*
+ * For ptrace(2):
+ */
+#define LX_PTRACE_TRACEME 0
+#define LX_PTRACE_PEEKTEXT 1
+#define LX_PTRACE_PEEKDATA 2
+#define LX_PTRACE_PEEKUSER 3
+#define LX_PTRACE_POKETEXT 4
+#define LX_PTRACE_POKEDATA 5
+#define LX_PTRACE_POKEUSER 6
+#define LX_PTRACE_CONT 7
+#define LX_PTRACE_KILL 8
+#define LX_PTRACE_SINGLESTEP 9
+#define LX_PTRACE_GETREGS 12
+#define LX_PTRACE_SETREGS 13
+#define LX_PTRACE_GETFPREGS 14
+#define LX_PTRACE_SETFPREGS 15
+#define LX_PTRACE_ATTACH 16
+#define LX_PTRACE_DETACH 17
+#define LX_PTRACE_GETFPXREGS 18
+#define LX_PTRACE_SETFPXREGS 19
+#define LX_PTRACE_SYSCALL 24
+#define LX_PTRACE_SETOPTIONS 0x4200
+#define LX_PTRACE_GETEVENTMSG 0x4201
+#define LX_PTRACE_GETSIGINFO 0x4202
+
+/*
+ * For clone(2):
+ */
+#define LX_CSIGNAL 0x000000ff
+#define LX_CLONE_VM 0x00000100
+#define LX_CLONE_FS 0x00000200
+#define LX_CLONE_FILES 0x00000400
+#define LX_CLONE_SIGHAND 0x00000800
+#define LX_CLONE_PID 0x00001000
+#define LX_CLONE_PTRACE 0x00002000
+#define LX_CLONE_VFORK 0x00004000
+#define LX_CLONE_PARENT 0x00008000
+#define LX_CLONE_THREAD 0x00010000
+#define LX_CLONE_NEWNS 0x00020000
+#define LX_CLONE_SYSVSEM 0x00040000
+#define LX_CLONE_SETTLS 0x00080000
+#define LX_CLONE_PARENT_SETTID 0x00100000
+#define LX_CLONE_CHILD_CLEARTID 0x00200000
+#define LX_CLONE_DETACH 0x00400000
+#define LX_CLONE_UNTRACED 0x00800000
+#define LX_CLONE_CHILD_SETTID 0x01000000
+#define LX_CLONE_NEWCGROUP 0x02000000
+#define LX_CLONE_NEWUTS 0x04000000
+#define LX_CLONE_NEWIPC 0x08000000
+#define LX_CLONE_NEWUSER 0x10000000
+#define LX_CLONE_NEWPID 0x20000000
+#define LX_CLONE_NEWNET 0x40000000
+#define LX_CLONE_IO 0x80000000
+
+#define SHARED_AS \
+ (LX_CLONE_VM | LX_CLONE_FS | LX_CLONE_FILES | LX_CLONE_SIGHAND | \
+ LX_CLONE_THREAD)
+
+/*
+ * Valid clone flags when not a full process or full thread (SHARED_AS), This
+ * can be expanded as additional clone-group support is added.
+ */
+#define LX_CLONE_GRP_SUBSET (LX_CLONE_FS)
+
+#define LX_IS_CLONE_GRP(X) ((X & SHARED_AS) != 0 && \
+ (X & SHARED_AS) != SHARED_AS && \
+ ((X & SHARED_AS) & ~LX_CLONE_GRP_SUBSET) == 0)
+
+#define LX_CLONE_NS_UNSUP (LX_CLONE_NEWNS | LX_CLONE_NEWCGROUP | \
+ LX_CLONE_NEWUTS | LX_CLONE_NEWIPC | \
+ LX_CLONE_NEWUSER | LX_CLONE_NEWPID | \
+ LX_CLONE_NEWNET | LX_CLONE_IO)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_SYSCALL_H */
diff --git a/usr/src/common/brand/lx/tools/Makefile b/usr/src/common/brand/lx/tools/Makefile
new file mode 100644
index 0000000000..2b5bb92251
--- /dev/null
+++ b/usr/src/common/brand/lx/tools/Makefile
@@ -0,0 +1,47 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Joyent, Inc.
+#
+
+PROG = gen_errno
+
+include ../../../../cmd/Makefile.cmd
+
+OBJS = gen_errno.o
+
+CLOBBERFILES += $(PROG)
+
+NATIVECC_CFLAGS += $(CFLAGS) $(CCVERBOSE)
+# As evidenced by the use of the NATIVE_ variables, gen_errno is intended
+# to be able to run on the build host. We continue to link it against
+# libcmdutils.so instead of libcustr.so in order to allow it to run on
+# older build hosts (relying on the libcmdutil filter entries if run on
+# newer hosts with libcustr.so).
+NATIVECC_LDLIBS += -lcmdutils -lnvpair
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all
+
+lint: lint_PROG
+
+clean:
+ $(RM) $(OBJS)
+
+$(PROG): $(OBJS)
+ $(NATIVECC) $(NATIVECC_CFLAGS) $(NATIVECC_LDLIBS) $(OBJS) -o $@
+ $(POST_PROCESS)
+
+include ../../../../cmd/Makefile.targ
diff --git a/usr/src/common/brand/lx/tools/README.md b/usr/src/common/brand/lx/tools/README.md
new file mode 100644
index 0000000000..5e4976f200
--- /dev/null
+++ b/usr/src/common/brand/lx/tools/README.md
@@ -0,0 +1,39 @@
+# Updating Error Number Translations
+
+To create an updated error number translation table, you can use the
+`gen_errno` tool. This tool requires, as input:
+
+* the illumos native `errno.h` file
+* a set of foreign operating system `errno.h` files
+
+The output is a set of translation table entries suitable for inclusion in a
+cstyled C array. The index of the array is the native error number and the
+value at each index is the translated error number for use with the foreign
+operating system.
+
+## Example
+
+To generate a translation table for the LX Brand, you will require two files
+from the current Linux source:
+
+* `include/uapi/asm-generic/errno-base.h` (low-valued, or base, error numbers)
+* `include/uapi/asm-generic/errno.h` (extended error numbers)
+
+Assuming the files are in the current directory, you should run the tool as
+follows:
+
+ $ dmake
+ ...
+ $ ./gen_errno -F errno-base.h -F errno.h \
+ -N $SRC/uts/common/sys/errno.h
+ 0, /* 0: No Error */
+ 1, /* 1: EPERM --> 1: EPERM */
+ 2, /* 2: ENOENT --> 2: ENOENT */
+ 3, /* 3: ESRCH --> 3: ESRCH */
+ 4, /* 4: EINTR --> 4: EINTR */
+ 5, /* 5: EIO --> 5: EIO */
+ 6, /* 6: ENXIO --> 6: ENXIO */
+ 7, /* 7: E2BIG --> 7: E2BIG */
+ ...
+
+The output may be used in the `$SRC/common/brand/lx/lx_errno.c` file.
diff --git a/usr/src/common/brand/lx/tools/gen_errno.c b/usr/src/common/brand/lx/tools/gen_errno.c
new file mode 100644
index 0000000000..6089fed3bd
--- /dev/null
+++ b/usr/src/common/brand/lx/tools/gen_errno.c
@@ -0,0 +1,444 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc.
+ */
+
+/*
+ * Take the error number definitions from a foreign system and generate a
+ * translation table that converts illumos native error numbers to foreign
+ * system error numbers.
+ */
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <err.h>
+#include <sys/sysmacros.h>
+#include <libcustr.h>
+#include <libnvpair.h>
+
+nvlist_t *native_errors;
+nvlist_t *foreign_errors;
+
+struct override {
+ const char *ovr_from;
+ const char *ovr_to;
+} overrides[] = {
+ { "ENOTSUP", "ENOSYS" },
+ { 0 }
+};
+
+static const char *
+lookup_override(const char *from)
+{
+ int i;
+
+ for (i = 0; overrides[i].ovr_from != NULL; i++) {
+ if (strcmp(overrides[i].ovr_from, from) == 0) {
+ return (overrides[i].ovr_to);
+ }
+ }
+
+ return (NULL);
+}
+
+static int
+parse_int(const char *number, int *rval)
+{
+ long n;
+ char *endpos;
+
+ errno = 0;
+ if ((n = strtol(number, &endpos, 10)) == 0 && errno != 0) {
+ return (-1);
+ }
+
+ if (endpos != NULL && *endpos != '\0') {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (n > INT_MAX || n < INT_MIN) {
+ errno = EOVERFLOW;
+ return (-1);
+ }
+
+ *rval = (int)n;
+ return (0);
+}
+
+static int
+errnum_add(nvlist_t *nvl, const char *name, const char *number)
+{
+ int val;
+
+ if (nvlist_exists(nvl, name)) {
+ (void) fprintf(stderr, "ERROR: duplicate definition: %s -> "
+ "%s\n", name, number);
+ errno = EEXIST;
+ return (-1);
+ }
+
+ /*
+ * Try and parse the error number:
+ */
+ if (parse_int(number, &val) == 0) {
+ /*
+ * The name refers to a number.
+ */
+ if (nvlist_add_int32(nvl, name, val) != 0) {
+ (void) fprintf(stderr, "ERROR: nvlist_add_int32: %s\n",
+ strerror(errno));
+ return (-1);
+ }
+ } else {
+ /*
+ * The name refers to another definition.
+ */
+ if (nvlist_add_string(nvl, name, number) != 0) {
+ (void) fprintf(stderr, "ERROR: nvlist_add_string: %s\n",
+ strerror(errno));
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+static int
+errnum_max(nvlist_t *nvl)
+{
+ int max = 0;
+ nvpair_t *nvp = NULL;
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ if (nvpair_type(nvp) != DATA_TYPE_INT32) {
+ continue;
+ }
+
+ max = MAX(fnvpair_value_int32(nvp), max);
+ }
+
+ return (max);
+}
+
+static int
+errname_by_num(nvlist_t *nvl, int num, const char **name)
+{
+ nvpair_t *nvp = NULL;
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ if (nvpair_type(nvp) != DATA_TYPE_INT32) {
+ continue;
+ }
+
+ if (fnvpair_value_int32(nvp) == num) {
+ *name = nvpair_name(nvp);
+ return (0);
+ }
+ }
+
+ errno = ENOENT;
+ return (-1);
+}
+
+static int
+errno_by_name(nvlist_t *nvl, const char *name, int *rval, const char **rname)
+{
+ nvpair_t *nvp = NULL;
+
+ if (nvlist_lookup_nvpair(nvl, name, &nvp) != 0) {
+ errno = ENOENT;
+ return (-1);
+ }
+
+ if (nvpair_type(nvp) == DATA_TYPE_STRING) {
+ return (errno_by_name(nvl, fnvpair_value_string(nvp), rval,
+ rname));
+ } else {
+ *rval = fnvpair_value_int32(nvp);
+ if (rname != NULL) {
+ *rname = name;
+ }
+ return (0);
+ }
+}
+
+static int
+process_line(const char *line, nvlist_t *nvl)
+{
+ custr_t *nam = NULL, *num = NULL;
+ const char *c = line;
+
+ if (custr_alloc(&nam) != 0 || custr_alloc(&num) != 0) {
+ int en = errno;
+
+ custr_free(nam);
+ custr_free(num);
+
+ errno = en;
+ return (-1);
+ }
+
+ /*
+ * Valid lines begin with "#define":
+ */
+ if (*c++ != '#' || *c++ != 'd' || *c++ != 'e' || *c++ != 'f' ||
+ *c++ != 'i' || *c++ != 'n' || *c++ != 'e') {
+ return (0);
+ }
+
+ /*
+ * Eat whitespace:
+ */
+ for (;;) {
+ if (*c == '\0') {
+ return (0);
+ }
+
+ if (*c != ' ' && *c != '\t') {
+ break;
+ }
+
+ c++;
+ }
+
+ /*
+ * Read error number token:
+ */
+ for (;;) {
+ if (*c == '\0') {
+ return (0);
+ }
+
+ if (*c == ' ' || *c == '\t') {
+ break;
+ }
+
+ if (custr_appendc(nam, *c) != 0) {
+ return (-1);
+ }
+
+ c++;
+ }
+
+ /*
+ * Eat whitespace:
+ */
+ for (;;) {
+ if (*c == '\0') {
+ return (0);
+ }
+
+ if (*c != ' ' && *c != '\t') {
+ break;
+ }
+
+ c++;
+ }
+
+ /*
+ * Read error number token:
+ */
+ for (;;) {
+ if (*c == '\0') {
+ break;
+ }
+
+ if (*c == ' ' || *c == '\t') {
+ break;
+ }
+
+ if (custr_appendc(num, *c) != 0) {
+ return (-1);
+ }
+
+ c++;
+ }
+
+ return (errnum_add(nvl, custr_cstr(nam), custr_cstr(num)));
+}
+
+static int
+read_file_into_list(const char *path, nvlist_t *nvl)
+{
+ int rval = 0, en = 0;
+ FILE *f;
+ custr_t *cu = NULL;
+
+ if (custr_alloc(&cu) != 0) {
+ return (-1);
+ }
+
+ if ((f = fopen(path, "r")) == NULL) {
+ custr_free(cu);
+ return (-1);
+ }
+
+ for (;;) {
+ int c;
+
+ errno = 0;
+ switch (c = fgetc(f)) {
+ case '\n':
+ case EOF:
+ if (errno != 0) {
+ en = errno;
+ rval = -1;
+ goto out;
+ }
+ if (process_line(custr_cstr(cu), nvl) != 0) {
+ en = errno;
+ rval = -1;
+ goto out;
+ }
+ custr_reset(cu);
+ if (c == EOF) {
+ goto out;
+ }
+ break;
+
+ case '\r':
+ case '\0':
+ /*
+ * Ignore these characters.
+ */
+ break;
+
+ default:
+ if (custr_appendc(cu, c) != 0) {
+ en = errno;
+ rval = -1;
+ goto out;
+ }
+ break;
+ }
+ }
+
+out:
+ (void) fclose(f);
+ custr_free(cu);
+ errno = en;
+ return (rval);
+}
+
+int
+main(int argc, char **argv)
+{
+ int max;
+ int fval;
+ int c;
+
+ if (nvlist_alloc(&native_errors, NV_UNIQUE_NAME, 0) != 0 ||
+ nvlist_alloc(&foreign_errors, NV_UNIQUE_NAME, 0) != 0) {
+ err(1, "could not allocate memory");
+ }
+
+ while ((c = getopt(argc, argv, ":N:F:")) != -1) {
+ switch (c) {
+ case 'N':
+ if (read_file_into_list(optarg, native_errors) != 0) {
+ err(1, "could not read file: %s", optarg);
+ }
+ break;
+
+ case 'F':
+ if (read_file_into_list(optarg, foreign_errors) != 0) {
+ err(1, "could not read file: %s", optarg);
+ }
+ break;
+
+ case ':':
+ errx(1, "option -%c requires an operand", c);
+ break;
+
+ case '?':
+ errx(1, "option -%c unrecognised", c);
+ break;
+ }
+ }
+
+ /*
+ * Print an array entry for each error number:
+ */
+ max = errnum_max(native_errors);
+ for (fval = 0; fval <= max; fval++) {
+ const char *fname;
+ const char *tname = NULL;
+ int32_t tval;
+ const char *msg = NULL;
+ const char *comma = (fval != max) ? "," : "";
+
+ if (errname_by_num(native_errors, fval, &fname) == -1) {
+ fname = NULL;
+ }
+
+ if (fval == 0) {
+ /*
+ * The error number "0" is special: it means no worries.
+ */
+ msg = "No Error";
+ tval = 0;
+ } else if (fname == NULL) {
+ /*
+ * There is no defined name for this error number; it
+ * is unused.
+ */
+ msg = "Unused Number";
+ tval = -1;
+ } else {
+ /*
+ * Check if we want to override the name of this error
+ * in the foreign error number lookup:
+ */
+ const char *oname = lookup_override(fname);
+
+ /*
+ * Do the lookup:
+ */
+ if (errno_by_name(foreign_errors, oname != NULL ?
+ oname : fname, &tval, &tname) != 0) {
+ /*
+ * There was no foreign error number by that
+ * name.
+ */
+ tname = "No Analogue";
+ tval = -2;
+ }
+ }
+
+ if (msg == NULL) {
+ size_t flen = strlen(fname);
+ size_t tlen = strlen(tname);
+ const char *t = flen > 7 ? "\t" : "\t\t";
+ const char *tt = tlen < 7 ? "\t\t\t" : tlen < 15 ?
+ "\t\t" : "\t";
+
+ (void) fprintf(stdout, "\t%d%s\t/* %3d: %s%s--> %3d: "
+ "%s%s*/\n", tval, comma, fval, fname, t, tval,
+ tname, tt);
+ } else {
+ const char *t = "\t\t\t\t\t";
+
+ (void) fprintf(stdout, "\t%d%s\t/* %3d: %s%s*/\n", tval,
+ comma, fval, msg, t);
+ }
+ }
+
+ (void) nvlist_free(native_errors);
+ (void) nvlist_free(foreign_errors);
+
+ return (0);
+}
diff --git a/usr/src/common/crypto/aes/aes_modes.c b/usr/src/common/crypto/aes/aes_modes.c
index b23c78d65c..8c7cc6b093 100644
--- a/usr/src/common/crypto/aes/aes_modes.c
+++ b/usr/src/common/crypto/aes/aes_modes.c
@@ -101,7 +101,7 @@ aes_encrypt_contiguous_blocks(void *ctx, char *data, size_t length,
if (aes_ctx->ac_flags & CTR_MODE) {
rv = ctr_mode_contiguous_blocks(ctx, data, length, out,
- AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
+ AES_BLOCK_LEN, aes_encrypt_block);
} else if (aes_ctx->ac_flags & CCM_MODE) {
rv = ccm_mode_encrypt_contiguous_blocks(ctx, data, length,
out, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
@@ -134,7 +134,7 @@ aes_decrypt_contiguous_blocks(void *ctx, char *data, size_t length,
if (aes_ctx->ac_flags & CTR_MODE) {
rv = ctr_mode_contiguous_blocks(ctx, data, length, out,
- AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
+ AES_BLOCK_LEN, aes_encrypt_block);
if (rv == CRYPTO_DATA_LEN_RANGE)
rv = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
} else if (aes_ctx->ac_flags & CCM_MODE) {
diff --git a/usr/src/common/crypto/chacha/chacha.c b/usr/src/common/crypto/chacha/chacha.c
index 5f9ef3b411..0a0b09919e 100644
--- a/usr/src/common/crypto/chacha/chacha.c
+++ b/usr/src/common/crypto/chacha/chacha.c
@@ -1,13 +1,25 @@
/*
+ * This implementation of ChaCha20 comes from the initial Dan Bernstein
+ * implementation, including a 256-bit key, a 64-bit nonce and a 64-bit
+ * counter. This is in contrast to ChaCha20 as defined in RFC 7539, which
+ * defines a 256-bit key, a 96-bit nonce and a 32-bit counter. In particular,
+ * kernel crash dump encryption relies on the fact that our larger counter
+ * allows for the encryption of very large messages (many gigabytes in
+ * length); any change to this implementation that reduces the size of the
+ * counter should be mindful of this use case.
+ */
+
+/*
chacha-merged.c version 20080118
D. J. Bernstein
Public domain.
*/
-/* $OpenBSD: chacha_private.h,v 1.2 2013/10/04 07:02:27 djm Exp $ */
+/* $OpenBSD: chacha.c,v 1.1 2013/11/21 00:45:44 djm Exp $ */
-#include <chacha.h>
-#include <stddef.h>
+#include "chacha.h"
+#include <sys/stddef.h>
+#include <sys/null.h>
typedef unsigned char u8;
typedef unsigned int u32;
@@ -76,10 +88,10 @@ chacha_keysetup(chacha_ctx_t *x,const u8 *k,u32 kbits,u32 ivbits)
}
void
-chacha_ivsetup(chacha_ctx_t *x,const u8 *iv)
+chacha_ivsetup(chacha_ctx_t *x,const u8 *iv, const u8 *counter)
{
- x->chacha_input[12] = 0;
- x->chacha_input[13] = 0;
+ x->chacha_input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
+ x->chacha_input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
x->chacha_input[14] = U8TO32_LITTLE(iv + 0);
x->chacha_input[15] = U8TO32_LITTLE(iv + 4);
}
diff --git a/usr/src/common/crypto/chacha/chacha.h b/usr/src/common/crypto/chacha/chacha.h
index ac9993a8a4..edadca4934 100644
--- a/usr/src/common/crypto/chacha/chacha.h
+++ b/usr/src/common/crypto/chacha/chacha.h
@@ -10,7 +10,7 @@
*/
/*
- * Copyright (c) 2015, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _CHACHA_H
@@ -27,7 +27,7 @@
* over the data and xoring it with the generated cipher.
*/
-#include <inttypes.h>
+#include <sys/inttypes.h>
#ifdef __cplusplus
extern "C" {
@@ -39,7 +39,7 @@ typedef struct chacha_ctx {
extern void chacha_keysetup(chacha_ctx_t *, const uint8_t *, uint32_t,
uint32_t);
-extern void chacha_ivsetup(chacha_ctx_t *, const uint8_t *);
+extern void chacha_ivsetup(chacha_ctx_t *, const uint8_t *, const uint8_t *);
extern void chacha_encrypt_bytes(chacha_ctx_t *, const uint8_t *, uint8_t *,
uint32_t);
diff --git a/usr/src/common/crypto/modes/ctr.c b/usr/src/common/crypto/modes/ctr.c
index 919ed3ab53..7bf0134bb4 100644
--- a/usr/src/common/crypto/modes/ctr.c
+++ b/usr/src/common/crypto/modes/ctr.c
@@ -21,6 +21,8 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _KERNEL
@@ -30,6 +32,7 @@
#include <security/cryptoki.h>
#endif
+#include <sys/debug.h>
#include <sys/types.h>
#include <modes/modes.h>
#include <sys/crypto/common.h>
@@ -37,164 +40,184 @@
#include <sys/byteorder.h>
/*
- * Encrypt and decrypt multiple blocks of data in counter mode.
+ * CTR (counter mode) is a stream cipher. That is, it generates a
+ * pseudo-random keystream that is used to XOR with the input to
+ * encrypt or decrypt. The pseudo-random keystream is generated by
+ * concatenating a nonce (supplied during initialzation) and with a
+ * counter (initialized to zero) to form an input block to the cipher
+ * mechanism. The resulting output of the cipher is used as a chunk
+ * of the pseudo-random keystream. Once all of the bytes of the
+ * keystream block have been used, the counter is incremented and
+ * the process repeats.
+ *
+ * Since this is a stream cipher, we do not accumulate input cipher
+ * text like we do for block modes. Instead we use ctr_ctx_t->ctr_offset
+ * to track the amount of bytes used in the current keystream block.
*/
-int
-ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length,
- crypto_data_t *out, size_t block_size,
- int (*cipher)(const void *ks, const uint8_t *pt, uint8_t *ct),
- void (*xor_block)(uint8_t *, uint8_t *))
+
+static void
+ctr_new_keyblock(ctr_ctx_t *ctx,
+ int (*cipher)(const void *ks, const uint8_t *pt, uint8_t *ct))
{
- size_t remainder = length;
- size_t need;
- uint8_t *datap = (uint8_t *)data;
- uint8_t *blockp;
- uint8_t *lastp;
- void *iov_or_mp;
- offset_t offset;
- uint8_t *out_data_1;
- uint8_t *out_data_2;
- size_t out_data_1_len;
uint64_t lower_counter, upper_counter;
- if (length + ctx->ctr_remainder_len < block_size) {
- /* accumulate bytes here and return */
- bcopy(datap,
- (uint8_t *)ctx->ctr_remainder + ctx->ctr_remainder_len,
- length);
- ctx->ctr_remainder_len += length;
- ctx->ctr_copy_to = datap;
- return (CRYPTO_SUCCESS);
+ /* increment the counter */
+ lower_counter = ntohll(ctx->ctr_cb[1] & ctx->ctr_lower_mask);
+ lower_counter = htonll(lower_counter + 1);
+ lower_counter &= ctx->ctr_lower_mask;
+ ctx->ctr_cb[1] = (ctx->ctr_cb[1] & ~(ctx->ctr_lower_mask)) |
+ lower_counter;
+
+ /* wrap around */
+ if (lower_counter == 0) {
+ upper_counter = ntohll(ctx->ctr_cb[0] & ctx->ctr_upper_mask);
+ upper_counter = htonll(upper_counter + 1);
+ upper_counter &= ctx->ctr_upper_mask;
+ ctx->ctr_cb[0] = (ctx->ctr_cb[0] & ~(ctx->ctr_upper_mask)) |
+ upper_counter;
}
- lastp = (uint8_t *)ctx->ctr_cb;
- if (out != NULL)
- crypto_init_ptrs(out, &iov_or_mp, &offset);
-
- do {
- /* Unprocessed data from last call. */
- if (ctx->ctr_remainder_len > 0) {
- need = block_size - ctx->ctr_remainder_len;
-
- if (need > remainder)
- return (CRYPTO_DATA_LEN_RANGE);
-
- bcopy(datap, &((uint8_t *)ctx->ctr_remainder)
- [ctx->ctr_remainder_len], need);
-
- blockp = (uint8_t *)ctx->ctr_remainder;
- } else {
- blockp = datap;
- }
+ /* generate the new keyblock */
+ cipher(ctx->ctr_keysched, (uint8_t *)ctx->ctr_cb,
+ (uint8_t *)ctx->ctr_keystream);
+ ctx->ctr_offset = 0;
+}
- /* ctr_cb is the counter block */
- cipher(ctx->ctr_keysched, (uint8_t *)ctx->ctr_cb,
- (uint8_t *)ctx->ctr_tmp);
+#ifdef __x86
+/*
+ * It's not worth bothering to check for pointer alignment on X86 -- always
+ * try to do 32-bits at a time when enough data is available.
+ */
+#define TRY32(_src, _dst, _key, _keylen, _outlen) \
+ ((_keylen) > 3 && (_outlen) > 3)
+#else
+/*
+ * Other platforms (e.g. SPARC) require the pointers to be aligned to
+ * do 32-bits at a time.
+ */
+#define TRY32(_src, _dst, _key, _keylen, _outlen) \
+ ((_keylen) > 3 && (_outlen) > 3 && \
+ IS_P2ALIGNED((_src), sizeof (uint32_t)) && \
+ IS_P2ALIGNED((_dst), sizeof (uint32_t)) && \
+ IS_P2ALIGNED((_key), sizeof (uint32_t)))
+#endif
- lastp = (uint8_t *)ctx->ctr_tmp;
+/*
+ * XOR the input with the keystream and write the result to out.
+ * This requires that the amount of data in 'in' is >= outlen
+ * (ctr_mode_contiguous_blocks() guarantees this for us before we are
+ * called). As CTR mode is a stream cipher, we cannot use a cipher's
+ * xxx_xor_block function (e.g. aes_xor_block()) as we must handle
+ * arbitrary lengths of input and should not buffer/accumulate partial blocks
+ * between calls.
+ */
+static void
+ctr_xor(ctr_ctx_t *ctx, uint8_t *in, uint8_t *out, size_t outlen,
+ size_t block_size,
+ int (*cipher)(const void *ks, const uint8_t *pt, uint8_t *ct))
+{
+ uint8_t *keyp;
+ size_t keyamt;
+ while (outlen > 0) {
/*
- * Increment Counter.
+ * This occurs once we've consumed all the bytes in the
+ * current block of the keystream. ctr_init_ctx() creates
+ * the initial block of the keystream, so we always start
+ * with a full block of key data.
*/
- lower_counter = ntohll(ctx->ctr_cb[1] & ctx->ctr_lower_mask);
- lower_counter = htonll(lower_counter + 1);
- lower_counter &= ctx->ctr_lower_mask;
- ctx->ctr_cb[1] = (ctx->ctr_cb[1] & ~(ctx->ctr_lower_mask)) |
- lower_counter;
-
- /* wrap around */
- if (lower_counter == 0) {
- upper_counter =
- ntohll(ctx->ctr_cb[0] & ctx->ctr_upper_mask);
- upper_counter = htonll(upper_counter + 1);
- upper_counter &= ctx->ctr_upper_mask;
- ctx->ctr_cb[0] =
- (ctx->ctr_cb[0] & ~(ctx->ctr_upper_mask)) |
- upper_counter;
+ if (ctx->ctr_offset == block_size) {
+ ctr_new_keyblock(ctx, cipher);
}
+ keyp = (uint8_t *)ctx->ctr_keystream + ctx->ctr_offset;
+ keyamt = block_size - ctx->ctr_offset;
+
/*
- * XOR encrypted counter block with the current clear block.
+ * Try to process 32-bits at a time when possible.
*/
- xor_block(blockp, lastp);
-
- if (out == NULL) {
- if (ctx->ctr_remainder_len > 0) {
- bcopy(lastp, ctx->ctr_copy_to,
- ctx->ctr_remainder_len);
- bcopy(lastp + ctx->ctr_remainder_len, datap,
- need);
- }
- } else {
- crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
- &out_data_1_len, &out_data_2, block_size);
-
- /* copy block to where it belongs */
- bcopy(lastp, out_data_1, out_data_1_len);
- if (out_data_2 != NULL) {
- bcopy(lastp + out_data_1_len, out_data_2,
- block_size - out_data_1_len);
- }
- /* update offset */
- out->cd_offset += block_size;
+ if (TRY32(in, out, keyp, keyamt, outlen)) {
+ uint32_t *in32 = (uint32_t *)in;
+ uint32_t *out32 = (uint32_t *)out;
+ uint32_t *key32 = (uint32_t *)keyp;
+
+ do {
+ *out32++ = *in32++ ^ *key32++;
+ keyamt -= sizeof (uint32_t);
+ outlen -= sizeof (uint32_t);
+ } while (keyamt > 3 && outlen > 3);
+
+ in = (uint8_t *)in32;
+ out = (uint8_t *)out32;
+ keyp = (uint8_t *)key32;
}
- /* Update pointer to next block of data to be processed. */
- if (ctx->ctr_remainder_len != 0) {
- datap += need;
- ctx->ctr_remainder_len = 0;
- } else {
- datap += block_size;
+ while (keyamt > 0 && outlen > 0) {
+ *out++ = *in++ ^ *keyp++;
+ keyamt--;
+ outlen--;
}
- remainder = (size_t)&data[length] - (size_t)datap;
-
- /* Incomplete last block. */
- if (remainder > 0 && remainder < block_size) {
- bcopy(datap, ctx->ctr_remainder, remainder);
- ctx->ctr_remainder_len = remainder;
- ctx->ctr_copy_to = datap;
- goto out;
- }
- ctx->ctr_copy_to = NULL;
-
- } while (remainder > 0);
-
-out:
- return (CRYPTO_SUCCESS);
+ ctx->ctr_offset = block_size - keyamt;
+ }
}
+/*
+ * Encrypt and decrypt multiple blocks of data in counter mode.
+ */
int
-ctr_mode_final(ctr_ctx_t *ctx, crypto_data_t *out,
- int (*encrypt_block)(const void *, const uint8_t *, uint8_t *))
+ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*cipher)(const void *ks, const uint8_t *pt, uint8_t *ct))
{
- uint8_t *lastp;
- uint8_t *p;
- int i;
- int rv;
+ size_t remainder = length;
+ uint8_t *datap = (uint8_t *)data;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
- if (out->cd_length < ctx->ctr_remainder_len)
- return (CRYPTO_DATA_LEN_RANGE);
+ if (block_size > sizeof (ctx->ctr_keystream))
+ return (CRYPTO_ARGUMENTS_BAD);
- encrypt_block(ctx->ctr_keysched, (uint8_t *)ctx->ctr_cb,
- (uint8_t *)ctx->ctr_tmp);
+ if (out == NULL)
+ return (CRYPTO_ARGUMENTS_BAD);
- lastp = (uint8_t *)ctx->ctr_tmp;
- p = (uint8_t *)ctx->ctr_remainder;
- for (i = 0; i < ctx->ctr_remainder_len; i++) {
- p[i] ^= lastp[i];
- }
+ /*
+ * This check guarantees 'out' contains sufficient space for
+ * the resulting output.
+ */
+ if (out->cd_offset + length > out->cd_length)
+ return (CRYPTO_BUFFER_TOO_SMALL);
- rv = crypto_put_output_data(p, out, ctx->ctr_remainder_len);
- if (rv == CRYPTO_SUCCESS) {
- out->cd_offset += ctx->ctr_remainder_len;
- ctx->ctr_remainder_len = 0;
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+
+ /* Now XOR the output with the keystream */
+ while (remainder > 0) {
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, remainder);
+
+ /*
+ * crypto_get_ptrs() should guarantee this, but act as a
+ * safeguard in case the behavior ever changes.
+ */
+ ASSERT3U(out_data_1_len, <=, remainder);
+ ctr_xor(ctx, datap, out_data_1, out_data_1_len, block_size,
+ cipher);
+
+ datap += out_data_1_len;
+ remainder -= out_data_1_len;
}
- return (rv);
+
+ out->cd_offset += length;
+
+ return (CRYPTO_SUCCESS);
}
int
ctr_init_ctx(ctr_ctx_t *ctr_ctx, ulong_t count, uint8_t *cb,
+ int (*cipher)(const void *ks, const uint8_t *pt, uint8_t *ct),
void (*copy_block)(uint8_t *, uint8_t *))
{
uint64_t upper_mask = 0;
@@ -217,6 +240,11 @@ ctr_init_ctx(ctr_ctx_t *ctr_ctx, ulong_t count, uint8_t *cb,
copy_block(cb, (uchar_t *)ctr_ctx->ctr_cb);
ctr_ctx->ctr_lastp = (uint8_t *)&ctr_ctx->ctr_cb[0];
+
+ /* Generate the first block of the keystream */
+ cipher(ctr_ctx->ctr_keysched, (uint8_t *)ctr_ctx->ctr_cb,
+ (uint8_t *)ctr_ctx->ctr_keystream);
+
ctr_ctx->ctr_flags |= CTR_MODE;
return (CRYPTO_SUCCESS);
}
diff --git a/usr/src/common/crypto/modes/modes.h b/usr/src/common/crypto/modes/modes.h
index 0ad18b0c25..0e8fb66c8a 100644
--- a/usr/src/common/crypto/modes/modes.h
+++ b/usr/src/common/crypto/modes/modes.h
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _COMMON_CRYPTO_MODES_H
@@ -51,6 +51,9 @@ extern "C" {
#define GMAC_MODE 0x00000040
#define CMAC_MODE 0x00000080
+/* Private flag for pkcs11_softtoken */
+#define P11_DECRYPTED 0x80000000
+
/*
* cc_keysched: Pointer to key schedule.
*
@@ -130,7 +133,8 @@ typedef struct ctr_ctx {
struct common_ctx ctr_common;
uint64_t ctr_lower_mask;
uint64_t ctr_upper_mask;
- uint32_t ctr_tmp[4];
+ size_t ctr_offset;
+ uint32_t ctr_keystream[4];
} ctr_ctx_t;
/*
@@ -307,8 +311,7 @@ extern int cbc_decrypt_contiguous_blocks(cbc_ctx_t *, char *, size_t,
extern int ctr_mode_contiguous_blocks(ctr_ctx_t *, char *, size_t,
crypto_data_t *, size_t,
- int (*cipher)(const void *, const uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ int (*cipher)(const void *, const uint8_t *, uint8_t *));
extern int ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *, char *, size_t,
crypto_data_t *, size_t,
@@ -356,15 +359,13 @@ extern int cmac_mode_final(cbc_ctx_t *, crypto_data_t *,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *));
-extern int ctr_mode_final(ctr_ctx_t *, crypto_data_t *,
- int (*encrypt_block)(const void *, const uint8_t *, uint8_t *));
-
extern int cbc_init_ctx(cbc_ctx_t *, char *, size_t, size_t,
void (*copy_block)(uint8_t *, uint64_t *));
extern int cmac_init_ctx(cbc_ctx_t *, size_t);
extern int ctr_init_ctx(ctr_ctx_t *, ulong_t, uint8_t *,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
void (*copy_block)(uint8_t *, uint8_t *));
extern int ccm_init_ctx(ccm_ctx_t *, char *, int, boolean_t, size_t,
diff --git a/usr/src/common/dis/i386/dis_tables.c b/usr/src/common/dis/i386/dis_tables.c
index 12a1112d8a..ddca678f1c 100644
--- a/usr/src/common/dis/i386/dis_tables.c
+++ b/usr/src/common/dis/i386/dis_tables.c
@@ -21,7 +21,7 @@
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2019, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
/*
@@ -562,6 +562,11 @@ const instable_t dis_opMOVSLD = TNS("movslq",MOVSXZ);
const instable_t dis_opPause = TNS("pause", NORM);
/*
+ * "decode table" for wbnoinvd instruction
+ */
+const instable_t dis_opWbnoinvd = TNS("wbnoinvd", NORM);
+
+/*
* Decode table for 0x0F00 opcodes
*/
const instable_t dis_op0F00[8] = {
@@ -2660,7 +2665,7 @@ dtrace_vex_adjust(uint_t vex_byte1, uint_t mode, uint_t *reg, uint_t *r_m)
*/
/* ARGSUSED */
static void
-dtrace_evex_mnem_adjust(dis86_t *x, instable_t *dp, uint_t vex_W,
+dtrace_evex_mnem_adjust(dis86_t *x, const instable_t *dp, uint_t vex_W,
uint_t evex_byte2)
{
#ifdef DIS_TEXT
@@ -3215,7 +3220,7 @@ dtrace_get_operand(dis86_t *x, uint_t mode, uint_t r_m, int wbit, int opindex)
int
dtrace_disx86(dis86_t *x, uint_t cpu_mode)
{
- instable_t *dp; /* decode table being used */
+ const instable_t *dp; /* decode table being used */
#ifdef DIS_TEXT
uint_t i;
#endif
@@ -3712,11 +3717,11 @@ not_avx512:
if (opnd_size_prefix == 0) {
/* SSSE3 MMX instructions */
dp_mmx = *dp;
- dp = &dp_mmx;
- dp->it_adrmode = MMOPM_66o;
+ dp_mmx.it_adrmode = MMOPM_66o;
#ifdef DIS_MEM
- dp->it_size = 8;
+ dp_mmx.it_size = 8;
#endif
+ dp = &dp_mmx;
}
break;
default:
@@ -3797,11 +3802,11 @@ not_avx512:
if (opnd_size_prefix == 0) {
/* SSSE3 MMX instructions */
dp_mmx = *dp;
- dp = &dp_mmx;
- dp->it_adrmode = MM;
+ dp_mmx.it_adrmode = MM;
#ifdef DIS_MEM
- dp->it_size = 8;
+ dp_mmx.it_size = 8;
#endif
+ dp = &dp_mmx;
}
break;
case CRC32:
@@ -3818,6 +3823,9 @@ not_avx512:
default:
goto error;
}
+ } else if (rep_prefix == 0xf3 && opcode4 == 0 && opcode5 == 9) {
+ rep_prefix = 0;
+ dp = (instable_t *)&dis_opWbnoinvd;
} else {
dp = (instable_t *)&dis_op0F[opcode4][opcode5];
}
diff --git a/usr/src/common/idspace/id_space.c b/usr/src/common/idspace/id_space.c
new file mode 100644
index 0000000000..7d28a8f533
--- /dev/null
+++ b/usr/src/common/idspace/id_space.c
@@ -0,0 +1,184 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/id_space.h>
+#include <sys/debug.h>
+
+/*
+ * ID Spaces
+ *
+ * The id_space_t provides a simple implementation of a managed range of
+ * integer identifiers using a vmem arena. An ID space guarantees that the
+ * next identifer returned by an allocation is larger than the previous one,
+ * unless there are no larger slots remaining in the range. In this case,
+ * the ID space will return the first available slot in the lower part of the
+ * range (viewing the previous identifier as a partitioning element). If no
+ * slots are available, id_alloc()/id_allocff() will sleep until an
+ * identifier becomes available. Accordingly, id_space allocations must be
+ * initiated from contexts where sleeping is acceptable. id_alloc_nosleep()/
+ * id_allocff_nosleep() will return -1 if no slots are available or if the
+ * system is low on memory. If id_alloc_nosleep() fails, callers should
+ * not try to extend the ID space. This is to avoid making a possible
+ * low-memory situation worse.
+ *
+ * As an ID space is designed for representing a range of id_t's, there
+ * is a preexisting maximal range: [0, MAXUID]. ID space requests outside
+ * that range will fail on a DEBUG kernel. The id_allocff*() functions
+ * return the first available id, and should be used when there is benefit
+ * to having a compact allocated range.
+ *
+ * (Presently, the id_space_t abstraction supports only direct allocations; ID
+ * reservation, in which an ID is allocated but placed in a internal
+ * dictionary for later use, should be added when a consuming subsystem
+ * arrives.)
+ *
+ * This code is also shared with userland. In userland, we don't have the same
+ * ability to have sleeping variants, so we effectively turn the normal
+ * versions without _nosleep into _nosleep.
+ */
+
+#define ID_TO_ADDR(id) ((void *)(uintptr_t)(id + 1))
+#define ADDR_TO_ID(addr) ((id_t)((uintptr_t)addr - 1))
+
+/*
+ * Create an arena to represent the range [low, high).
+ * Caller must be in a context in which VM_SLEEP is legal,
+ * for the kernel. Always VM_NOSLEEP in userland.
+ */
+id_space_t *
+id_space_create(const char *name, id_t low, id_t high)
+{
+#ifdef _KERNEL
+ int flag = VM_SLEEP;
+#else
+ int flag = VM_NOSLEEP;
+#endif
+ ASSERT(low >= 0);
+ ASSERT(low < high);
+
+ return (vmem_create(name, ID_TO_ADDR(low), high - low, 1,
+ NULL, NULL, NULL, 0, flag | VMC_IDENTIFIER));
+}
+
+/*
+ * Destroy a previously created ID space.
+ * No restrictions on caller's context.
+ */
+void
+id_space_destroy(id_space_t *isp)
+{
+ vmem_destroy(isp);
+}
+
+void
+id_space_extend(id_space_t *isp, id_t low, id_t high)
+{
+#ifdef _KERNEL
+ int flag = VM_SLEEP;
+#else
+ int flag = VM_NOSLEEP;
+#endif
+ (void) vmem_add(isp, ID_TO_ADDR(low), high - low, flag);
+}
+
+/*
+ * Allocate an id_t from specified ID space.
+ * Caller must be in a context in which VM_SLEEP is legal.
+ */
+id_t
+id_alloc(id_space_t *isp)
+{
+#ifdef _KERNEL
+ int flag = VM_SLEEP;
+#else
+ int flag = VM_NOSLEEP;
+#endif
+ return (ADDR_TO_ID(vmem_alloc(isp, 1, flag | VM_NEXTFIT)));
+}
+
+/*
+ * Allocate an id_t from specified ID space.
+ * Returns -1 on failure (see module block comments for more information on
+ * failure modes).
+ */
+id_t
+id_alloc_nosleep(id_space_t *isp)
+{
+ return (ADDR_TO_ID(vmem_alloc(isp, 1, VM_NOSLEEP | VM_NEXTFIT)));
+}
+
+/*
+ * Allocate an id_t from specified ID space using FIRSTFIT.
+ * Caller must be in a context in which VM_SLEEP is legal.
+ */
+id_t
+id_allocff(id_space_t *isp)
+{
+#ifdef _KERNEL
+ int flag = VM_SLEEP;
+#else
+ int flag = VM_NOSLEEP;
+#endif
+ return (ADDR_TO_ID(vmem_alloc(isp, 1, flag | VM_FIRSTFIT)));
+}
+
+/*
+ * Allocate an id_t from specified ID space using FIRSTFIT
+ * Returns -1 on failure (see module block comments for more information on
+ * failure modes).
+ */
+id_t
+id_allocff_nosleep(id_space_t *isp)
+{
+ return (ADDR_TO_ID(vmem_alloc(isp, 1, VM_NOSLEEP | VM_FIRSTFIT)));
+}
+
+/*
+ * Allocate a specific identifier if possible, returning the id if
+ * successful, or -1 on failure.
+ */
+id_t
+id_alloc_specific_nosleep(id_space_t *isp, id_t id)
+{
+ void *minaddr = ID_TO_ADDR(id);
+ void *maxaddr = ID_TO_ADDR(id + 1);
+
+ /*
+ * Note that even though we're vmem_free()ing this later, it
+ * should be OK, since there's no quantum cache.
+ */
+ return (ADDR_TO_ID(vmem_xalloc(isp, 1, 1, 0, 0,
+ minaddr, maxaddr, VM_NOSLEEP)));
+}
+
+/*
+ * Free a previously allocated ID.
+ * No restrictions on caller's context.
+ */
+void
+id_free(id_space_t *isp, id_t id)
+{
+ vmem_free(isp, ID_TO_ADDR(id), 1);
+}
diff --git a/usr/src/common/inet/inet_hash.c b/usr/src/common/inet/inet_hash.c
new file mode 100644
index 0000000000..3a511fe588
--- /dev/null
+++ b/usr/src/common/inet/inet_hash.c
@@ -0,0 +1,359 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
+ */
+
+/*
+ * Common routines usable by any part of the networking stack for hashing
+ * packets. The hashing logic originally was part of MAC, but it has more
+ * utility being usable by the rest of the broader system.
+ */
+
+#include <sys/types.h>
+#include <sys/mac.h>
+#include <sys/strsubr.h>
+#include <sys/strsun.h>
+#include <sys/vlan.h>
+#include <inet/ip.h>
+#include <inet/ip_impl.h>
+#include <inet/ip6.h>
+#include <sys/dlpi.h>
+#include <sys/sunndi.h>
+#include <inet/ipsec_impl.h>
+#include <inet/sadb.h>
+#include <inet/ipsecesp.h>
+#include <inet/ipsecah.h>
+#include <inet/inet_hash.h>
+
+/*
+ * Determines the IPv6 header length accounting for all the optional IPv6
+ * headers (hop-by-hop, destination, routing and fragment). The header length
+ * and next header value (a transport header) is captured.
+ *
+ * Returns B_FALSE if all the IP headers are not in the same mblk otherwise
+ * returns B_TRUE.
+ */
+static boolean_t
+inet_pkthash_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr,
+ uint16_t *hdr_length, uint8_t *next_hdr, ip6_frag_t **fragp)
+{
+ uint16_t length;
+ uint_t ehdrlen;
+ uint8_t *whereptr;
+ uint8_t *nexthdrp;
+ ip6_dest_t *desthdr;
+ ip6_rthdr_t *rthdr;
+ ip6_frag_t *fraghdr;
+
+ if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr)
+ return (B_FALSE);
+ ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
+ length = IPV6_HDR_LEN;
+ whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
+
+ if (fragp != NULL)
+ *fragp = NULL;
+
+ nexthdrp = &ip6h->ip6_nxt;
+ while (whereptr < endptr) {
+ /* Is there enough left for len + nexthdr? */
+ if (whereptr + MIN_EHDR_LEN > endptr)
+ break;
+
+ switch (*nexthdrp) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_DSTOPTS:
+ /* Assumes the headers are identical for hbh and dst */
+ desthdr = (ip6_dest_t *)whereptr;
+ ehdrlen = 8 * (desthdr->ip6d_len + 1);
+ if ((uchar_t *)desthdr + ehdrlen > endptr)
+ return (B_FALSE);
+ nexthdrp = &desthdr->ip6d_nxt;
+ break;
+ case IPPROTO_ROUTING:
+ rthdr = (ip6_rthdr_t *)whereptr;
+ ehdrlen = 8 * (rthdr->ip6r_len + 1);
+ if ((uchar_t *)rthdr + ehdrlen > endptr)
+ return (B_FALSE);
+ nexthdrp = &rthdr->ip6r_nxt;
+ break;
+ case IPPROTO_FRAGMENT:
+ fraghdr = (ip6_frag_t *)whereptr;
+ ehdrlen = sizeof (ip6_frag_t);
+ if ((uchar_t *)&fraghdr[1] > endptr)
+ return (B_FALSE);
+ nexthdrp = &fraghdr->ip6f_nxt;
+ if (fragp != NULL)
+ *fragp = fraghdr;
+ break;
+ case IPPROTO_NONE:
+ /* No next header means we're finished */
+ default:
+ *hdr_length = length;
+ *next_hdr = *nexthdrp;
+ return (B_TRUE);
+ }
+ length += ehdrlen;
+ whereptr += ehdrlen;
+ *hdr_length = length;
+ *next_hdr = *nexthdrp;
+ }
+ switch (*nexthdrp) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_FRAGMENT:
+ /*
+ * If any known extension headers are still to be processed,
+ * the packet's malformed (or at least all the IP header(s) are
+ * not in the same mblk - and that should never happen.
+ */
+ return (B_FALSE);
+
+ default:
+ /*
+ * If we get here, we know that all of the IP headers were in
+ * the same mblk, even if the ULP header is in the next mblk.
+ */
+ *hdr_length = length;
+ *next_hdr = *nexthdrp;
+ return (B_TRUE);
+ }
+}
+
+#define PKT_HASH_2BYTES(x) ((x)[0] ^ (x)[1])
+#define PKT_HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3])
+#define PKT_HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5])
+uint64_t
+inet_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy)
+{
+ struct ether_header *ehp;
+ uint64_t hash = 0;
+ uint16_t sap;
+ uint_t skip_len;
+ uint8_t proto;
+ boolean_t ip_fragmented;
+
+ /*
+ * We may want to have one of these per MAC type plugin in the
+ * future. For now supports only ethernet.
+ */
+ if (media != DL_ETHER)
+ return (0L);
+
+ /* for now we support only outbound packets */
+ ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
+ ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
+
+ /* compute L2 hash */
+
+ ehp = (struct ether_header *)mp->b_rptr;
+
+ if ((policy & INET_PKT_HASH_L2) != 0) {
+ uchar_t *mac_src = ehp->ether_shost.ether_addr_octet;
+ uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet;
+ hash = PKT_HASH_MAC(mac_src) ^ PKT_HASH_MAC(mac_dst);
+ policy &= ~INET_PKT_HASH_L2;
+ }
+
+ if (policy == 0)
+ goto done;
+
+ /* skip ethernet header */
+
+ sap = ntohs(ehp->ether_type);
+ if (sap == ETHERTYPE_VLAN) {
+ struct ether_vlan_header *evhp;
+ mblk_t *newmp = NULL;
+
+ skip_len = sizeof (struct ether_vlan_header);
+ if (MBLKL(mp) < skip_len) {
+ /* the vlan tag is the payload, pull up first */
+ newmp = msgpullup(mp, -1);
+ if ((newmp == NULL) || (MBLKL(newmp) < skip_len)) {
+ goto done;
+ }
+ evhp = (struct ether_vlan_header *)newmp->b_rptr;
+ } else {
+ evhp = (struct ether_vlan_header *)mp->b_rptr;
+ }
+
+ sap = ntohs(evhp->ether_type);
+ freemsg(newmp);
+ } else {
+ skip_len = sizeof (struct ether_header);
+ }
+
+ /* if ethernet header is in its own mblk, skip it */
+ if (MBLKL(mp) <= skip_len) {
+ skip_len -= MBLKL(mp);
+ mp = mp->b_cont;
+ if (mp == NULL)
+ goto done;
+ }
+
+ sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap;
+
+ /* compute IP src/dst addresses hash and skip IPv{4,6} header */
+
+ switch (sap) {
+ case ETHERTYPE_IP: {
+ ipha_t *iphp;
+
+ /*
+ * If the header is not aligned or the header doesn't fit
+ * in the mblk, bail now. Note that this may cause packet
+ * reordering.
+ */
+ iphp = (ipha_t *)(mp->b_rptr + skip_len);
+ if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) ||
+ !OK_32PTR((char *)iphp))
+ goto done;
+
+ proto = iphp->ipha_protocol;
+ skip_len += IPH_HDR_LENGTH(iphp);
+
+ /* Check if the packet is fragmented. */
+ ip_fragmented = ntohs(iphp->ipha_fragment_offset_and_flags) &
+ IPH_OFFSET;
+
+ /*
+ * For fragmented packets, use addresses in addition to
+ * the frag_id to generate the hash inorder to get
+ * better distribution.
+ */
+ if (ip_fragmented || (policy & INET_PKT_HASH_L3) != 0) {
+ uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src);
+ uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst);
+
+ hash ^= (PKT_HASH_4BYTES(ip_src) ^
+ PKT_HASH_4BYTES(ip_dst));
+ policy &= ~INET_PKT_HASH_L3;
+ }
+
+ if (ip_fragmented) {
+ uint8_t *identp = (uint8_t *)&iphp->ipha_ident;
+ hash ^= PKT_HASH_2BYTES(identp);
+ goto done;
+ }
+ break;
+ }
+ case ETHERTYPE_IPV6: {
+ ip6_t *ip6hp;
+ ip6_frag_t *frag = NULL;
+ uint16_t hdr_length;
+
+ /*
+ * If the header is not aligned or the header doesn't fit
+ * in the mblk, bail now. Note that this may cause packets
+ * reordering.
+ */
+
+ ip6hp = (ip6_t *)(mp->b_rptr + skip_len);
+ if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) ||
+ !OK_32PTR((char *)ip6hp))
+ goto done;
+
+ if (!inet_pkthash_ip_hdr_length_v6(ip6hp, mp->b_wptr,
+ &hdr_length, &proto, &frag))
+ goto done;
+ skip_len += hdr_length;
+
+ /*
+ * For fragmented packets, use addresses in addition to
+ * the frag_id to generate the hash inorder to get
+ * better distribution.
+ */
+ if (frag != NULL || (policy & INET_PKT_HASH_L3) != 0) {
+ uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]);
+ uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]);
+
+ hash ^= (PKT_HASH_4BYTES(ip_src) ^
+ PKT_HASH_4BYTES(ip_dst));
+ policy &= ~INET_PKT_HASH_L3;
+ }
+
+ if (frag != NULL) {
+ uint8_t *identp = (uint8_t *)&frag->ip6f_ident;
+ hash ^= PKT_HASH_4BYTES(identp);
+ goto done;
+ }
+ break;
+ }
+ default:
+ goto done;
+ }
+
+ if (policy == 0)
+ goto done;
+
+ /* if ip header is in its own mblk, skip it */
+ if (MBLKL(mp) <= skip_len) {
+ skip_len -= MBLKL(mp);
+ mp = mp->b_cont;
+ if (mp == NULL)
+ goto done;
+ }
+
+ /* parse ULP header */
+again:
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_ESP:
+ case IPPROTO_SCTP:
+ /*
+ * These Internet Protocols are intentionally designed
+ * for hashing from the git-go. Port numbers are in the first
+ * word for transports, SPI is first for ESP.
+ */
+ if (mp->b_rptr + skip_len + 4 > mp->b_wptr)
+ goto done;
+ hash ^= PKT_HASH_4BYTES((mp->b_rptr + skip_len));
+ break;
+
+ case IPPROTO_AH: {
+ ah_t *ah = (ah_t *)(mp->b_rptr + skip_len);
+ uint_t ah_length = AH_TOTAL_LEN(ah);
+
+ if ((unsigned char *)ah + sizeof (ah_t) > mp->b_wptr)
+ goto done;
+
+ proto = ah->ah_nexthdr;
+ skip_len += ah_length;
+
+ /* if AH header is in its own mblk, skip it */
+ if (MBLKL(mp) <= skip_len) {
+ skip_len -= MBLKL(mp);
+ mp = mp->b_cont;
+ if (mp == NULL)
+ goto done;
+ }
+
+ goto again;
+ }
+ }
+
+done:
+ return (hash);
+}
diff --git a/usr/src/common/mc/imc/imc_decode.c b/usr/src/common/mc/imc/imc_decode.c
new file mode 100644
index 0000000000..7e52e9795e
--- /dev/null
+++ b/usr/src/common/mc/imc/imc_decode.c
@@ -0,0 +1,770 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+/*
+ * Memory decoding logic.
+ *
+ * This file is part of the 'imc' driver on x86. It supports taking a physical
+ * address and determining what the corresponding DIMM is. This is shared
+ * between the kernel and userland for easier testing.
+ *
+ * For more information about the different parts of the decoding process,
+ * please see the file 'uts/i86pc/io/imc/imc.c'.
+ */
+
+#include <sys/sysmacros.h>
+
+#ifndef _KERNEL
+#include <stdint.h>
+#include <strings.h>
+#define BITX(u, h, l) (((u) >> (l)) & ((1LU << ((h) - (l) + 1LU)) - 1LU))
+#endif /* !_KERNEL */
+
+#include "imc.h"
+
+/*
+ * Address ranges for decoding system addresses. There are three ranges that
+ * exist on x86, traditional DOS memory (hi 640 KiB), low memory, and high
+ * memory. Low memory always starts at 1 MiB and high memory always starts at 4
+ * GiB. The upper bounds of these ranges is based on registers on the system.
+ */
+#define IMC_DECODE_CONV_BASE 0UL
+#define IMC_DECODE_CONV_MAX 0x00009ffffULL /* 640 KiB - 1 */
+#define IMC_DECODE_LOW_BASE 0x000100000ULL /* 1 M */
+#define IMC_DECODE_HIGH_BASE 0x100000000ULL /* 4 GiB */
+
+typedef struct imc_legacy_range {
+ uint64_t ilr_base;
+ size_t ilr_len;
+ const char *ilr_desc;
+} imc_legacy_range_t;
+
+/*
+ * These represent regions of memory that are reserved for use and will not be
+ * decoded by DRAM.
+ */
+static imc_legacy_range_t imc_legacy_ranges[] = {
+ { 0x00000A0000ULL, 128 * 1024, "VGA" },
+ { 0x00000C0000ULL, 256 * 1024, "PAM" },
+ { 0x0000F00000ULL, 1024 * 1024, "Reserved" },
+ { 0x00FE000000ULL, 32 * 1024 * 1024, "Unknown" },
+ { 0x00FF000000ULL, 16 * 1024 * 1024, "Firmware" },
+ { 0x00FED20000ULL, 384 * 1024, "TXT" },
+ { 0x00FED00000ULL, 1024 * 1024, "PCH" },
+ { 0x00FEC00000ULL, 1024 * 1024, "IOAPIC" },
+ { 0x00FEB80000ULL, 512 * 1024, "Reserved" },
+ { 0x00FEB00000ULL, 64 * 1024, "Reserved" }
+};
+
+/*
+ * Determine whether or not this address is in one of the reserved regions or if
+ * it falls outside of the explicit DRAM ranges.
+ */
+static boolean_t
+imc_decode_addr_resvd(const imc_t *imc, imc_decode_state_t *dec)
+{
+ uint_t i;
+ const imc_sad_t *sad;
+
+ for (i = 0; i < ARRAY_SIZE(imc_legacy_ranges); i++) {
+ uint64_t end = imc_legacy_ranges[i].ilr_base +
+ imc_legacy_ranges[i].ilr_len;
+
+ if (dec->ids_pa >= imc_legacy_ranges[i].ilr_base &&
+ dec->ids_pa < end) {
+ dec->ids_fail = IMC_DECODE_F_LEGACY_RANGE;
+ dec->ids_fail_data = i;
+ return (B_TRUE);
+ }
+ }
+
+ /*
+ * For checking and determining whether or not we fit in DRAM, we need
+ * to check against the top of low memory and the top of high memory.
+ * While we technically have this information on a per-socket basis, we
+ * have to rely on the fact that both processors have the same
+ * information. A requirement which if not true, would lead to chaos
+ * depending on what socket we're running on.
+ */
+ sad = &imc->imc_sockets[0].isock_sad;
+ if (sad->isad_valid != IMC_SAD_V_VALID) {
+ dec->ids_fail = IMC_DECODE_F_BAD_SAD;
+ return (B_TRUE);
+ }
+
+ /*
+ * An address may fall into three ranges. It may fall into conventional
+ * memory. It may fall into low memory. It may fall into high memory.
+ * The conventional memory range is inclusive at the top. The others
+ * have been translated such that they are uniformly exclusive at the
+ * top. Because the bottom of conventional memory is at zero, the
+ * compiler will be angry if we compare against IMC_DECODE_CONV_BASE as
+ * it is always true.
+ */
+ if (dec->ids_pa <= IMC_DECODE_CONV_MAX) {
+ return (B_FALSE);
+ }
+
+ if (dec->ids_pa >= IMC_DECODE_LOW_BASE &&
+ dec->ids_pa < sad->isad_tolm) {
+ return (B_FALSE);
+ }
+
+ if (dec->ids_pa >= IMC_DECODE_HIGH_BASE &&
+ dec->ids_pa < sad->isad_tohm) {
+ return (B_FALSE);
+ }
+
+ /*
+ * Memory fell outside of the valid range. It's not for us.
+ */
+ dec->ids_fail = IMC_DECODE_F_OUTSIDE_DRAM;
+ return (B_TRUE);
+}
+
+static uint_t
+imc_decode_sad_interleave(const imc_sad_rule_t *rule, uint64_t pa)
+{
+ uint_t itgt = 0;
+
+ switch (rule->isr_imode) {
+ case IMC_SAD_IMODE_8t6:
+ if (rule->isr_a7mode) {
+ itgt = BITX(pa, 9, 9);
+ itgt |= (BITX(pa, 8, 7) << 1);
+ } else {
+ itgt = BITX(pa, 8, 6);
+ }
+ break;
+ case IMC_SAD_IMODE_8t6XOR:
+ if (rule->isr_a7mode) {
+ itgt = BITX(pa, 9, 9);
+ itgt |= (BITX(pa, 8, 7) << 1);
+ } else {
+ itgt = BITX(pa, 8, 6);
+ }
+ itgt ^= BITX(pa, 18, 16);
+ break;
+ case IMC_SAD_IMODE_10t8:
+ itgt = BITX(pa, 10, 8);
+ break;
+ case IMC_SAD_IMODE_14t12:
+ itgt = BITX(pa, 14, 12);
+ break;
+ case IMC_SAD_IMODE_32t30:
+ itgt = BITX(pa, 32, 30);
+ break;
+ }
+
+ return (itgt);
+}
+
+/*
+ * Use the system address decoder to try and find a valid SAD entry for this
+ * address. We always use socket zero's SAD as the SAD rules should be the same
+ * between the different sockets.
+ */
+static boolean_t
+imc_decode_sad(const imc_t *imc, imc_decode_state_t *dec)
+{
+ uint_t i, ileaveidx;
+ uint8_t ileavetgt;
+ uint32_t nodeid, tadid, channelid;
+ uint64_t base;
+ const imc_socket_t *socket = &imc->imc_sockets[0];
+ const imc_sad_t *sad = &socket->isock_sad;
+ const imc_sad_rule_t *rule;
+ boolean_t loop = B_FALSE;
+
+ /*
+ * Note, all SAD rules have been adjusted so that they are uniformly
+ * exclusive.
+ */
+start:
+ for (rule = NULL, i = 0, base = 0; i < sad->isad_nrules; i++) {
+ rule = &sad->isad_rules[i];
+
+ if (rule->isr_enable && dec->ids_pa >= base &&
+ dec->ids_pa < rule->isr_limit) {
+ break;
+ }
+
+ base = rule->isr_limit;
+ }
+
+ if (rule == NULL || i == sad->isad_nrules) {
+ dec->ids_fail = IMC_DECODE_F_NO_SAD_RULE;
+ return (B_FALSE);
+ }
+
+ /*
+ * Store the SAD rule in the decode information for debugging's sake.
+ */
+ dec->ids_sad = sad;
+ dec->ids_sad_rule = rule;
+
+ /*
+ * We have found a SAD rule. We now need to transform that into the
+ * corresponding target based on its mode, etc. The way we do this
+ * varies based on the generation.
+ *
+ * The first thing we need to do is to figure out the target in the
+ * interleave list.
+ */
+ ileaveidx = imc_decode_sad_interleave(rule, dec->ids_pa);
+ if (ileaveidx >= rule->isr_ntargets) {
+ dec->ids_fail = IMC_DECODE_F_BAD_SAD_INTERLEAVE;
+ dec->ids_fail_data = ileaveidx;
+ return (B_FALSE);
+ }
+ ileavetgt = rule->isr_targets[ileaveidx];
+ if (imc->imc_gen >= IMC_GEN_SKYLAKE &&
+ IMC_SAD_ILEAVE_SKX_LOCAL(ileavetgt) == 0) {
+ /*
+ * If we're in this case, the interleave rule said we had a
+ * remote target. That means we need to find the correct SAD
+ * based on the Node ID and then do all of this over again.
+ */
+ nodeid = IMC_SAD_ILEAVE_SKX_TARGET(ileavetgt);
+
+ if (loop) {
+ dec->ids_fail = IMC_DECODE_F_SAD_SEARCH_LOOP;
+ return (B_FALSE);
+ }
+
+ for (i = 0; i < imc->imc_nsockets; i++) {
+ if (imc->imc_sockets[i].isock_valid ==
+ IMC_SOCKET_V_VALID &&
+ imc->imc_sockets[i].isock_nodeid == nodeid) {
+ socket = &imc->imc_sockets[i];
+ sad = &imc->imc_sockets[i].isock_sad;
+ loop = B_TRUE;
+ goto start;
+ }
+ }
+
+ dec->ids_fail = IMC_DECODE_F_BAD_REMOTE_MC_ROUTE;
+ dec->ids_fail_data = nodeid;
+ return (B_FALSE);
+ }
+
+ /*
+ * On some platforms we need to derive the target channel based on the
+ * physical address and additional rules in the SAD. If we do, do that
+ * here. The idea is that this may overrule the memory channel route
+ * table target that was determined from the SAD rule.
+ */
+ if (rule->isr_need_mod3) {
+ uint64_t addr;
+ uint8_t channel;
+
+ switch (rule->isr_mod_mode) {
+ case IMC_SAD_MOD_MODE_45t6:
+ addr = dec->ids_pa >> 6;
+ break;
+ case IMC_SAD_MOD_MODE_45t8:
+ addr = dec->ids_pa >> 8;
+ break;
+ case IMC_SAD_MOD_MODE_45t12:
+ addr = dec->ids_pa >> 12;
+ break;
+ default:
+ dec->ids_fail = IMC_DECODE_F_SAD_BAD_MOD;
+ return (B_FALSE);
+ }
+
+ switch (rule->isr_mod_type) {
+ case IMC_SAD_MOD_TYPE_MOD3:
+ channel = (addr % 3) << 1;
+ channel |= ileavetgt & 1;
+ break;
+ case IMC_SAD_MOD_TYPE_MOD2_01:
+ channel = (addr % 2) << 1;
+ channel |= ileavetgt & 1;
+ break;
+ case IMC_SAD_MOD_TYPE_MOD2_12:
+ channel = (addr % 2) << 2;
+ channel |= (~addr % 2) << 1;
+ channel |= ileavetgt & 1;
+ break;
+ case IMC_SAD_MOD_TYPE_MOD2_02:
+ channel = (addr % 2) << 2;
+ channel |= ileavetgt & 1;
+ break;
+ default:
+ dec->ids_fail = IMC_DECODE_F_SAD_BAD_MOD;
+ return (B_FALSE);
+ }
+
+ ileavetgt = channel;
+ }
+
+ switch (imc->imc_gen) {
+ case IMC_GEN_SANDY:
+ /*
+ * Sandy Bridge systems only have a single home agent, so the
+ * interleave target is always the node id.
+ */
+ nodeid = ileavetgt;
+ tadid = 0;
+ channelid = UINT32_MAX;
+ break;
+ case IMC_GEN_IVY:
+ case IMC_GEN_HASWELL:
+ case IMC_GEN_BROADWELL:
+ /*
+ * On these generations, the interleave NodeID in the SAD
+ * encodes both the nodeid and the home agent ID that we care
+ * about.
+ */
+ nodeid = IMC_NODEID_IVY_BRD_UPPER(ileavetgt) |
+ IMC_NODEID_IVY_BRD_LOWER(ileavetgt);
+ tadid = IMC_NODEID_IVY_BRD_HA(ileavetgt);
+ channelid = UINT32_MAX;
+ break;
+ case IMC_GEN_SKYLAKE:
+ /*
+ * On Skylake generation systems we take the interleave target
+ * and use that to look up both the memory controller and the
+ * physical channel in the route table. The nodeid is already
+ * known because its SAD rules redirect us.
+ */
+ nodeid = socket->isock_nodeid;
+ if (ileavetgt > IMC_SAD_ILEAVE_SKX_MAX) {
+ dec->ids_fail = IMC_DECODE_F_BAD_SAD_INTERLEAVE;
+ dec->ids_fail_data = ileavetgt;
+ return (B_FALSE);
+ }
+ ileavetgt = IMC_SAD_ILEAVE_SKX_TARGET(ileavetgt);
+ if (ileavetgt > sad->isad_mcroute.ismc_nroutes) {
+ dec->ids_fail = IMC_DECODE_F_BAD_SAD_INTERLEAVE;
+ dec->ids_fail_data = ileavetgt;
+ return (B_FALSE);
+ }
+ tadid = sad->isad_mcroute.ismc_mcroutes[ileavetgt].ismce_imc;
+ channelid =
+ sad->isad_mcroute.ismc_mcroutes[ileavetgt].ismce_pchannel;
+ break;
+ default:
+ nodeid = tadid = channelid = UINT32_MAX;
+ break;
+ }
+
+ /*
+ * Map to the correct socket based on the nodeid. Make sure that we have
+ * a valid TAD.
+ */
+ dec->ids_socket = NULL;
+ for (i = 0; i < imc->imc_nsockets; i++) {
+ if (imc->imc_sockets[i].isock_nodeid == nodeid) {
+ dec->ids_socket = &imc->imc_sockets[i];
+ break;
+ }
+ }
+ if (dec->ids_socket == NULL) {
+ dec->ids_fail = IMC_DECODE_F_SAD_BAD_SOCKET;
+ dec->ids_fail_data = nodeid;
+ return (B_FALSE);
+ }
+
+ if (tadid >= dec->ids_socket->isock_ntad) {
+ dec->ids_fail = IMC_DECODE_F_SAD_BAD_TAD;
+ dec->ids_fail_data = tadid;
+ return (B_FALSE);
+ }
+
+ dec->ids_nodeid = nodeid;
+ dec->ids_tadid = tadid;
+ dec->ids_channelid = channelid;
+ dec->ids_tad = &dec->ids_socket->isock_tad[tadid];
+ dec->ids_mc = &dec->ids_socket->isock_imcs[tadid];
+
+ return (B_TRUE);
+}
+
+/*
+ * For Sandy Bridge through Broadwell we need to decode the memory channel that
+ * we're targeting. This is determined based on the number of ways that the
+ * socket and channel are supposed to be interleaved. The TAD has a target
+ * channel list sitting with the TAD rule. To figure out the appropriate index,
+ * the algorithm is roughly:
+ *
+ * idx = [(dec->ids_pa >> 6) / socket-ways] % channel-ways
+ *
+ * The shift by six, comes from taking the number of bits that are in theory in
+ * the cache line size. Of course, if things were this simple, that'd be great.
+ * The first complication is a7mode / MCChanShiftUpEnable. When this is enabled,
+ * more cache lines are used for this. The next complication comes when the
+ * feature MCChanHashEn is enabled. This means that we have to hash the
+ * resulting address before we do the modulus based on the number of channel
+ * ways.
+ *
+ * The last, and most complicated problem is when the number of channel ways is
+ * set to three. When this is the case, the base address of the range may not
+ * actually start at index zero. The nominal solution is to use the offset
+ * that's programmed on a per-channel basis to offset the system address.
+ * However, to get that information we would have to know what channel we're on,
+ * which is what we're trying to figure out. Regretfully, proclaim that we can't
+ * in this case.
+ */
+static boolean_t
+imc_decode_tad_channel(const imc_t *imc, imc_decode_state_t *dec)
+{
+ uint64_t index;
+ const imc_tad_rule_t *rule = dec->ids_tad_rule;
+
+ index = dec->ids_pa >> 6;
+ if ((dec->ids_tad->itad_flags & IMC_TAD_FLAG_CHANSHIFT) != 0) {
+ index = index >> 1;
+ }
+
+ /*
+ * When performing a socket way equals three comparison, this would not
+ * work.
+ */
+ index = index / rule->itr_sock_way;
+
+ if ((dec->ids_tad->itad_flags & IMC_TAD_FLAG_CHANHASH) != 0) {
+ uint_t i;
+ for (i = 12; i < 28; i += 2) {
+ uint64_t shift = (dec->ids_pa >> i) & 0x3;
+ index ^= shift;
+ }
+ }
+
+ index %= rule->itr_chan_way;
+ if (index >= rule->itr_ntargets) {
+ dec->ids_fail = IMC_DECODE_F_TAD_BAD_TARGET_INDEX;
+ dec->ids_fail_data = index;
+ return (B_FALSE);
+ }
+
+ dec->ids_channelid = rule->itr_targets[index];
+ return (B_TRUE);
+}
+
+static uint_t
+imc_tad_gran_to_shift(const imc_tad_t *tad, imc_tad_gran_t gran)
+{
+ uint_t shift = 0;
+
+ switch (gran) {
+ case IMC_TAD_GRAN_64B:
+ shift = 6;
+ if ((tad->itad_flags & IMC_TAD_FLAG_CHANSHIFT) != 0) {
+ shift++;
+ }
+ break;
+ case IMC_TAD_GRAN_256B:
+ shift = 8;
+ break;
+ case IMC_TAD_GRAN_4KB:
+ shift = 12;
+ break;
+ case IMC_TAD_GRAN_1GB:
+ shift = 30;
+ break;
+ }
+
+ return (shift);
+}
+
+static boolean_t
+imc_decode_tad(const imc_t *imc, imc_decode_state_t *dec)
+{
+ uint_t i, tadruleno;
+ uint_t sockshift, chanshift, sockmask, chanmask;
+ uint64_t off, chanaddr;
+ const imc_tad_t *tad = dec->ids_tad;
+ const imc_mc_t *mc = dec->ids_mc;
+ const imc_tad_rule_t *rule = NULL;
+ const imc_channel_t *chan;
+
+ /*
+ * The first step in all of this is to determine which TAD rule applies
+ * for this address.
+ */
+ for (i = 0; i < tad->itad_nrules; i++) {
+ rule = &tad->itad_rules[i];
+
+ if (dec->ids_pa >= rule->itr_base &&
+ dec->ids_pa < rule->itr_limit) {
+ break;
+ }
+ }
+
+ if (rule == NULL || i == tad->itad_nrules) {
+ dec->ids_fail = IMC_DECODE_F_NO_TAD_RULE;
+ return (B_FALSE);
+ }
+ tadruleno = i;
+ dec->ids_tad_rule = rule;
+
+ /*
+ * Check if our TAD rule requires 3-way interleaving on the channel. We
+ * basically can't do that right now. For more information, see the
+ * comment above imc_decode_tad_channel().
+ */
+ if (rule->itr_chan_way == 3) {
+ dec->ids_fail = IMC_DECODE_F_TAD_3_ILEAVE;
+ return (B_FALSE);
+ }
+
+ /*
+ * On some platforms, we need to now calculate the channel index from
+ * this. The way that we calculate this is nominally straightforward,
+ * but complicated by a number of different issues.
+ */
+ switch (imc->imc_gen) {
+ case IMC_GEN_SANDY:
+ case IMC_GEN_IVY:
+ case IMC_GEN_HASWELL:
+ case IMC_GEN_BROADWELL:
+ if (!imc_decode_tad_channel(imc, dec)) {
+ return (B_FALSE);
+ }
+ break;
+ default:
+ /*
+ * On Skylake and newer platforms we should have already decoded
+ * the target channel based on using the memory controller route
+ * table above.
+ */
+ break;
+ }
+
+ /*
+ * We initialize ids_channelid to UINT32_MAX, so this should make sure
+ * that we catch an incorrect channel as well.
+ */
+ if (dec->ids_channelid >= mc->icn_nchannels) {
+ dec->ids_fail = IMC_DECODE_F_BAD_CHANNEL_ID;
+ dec->ids_fail_data = dec->ids_channelid;
+ return (B_FALSE);
+ }
+ chan = &mc->icn_channels[dec->ids_channelid];
+ dec->ids_chan = chan;
+
+ if (tadruleno >= chan->ich_ntad_offsets) {
+ dec->ids_fail = IMC_DECODE_F_BAD_CHANNEL_TAD_OFFSET;
+ dec->ids_fail_data = tadruleno;
+ return (B_FALSE);
+ }
+
+ /*
+ * Now we can go ahead and calculate the channel address, which is
+ * roughly equal to:
+ *
+ * chan_addr = (sys_addr - off) / (chan way * sock way).
+ *
+ * The catch is that we want to preserve the low bits where possible.
+ * The number of bits is based on the interleaving granularities, the
+ * way that's calculated is based on information in the TAD rule.
+ * However, if a7mode is enabled on Ivy Bridge through Broadwell, then
+ * we need to add one to that. So we will save the smallest number of
+ * bits that are left after interleaving.
+ *
+ * Because the interleaving occurs at different granularities, we need
+ * to break this into two discrete steps, one where we apply the socket
+ * interleaving and one where we apply the channel interleaving,
+ * shifting and dividing at each step.
+ */
+ off = chan->ich_tad_offsets[tadruleno];
+ if (off > dec->ids_pa) {
+ dec->ids_fail = IMC_DECODE_F_CHANOFF_UNDERFLOW;
+ return (B_FALSE);
+ }
+ chanshift = imc_tad_gran_to_shift(tad, rule->itr_chan_gran);
+ sockshift = imc_tad_gran_to_shift(tad, rule->itr_sock_gran);
+ chanmask = (1 << chanshift) - 1;
+ sockmask = (1 << sockshift) - 1;
+
+ chanaddr = dec->ids_pa - off;
+ chanaddr >>= sockshift;
+ chanaddr /= rule->itr_sock_way;
+ chanaddr <<= sockshift;
+ chanaddr |= dec->ids_pa & sockmask;
+ chanaddr >>= chanshift;
+ chanaddr /= rule->itr_chan_way;
+ chanaddr <<= chanshift;
+ chanaddr |= dec->ids_pa & chanmask;
+
+ dec->ids_chanaddr = chanaddr;
+
+ return (B_TRUE);
+}
+
+static boolean_t
+imc_decode_rir(const imc_t *imc, imc_decode_state_t *dec)
+{
+ const imc_mc_t *mc = dec->ids_mc;
+ const imc_channel_t *chan = dec->ids_chan;
+ const imc_rank_ileave_t *rir = NULL;
+ const imc_rank_ileave_entry_t *rirtarg;
+ const imc_dimm_t *dimm;
+ uint32_t shift, index;
+ uint_t i, dimmid, rankid;
+ uint64_t mask, base, rankaddr;
+
+ if (mc->icn_closed) {
+ shift = IMC_PAGE_BITS_CLOSED;
+ } else {
+ shift = IMC_PAGE_BITS_OPEN;
+ }
+ mask = (1UL << shift) - 1;
+
+ for (i = 0, base = 0; i < chan->ich_nrankileaves; i++) {
+ rir = &chan->ich_rankileaves[i];
+ if (rir->irle_enabled && dec->ids_chanaddr >= base &&
+ dec->ids_chanaddr < rir->irle_limit) {
+ break;
+ }
+
+ base = rir->irle_limit;
+ }
+
+ if (rir == NULL || i == chan->ich_nrankileaves) {
+ dec->ids_fail = IMC_DECODE_F_NO_RIR_RULE;
+ return (B_FALSE);
+ }
+ dec->ids_rir = rir;
+
+ /*
+ * Determine the index of the rule that we care about. This is done by
+ * shifting the address based on the open and closed page bits and then
+ * just modding it by the number of ways in question.
+ */
+ index = (dec->ids_chanaddr >> shift) % rir->irle_nways;
+ if (index >= rir->irle_nentries) {
+ dec->ids_fail = IMC_DECODE_F_BAD_RIR_ILEAVE_TARGET;
+ dec->ids_fail_data = index;
+ return (B_FALSE);
+ }
+ rirtarg = &rir->irle_entries[index];
+
+ /*
+ * The rank interleaving register has information about a physical rank
+ * target. This is within the notion of the physical chip selects that
+ * exist. While the memory controller only has eight actual chip
+ * selects, the physical values that are programmed depend a bit on the
+ * underlying hardware. Effectively, in this ID space, each DIMM has
+ * four ranks associated with it. Even when we only have two ranks with
+ * each physical channel, they'll be programmed so we can simply do the
+ * following match:
+ *
+ * DIMM = rank id / 4
+ * RANK = rank id % 4
+ */
+ dec->ids_physrankid = rirtarg->irle_target;
+ dimmid = dec->ids_physrankid / 4;
+ rankid = dec->ids_physrankid % 4;
+
+ if (dimmid >= chan->ich_ndimms) {
+ dec->ids_fail = IMC_DECODE_F_BAD_DIMM_INDEX;
+ dec->ids_fail_data = dimmid;
+ return (B_FALSE);
+ }
+
+ dimm = &chan->ich_dimms[dimmid];
+ if (!dimm->idimm_present) {
+ dec->ids_fail = IMC_DECODE_F_DIMM_NOT_PRESENT;
+ return (B_FALSE);
+ }
+ dec->ids_dimmid = dimmid;
+ dec->ids_dimm = dimm;
+
+ if (rankid >= dimm->idimm_nranks) {
+ dec->ids_fail = IMC_DECODE_F_BAD_DIMM_RANK;
+ dec->ids_fail_data = rankid;
+ return (B_FALSE);
+ }
+ dec->ids_rankid = rankid;
+
+ /*
+ * Calculate the rank address. We need to divide the address by the
+ * number of rank ways and then or in the lower bits.
+ */
+ rankaddr = dec->ids_chanaddr;
+ rankaddr >>= shift;
+ rankaddr /= rir->irle_nways;
+ rankaddr <<= shift;
+ rankaddr |= dec->ids_chanaddr & mask;
+
+ if (rirtarg->irle_offset > rankaddr) {
+ dec->ids_fail = IMC_DECODE_F_RANKOFF_UNDERFLOW;
+ return (B_FALSE);
+ }
+ rankaddr -= rirtarg->irle_offset;
+ dec->ids_rankaddr = rankaddr;
+
+ return (B_TRUE);
+}
+
+boolean_t
+imc_decode_pa(const imc_t *imc, uint64_t pa, imc_decode_state_t *dec)
+{
+ bzero(dec, sizeof (*dec));
+ dec->ids_pa = pa;
+ dec->ids_nodeid = dec->ids_tadid = dec->ids_channelid = UINT32_MAX;
+
+ /*
+ * We need to rely on socket zero's information. Make sure that it both
+ * exists and is considered valid.
+ */
+ if (imc->imc_nsockets < 1 ||
+ imc->imc_sockets[0].isock_valid != IMC_SOCKET_V_VALID) {
+ dec->ids_fail = IMC_DECODE_F_BAD_SOCKET;
+ dec->ids_fail_data = 0;
+ return (B_FALSE);
+ }
+
+ /*
+ * First, we need to make sure that the PA we've been given actually is
+ * meant to target a DRAM address. This address may fall to MMIO, MMCFG,
+ * be an address that's outside of DRAM, or belong to a legacy address
+ * range that is interposed.
+ */
+ if (imc_decode_addr_resvd(imc, dec)) {
+ return (B_FALSE);
+ }
+
+ /*
+ * Now that we have this data, we want to go through and look at the
+ * SAD. The SAD will point us to a specific socket and an IMC / home
+ * agent on that socket which will tell us which TAD we need to use.
+ */
+ if (!imc_decode_sad(imc, dec)) {
+ return (B_FALSE);
+ }
+
+ /*
+ * The decoded SAD information has pointed us a TAD. We need to use this
+ * to point us to the corresponding memory channel and the corresponding
+ * address on the channel.
+ */
+ if (!imc_decode_tad(imc, dec)) {
+ return (B_FALSE);
+ }
+
+ /*
+ * Use the rank interleaving data to determine which DIMM this is, the
+ * relevant rank, and the rank address.
+ */
+ if (!imc_decode_rir(imc, dec)) {
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
diff --git a/usr/src/common/mc/imc/imc_dump.c b/usr/src/common/mc/imc/imc_dump.c
new file mode 100644
index 0000000000..05a2f72308
--- /dev/null
+++ b/usr/src/common/mc/imc/imc_dump.c
@@ -0,0 +1,569 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+/*
+ * This implements logic to allow us to dump IMC data for decoding purposes,
+ * such that we can later encode it elsewhere. In general, dumping is done by
+ * the kernel and reconstituting this data is done by user land.
+ */
+
+#include "imc.h"
+
+#ifndef _KERNEL
+#include <stdint.h>
+#include <strings.h>
+#endif /* !_KERNEL */
+
+
+static nvlist_t *
+imc_dump_sad(imc_sad_t *sad)
+{
+ uint_t i;
+ nvlist_t *nvl;
+ nvlist_t *rules[IMC_MAX_SAD_RULES];
+ nvlist_t *routes[IMC_MAX_SAD_MCROUTES];
+
+ nvl = fnvlist_alloc();
+ fnvlist_add_uint32(nvl, "isad_flags", sad->isad_flags);
+ fnvlist_add_uint32(nvl, "isad_valid", sad->isad_valid);
+ fnvlist_add_uint64(nvl, "isad_tolm", sad->isad_tolm);
+ fnvlist_add_uint64(nvl, "isad_tohm", sad->isad_tohm);
+
+ for (i = 0; i < sad->isad_nrules; i++) {
+ nvlist_t *n = fnvlist_alloc();
+ imc_sad_rule_t *r = &sad->isad_rules[i];
+
+ fnvlist_add_boolean_value(n, "isr_enable", r->isr_enable);
+ fnvlist_add_boolean_value(n, "isr_a7mode", r->isr_a7mode);
+ fnvlist_add_boolean_value(n, "isr_need_mod3", r->isr_need_mod3);
+ fnvlist_add_uint64(n, "isr_limit", r->isr_limit);
+ fnvlist_add_uint32(n, "isr_type", r->isr_type);
+ fnvlist_add_uint32(n, "isr_imode", r->isr_imode);
+ fnvlist_add_uint32(n, "isr_mod_mode", r->isr_mod_mode);
+ fnvlist_add_uint32(n, "isr_mod_type", r->isr_mod_type);
+ fnvlist_add_uint8_array(n, "isr_targets", r->isr_targets,
+ r->isr_ntargets);
+
+ rules[i] = n;
+ }
+ fnvlist_add_nvlist_array(nvl, "isad_rules", rules, sad->isad_nrules);
+ for (i = 0; i < sad->isad_nrules; i++) {
+ nvlist_free(rules[i]);
+ }
+
+ if (sad->isad_mcroute.ismc_nroutes == 0) {
+ return (nvl);
+ }
+
+ for (i = 0; i < sad->isad_mcroute.ismc_nroutes; i++) {
+ nvlist_t *r = fnvlist_alloc();
+ imc_sad_mcroute_entry_t *e =
+ &sad->isad_mcroute.ismc_mcroutes[i];
+
+ fnvlist_add_uint8(r, "ismce_imc", e->ismce_imc);
+ fnvlist_add_uint8(r, "ismce_pchannel", e->ismce_pchannel);
+ routes[i] = r;
+ }
+ fnvlist_add_nvlist_array(nvl, "isad_mcroute", routes, i);
+ for (i = 0; i < sad->isad_mcroute.ismc_nroutes; i++) {
+ nvlist_free(routes[i]);
+ }
+
+ return (nvl);
+}
+
+static nvlist_t *
+imc_dump_tad(imc_tad_t *tad)
+{
+ uint_t i;
+ nvlist_t *nvl;
+ nvlist_t *rules[IMC_MAX_TAD_RULES];
+
+ nvl = fnvlist_alloc();
+ fnvlist_add_uint32(nvl, "itad_valid", tad->itad_valid);
+ fnvlist_add_uint32(nvl, "itad_flags", tad->itad_flags);
+ for (i = 0; i < tad->itad_nrules; i++) {
+ nvlist_t *t = fnvlist_alloc();
+ imc_tad_rule_t *r = &tad->itad_rules[i];
+
+ fnvlist_add_uint64(t, "itr_base", r->itr_base);
+ fnvlist_add_uint64(t, "itr_limit", r->itr_limit);
+ fnvlist_add_uint8(t, "itr_sock_way", r->itr_sock_way);
+ fnvlist_add_uint8(t, "itr_chan_way", r->itr_chan_way);
+ fnvlist_add_uint32(t, "itr_sock_gran", r->itr_sock_gran);
+ fnvlist_add_uint32(t, "itr_chan_gran", r->itr_chan_gran);
+ fnvlist_add_uint8_array(t, "itr_targets", r->itr_targets,
+ r->itr_ntargets);
+
+ rules[i] = t;
+ }
+ fnvlist_add_nvlist_array(nvl, "itad_rules", rules, tad->itad_nrules);
+ for (i = 0; i < tad->itad_nrules; i++) {
+ nvlist_free(rules[i]);
+ }
+
+ return (nvl);
+}
+
+static nvlist_t *
+imc_dump_channel(imc_channel_t *chan)
+{
+ uint_t i;
+ nvlist_t *nvl;
+ nvlist_t *dimms[IMC_MAX_DIMMPERCHAN];
+ nvlist_t *ranks[IMC_MAX_RANK_WAYS];
+
+ nvl = fnvlist_alloc();
+ fnvlist_add_uint32(nvl, "ich_valid", chan->ich_valid);
+ for (i = 0; i < chan->ich_ndimms; i++) {
+ nvlist_t *d = fnvlist_alloc();
+ imc_dimm_t *dimm = &chan->ich_dimms[i];
+
+ fnvlist_add_uint32(d, "idimm_valid", dimm->idimm_valid);
+ fnvlist_add_boolean_value(d, "idimm_present",
+ dimm->idimm_present);
+ if (!dimm->idimm_present)
+ goto add;
+
+ fnvlist_add_uint8(d, "idimm_nbanks", dimm->idimm_nbanks);
+ fnvlist_add_uint8(d, "idimm_nranks", dimm->idimm_nranks);
+ fnvlist_add_uint8(d, "idimm_width", dimm->idimm_width);
+ fnvlist_add_uint8(d, "idimm_density", dimm->idimm_density);
+ fnvlist_add_uint8(d, "idimm_nrows", dimm->idimm_nrows);
+ fnvlist_add_uint8(d, "idimm_ncolumns", dimm->idimm_ncolumns);
+ fnvlist_add_uint64(d, "idimm_size", dimm->idimm_size);
+add:
+ dimms[i] = d;
+ }
+ fnvlist_add_nvlist_array(nvl, "ich_dimms", dimms, i);
+ for (i = 0; i < chan->ich_ndimms; i++) {
+ nvlist_free(dimms[i]);
+ }
+
+ fnvlist_add_uint64_array(nvl, "ich_tad_offsets", chan->ich_tad_offsets,
+ chan->ich_ntad_offsets);
+
+ for (i = 0; i < chan->ich_nrankileaves; i++) {
+ uint_t j;
+ nvlist_t *r = fnvlist_alloc();
+ nvlist_t *ileaves[IMC_MAX_RANK_INTERLEAVES];
+ imc_rank_ileave_t *rank = &chan->ich_rankileaves[i];
+
+ fnvlist_add_boolean_value(r, "irle_enabled",
+ rank->irle_enabled);
+ fnvlist_add_uint8(r, "irle_nways", rank->irle_nways);
+ fnvlist_add_uint8(r, "irle_nwaysbits", rank->irle_nwaysbits);
+ fnvlist_add_uint64(r, "irle_limit", rank->irle_limit);
+
+ for (j = 0; j < rank->irle_nentries; j++) {
+ nvlist_t *e = fnvlist_alloc();
+
+ fnvlist_add_uint8(e, "irle_target",
+ rank->irle_entries[j].irle_target);
+ fnvlist_add_uint64(e, "irle_offset",
+ rank->irle_entries[j].irle_offset);
+ ileaves[j] = e;
+ }
+ fnvlist_add_nvlist_array(r, "irle_entries", ileaves, j);
+ for (j = 0; j < rank->irle_nentries; j++) {
+ nvlist_free(ileaves[j]);
+ }
+
+ ranks[i] = r;
+ }
+ fnvlist_add_nvlist_array(nvl, "ich_rankileaves", ranks, i);
+ for (i = 0; i < chan->ich_nrankileaves; i++) {
+ nvlist_free(ranks[i]);
+ }
+
+ return (nvl);
+}
+
+static nvlist_t *
+imc_dump_mc(imc_mc_t *mc)
+{
+ uint_t i;
+ nvlist_t *nvl;
+ nvlist_t *channels[IMC_MAX_CHANPERMC];
+
+ nvl = fnvlist_alloc();
+ fnvlist_add_boolean_value(nvl, "icn_ecc", mc->icn_ecc);
+ fnvlist_add_boolean_value(nvl, "icn_lockstep", mc->icn_lockstep);
+ fnvlist_add_boolean_value(nvl, "icn_closed", mc->icn_closed);
+ fnvlist_add_uint32(nvl, "icn_dimm_type", mc->icn_dimm_type);
+
+ for (i = 0; i < mc->icn_nchannels; i++) {
+ channels[i] = imc_dump_channel(&mc->icn_channels[i]);
+ }
+ fnvlist_add_nvlist_array(nvl, "icn_channels", channels, i);
+ for (i = 0; i < mc->icn_nchannels; i++) {
+ nvlist_free(channels[i]);
+ }
+
+ return (nvl);
+}
+
+static nvlist_t *
+imc_dump_socket(imc_socket_t *sock)
+{
+ uint_t i;
+ nvlist_t *nvl, *sad;
+ nvlist_t *tad[IMC_MAX_TAD];
+ nvlist_t *mc[IMC_MAX_IMCPERSOCK];
+
+ nvl = fnvlist_alloc();
+
+ sad = imc_dump_sad(&sock->isock_sad);
+ fnvlist_add_nvlist(nvl, "isock_sad", sad);
+ nvlist_free(sad);
+
+ for (i = 0; i < sock->isock_ntad; i++) {
+ tad[i] = imc_dump_tad(&sock->isock_tad[i]);
+ }
+ fnvlist_add_nvlist_array(nvl, "isock_tad", tad, i);
+ for (i = 0; i < sock->isock_ntad; i++) {
+ fnvlist_free(tad[i]);
+ }
+
+ fnvlist_add_uint32(nvl, "isock_nodeid", sock->isock_nodeid);
+
+ for (i = 0; i < sock->isock_nimc; i++) {
+ mc[i] = imc_dump_mc(&sock->isock_imcs[i]);
+ }
+ fnvlist_add_nvlist_array(nvl, "isock_imcs", mc, i);
+ for (i = 0; i < sock->isock_nimc; i++) {
+ fnvlist_free(mc[i]);
+ }
+ return (nvl);
+}
+
+nvlist_t *
+imc_dump_decoder(imc_t *imc)
+{
+ uint_t i;
+ nvlist_t *nvl, *invl;
+ nvlist_t *sockets[IMC_MAX_SOCKETS];
+
+ nvl = fnvlist_alloc();
+ fnvlist_add_uint32(nvl, "mc_dump_version", 0);
+ fnvlist_add_string(nvl, "mc_dump_driver", "imc");
+
+ invl = fnvlist_alloc();
+ fnvlist_add_uint32(invl, "imc_gen", imc->imc_gen);
+
+ for (i = 0; i < imc->imc_nsockets; i++) {
+ sockets[i] = imc_dump_socket(&imc->imc_sockets[i]);
+ }
+ fnvlist_add_nvlist_array(invl, "imc_sockets", sockets, i);
+ fnvlist_add_nvlist(nvl, "imc", invl);
+
+ for (i = 0; i < imc->imc_nsockets; i++) {
+ nvlist_free(sockets[i]);
+ }
+ nvlist_free(invl);
+
+ return (nvl);
+}
+
+static boolean_t
+imc_restore_sad(nvlist_t *nvl, imc_sad_t *sad)
+{
+ nvlist_t **rules, **routes;
+ uint_t i, nroutes;
+
+ if (nvlist_lookup_uint32(nvl, "isad_flags", &sad->isad_flags) != 0 ||
+ nvlist_lookup_uint32(nvl, "isad_valid", &sad->isad_valid) != 0 ||
+ nvlist_lookup_uint64(nvl, "isad_tolm", &sad->isad_tolm) != 0 ||
+ nvlist_lookup_uint64(nvl, "isad_tohm", &sad->isad_tohm) != 0 ||
+ nvlist_lookup_nvlist_array(nvl, "isad_rules",
+ &rules, &sad->isad_nrules) != 0) {
+ return (B_FALSE);
+ }
+
+ for (i = 0; i < sad->isad_nrules; i++) {
+ imc_sad_rule_t *r = &sad->isad_rules[i];
+ uint8_t *targs;
+
+ if (nvlist_lookup_boolean_value(rules[i], "isr_enable",
+ &r->isr_enable) != 0 ||
+ nvlist_lookup_boolean_value(rules[i], "isr_a7mode",
+ &r->isr_a7mode) != 0 ||
+ nvlist_lookup_boolean_value(rules[i], "isr_need_mod3",
+ &r->isr_need_mod3) != 0 ||
+ nvlist_lookup_uint64(rules[i], "isr_limit",
+ &r->isr_limit) != 0 ||
+ nvlist_lookup_uint32(rules[i], "isr_type",
+ &r->isr_type) != 0 ||
+ nvlist_lookup_uint32(rules[i], "isr_imode",
+ &r->isr_imode) != 0 ||
+ nvlist_lookup_uint32(rules[i], "isr_mod_mode",
+ &r->isr_mod_mode) != 0 ||
+ nvlist_lookup_uint32(rules[i], "isr_mod_type",
+ &r->isr_mod_type) != 0 ||
+ nvlist_lookup_uint8_array(rules[i], "isr_targets", &targs,
+ &r->isr_ntargets) != 0 ||
+ r->isr_ntargets > IMC_MAX_SAD_RULES) {
+ return (B_FALSE);
+ }
+
+ bcopy(targs, r->isr_targets, r->isr_ntargets *
+ sizeof (uint8_t));
+ }
+
+ /*
+ * The mcroutes entry right now is only included conditionally.
+ */
+ if (nvlist_lookup_nvlist_array(nvl, "isad_mcroute", &routes,
+ &nroutes) == 0) {
+ if (nroutes > IMC_MAX_SAD_MCROUTES)
+ return (B_FALSE);
+ sad->isad_mcroute.ismc_nroutes = nroutes;
+ for (i = 0; i < nroutes; i++) {
+ imc_sad_mcroute_entry_t *r =
+ &sad->isad_mcroute.ismc_mcroutes[i];
+ if (nvlist_lookup_uint8(routes[i], "ismce_imc",
+ &r->ismce_imc) != 0 ||
+ nvlist_lookup_uint8(routes[i], "ismce_pchannel",
+ &r->ismce_pchannel) != 0) {
+ return (B_FALSE);
+ }
+ }
+ }
+
+ return (B_TRUE);
+}
+
+static boolean_t
+imc_restore_tad(nvlist_t *nvl, imc_tad_t *tad)
+{
+ nvlist_t **rules;
+
+ if (nvlist_lookup_uint32(nvl, "itad_valid", &tad->itad_valid) != 0 ||
+ nvlist_lookup_uint32(nvl, "itad_flags", &tad->itad_flags) != 0 ||
+ nvlist_lookup_nvlist_array(nvl, "itad_rules", &rules,
+ &tad->itad_nrules) != 0 || tad->itad_nrules > IMC_MAX_TAD_RULES) {
+ return (B_FALSE);
+ }
+
+ for (uint_t i = 0; i < tad->itad_nrules; i++) {
+ imc_tad_rule_t *r = &tad->itad_rules[i];
+ uint8_t *targs;
+
+ if (nvlist_lookup_uint64(rules[i], "itr_base",
+ &r->itr_base) != 0 ||
+ nvlist_lookup_uint64(rules[i], "itr_limit",
+ &r->itr_limit) != 0 ||
+ nvlist_lookup_uint8(rules[i], "itr_sock_way",
+ &r->itr_sock_way) != 0 ||
+ nvlist_lookup_uint8(rules[i], "itr_chan_way",
+ &r->itr_chan_way) != 0 ||
+ nvlist_lookup_uint32(rules[i], "itr_sock_gran",
+ &r->itr_sock_gran) != 0 ||
+ nvlist_lookup_uint32(rules[i], "itr_chan_gran",
+ &r->itr_chan_gran) != 0 ||
+ nvlist_lookup_uint8_array(rules[i], "itr_targets",
+ &targs, &r->itr_ntargets) != 0 ||
+ r->itr_ntargets > IMC_MAX_TAD_TARGETS) {
+ return (B_FALSE);
+ }
+
+ bcopy(targs, r->itr_targets, r->itr_ntargets *
+ sizeof (uint8_t));
+ }
+
+ return (B_TRUE);
+}
+
+static boolean_t
+imc_restore_channel(nvlist_t *nvl, imc_channel_t *chan)
+{
+ nvlist_t **dimms, **rir;
+ uint64_t *tadoff;
+
+ if (nvlist_lookup_uint32(nvl, "ich_valid", &chan->ich_valid) != 0 ||
+ nvlist_lookup_nvlist_array(nvl, "ich_dimms", &dimms,
+ &chan->ich_ndimms) != 0 ||
+ chan->ich_ndimms > IMC_MAX_DIMMPERCHAN ||
+ nvlist_lookup_uint64_array(nvl, "ich_tad_offsets", &tadoff,
+ &chan->ich_ntad_offsets) != 0 ||
+ chan->ich_ntad_offsets > IMC_MAX_TAD_RULES ||
+ nvlist_lookup_nvlist_array(nvl, "ich_rankileaves", &rir,
+ &chan->ich_nrankileaves) != 0 ||
+ chan->ich_nrankileaves > IMC_MAX_RANK_WAYS) {
+ return (B_FALSE);
+ }
+
+ for (uint_t i = 0; i < chan->ich_ndimms; i++) {
+ imc_dimm_t *d = &chan->ich_dimms[i];
+
+ if (nvlist_lookup_uint32(dimms[i], "idimm_valid",
+ &d->idimm_valid) != 0 ||
+ nvlist_lookup_boolean_value(dimms[i], "idimm_present",
+ &d->idimm_present) != 0) {
+ return (B_FALSE);
+ }
+
+ if (!d->idimm_present)
+ continue;
+
+ if (nvlist_lookup_uint8(dimms[i], "idimm_nbanks",
+ &d->idimm_nbanks) != 0 ||
+ nvlist_lookup_uint8(dimms[i], "idimm_nranks",
+ &d->idimm_nranks) != 0 ||
+ nvlist_lookup_uint8(dimms[i], "idimm_width",
+ &d->idimm_width) != 0 ||
+ nvlist_lookup_uint8(dimms[i], "idimm_density",
+ &d->idimm_density) != 0 ||
+ nvlist_lookup_uint8(dimms[i], "idimm_nrows",
+ &d->idimm_nrows) != 0 ||
+ nvlist_lookup_uint8(dimms[i], "idimm_ncolumns",
+ &d->idimm_ncolumns) != 0 ||
+ nvlist_lookup_uint64(dimms[i], "idimm_size",
+ &d->idimm_size) != 0) {
+ return (B_FALSE);
+ }
+ }
+
+ bcopy(tadoff, chan->ich_tad_offsets, chan->ich_ntad_offsets *
+ sizeof (uint64_t));
+
+ for (uint_t i = 0; i < chan->ich_nrankileaves; i++) {
+ nvlist_t **ileaves;
+ imc_rank_ileave_t *r = &chan->ich_rankileaves[i];
+
+ if (nvlist_lookup_boolean_value(rir[i], "irle_enabled",
+ &r->irle_enabled) != 0 ||
+ nvlist_lookup_uint8(rir[i], "irle_nways",
+ &r->irle_nways) != 0 ||
+ nvlist_lookup_uint8(rir[i], "irle_nwaysbits",
+ &r->irle_nwaysbits) != 0 ||
+ nvlist_lookup_uint64(rir[i], "irle_limit",
+ &r->irle_limit) != 0 ||
+ nvlist_lookup_nvlist_array(rir[i], "irle_entries",
+ &ileaves, &r->irle_nentries) != 0 ||
+ r->irle_nentries > IMC_MAX_RANK_INTERLEAVES) {
+ return (B_FALSE);
+ }
+
+ for (uint_t j = 0; j < r->irle_nentries; j++) {
+ imc_rank_ileave_entry_t *ril = &r->irle_entries[j];
+
+ if (nvlist_lookup_uint8(ileaves[j], "irle_target",
+ &ril->irle_target) != 0 ||
+ nvlist_lookup_uint64(ileaves[j], "irle_offset",
+ &ril->irle_offset) != 0) {
+ return (B_FALSE);
+ }
+ }
+ }
+
+ return (B_TRUE);
+}
+
+static boolean_t
+imc_restore_mc(nvlist_t *nvl, imc_mc_t *mc)
+{
+ nvlist_t **channels;
+
+ if (nvlist_lookup_boolean_value(nvl, "icn_ecc", &mc->icn_ecc) != 0 ||
+ nvlist_lookup_boolean_value(nvl, "icn_lockstep",
+ &mc->icn_lockstep) != 0 ||
+ nvlist_lookup_boolean_value(nvl, "icn_closed",
+ &mc->icn_closed) != 0 ||
+ nvlist_lookup_uint32(nvl, "icn_dimm_type",
+ &mc->icn_dimm_type) != 0 ||
+ nvlist_lookup_nvlist_array(nvl, "icn_channels", &channels,
+ &mc->icn_nchannels) != 0 || mc->icn_nchannels > IMC_MAX_CHANPERMC) {
+ return (B_FALSE);
+ }
+
+ for (uint_t i = 0; i < mc->icn_nchannels; i++) {
+ if (!imc_restore_channel(channels[i], &mc->icn_channels[i])) {
+ return (B_FALSE);
+ }
+ }
+
+ return (B_TRUE);
+}
+
+static boolean_t
+imc_restore_socket(nvlist_t *nvl, imc_socket_t *sock)
+{
+ uint_t i;
+ nvlist_t *sad, **tads, **imcs;
+
+ if (nvlist_lookup_nvlist(nvl, "isock_sad", &sad) != 0 ||
+ nvlist_lookup_nvlist_array(nvl, "isock_tad", &tads,
+ &sock->isock_ntad) != 0 ||
+ nvlist_lookup_uint32(nvl, "isock_nodeid",
+ &sock->isock_nodeid) != 0 ||
+ nvlist_lookup_nvlist_array(nvl, "isock_imcs", &imcs,
+ &sock->isock_nimc) != 0 ||
+ sock->isock_ntad > IMC_MAX_TAD ||
+ sock->isock_nimc > IMC_MAX_IMCPERSOCK) {
+ return (B_FALSE);
+ }
+
+ if (!imc_restore_sad(sad, &sock->isock_sad)) {
+ return (B_FALSE);
+ }
+
+ for (i = 0; i < sock->isock_ntad; i++) {
+ if (!imc_restore_tad(tads[i], &sock->isock_tad[i])) {
+ return (B_FALSE);
+ }
+ }
+
+ for (i = 0; i < sock->isock_nimc; i++) {
+ if (!imc_restore_mc(imcs[i], &sock->isock_imcs[i])) {
+ return (B_FALSE);
+ }
+ }
+
+ return (B_TRUE);
+}
+
+boolean_t
+imc_restore_decoder(nvlist_t *nvl, imc_t *imc)
+{
+ uint_t i;
+ uint32_t vers;
+ nvlist_t *invl, **socks;
+ char *driver;
+
+ bzero(imc, sizeof (imc_t));
+
+ if (nvlist_lookup_uint32(nvl, "mc_dump_version", &vers) != 0 ||
+ vers != 0 ||
+ nvlist_lookup_string(nvl, "mc_dump_driver", &driver) != 0 ||
+ strcmp(driver, "imc") != 0 ||
+ nvlist_lookup_nvlist(nvl, "imc", &invl) != 0) {
+ return (B_FALSE);
+ }
+
+ if (nvlist_lookup_uint32(invl, "imc_gen", &imc->imc_gen) != 0 ||
+ nvlist_lookup_nvlist_array(invl, "imc_sockets", &socks,
+ &imc->imc_nsockets) != 0 ||
+ imc->imc_nsockets > IMC_MAX_SOCKETS) {
+ return (B_FALSE);
+ }
+
+ for (i = 0; i < imc->imc_nsockets; i++) {
+ if (!imc_restore_socket(socks[i], &imc->imc_sockets[i]))
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
diff --git a/usr/src/common/net/dhcp/octet.c b/usr/src/common/net/dhcp/octet.c
index d8367bbf0b..370604c4e3 100644
--- a/usr/src/common/net/dhcp/octet.c
+++ b/usr/src/common/net/dhcp/octet.c
@@ -77,6 +77,9 @@ octet_to_hexascii(const void *nump, uint_t nlen, char *bufp, uint_t *blen)
* Converts an ASCII string into an octet string.
*
* Returns 0 for success, errno otherwise.
+ *
+ * If the string contains invalid hexadecimal characters, or an odd number of
+ * characters then this function returns EINVAL.
*/
int
hexascii_to_octet(const char *asp, uint_t alen, void *bufp, uint_t *blen)
diff --git a/usr/src/common/pnglite/pnglite.c b/usr/src/common/pnglite/pnglite.c
index 7a30bdc609..5d8b41f9e9 100644
--- a/usr/src/common/pnglite/pnglite.c
+++ b/usr/src/common/pnglite/pnglite.c
@@ -9,6 +9,7 @@
#else
#include <stdio.h>
#include <stdlib.h>
+#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
diff --git a/usr/src/common/zfs/zfs_prop.c b/usr/src/common/zfs/zfs_prop.c
index a4f02b18db..6f633147a3 100644
--- a/usr/src/common/zfs/zfs_prop.c
+++ b/usr/src/common/zfs/zfs_prop.c
@@ -547,6 +547,23 @@ zfs_prop_delegatable(zfs_prop_t prop)
return (pd->pd_attr != PROP_READONLY);
}
+boolean_t
+zfs_prop_cacheable(zfs_prop_t prop)
+{
+ /*
+ * It'd be nice if each prop had a flags field which could have flag
+ * like PROP_CACHEABLE, but since zprop_attr_t is an enum and this
+ * setting is orthogonal to the concepts of PROP_READONLY, etc., we have
+ * this function.
+ */
+ return (prop == ZFS_PROP_VERSION ||
+ prop == ZFS_PROP_NORMALIZE ||
+ prop == ZFS_PROP_UTF8ONLY ||
+ prop == ZFS_PROP_CASE ||
+ prop == ZFS_PROP_VOLSIZE ||
+ prop == ZFS_PROP_VOLBLOCKSIZE);
+}
+
/*
* Given a zfs dataset property name, returns the corresponding property ID.
*/
diff --git a/usr/src/common/zfs/zfs_prop.h b/usr/src/common/zfs/zfs_prop.h
index 45423cc72f..3f34ad64a6 100644
--- a/usr/src/common/zfs/zfs_prop.h
+++ b/usr/src/common/zfs/zfs_prop.h
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _ZFS_PROP_H
@@ -89,6 +90,7 @@ typedef struct {
void zfs_prop_init(void);
zprop_type_t zfs_prop_get_type(zfs_prop_t);
boolean_t zfs_prop_delegatable(zfs_prop_t prop);
+boolean_t zfs_prop_cacheable(zfs_prop_t prop);
zprop_desc_t *zfs_prop_get_table(void);
/*