summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/boot/Makefile.version2
-rw-r--r--usr/src/boot/lib/libstand/Makefile.inc122
-rw-r--r--usr/src/boot/lib/libstand/crypto/Makefile.inc28
-rw-r--r--usr/src/boot/lib/libstand/zfs/Makefile.inc30
-rw-r--r--usr/src/boot/sys/boot/Makefile.inc8
-rw-r--r--usr/src/boot/sys/boot/Makefile.lib34
-rw-r--r--usr/src/boot/sys/boot/efi/Makefile.inc2
-rw-r--r--usr/src/boot/sys/boot/efi/loader/Makefile.com43
-rw-r--r--usr/src/boot/sys/boot/efi/loader/Makefile.common30
-rw-r--r--usr/src/boot/sys/boot/efi/loader/amd64/Makefile2
-rw-r--r--usr/src/boot/sys/boot/efi/loader/i386/Makefile4
-rw-r--r--usr/src/boot/sys/boot/i386/libi386/Makefile27
-rw-r--r--usr/src/boot/sys/boot/libficl/Makefile.com28
-rw-r--r--usr/src/boot/sys/boot/libficl/amd64/Makefile5
-rw-r--r--usr/src/boot/sys/boot/libficl/i386/Makefile6
-rw-r--r--usr/src/boot/sys/boot/libstand/Makefile6
-rw-r--r--usr/src/boot/sys/boot/libstand/Makefile.com57
-rw-r--r--usr/src/boot/sys/boot/libstand/amd64/Makefile22
-rw-r--r--usr/src/boot/sys/boot/libstand/i386/Makefile17
-rw-r--r--usr/src/cmd/Makefile1
-rw-r--r--usr/src/cmd/bhyve/bhyverun.c11
-rw-r--r--usr/src/cmd/bhyvectl/bhyvectl.c7
-rw-r--r--usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c15
-rw-r--r--usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_impl.h9
-rw-r--r--usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_main.c6
-rw-r--r--usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_persist.c15
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ipadm/ipadm.c28
-rw-r--r--usr/src/cmd/rcm_daemon/common/ip_rcm.c9
-rw-r--r--usr/src/cmd/rsrvrctl/Makefile48
-rw-r--r--usr/src/cmd/rsrvrctl/rsrvrctl.c164
-rw-r--r--usr/src/lib/libipadm/common/ipadm_if.c96
-rw-r--r--usr/src/lib/libipadm/common/libipadm.h22
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.c106
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.h6
-rw-r--r--usr/src/pkg/manifests/system-bhyve.mf1
-rw-r--r--usr/src/uts/common/fs/zfs/spa.c18
-rw-r--r--usr/src/uts/common/vm/page.h12
-rw-r--r--usr/src/uts/common/vm/vm_page.c72
-rw-r--r--usr/src/uts/i86pc/Makefile.files4
-rw-r--r--usr/src/uts/i86pc/io/vmm/seg_vmm.c (renamed from usr/src/uts/i86pc/vm/seg_vmm.c)91
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/seg_vmm.h30
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_impl.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_reservoir.h40
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_vm.h7
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c24
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_reservoir.c820
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c103
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c150
-rw-r--r--usr/src/uts/i86pc/sys/vmm.h26
-rw-r--r--usr/src/uts/i86pc/sys/vmm_dev.h27
-rw-r--r--usr/src/uts/i86pc/vm/seg_vmm.h50
52 files changed, 1847 insertions, 648 deletions
diff --git a/usr/src/boot/Makefile.version b/usr/src/boot/Makefile.version
index 5f2a2b4676..1f4b347809 100644
--- a/usr/src/boot/Makefile.version
+++ b/usr/src/boot/Makefile.version
@@ -34,4 +34,4 @@ LOADER_VERSION = 1.1
# Use date like formatting here, YYYY.MM.DD.XX, without leading zeroes.
# The version is processed from left to right, the version number can only
# be increased.
-BOOT_VERSION = $(LOADER_VERSION)-2021.06.15.1
+BOOT_VERSION = $(LOADER_VERSION)-2021.07.27.1
diff --git a/usr/src/boot/lib/libstand/Makefile.inc b/usr/src/boot/lib/libstand/Makefile.inc
index 633005d142..2acb9ef490 100644
--- a/usr/src/boot/lib/libstand/Makefile.inc
+++ b/usr/src/boot/lib/libstand/Makefile.inc
@@ -21,25 +21,25 @@
#
# standalone components and stuff we have modified locally
-SRCS= $(ZLIB)/gzguts.h $(ZLIB)/zutil.h
+SRCS += $(ZLIB)/gzguts.h $(ZLIB)/zutil.h
SRCS += $(SASRC)/__main.c $(SASRC)/abort.c $(SASRC)/assert.c
SRCS += $(SASRC)/bcd.c $(SASRC)/environment.c
SRCS += $(SASRC)/getopt.c $(SASRC)/random.c
SRCS += $(SASRC)/sbrk.c $(SASRC)/twiddle.c
SRCS += $(SASRC)/zalloc.c $(SASRC)/zalloc_malloc.c
-OBJS= __main.o abort.o assert.o bcd.o environment.o \
+OBJECTS += __main.o abort.o assert.o bcd.o environment.o \
getopt.o gets.o globals.o pager.o panic.o printf.o \
strdup.o strerror.o strtol.o strtoll.o strtoul.o strtoull.o random.o \
sbrk.o twiddle.o zalloc.o zalloc_malloc.o
# private (pruned) versions of libc string functions
SRCS += $(SASRC)/strcasecmp.c
-OBJS += strcasecmp.o
+OBJECTS += strcasecmp.o
# from libc
SRCS += $(LIBSRC)/libc/net/ntoh.c
-OBJS += ntoh.o
+OBJECTS += ntoh.o
# string functions from libc
SRCS += $(LIBSRC)/libc/string/bcmp.c $(LIBSRC)/libc/string/bcopy.c
@@ -61,7 +61,7 @@ SRCS += $(LIBSRC)/libc/string/swab.c
SRCS += $(SASRC)/qdivrem.c
-OBJS += bcmp.o bcopy.o bzero.o ffs.o fls.o \
+OBJECTS += bcmp.o bcopy.o bzero.o ffs.o fls.o \
memccpy.o memchr.o memcmp.o memcpy.o memmove.o memset.o \
qdivrem.o strcat.o strchr.o strcmp.o strcpy.o stpcpy.o stpncpy.o \
strcspn.o strlcat.o strlcpy.o strlen.o strncat.o strncmp.o strncpy.o \
@@ -75,22 +75,57 @@ SRCS += $(LIBSRC)/libc/uuid/uuid_is_nil.c
SRCS += $(SASRC)/uuid_from_string.c
SRCS += $(SASRC)/uuid_to_string.c
-OBJS += uuid_create_nil.o uuid_equal.o uuid_from_string.o uuid_is_nil.o \
+OBJECTS += uuid_create_nil.o uuid_equal.o uuid_from_string.o uuid_is_nil.o \
uuid_to_string.o
# decompression functionality from libbz2
# NOTE: to actually test this functionality after libbz2 upgrade compile
# loader(8) with LOADER_BZIP2_SUPPORT defined
-_bzlib.o _crctable.o _decompress.o _huffman.o _randtable.o bzipfs.o \
-:= CFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS
+objs/_bzlib.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS
+objs/_bzlib.o := CPPFLAGS += -I$(SRC)/common/bzip2
+objs/_bzlib.o: libstand_bzlib_private.h
+pics/_bzlib.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS
+pics/_bzlib.o := CPPFLAGS += -I$(SRC)/common/bzip2
+pics/_bzlib.o: libstand_bzlib_private.h
+objs/_crctable.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS
+objs/_crctable.o := CPPFLAGS += -I$(SRC)/common/bzip2
+objs/_crctable.o: libstand_bzlib_private.h
+pics/_crctable.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS
+pics/_crctable.o := CPPFLAGS += -I$(SRC)/common/bzip2
+pics/_crctable.o: libstand_bzlib_private.h
+objs/_decompress.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS
+objs/_decompress.o := CPPFLAGS += -I$(SRC)/common/bzip2
+objs/_decompress.o: libstand_bzlib_private.h
+pics/_decompress.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS
+pics/_decompress.o := CPPFLAGS += -I$(SRC)/common/bzip2
+pics/_decompress.o: libstand_bzlib_private.h
+objs/_huffman.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS
+objs/_huffman.o := CPPFLAGS += -I$(SRC)/common/bzip2
+objs/_huffman.o: libstand_bzlib_private.h
+pics/_huffman.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS
+pics/_huffman.o := CPPFLAGS += -I$(SRC)/common/bzip2
+pics/_huffman.o: libstand_bzlib_private.h
+objs/_randtable.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS
+objs/_randtable.o := CPPFLAGS += -I$(SRC)/common/bzip2
+objs/_randtable.o: libstand_bzlib_private.h
+pics/_randtable.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS
+pics/_randtable.o := CPPFLAGS += -I$(SRC)/common/bzip2
+pics/_randtable.o: libstand_bzlib_private.h
+objs/bzipfs.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS
+objs/bzipfs.o := CPPFLAGS += -I$(SRC)/common/bzip2
+objs/bzipfs.o: libstand_bzlib_private.h
+pics/bzipfs.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS
+pics/bzipfs.o := CPPFLAGS += -I$(SRC)/common/bzip2
+pics/bzipfs.o: libstand_bzlib_private.h
SRCS += libstand_bzlib_private.h
# too hairy
-_inflate.o := SMATCH=off
+objs/_inflate.o := SMATCH=off
+pics/_inflate.o := SMATCH=off
SRCS += _bzlib.c _crctable.c _decompress.c _huffman.c _randtable.c
-OBJS += _bzlib.o _crctable.o _decompress.o _huffman.o _randtable.o
-CLEANFILES += _bzlib.c _crctable.c _decompress.c _huffman.c _randtable.c
+OBJECTS += _bzlib.o _crctable.o _decompress.o _huffman.o _randtable.o
+CLEANFILES += _bzlib.c _crctable.c _decompress.c _huffman.c _randtable.c
_bzlib.c: $(SRC)/common/bzip2/bzlib.c
sed "s|bzlib_private\.h|libstand_bzlib_private.h|" $^ > $@
@@ -112,11 +147,33 @@ libstand_bzlib_private.h: $(SRC)/common/bzip2/bzlib_private.h
sed -e 's|<stdlib.h>|"stand.h"|' $^ > $@
# decompression functionality from zlib
-adler32.o crc32.o _infback.o _inffast.o _inflate.o _inftrees.o _zutil.o \
-gzipfs.o gzip.o := CPPFLAGS += -I$(ZLIB)
+objs/adler32.o := CPPFLAGS += -I$(ZLIB)
+pics/adler32.o := CPPFLAGS += -I$(ZLIB)
+objs/crc32.o := CPPFLAGS += -I$(ZLIB)
+pics/crc32.o := CPPFLAGS += -I$(ZLIB)
+objs/_infback.o := CPPFLAGS += -I$(ZLIB)
+pics/_infback.o := CPPFLAGS += -I$(ZLIB)
+objs/_infback.o pics/_infback.o: libstand_zutil.h libstand_gzguts.h
+objs/_inffast.o := CPPFLAGS += -I$(ZLIB)
+pics/_inffast.o := CPPFLAGS += -I$(ZLIB)
+objs/_inffast.o pics/_inffast.o: libstand_zutil.h libstand_gzguts.h
+objs/_inflate.o := CPPFLAGS += -I$(ZLIB)
+pics/_inflate.o := CPPFLAGS += -I$(ZLIB)
+objs/_inflate.o pics/_inflate.o: libstand_zutil.h libstand_gzguts.h
+objs/_inftrees.o := CPPFLAGS += -I$(ZLIB)
+pics/_inftrees.o := CPPFLAGS += -I$(ZLIB)
+objs/_inftrees.o pics/_inftrees.o: libstand_zutil.h libstand_gzguts.h
+objs/_zutil.o := CPPFLAGS += -I$(ZLIB)
+pics/_zutil.o := CPPFLAGS += -I$(ZLIB)
+objs/_zutil.o pics/_zutil.o: libstand_zutil.h libstand_gzguts.h
+objs/gzipfs.o := CPPFLAGS += -I$(ZLIB)
+pics/gzipfs.o := CPPFLAGS += -I$(ZLIB)
+objs/gzip.o := CPPFLAGS += -I$(ZLIB)
+pics/gzip.o := CPPFLAGS += -I$(ZLIB)
+
SRCS += $(ZLIB)/adler32.c $(ZLIB)/crc32.c \
libstand_zutil.h libstand_gzguts.h
-OBJS += adler32.o crc32.o
+OBJECTS += adler32.o crc32.o
_infback.c: $(ZLIB)/infback.c
sed -e "s|zutil\.h|libstand_zutil.h|" \
@@ -140,8 +197,8 @@ _zutil.c: $(ZLIB)/zutil.c
$^ > $@
SRCS += _infback.c _inffast.c _inflate.c _inftrees.c _zutil.c
-OBJS += _infback.o _inffast.o _inflate.o _inftrees.o _zutil.o
-CLEANFILES += _infback.c _inffast.c _inflate.c _inftrees.c _zutil.c
+OBJECTS += _infback.o _inffast.o _inflate.o _inftrees.o _zutil.o
+CLEANFILES += _infback.c _inffast.c _inflate.c _inftrees.c _zutil.c
# depend on stand.h being able to be included multiple times
libstand_zutil.h: $(ZLIB)/zutil.h
@@ -163,9 +220,10 @@ libstand_gzguts.h: $(ZLIB)/gzguts.h
CLEANFILES += libstand_zutil.h libstand_gzguts.h
# lz4 decompression functionality
-lz4.o := CPPFLAGS += -I$(LZ4)
+pics/lz4.o := CPPFLAGS += -I$(LZ4)
+objs/lz4.o := CPPFLAGS += -I$(LZ4)
SRCS += $(LZ4)/lz4.c
-OBJS += lz4.o
+OBJECTS += lz4.o
# io routines
SRCS += $(SASRC)/closeall.c $(SASRC)/dev.c \
@@ -174,7 +232,7 @@ SRCS += $(SASRC)/closeall.c $(SASRC)/dev.c \
$(SASRC)/lseek.c $(SASRC)/open.c $(SASRC)/read.c \
$(SASRC)/write.c $(SASRC)/readdir.c
-OBJS += closeall.o dev.o ioctl.o nullfs.o stat.o fstat.o close.o lseek.o \
+OBJECTS += closeall.o dev.o ioctl.o nullfs.o stat.o fstat.o close.o lseek.o \
open.o read.o write.o readdir.o
# network routines
@@ -182,12 +240,12 @@ SRCS += $(SASRC)/arp.c $(SASRC)/ether.c $(SASRC)/ip.c \
$(SASRC)/inet_ntoa.c $(SASRC)/in_cksum.c \
$(SASRC)/net.c $(SASRC)/udp.c $(SASRC)/netif.c \
$(SASRC)/rpc.c
-OBJS += arp.o ether.o ip.o inet_ntoa.o in_cksum.o net.o udp.o netif.o rpc.o
+OBJECTS += arp.o ether.o ip.o inet_ntoa.o in_cksum.o net.o udp.o netif.o rpc.o
# network info services:
SRCS += $(SASRC)/bootp.c $(SASRC)/rarp.c \
$(SASRC)/bootparam.c
-OBJS += bootp.o rarp.o bootparam.o
+OBJECTS += bootp.o rarp.o bootparam.o
# boot filesystems
SRCS += $(SASRC)/ufs.c
@@ -197,12 +255,16 @@ SRCS += $(SASRC)/tftp.c
SRCS += $(SASRC)/gzipfs.c
SRCS += $(SASRC)/bzipfs.c
SRCS += $(SASRC)/dosfs.c
-OBJS += ufs.o
-OBJS += nfs.o
-OBJS += cd9660.o
-OBJS += tftp.o
-OBJS += gzipfs.o
-OBJS += bzipfs.o
-OBJS += dosfs.o
-#
-.PARALLEL:
+OBJECTS += ufs.o
+OBJECTS += nfs.o
+OBJECTS += cd9660.o
+OBJECTS += tftp.o
+OBJECTS += gzipfs.o
+OBJECTS += bzipfs.o
+OBJECTS += dosfs.o
+
+# utility
+SRCS += (SRC)/common/util/explicit_bzero.c
+SRCS += (SRC)/common/util/memmem.c
+OBJECTS += explicit_bzero.o
+OBJECTS += memmem.o
diff --git a/usr/src/boot/lib/libstand/crypto/Makefile.inc b/usr/src/boot/lib/libstand/crypto/Makefile.inc
index 6a8dadc313..3cff3ecdb6 100644
--- a/usr/src/boot/lib/libstand/crypto/Makefile.inc
+++ b/usr/src/boot/lib/libstand/crypto/Makefile.inc
@@ -21,32 +21,32 @@ SRCS += $(COMDIR)/edonr/edonr.c
SRCS += $(COMDIR)/skein/skein.c
SRCS += $(COMDIR)/skein/skein_iv.c
SRCS += $(COMDIR)/skein/skein_block.c
-OBJS += digest.o
-OBJS += sha1.o
-OBJS += edonr.o
-OBJS += skein.o
-OBJS += skein_iv.o
-OBJS += skein_block.o
+OBJECTS += digest.o
+OBJECTS += sha1.o
+OBJECTS += edonr.o
+OBJECTS += skein.o
+OBJECTS += skein_iv.o
+OBJECTS += skein_block.o
-digest.o := CPPFLAGS += -I../../common
+objs/digest.o pics/digest.o := CPPFLAGS += -I../../common
# Do not unroll skein loops, reduce code size
-skein_block.o := CPPFLAGS += -DSKEIN_LOOP=111
+objs/skein_block.o pics/skein_block.o := CPPFLAGS += -DSKEIN_LOOP=111
-%.o: $(COMDIR)/edonr/%.c
+objs/%.o pics/%.o: $(COMDIR)/edonr/%.c
$(COMPILE.c) -o $@ $<
-%.o: $(COMDIR)/skein/%.c
+objs/%.o pics/%.o: $(COMDIR)/skein/%.c
$(COMPILE.c) -o $@ $<
-%.o: $(CRYPTOSRC)/%.c
+objs/%.o pics/%.o: $(CRYPTOSRC)/%.c
$(COMPILE.c) -o $@ $<
-%.o: $(COMDIR)/sha1/%.c
- $(COMPILE.c) $<
+objs/%.o pics/%.o: $(COMDIR)/sha1/%.c
+ $(COMPILE.c) -o $@ $<
sha1-x86_64.s: $(COMDIR)/sha1/amd64/sha1-x86_64.pl
$(PERL) $? $@
-sha1-x86_64.o: sha1-x86_64.s
+pics/sha1-x86_64.o: sha1-x86_64.s
$(COMPILE.s) -o $@ ${@F:.o=.s}
diff --git a/usr/src/boot/lib/libstand/zfs/Makefile.inc b/usr/src/boot/lib/libstand/zfs/Makefile.inc
index fd054a9411..a33a42d703 100644
--- a/usr/src/boot/lib/libstand/zfs/Makefile.inc
+++ b/usr/src/boot/lib/libstand/zfs/Makefile.inc
@@ -13,23 +13,25 @@
# Copyright 2021 Toomas Soome <tsoome@me.com>
#
-SRCS += $(ZFSSRC)/zfs.c
-SRCS += $(ZFSSRC)/gzip.c
-SRCS += $(SRC)/common/list/list.c
-OBJS += zfs.o
-OBJS += gzip.o
-OBJS += nvlist.o
-OBJS += list.o
+SRCS += $(ZFSSRC)/zfs.c
+SRCS += $(ZFSSRC)/gzip.c
+SRCS += $(SRC)/common/list/list.c
+OBJECTS += zfs.o
+OBJECTS += gzip.o
+OBJECTS += nvlist.o
+OBJECTS += list.o
-zfs.o := CPPFLAGS += -I../../common
-zfs.o := CPPFLAGS += -I../../../cddl/boot/zfs -I$(LZ4)
-zfs.o := CPPFLAGS += -I$(SRC)/uts/common/fs/zfs
-nvlist.o := CPPFLAGS += -I../../common -I../../../cddl/boot/zfs
+objs/zfs.o pics/zfs.o := CPPFLAGS += -I../../common
+objs/zfs.o pics/zfs.o := CPPFLAGS += -I../../../cddl/boot/zfs -I$(LZ4)
+objs/zfs.o pics/zfs.o := CPPFLAGS += -I$(SRC)/uts/common/fs/zfs
+objs/zfs.o pics/zfs.o := CPPFLAGS += -I$(CRYPTOSRC)
+objs/nvlist.o pics/nvlist.o := CPPFLAGS += -I../../common
+objs/nvlist.o pics/nvlist.o := CPPFLAGS += -I../../../cddl/boot/zfs
-%.o: $(ZFSSRC)/%.c
+pics/%.o objs/%.o: $(ZFSSRC)/%.c
$(COMPILE.c) -o $@ $<
-%.o: $(SRC)/common/list/%.c
- $(COMPILE.c) -DNDEBUG $<
+pics/%.o objs/%.o: $(SRC)/common/list/%.c
+ $(COMPILE.c) -DNDEBUG -o $@ $<
zfs.o: $(ZFSSRC)/zfsimpl.c
diff --git a/usr/src/boot/sys/boot/Makefile.inc b/usr/src/boot/sys/boot/Makefile.inc
index 83a480c156..c6b5320866 100644
--- a/usr/src/boot/sys/boot/Makefile.inc
+++ b/usr/src/boot/sys/boot/Makefile.inc
@@ -49,14 +49,14 @@ AS_CPPFLAGS=
CPPFLAGS= -D_STANDALONE -_gcc=-nostdinc
CFLAGS64= -_gcc=-mno-red-zone
-CFLAGS= -_gcc=-Os -_gcc=-fPIC -_gcc=-ffreestanding -_gcc=-fno-builtin
+CFLAGS= -_gcc=-Os -_gcc=-ffreestanding -_gcc=-fno-builtin
CFLAGS += -_gcc=-ffunction-sections -_gcc=-fdata-sections
CFLAGS += -_gcc=-mno-mmx -_gcc=-mno-3dnow -_gcc=-mno-sse -_gcc=-mno-sse2
CFLAGS += -_gcc=-mno-sse3 -_gcc=-msoft-float
CFLAGS += -_gcc=-mno-avx -_gcc=-mno-aes
CFLAGS += -_gcc=-Wall
CFLAGS += $(CCNOAUTOINLINE) $(CCNOREORDER) $(CSTD_GNU99)
-CCASFLAGS= -fPIC -Wa,--divide
+CCASFLAGS= -Wa,--divide
ASFLAGS= --divide
SMATCH_ =
@@ -64,11 +64,11 @@ SMATCH_on =
SMATCH_off = -_smatch=off
# SMATCH_ARGS will bring in set of -Wno-* options.
-#CFLAGS += $(SMATCH_ARGS:%=-_smatch=%)
+SMATCH_ARGS = --timeout=0
+CFLAGS += $(SMATCH_ARGS:%=-_smatch=%)
CFLAGS += $(SMOFF:%=-_smatch=--disable=%)
CFLAGS += $(SMATCH_$(MACHINE))
CFLAGS += $(SMATCH_$(SMATCH))
-CFLAGS += -_smatch=--timeout=0
COMPILE.S= $(CC) $(SMATCH_off) $(CCASFLAGS) $(CPPFLAGS) -c
diff --git a/usr/src/boot/sys/boot/Makefile.lib b/usr/src/boot/sys/boot/Makefile.lib
new file mode 100644
index 0000000000..baa97a1513
--- /dev/null
+++ b/usr/src/boot/sys/boot/Makefile.lib
@@ -0,0 +1,34 @@
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2021 Toomas Soome <tsoome@me.com>
+#
+
+OBJS= $(OBJECTS:%=objs/%)
+PICS= $(OBJECTS:%=pics/%)
+
+.PARALLEL: $(OBJS) $(PICS) DUMMY
+
+$(PICS) := CFLAGS += -_gcc=-fPIC
+$(PICS) := CCASFLAGS += -_gcc=-fPIC
+
+$(OBJS) $(PICS): machine x86
+
+objs pics:
+ -@mkdir -p $@
+
+$(LIBRARY): objs .WAIT $$(OBJS)
+ $(AR) $(ARFLAGS) $@ $(OBJS)
+
+$(DYNLIB): pics .WAIT $$(PICS)
+ $(AR) $(ARFLAGS) $@ $(PICS)
+
+CLEANFILES += $(OBJS) $(PICS) $(LIBRARY) $(DYNLIB)
diff --git a/usr/src/boot/sys/boot/efi/Makefile.inc b/usr/src/boot/sys/boot/efi/Makefile.inc
index 4201600ac3..ad29e53249 100644
--- a/usr/src/boot/sys/boot/efi/Makefile.inc
+++ b/usr/src/boot/sys/boot/efi/Makefile.inc
@@ -14,6 +14,6 @@
#
# Options used when building app-specific efi components
-CFLAGS += -_gcc=-fshort-wchar
+CFLAGS += -_gcc=-fshort-wchar $(C_BIGPICFLAGS)
.PARALLEL:
diff --git a/usr/src/boot/sys/boot/efi/loader/Makefile.com b/usr/src/boot/sys/boot/efi/loader/Makefile.com
index dc71ddeede..ea6303034f 100644
--- a/usr/src/boot/sys/boot/efi/loader/Makefile.com
+++ b/usr/src/boot/sys/boot/efi/loader/Makefile.com
@@ -15,7 +15,6 @@
# Copyright (c) 2019, Joyent, Inc.
#
-include $(SRC)/Makefile.master
include $(SRC)/boot/Makefile.version
include $(SRC)/boot/sys/boot/Makefile.inc
@@ -98,18 +97,46 @@ smbios.o := CPPFLAGS += -DSMBIOS_LITTLE_ENDIAN_UUID
# Use network-endian UUID format for backward compatibility.
#CPPFLAGS += -DSMBIOS_NETWORK_ENDIAN_UUID
-DPLIBSTAND= ../../../libstand/$(MACHINE)/libstand.a
-LIBSTAND= -L../../../libstand/$(MACHINE) -lstand
+DPLIBSTAND= ../../../libstand/$(MACHINE)/libstand_pics.a
+LIBSTAND= -L../../../libstand/$(MACHINE) -lstand_pics
BOOT_FORTH= yes
CPPFLAGS += -DBOOT_FORTH
CPPFLAGS += -I$(SRC)/common/ficl
CPPFLAGS += -I../../../libficl
-DPLIBFICL= ../../../libficl/$(MACHINE)/libficl.a
-LIBFICL= -L../../../libficl/$(MACHINE) -lficl
+DPLIBFICL= ../../../libficl/$(MACHINE)/libficl_pics.a
+LIBFICL= -L../../../libficl/$(MACHINE) -lficl_pics
# Always add MI sources
-include ../Makefile.common
+#
+SRCS += boot.c commands.c console.c devopen.c interp.c
+SRCS += interp_backslash.c interp_parse.c ls.c misc.c
+SRCS += module.c linenoise.c zfs_cmd.c
+
+OBJS += boot.o commands.o console.o devopen.o interp.o \
+ interp_backslash.o interp_parse.o ls.o misc.o \
+ module.o linenoise.o zfs_cmd.o
+
+SRCS += load_elf32.c load_elf32_obj.c reloc_elf32.c
+SRCS += load_elf64.c load_elf64_obj.c reloc_elf64.c
+
+OBJS += load_elf32.o load_elf32_obj.o reloc_elf32.o \
+ load_elf64.o load_elf64_obj.o reloc_elf64.o
+
+SRCS += disk.c part.c dev_net.c vdisk.c
+OBJS += disk.o part.o dev_net.o vdisk.o
+CPPFLAGS += -DLOADER_DISK_SUPPORT
+CPPFLAGS += -DLOADER_GPT_SUPPORT
+CPPFLAGS += -DLOADER_MBR_SUPPORT
+
+part.o := CPPFLAGS += -I$(ZLIB)
+
+SRCS += bcache.c
+OBJS += bcache.o
+
+# Forth interpreter
+SRCS += interp_forth.c
+OBJS += interp_forth.o
CPPFLAGS += -I../../../common
# For multiboot2.h, must be last, to avoid conflicts
@@ -125,7 +152,7 @@ LDFLAGS = -nostdlib --eh-frame-hdr
LDFLAGS += -shared --hash-style=both --enable-new-dtags
LDFLAGS += -T$(LDSCRIPT) -Bsymbolic
-CLEANFILES= loader.sym loader.bin
+CLEANFILES= $(EFIPROG) loader.sym loader.bin
CLEANFILES += $(FONT).c vers.c
NEWVERSWHAT= "EFI loader" $(MACHINE)
@@ -167,7 +194,7 @@ x86:
$(SYMLINK) ../../../../x86/include x86
clean clobber:
- $(RM) $(CLEANFILES) $(OBJS)
+ $(RM) $(CLEANFILES) $(OBJS) machine x86
%.o: ../%.c
$(COMPILE.c) $<
diff --git a/usr/src/boot/sys/boot/efi/loader/Makefile.common b/usr/src/boot/sys/boot/efi/loader/Makefile.common
deleted file mode 100644
index 9399748591..0000000000
--- a/usr/src/boot/sys/boot/efi/loader/Makefile.common
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-
-SRCS += boot.c commands.c console.c devopen.c interp.c
-SRCS += interp_backslash.c interp_parse.c ls.c misc.c
-SRCS += module.c linenoise.c zfs_cmd.c
-
-OBJS += boot.o commands.o console.o devopen.o interp.o \
- interp_backslash.o interp_parse.o ls.o misc.o \
- module.o linenoise.o zfs_cmd.o
-
-SRCS += load_elf32.c load_elf32_obj.c reloc_elf32.c
-SRCS += load_elf64.c load_elf64_obj.c reloc_elf64.c
-
-OBJS += load_elf32.o load_elf32_obj.o reloc_elf32.o \
- load_elf64.o load_elf64_obj.o reloc_elf64.o
-
-SRCS += disk.c part.c dev_net.c vdisk.c
-OBJS += disk.o part.o dev_net.o vdisk.o
-CFLAGS += -DLOADER_DISK_SUPPORT
-CFLAGS += -DLOADER_GPT_SUPPORT
-CFLAGS += -DLOADER_MBR_SUPPORT
-
-part.o := CPPFLAGS += -I$(ZLIB)
-
-SRCS += bcache.c
-OBJS += bcache.o
-
-# Forth interpreter
-SRCS += interp_forth.c
-OBJS += interp_forth.o
diff --git a/usr/src/boot/sys/boot/efi/loader/amd64/Makefile b/usr/src/boot/sys/boot/efi/loader/amd64/Makefile
index 8022b364f2..a7894b50ca 100644
--- a/usr/src/boot/sys/boot/efi/loader/amd64/Makefile
+++ b/usr/src/boot/sys/boot/efi/loader/amd64/Makefile
@@ -14,6 +14,8 @@
# Copyright 2016 RackTop Systems.
#
+include $(SRC)/Makefile.master
+
MACHINE= $(MACH64)
EFIPROG= loader64.efi
diff --git a/usr/src/boot/sys/boot/efi/loader/i386/Makefile b/usr/src/boot/sys/boot/efi/loader/i386/Makefile
index 42885ca81f..b2f086971a 100644
--- a/usr/src/boot/sys/boot/efi/loader/i386/Makefile
+++ b/usr/src/boot/sys/boot/efi/loader/i386/Makefile
@@ -14,6 +14,8 @@
# Copyright 2016 RackTop Systems.
#
+include $(SRC)/Makefile.master
+
MACHINE= $(MACH)
EFIPROG= loader32.efi
@@ -28,8 +30,6 @@ efi_main.o := CPPFLAGS += -DLOADER_EFI=L\"loader32.efi\"
CFLAGS += -m32
CCASFLAGS += -m32
-CLEANFILES += machine x86 $(EFIPROG)
-
$(OBJS): machine x86
%.o: ../../../i386/libi386/%.c
diff --git a/usr/src/boot/sys/boot/i386/libi386/Makefile b/usr/src/boot/sys/boot/i386/libi386/Makefile
index aa11fc2276..4b6d501c95 100644
--- a/usr/src/boot/sys/boot/i386/libi386/Makefile
+++ b/usr/src/boot/sys/boot/i386/libi386/Makefile
@@ -105,48 +105,39 @@ SRCS += $(ZFSSRC)/devicename_stubs.c
OBJS += devicename_stubs.o
BOOT_COMCONSOLE_PORT= 0x3f8
-CFLAGS += -DCOMPORT=${BOOT_COMCONSOLE_PORT}
+CPPFLAGS += -DCOMPORT=${BOOT_COMCONSOLE_PORT}
BOOT_COMCONSOLE_SPEED= 9600
-CFLAGS += -DCOMSPEED=${BOOT_COMCONSOLE_SPEED}
+CPPFLAGS += -DCOMSPEED=${BOOT_COMCONSOLE_SPEED}
# Make the disk code more talkative
-# CFLAGS+= -DDISK_DEBUG
+# CPPFLAGS+= -DDISK_DEBUG
# Export serial numbers, UUID, and asset tag from loader.
-smbios.o := CFLAGS += -DSMBIOS_SERIAL_NUMBERS
+smbios.o := CPPFLAGS += -DSMBIOS_SERIAL_NUMBERS
# Use little-endian UUID format as defined in SMBIOS 2.6.
-smbios.o := CFLAGS += -DSMBIOS_LITTLE_ENDIAN_UUID
+smbios.o := CPPFLAGS += -DSMBIOS_LITTLE_ENDIAN_UUID
# Use network-endian UUID format for backward compatibility.
-#CFLAGS += -DSMBIOS_NETWORK_ENDIAN_UUID
+#CPPFLAGS += -DSMBIOS_NETWORK_ENDIAN_UUID
# XXX: make alloca() useable
-CFLAGS += -Dalloca=__builtin_alloca
+CPPFLAGS += -Dalloca=__builtin_alloca
-CFLAGS += -I$(SRC)/common/ficl -I../../libficl \
+CPPFLAGS += -I$(SRC)/common/ficl -I../../libficl \
-I../../common -I../common \
-I../btx/lib \
-I$(SRC)/uts/intel/sys/acpi \
-I../../.. -I.
# the location of libstand
-CFLAGS += -I../../../../lib/libstand/
+CPPFLAGS += -I../../../../lib/libstand/
multiboot.o := CPPFLAGS += -I../../../cddl/boot/zfs
multiboot2.o := CPPFLAGS += -I../../../cddl/boot/zfs
devicename.o := CPPFLAGS += -I../../../cddl/boot/zfs
devicename_stubs.o := CPPFLAGS += -I../../../cddl/boot/zfs
-# Handle FreeBSD specific %b and %D printf format specifiers
-#FORMAT_EXTENSIONS=-D__printf__=__freebsd_kprintf__
-#CFLAGS += ${FORMAT_EXTENSIONS}
-
CLEANFILES += machine x86
-# XXX: clang integrated-as doesn't grok .codeNN directives yet
-# CFLAGS.amd64_tramp.S= ${CLANG_NO_IAS}
-# CFLAGS.multiboot_tramp.S= ${CLANG_NO_IAS}
-# CFLAGS += ${CFLAGS.${.IMPSRC:T}}
-
include ../Makefile.inc
# For multiboot2.h, must be last, to avoid conflicts
diff --git a/usr/src/boot/sys/boot/libficl/Makefile.com b/usr/src/boot/sys/boot/libficl/Makefile.com
index 0cd051356f..564403516d 100644
--- a/usr/src/boot/sys/boot/libficl/Makefile.com
+++ b/usr/src/boot/sys/boot/libficl/Makefile.com
@@ -34,20 +34,14 @@ HEADERS= $(FICLDIR)/ficl.h $(FICLDIR)/ficlplatform/unix.h ../ficllocal.h
#
# disable inner loop variable 'fw' check
-vm.o := SMOFF += check_check_deref
-
-.PARALLEL:
+objs/vm.o := SMOFF += check_check_deref
+pics/vm.o := SMOFF += check_check_deref
MAJOR = 4
MINOR = 1.0
-lib: libficl.a
-
-vm.o := CFLAGS += -_gcc=-Wno-clobbered
-
-# static library build
-libficl.a: $(OBJECTS)
- $(AR) $(ARFLAGS) libficl.a $(OBJECTS)
+objs/vm.o := CFLAGS += -_gcc=-Wno-clobbered
+pics/vm.o := CFLAGS += -_gcc=-Wno-clobbered
machine:
$(RM) machine
@@ -57,17 +51,17 @@ x86:
$(RM) x86
$(SYMLINK) ../../../x86/include x86
-%.o: ../softcore/%.c $(HEADERS)
- $(COMPILE.c) $<
+objs/%.o pics/%.o: ../softcore/%.c $(HEADERS)
+ $(COMPILE.c) -o $@ $<
-%.o: $(FICLDIR)/%.c $(HEADERS)
- $(COMPILE.c) $<
+objs/%.o pics/%.o: $(FICLDIR)/%.c $(HEADERS)
+ $(COMPILE.c) -o $@ $<
-%.o: $(FICLDIR)/ficlplatform/%.c $(HEADERS)
- $(COMPILE.c) $<
+objs/%.o pics/%.o: $(FICLDIR)/ficlplatform/%.c $(HEADERS)
+ $(COMPILE.c) -o $@ $<
#
# generic cleanup code
#
clobber clean: FRC
- $(RM) *.o *.a libficl.* ficl machine x86
+ $(RM) $(CLEANFILES) machine x86
diff --git a/usr/src/boot/sys/boot/libficl/amd64/Makefile b/usr/src/boot/sys/boot/libficl/amd64/Makefile
index ef560fa39b..2f006575f7 100644
--- a/usr/src/boot/sys/boot/libficl/amd64/Makefile
+++ b/usr/src/boot/sys/boot/libficl/amd64/Makefile
@@ -17,13 +17,14 @@
include $(SRC)/Makefile.master
MACHINE= $(MACH64)
+DYNLIB= libficl_pics.a
-all install: lib
+all install: $(DYNLIB)
include ../Makefile.com
CFLAGS += -m64 $(CFLAGS64)
-$(OBJECTS): machine x86
+include $(SRC)/boot/sys/boot/Makefile.lib
FRC:
diff --git a/usr/src/boot/sys/boot/libficl/i386/Makefile b/usr/src/boot/sys/boot/libficl/i386/Makefile
index f4478a4c50..6ad5f9467d 100644
--- a/usr/src/boot/sys/boot/libficl/i386/Makefile
+++ b/usr/src/boot/sys/boot/libficl/i386/Makefile
@@ -17,13 +17,15 @@
include $(SRC)/Makefile.master
MACHINE= $(MACH)
+LIBRARY= libficl.a
+DYNLIB= libficl_pics.a
-all install: lib
+all install: $(LIBRARY) $(DYNLIB)
include ../Makefile.com
CFLAGS += -m32
-$(OBJECTS): machine x86
+include $(SRC)/boot/sys/boot/Makefile.lib
FRC:
diff --git a/usr/src/boot/sys/boot/libstand/Makefile b/usr/src/boot/sys/boot/libstand/Makefile
index 41e614a3ec..01b0b02cab 100644
--- a/usr/src/boot/sys/boot/libstand/Makefile
+++ b/usr/src/boot/sys/boot/libstand/Makefile
@@ -13,8 +13,6 @@
# Copyright 2015 Toomas Soome <tsoome@me.com>
#
-.KEEP_STATE:
-
include $(SRC)/Makefile.master
SUBDIRS = $(MACH) $(MACH64)
@@ -24,9 +22,9 @@ clean := TARGET = clean
clobber := TARGET = clobber
install := TARGET = install
-all clean clobber: $(SUBDIRS)
+.KEEP_STATE:
-install: all
+all clean clobber install: $(SUBDIRS)
.PARALLEL:
diff --git a/usr/src/boot/sys/boot/libstand/Makefile.com b/usr/src/boot/sys/boot/libstand/Makefile.com
index 9294460253..d282deedaa 100644
--- a/usr/src/boot/sys/boot/libstand/Makefile.com
+++ b/usr/src/boot/sys/boot/libstand/Makefile.com
@@ -14,37 +14,23 @@
# Copyright 2019 Joyent, Inc.
#
-include $(SRC)/Makefile.master
include $(SRC)/boot/sys/boot/Makefile.inc
CPPFLAGS += -I../../../../include -I$(SASRC)
-CPPFLAGS += -I../../.. -I. -I$(SRC)/common/bzip2
-
-$(LIBRARY): $(SRCS) $(OBJS)
- $(AR) $(ARFLAGS) $@ $(OBJS)
+CPPFLAGS += -I../../.. -I.
include $(SASRC)/Makefile.inc
include $(CRYPTOSRC)/Makefile.inc
include $(ZFSSRC)/Makefile.inc
-LIBCSRC= $(SRC)/lib/libc
-OBJS += explicit_bzero.o
-OBJS += memmem.o
-
CPPFLAGS += -I$(SRC)/uts/common
-# needs work
-printf.o := SMOFF += 64bit_shift
-
-# too hairy
-_inflate.o := SMATCH=off
-
# 64-bit smatch false positive :/
SMOFF += uninitialized
-clean: clobber
-clobber:
- $(RM) $(CLEANFILES) $(OBJS) machine $(LIBRARY)
+# needs work
+objs/printf.o := SMOFF += 64bit_shift
+pics/printf.o := SMOFF += 64bit_shift
machine:
$(RM) machine
@@ -54,23 +40,30 @@ x86:
$(RM) x86
$(SYMLINK) ../../../x86/include x86
-%.o: $(SASRC)/%.c
- $(COMPILE.c) $<
+pics/%.o objs/%.o: %.c
+ $(COMPILE.c) -o $@ $<
+
+pics/%.o objs/%.o: $(SASRC)/%.c
+ $(COMPILE.c) -o $@ $<
-%.o: $(LIBSRC)/libc/net/%.c
- $(COMPILE.c) $<
+pics/%.o objs/%.o: $(LIBSRC)/libc/net/%.c
+ $(COMPILE.c) -o $@ $<
-%.o: $(LIBSRC)/libc/string/%.c
- $(COMPILE.c) $<
+pics/%.o objs/%.o: $(LIBSRC)/libc/string/%.c
+ $(COMPILE.c) -o $@ $<
-%.o: $(LIBSRC)/libc/uuid/%.c
- $(COMPILE.c) $<
+pics/%.o objs/%.o: $(LIBSRC)/libc/uuid/%.c
+ $(COMPILE.c) -o $@ $<
-%.o: $(ZLIB)/%.c
- $(COMPILE.c) $<
+pics/%.o objs/%.o: $(ZLIB)/%.c
+ $(COMPILE.c) -o $@ $<
-%.o: $(LZ4)/%.c
- $(COMPILE.c) $<
+pics/%.o objs/%.o: $(LZ4)/%.c
+ $(COMPILE.c) -o $@ $<
-%.o: $(SRC)/common/util/%.c
- $(COMPILE.c) $<
+pics/%.o objs/%.o: $(SRC)/common/util/%.c
+ $(COMPILE.c) -o $@ $<
+
+clean: clobber
+clobber:
+ $(RM) $(CLEANFILES) machine x86
diff --git a/usr/src/boot/sys/boot/libstand/amd64/Makefile b/usr/src/boot/sys/boot/libstand/amd64/Makefile
index 8c28ac3dda..ec11ff2156 100644
--- a/usr/src/boot/sys/boot/libstand/amd64/Makefile
+++ b/usr/src/boot/sys/boot/libstand/amd64/Makefile
@@ -14,10 +14,12 @@
# Copyright 2016 RackTop Systems.
#
+include $(SRC)/Makefile.master
+
MACHINE= $(MACH64)
-LIBRARY= libstand.a
+DYNLIB= libstand_pics.a
-all install: $(LIBRARY)
+all install: $(DYNLIB)
include ../Makefile.com
@@ -25,17 +27,17 @@ ASFLAGS = $(amd64_AS_XARCH) -I$(SRC)/uts/common -D_ASM
CFLAGS += -m64 $(CFLAGS64)
CCASFLAGS += -m64
-CLEANFILES += x86 sha1-x86_64.s
-
# _setjmp/_longjmp
SRCS += $(SASRC)/amd64/_setjmp.S
-OBJS += _setjmp.o
+OBJECTS += _setjmp.o
SRCS += sha1-x86_64.s
-OBJS += sha1-x86_64.o
+OBJECTS += sha1-x86_64.o
+
+CLEANFILES += sha1-x86_64.s
-$(LIBRARY): $(SRCS) $(OBJS)
+pics/%.o: $(SASRC)/amd64/%.S
+ $(COMPILE.S) -o $@ $<
-$(OBJS): machine x86
+include $(SRC)/boot/sys/boot/Makefile.lib
-%.o: $(SASRC)/amd64/%.S
- $(COMPILE.S) $<
+FRC:
diff --git a/usr/src/boot/sys/boot/libstand/i386/Makefile b/usr/src/boot/sys/boot/libstand/i386/Makefile
index 4837c0eb4d..aa55779a26 100644
--- a/usr/src/boot/sys/boot/libstand/i386/Makefile
+++ b/usr/src/boot/sys/boot/libstand/i386/Makefile
@@ -14,25 +14,26 @@
# Copyright 2016 RackTop Systems.
#
+include $(SRC)/Makefile.master
+
MACHINE= $(MACH)
LIBRARY= libstand.a
+DYNLIB= libstand_pics.a
-all install: $(LIBRARY)
+all install: $(LIBRARY) $(DYNLIB)
include ../Makefile.com
CFLAGS += -m32
CCASFLAGS += -m32
-CLEANFILES += x86
-
# _setjmp/_longjmp
SRCS += $(SASRC)/i386/_setjmp.S
-OBJS += _setjmp.o
+OBJECTS += _setjmp.o
-$(LIBRARY): $(SRCS) $(OBJS)
+pics/%.o objs/%.o: $(SASRC)/i386/%.S
+ $(COMPILE.S) -o $@ $<
-$(OBJS): machine x86
+include $(SRC)/boot/sys/boot/Makefile.lib
-%.o: $(SASRC)/i386/%.S
- $(COMPILE.S) $<
+FRC:
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index a4bacee105..a66bec5656 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -496,6 +496,7 @@ i386_SUBDIRS= \
nvmeadm \
pptadm \
rdmsr \
+ rsrvrctl \
rtc \
ucodeadm \
xhci \
diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c
index 9531db8fef..f8a3cd8760 100644
--- a/usr/src/cmd/bhyve/bhyverun.c
+++ b/usr/src/cmd/bhyve/bhyverun.c
@@ -39,7 +39,7 @@
*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2018 Joyent, Inc.
- * Copyright 2020 Oxide Computer Company
+ * Copyright 2021 Oxide Computer Company
*/
#include <sys/cdefs.h>
@@ -1257,8 +1257,15 @@ do_open(const char *vmname)
if (lpc_bootrom())
romboot = true;
-
+#ifndef __FreeBSD__
+ uint64_t create_flags = 0;
+ if (get_config_bool_default("memory.use_reservoir", false)) {
+ create_flags |= VCF_RESERVOIR_MEM;
+ }
+ error = vm_create(vmname, create_flags);
+#else
error = vm_create(vmname);
+#endif /* __FreeBSD__ */
if (error) {
if (errno == EEXIST) {
if (romboot) {
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c
index 313a1a37f4..4fc6ddc251 100644
--- a/usr/src/cmd/bhyvectl/bhyvectl.c
+++ b/usr/src/cmd/bhyvectl/bhyvectl.c
@@ -39,7 +39,7 @@
*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2019 Joyent, Inc.
- * Copyright 2020 Oxide Computer Company
+ * Copyright 2021 Oxide Computer Company
*/
#include <sys/cdefs.h>
@@ -1953,8 +1953,13 @@ main(int argc, char *argv[])
error = 0;
+#ifndef __FreeBSD__
+ if (!error && create)
+ error = vm_create(vmname, 0);
+# else
if (!error && create)
error = vm_create(vmname);
+#endif /* __FreeBSD__ */
if (!error) {
ctx = vm_open(vmname);
diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c
index 81539340d3..cc4d4e95d4 100644
--- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c
+++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c
@@ -748,7 +748,8 @@ ipmgmt_getif_handler(void *argp)
ipmgmt_getif_rval_t *rvalp;
ipmgmt_retval_t rval;
ipmgmt_getif_cbarg_t cbarg;
- ipadm_if_info_t *ifp, *rifp, *curifp;
+ ipadm_if_info_list_t *ifl, *curifl;
+ ipadm_if_info_t *ifp, *rifp;
int i, err = 0, count = 0;
size_t rbufsize;
@@ -771,7 +772,7 @@ ipmgmt_getif_handler(void *argp)
}
/* allocate sufficient buffer to return the interface info */
- for (ifp = cbarg.cb_ifinfo; ifp != NULL; ifp = ifp->ifi_next)
+ for (ifl = cbarg.cb_ifinfo; ifl != NULL; ifl = ifl->ifil_next)
++count;
rbufsize = sizeof (*rvalp) + count * sizeof (*ifp);
rvalp = alloca(rbufsize);
@@ -779,7 +780,7 @@ ipmgmt_getif_handler(void *argp)
rvalp->ir_ifcnt = count;
rifp = rvalp->ir_ifinfo;
- ifp = cbarg.cb_ifinfo;
+ ifl = cbarg.cb_ifinfo;
/*
* copy the interface info to buffer allocated on stack. The reason
@@ -787,12 +788,12 @@ ipmgmt_getif_handler(void *argp)
* return
*/
for (i = 0; i < count; i++) {
+ ifp = &ifl->ifil_ifi;
rifp = rvalp->ir_ifinfo + i;
(void) bcopy(ifp, rifp, sizeof (*rifp));
- rifp->ifi_next = NULL;
- curifp = ifp->ifi_next;
- free(ifp);
- ifp = curifp;
+ curifl = ifl->ifil_next;
+ free(ifl);
+ ifl = curifl;
}
rvalp->ir_err = err;
(void) door_return((char *)rvalp, rbufsize, NULL, 0);
diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_impl.h b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_impl.h
index f4d6d30645..a1f016c8c6 100644
--- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_impl.h
+++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_impl.h
@@ -23,6 +23,7 @@
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2015 Joyent, Inc.
* Copyright (c) 2016, Chris Fraire <cfraire@me.com>.
+ * Copyright 2021, Tintri by DDN. All rights reserved.
*/
#ifndef _IPMGMT_IMPL_H
@@ -63,8 +64,8 @@ extern db_wfunc_t ipmgmt_db_getprop, ipmgmt_db_resetprop;
extern db_wfunc_t ipmgmt_db_add, ipmgmt_db_update;
typedef struct {
- char *cb_ifname;
- ipadm_if_info_t *cb_ifinfo;
+ char *cb_ifname;
+ ipadm_if_info_list_t *cb_ifinfo;
} ipmgmt_getif_cbarg_t;
extern db_wfunc_t ipmgmt_db_getif;
@@ -99,7 +100,7 @@ extern db_wfunc_t ipmgmt_db_initif;
* A linked list of address object nodes. Each node in the list tracks
* following information for the address object identified by `am_aobjname'.
* - interface on which the address is created
- * - logical interface number on which the address is created
+ * - logical interface number on which the address is created
* - address family
* - `am_nextnum' identifies the next number to use to generate user part
* of `aobjname'.
@@ -176,7 +177,7 @@ extern int ipmgmt_db_walk(db_wfunc_t *, void *, ipadm_db_op_t);
extern int ipmgmt_aobjmap_op(ipmgmt_aobjmap_t *, uint32_t);
extern boolean_t ipmgmt_aobjmap_init(void *, nvlist_t *, char *,
size_t, int *);
-extern int ipmgmt_persist_aobjmap(ipmgmt_aobjmap_t *,
+extern int ipmgmt_persist_aobjmap(ipmgmt_aobjmap_t *,
ipadm_db_op_t);
extern boolean_t ipmgmt_ngz_firstboot_postinstall();
extern int ipmgmt_persist_if(ipmgmt_if_arg_t *);
diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_main.c b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_main.c
index 994d1b0125..c5bda676cc 100644
--- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_main.c
+++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_main.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2015 Joyent, Inc.
+ * Copyright 2021, Tintri by DDN. All rights reserved.
*/
/*
@@ -568,13 +569,14 @@ ipmgmt_persist_if_exists(char *ifname, sa_family_t af)
bzero(&cbarg, sizeof (cbarg));
cbarg.cb_ifname = ifname;
(void) ipmgmt_db_walk(ipmgmt_db_getif, &cbarg, IPADM_DB_READ);
- if ((ifp = cbarg.cb_ifinfo) != NULL) {
+ if (cbarg.cb_ifinfo != NULL) {
+ ifp = &cbarg.cb_ifinfo->ifil_ifi;
if ((af == AF_INET && (ifp->ifi_pflags & IFIF_IPV4)) ||
(af == AF_INET6 && (ifp->ifi_pflags & IFIF_IPV6))) {
exists = B_TRUE;
}
}
- free(ifp);
+ free(cbarg.cb_ifinfo);
return (exists);
}
diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_persist.c b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_persist.c
index a185068005..2b471b8146 100644
--- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_persist.c
+++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_persist.c
@@ -24,6 +24,7 @@
* Copyright 2018 Joyent, Inc.
* Copyright 2016 Argo Technologie SA.
* Copyright (c) 2016-2017, Chris Fraire <cfraire@me.com>.
+ * Copyright 2021, Tintri by DDN. All rights reserved.
*/
/*
@@ -575,7 +576,8 @@ ipmgmt_db_getif(void *arg, nvlist_t *db_nvl, char *buf, size_t buflen,
ipmgmt_getif_cbarg_t *cbarg = arg;
char *ifname = cbarg->cb_ifname;
char *intf = NULL;
- ipadm_if_info_t *ifp = NULL;
+ ipadm_if_info_list_t *ifl = NULL;
+ ipadm_if_info_t *ifp;
sa_family_t af;
char *afstr;
@@ -586,20 +588,21 @@ ipmgmt_db_getif(void *arg, nvlist_t *db_nvl, char *buf, size_t buflen,
return (B_TRUE);
}
af = atoi(afstr);
- for (ifp = cbarg->cb_ifinfo; ifp != NULL; ifp = ifp->ifi_next) {
+ for (ifl = cbarg->cb_ifinfo; ifl != NULL; ifl = ifl->ifil_next) {
+ ifp = &ifl->ifil_ifi;
if (strcmp(ifp->ifi_name, intf) == 0)
break;
}
- if (ifp == NULL) {
- ipadm_if_info_t *new;
+ if (ifl == NULL) {
+ ipadm_if_info_list_t *new;
if ((new = calloc(1, sizeof (*new))) == NULL) {
*errp = ENOMEM;
return (B_FALSE); /* don't continue the walk */
}
- new->ifi_next = cbarg->cb_ifinfo;
+ new->ifil_next = cbarg->cb_ifinfo;
cbarg->cb_ifinfo = new;
- ifp = new;
+ ifp = &new->ifil_ifi;
(void) strlcpy(ifp->ifi_name, intf, sizeof (ifp->ifi_name));
}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ipadm/ipadm.c b/usr/src/cmd/cmd-inet/usr.sbin/ipadm/ipadm.c
index 30e2f0f549..c195e7be80 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ipadm/ipadm.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ipadm/ipadm.c
@@ -25,6 +25,7 @@
* Copyright (c) 2018, Joyent, Inc.
* Copyright 2017 Gary Mills
* Copyright (c) 2016, Chris Fraire <cfraire@me.com>.
+ * Copyright 2021, Tintri by DDN. All rights reserved.
*/
#include <arpa/inet.h>
@@ -278,7 +279,7 @@ typedef struct show_addr_args_s {
typedef struct show_if_args_s {
show_if_state_t *si_state;
- ipadm_if_info_t *si_info;
+ ipadm_if_info_list_t *si_info;
} show_if_args_t;
typedef enum {
@@ -675,7 +676,7 @@ do_show_ifprop(int argc, char **argv, const char *use)
uint_t proto;
boolean_t m_arg = _B_FALSE;
char *protostr;
- ipadm_if_info_t *ifinfo, *ifp;
+ ipadm_if_info_list_t *ifinfo, *ifl;
ipadm_status_t status;
show_prop_state_t state;
@@ -739,8 +740,9 @@ do_show_ifprop(int argc, char **argv, const char *use)
if (status != IPADM_SUCCESS)
die("Error retrieving interface(s): %s",
ipadm_status2str(status));
- for (ifp = ifinfo; ifp; ifp = ifp->ifi_next) {
- (void) strlcpy(state.sps_ifname, ifp->ifi_name, LIFNAMSIZ);
+ for (ifl = ifinfo; ifl != NULL; ifl = ifl->ifil_next) {
+ (void) strlcpy(state.sps_ifname, ifl->ifil_ifi.ifi_name,
+ LIFNAMSIZ);
state.sps_proto = proto;
show_properties(&state, IPADMPROP_CLASS_IF);
}
@@ -1611,7 +1613,7 @@ flags2str(uint64_t flags, fmask_t *tbl, boolean_t is_bits,
static boolean_t
is_from_gz(const char *lifname)
{
- ipadm_if_info_t *if_info;
+ ipadm_if_info_list_t *if_info;
char phyname[LIFNAMSIZ], *cp;
boolean_t ret = _B_FALSE;
ipadm_status_t status;
@@ -1634,7 +1636,7 @@ is_from_gz(const char *lifname)
if (status != IPADM_SUCCESS)
return (ret);
- if (if_info->ifi_cflags & IFIF_L3PROTECT)
+ if (if_info->ifil_ifi.ifi_cflags & IFIF_L3PROTECT)
ret = _B_TRUE;
ipadm_free_if_info(if_info);
return (ret);
@@ -1889,8 +1891,8 @@ static boolean_t
print_si_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize)
{
show_if_args_t *arg = ofarg->ofmt_cbarg;
- ipadm_if_info_t *ifinfo = arg->si_info;
- char *ifname = ifinfo->ifi_name;
+ ipadm_if_info_list_t *ifinfo = arg->si_info;
+ char *ifname = ifinfo->ifil_ifi.ifi_name;
fmask_t intf_state[] = {
{ "ok", IFIS_OK, IPADM_ALL_BITS},
{ "down", IFIS_DOWN, IPADM_ALL_BITS},
@@ -1927,15 +1929,15 @@ print_si_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize)
(void) snprintf(buf, bufsize, "%s", ifname);
break;
case SI_STATE:
- flags2str(ifinfo->ifi_state, intf_state, _B_FALSE,
+ flags2str(ifinfo->ifil_ifi.ifi_state, intf_state, _B_FALSE,
buf, bufsize);
break;
case SI_CURRENT:
- flags2str(ifinfo->ifi_cflags, intf_cflags, _B_TRUE,
+ flags2str(ifinfo->ifil_ifi.ifi_cflags, intf_cflags, _B_TRUE,
buf, bufsize);
break;
case SI_PERSISTENT:
- flags2str(ifinfo->ifi_pflags, intf_pflags, _B_TRUE,
+ flags2str(ifinfo->ifil_ifi.ifi_pflags, intf_pflags, _B_TRUE,
buf, bufsize);
break;
default:
@@ -1956,7 +1958,7 @@ do_show_if(int argc, char *argv[], const char *use)
ipadm_status_t status;
show_if_state_t state;
char *fields_str = NULL;
- ipadm_if_info_t *if_info, *ptr;
+ ipadm_if_info_list_t *if_info, *ptr;
show_if_args_t sargs;
int option;
ofmt_handle_t ofmt;
@@ -2001,7 +2003,7 @@ do_show_if(int argc, char *argv[], const char *use)
ipadm_status2str(status));
}
- for (ptr = if_info; ptr; ptr = ptr->ifi_next) {
+ for (ptr = if_info; ptr != NULL; ptr = ptr->ifil_next) {
sargs.si_info = ptr;
ofmt_print(state.si_ofmt, &sargs);
}
diff --git a/usr/src/cmd/rcm_daemon/common/ip_rcm.c b/usr/src/cmd/rcm_daemon/common/ip_rcm.c
index a389fc770d..7d2eeec9b7 100644
--- a/usr/src/cmd/rcm_daemon/common/ip_rcm.c
+++ b/usr/src/cmd/rcm_daemon/common/ip_rcm.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2021, Tintri by DDN. All rights reserved.
*/
/*
@@ -2373,7 +2374,7 @@ if_configure_ipadm(datalink_id_t linkid)
{
char ifinst[MAXLINKNAMELEN];
boolean_t found;
- ipadm_if_info_t *ifinfo, *ptr;
+ ipadm_if_info_list_t *ifinfo, *ptr;
ipadm_status_t status;
assert(linkid != DATALINK_INVALID_LINKID);
@@ -2398,14 +2399,14 @@ if_configure_ipadm(datalink_id_t linkid)
}
if (ifinfo != NULL) {
found = B_FALSE;
- for (ptr = ifinfo; ptr; ptr = ptr->ifi_next) {
- if (strncmp(ptr->ifi_name, ifinst,
+ for (ptr = ifinfo; ptr != NULL; ptr = ptr->ifil_next) {
+ if (strncmp(ptr->ifil_ifi.ifi_name, ifinst,
sizeof (ifinst)) == 0) {
found = B_TRUE;
break;
}
}
- free(ifinfo);
+ ipadm_free_if_info(ifinfo);
if (!found) {
return (0);
}
diff --git a/usr/src/cmd/rsrvrctl/Makefile b/usr/src/cmd/rsrvrctl/Makefile
new file mode 100644
index 0000000000..f51df92730
--- /dev/null
+++ b/usr/src/cmd/rsrvrctl/Makefile
@@ -0,0 +1,48 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2021 Oxide Computer Company
+#
+
+PROG = rsrvrctl
+
+include ../Makefile.cmd
+include ../Makefile.cmd.64
+include ../Makefile.ctf
+
+SRCS = rsrvrctl.c
+OBJS = $(SRCS:.c=.o)
+
+CLEANFILES = $(PROG)
+CLOBBERFILES += $(ROOTUSRSBINPROG)
+
+.KEEP_STATE:
+
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \
+ -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64 \
+ $(CPPFLAGS.master) \
+ -I$(SRC)/uts/i86pc/io/vmm \
+ -I$(SRC)/uts/i86pc
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDFLAGS) $(LDLIBS)
+ $(POST_PROCESS)
+
+install: all $(ROOTLIBPROG)
+
+clean:
+ $(RM) $(OBJS) $(CLEANFILES)
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/rsrvrctl/rsrvrctl.c b/usr/src/cmd/rsrvrctl/rsrvrctl.c
new file mode 100644
index 0000000000..e189520a1c
--- /dev/null
+++ b/usr/src/cmd/rsrvrctl/rsrvrctl.c
@@ -0,0 +1,164 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2021 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+
+#include <sys/vmm_dev.h>
+
+static void
+usage(const char *pname)
+{
+ fprintf(stderr,
+ "Usage: %s [-a add] [-r remove] [-q]\n"
+ "\t-a <SZ> add SZ MiB to the reservoir\n"
+ "\t-r <SZ> remove SZ MiB from the reservoir\n"
+ "\t-q query reservoir state\n", pname);
+}
+
+static bool
+parse_size(const char *arg, size_t *resp)
+{
+ size_t res;
+
+ errno = 0;
+ res = strtoul(arg, NULL, 0);
+ if (errno != 0) {
+ return (false);
+ }
+
+ *resp = (res * 1024 * 1024);
+ return (true);
+}
+
+static void
+do_add(int fd, size_t sz)
+{
+ int res;
+
+ res = ioctl(fd, VMM_RESV_ADD, sz);
+ if (res != 0) {
+ perror("Could not add to reservoir");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void
+do_remove(int fd, size_t sz)
+{
+ int res;
+
+ res = ioctl(fd, VMM_RESV_REMOVE, sz);
+ if (res != 0) {
+ perror("Could not remove from reservoir");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void
+do_query(int fd)
+{
+ struct vmm_resv_query data;
+ int res;
+
+ res = ioctl(fd, VMM_RESV_QUERY, &data);
+ if (res != 0) {
+ perror("Could not query reservoir info");
+ return;
+ }
+
+ printf("Free KiB:\t%llu\n"
+ "Allocated KiB:\t%llu\n"
+ "Transient Allocated KiB:\t%llu\n"
+ "Size limit KiB:\t%llu\n",
+ data.vrq_free_sz / 1024,
+ data.vrq_alloc_sz / 1024,
+ data.vrq_alloc_transient_sz / 1024,
+ data.vrq_limit / 1024);
+}
+
+int
+main(int argc, char *argv[])
+{
+ char c;
+ const char *opt_a = NULL, *opt_r = NULL;
+ bool opt_q = false;
+ int fd;
+
+ const char *pname = argv[0];
+
+ while ((c = getopt(argc, argv, "a:r:qh")) != -1) {
+ switch (c) {
+ case 'a':
+ opt_a = optarg;
+ break;
+ case 'r':
+ opt_r = optarg;
+ break;
+ case 'q':
+ opt_q = true;
+ break;
+ case 'h':
+ usage(pname);
+ return (EXIT_SUCCESS);
+ default:
+ usage(pname);
+ return (EXIT_FAILURE);
+ }
+ }
+ if (optind < argc ||
+ (opt_a == NULL && opt_r == NULL && !opt_q) ||
+ (opt_a != NULL && opt_r != NULL)) {
+ usage(pname);
+ return (EXIT_FAILURE);
+ }
+
+ fd = open(VMM_CTL_DEV, O_EXCL | O_RDWR);
+ if (fd < 0) {
+ perror("Could not open vmmctl");
+ usage(pname);
+ return (EXIT_FAILURE);
+ }
+
+ if (opt_a != NULL) {
+ size_t sz;
+
+ if (!parse_size(opt_a, &sz)) {
+ perror("Invalid size");
+ usage(pname);
+ return (EXIT_FAILURE);
+ }
+
+ do_add(fd, sz);
+ }
+ if (opt_r != NULL) {
+ size_t sz;
+
+ if (!parse_size(opt_r, &sz)) {
+ perror("Invalid size");
+ usage(pname);
+ return (EXIT_FAILURE);
+ }
+ do_remove(fd, sz);
+ }
+ if (opt_q) {
+ do_query(fd);
+ }
+
+ (void) close(fd);
+ return (0);
+}
diff --git a/usr/src/lib/libipadm/common/ipadm_if.c b/usr/src/lib/libipadm/common/ipadm_if.c
index 41f22e4eeb..c140f4ca40 100644
--- a/usr/src/lib/libipadm/common/ipadm_if.c
+++ b/usr/src/lib/libipadm/common/ipadm_if.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2021, Tintry by DDN. All rights reserved.
*/
#include <errno.h>
@@ -82,12 +83,12 @@ i_ipadm_is_if_down(char *ifname, struct ifaddrs *ifa)
*/
static ipadm_status_t
i_ipadm_active_if_info(ipadm_handle_t iph, const char *ifname,
- ipadm_if_info_t **if_info, int64_t lifc_flags)
+ ipadm_if_info_list_t **if_info, int64_t lifc_flags)
{
struct lifreq *buf;
struct lifreq *lifrp;
struct lifreq lifrl;
- ipadm_if_info_t *last = NULL;
+ ipadm_if_info_list_t *ifl, *last = NULL;
ipadm_if_info_t *ifp;
int s;
int n;
@@ -117,24 +118,26 @@ i_ipadm_active_if_info(ipadm_handle_t iph, const char *ifname,
* Check if the interface already exists in our list.
* If it already exists, we need to update its flags.
*/
- for (ifp = *if_info; ifp != NULL; ifp = ifp->ifi_next) {
+ for (ifl = *if_info; ifl != NULL; ifl = ifl->ifil_next) {
+ ifp = &ifl->ifil_ifi;
if (strcmp(lifrp->lifr_name, ifp->ifi_name) == 0)
break;
}
- if (ifp == NULL) {
- ifp = calloc(1, sizeof (ipadm_if_info_t));
- if (ifp == NULL) {
+ if (ifl == NULL) {
+ ifl = calloc(1, sizeof (ipadm_if_info_list_t));
+ if (ifl == NULL) {
status = ipadm_errno2status(errno);
goto fail;
}
+ ifp = &ifl->ifil_ifi;
(void) strlcpy(ifp->ifi_name, lifrp->lifr_name,
sizeof (ifp->ifi_name));
- /* Update the `ifi_next' pointer for this new node */
+ /* Update the `ifil_next' pointer for this new node */
if (*if_info == NULL)
- *if_info = ifp;
+ *if_info = ifl;
else
- last->ifi_next = ifp;
- last = ifp;
+ last->ifil_next = ifl;
+ last = ifl;
}
/*
@@ -188,12 +191,13 @@ fail:
*/
static ipadm_status_t
i_ipadm_persist_if_info(ipadm_handle_t iph, const char *ifname,
- ipadm_if_info_t **if_info)
+ ipadm_if_info_list_t **if_info)
{
ipadm_status_t status = IPADM_SUCCESS;
ipmgmt_getif_arg_t getif;
ipmgmt_getif_rval_t *rvalp;
- ipadm_if_info_t *ifp, *curr, *prev = NULL;
+ ipadm_if_info_t *ifp;
+ ipadm_if_info_list_t *curr, *prev = NULL;
int i = 0, err = 0;
bzero(&getif, sizeof (getif));
@@ -225,8 +229,8 @@ i_ipadm_persist_if_info(ipadm_handle_t iph, const char *ifname,
ipadm_free_if_info(prev);
break;
}
- (void) bcopy(ifp, curr, sizeof (*curr));
- curr->ifi_next = prev;
+ (void) bcopy(ifp, &curr->ifil_ifi, sizeof (*ifp));
+ curr->ifil_next = prev;
prev = curr;
}
*if_info = curr;
@@ -242,14 +246,16 @@ i_ipadm_persist_if_info(ipadm_handle_t iph, const char *ifname,
*/
ipadm_status_t
i_ipadm_get_all_if_info(ipadm_handle_t iph, const char *ifname,
- ipadm_if_info_t **if_info, int64_t lifc_flags)
+ ipadm_if_info_list_t **if_info, int64_t lifc_flags)
{
ipadm_status_t status;
- ipadm_if_info_t *aifinfo = NULL;
- ipadm_if_info_t *pifinfo = NULL;
+ ipadm_if_info_list_t *aifinfo = NULL;
+ ipadm_if_info_list_t *pifinfo = NULL;
+ ipadm_if_info_list_t *last = NULL;
+ ipadm_if_info_list_t *aifl;
+ ipadm_if_info_list_t *pifl;
ipadm_if_info_t *aifp;
ipadm_if_info_t *pifp;
- ipadm_if_info_t *last = NULL;
struct ifaddrs *ifa;
struct ifaddrs *ifap;
@@ -269,7 +275,9 @@ retry:
status = ipadm_errno2status(errno);
goto fail;
}
- for (aifp = aifinfo; aifp != NULL; aifp = aifp->ifi_next) {
+ for (aifl = aifinfo; aifl != NULL; aifl = aifl->ifil_next) {
+ aifp = &aifl->ifil_ifi;
+
/*
* Find the `ifaddrs' structure from `ifa'
* for this interface. We need the IFF_* flags
@@ -299,8 +307,8 @@ retry:
aifp->ifi_state = IFIS_DOWN;
else
aifp->ifi_state = IFIS_OK;
- if (aifp->ifi_next == NULL)
- last = aifp;
+ if (aifl->ifil_next == NULL)
+ last = aifl;
}
freeifaddrs(ifa);
}
@@ -321,27 +329,29 @@ retry:
* `aifinfo', it means that this interface was disabled. We should
* add this interface to `aifinfo' and set it state to IFIF_DISABLED.
*/
- for (pifp = pifinfo; pifp != NULL; pifp = pifp->ifi_next) {
- for (aifp = aifinfo; aifp != NULL; aifp = aifp->ifi_next) {
+ for (pifl = pifinfo; pifl != NULL; pifl = pifl->ifil_next) {
+ pifp = &pifl->ifil_ifi;
+ for (aifl = aifinfo; aifl != NULL; aifl = aifl->ifil_next) {
+ aifp = &aifl->ifil_ifi;
if (strcmp(aifp->ifi_name, pifp->ifi_name) == 0) {
aifp->ifi_pflags = pifp->ifi_pflags;
break;
}
}
- if (aifp == NULL) {
- aifp = malloc(sizeof (ipadm_if_info_t));
- if (aifp == NULL) {
+ if (aifl == NULL) {
+ aifl = malloc(sizeof (ipadm_if_info_list_t));
+ if (aifl == NULL) {
status = ipadm_errno2status(errno);
goto fail;
}
- *aifp = *pifp;
- aifp->ifi_next = NULL;
- aifp->ifi_state = IFIS_DISABLED;
+ *aifl = *pifl;
+ aifl->ifil_next = NULL;
+ aifl->ifil_ifi.ifi_state = IFIS_DISABLED;
if (last != NULL)
- last->ifi_next = aifp;
+ last->ifil_next = aifl;
else
- aifinfo = aifp;
- last = aifp;
+ aifinfo = aifl;
+ last = aifl;
}
}
*if_info = aifinfo;
@@ -375,7 +385,7 @@ ipadm_status_t
i_ipadm_if_pexists(ipadm_handle_t iph, const char *ifname, sa_family_t af,
boolean_t *exists)
{
- ipadm_if_info_t *ifinfo;
+ ipadm_if_info_list_t *ifinfo;
ipadm_status_t status;
/*
@@ -390,9 +400,9 @@ i_ipadm_if_pexists(ipadm_handle_t iph, const char *ifname, sa_family_t af,
status = i_ipadm_persist_if_info(iph, ifname, &ifinfo);
if (status == IPADM_SUCCESS) {
*exists = ((af == AF_INET &&
- (ifinfo->ifi_pflags & IFIF_IPV4)) ||
+ (ifinfo->ifil_ifi.ifi_pflags & IFIF_IPV4)) ||
(af == AF_INET6 &&
- (ifinfo->ifi_pflags & IFIF_IPV6)));
+ (ifinfo->ifil_ifi.ifi_pflags & IFIF_IPV6)));
free(ifinfo);
} else if (status == IPADM_NOTFOUND) {
status = IPADM_SUCCESS;
@@ -495,7 +505,7 @@ i_ipadm_create_ipmp_peer(ipadm_handle_t iph, char *ifname, sa_family_t af)
lifgroupinfo_t lifgr;
ipadm_status_t status = IPADM_SUCCESS;
struct lifreq lifr;
- int other_af_sock;
+ int other_af_sock;
assert(af == AF_INET || af == AF_INET6);
@@ -1387,7 +1397,7 @@ ipadm_delete_if(ipadm_handle_t iph, const char *ifname, sa_family_t af,
* If af is AF_UNSPEC, then we return the following:
* status1, if status1 == status2
* IPADM_SUCCESS, if either of status1 or status2 is SUCCESS
- * and the other status is ENXIO
+ * and the other status is ENXIO
* IPADM_ENXIO, if both status1 and status2 are ENXIO
* IPADM_FAILURE otherwise.
*/
@@ -1411,12 +1421,12 @@ ipadm_delete_if(ipadm_handle_t iph, const char *ifname, sa_family_t af,
* identified by `ifname'.
*
* Return values:
- * On success: IPADM_SUCCESS.
- * On error : IPADM_INVALID_ARG, IPADM_ENXIO or IPADM_FAILURE.
+ * On success: IPADM_SUCCESS.
+ * On error : IPADM_INVALID_ARG, IPADM_ENXIO or IPADM_FAILURE.
*/
ipadm_status_t
ipadm_if_info(ipadm_handle_t iph, const char *ifname,
- ipadm_if_info_t **if_info, uint32_t flags, int64_t lifc_flags)
+ ipadm_if_info_list_t **if_info, uint32_t flags, int64_t lifc_flags)
{
ipadm_status_t status;
ifspec_t ifsp;
@@ -1442,12 +1452,12 @@ ipadm_if_info(ipadm_handle_t iph, const char *ifname,
* Frees the linked list allocated by ipadm_if_info().
*/
void
-ipadm_free_if_info(ipadm_if_info_t *ifinfo)
+ipadm_free_if_info(ipadm_if_info_list_t *ifinfo)
{
- ipadm_if_info_t *ifinfo_next;
+ ipadm_if_info_list_t *ifinfo_next;
for (; ifinfo != NULL; ifinfo = ifinfo_next) {
- ifinfo_next = ifinfo->ifi_next;
+ ifinfo_next = ifinfo->ifil_next;
free(ifinfo);
}
}
diff --git a/usr/src/lib/libipadm/common/libipadm.h b/usr/src/lib/libipadm/common/libipadm.h
index 0d8e3fdd7b..0ae9d89e4b 100644
--- a/usr/src/lib/libipadm/common/libipadm.h
+++ b/usr/src/lib/libipadm/common/libipadm.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, Chris Fraire <cfraire@me.com>.
+ * Copyright 2021, Tintri by DDN. All rights reserved.
*/
#ifndef _LIBIPADM_H
#define _LIBIPADM_H
@@ -205,14 +206,27 @@ typedef enum {
IFIS_DISABLED /* Interface has been disabled. */
} ipadm_if_state_t;
+/*
+ * Declare ipadm_if_info_list_t as a container for ipadm_if_info_t.
+ *
+ * ipadm_if_info_t used to have a list pointer ifi_next for linking a number
+ * of ipadm_if_info_t's together. Even though this linking wasn't used in the
+ * data exchange between ipmgmtd and libipadm, this meant the structure wasn't
+ * safe for passing through the door between 32bit and 64bit processes.
+ */
typedef struct ipadm_if_info_s {
- struct ipadm_if_info_s *ifi_next;
char ifi_name[LIFNAMSIZ]; /* interface name */
ipadm_if_state_t ifi_state; /* see above */
uint_t ifi_cflags; /* current flags */
uint_t ifi_pflags; /* persistent flags */
} ipadm_if_info_t;
+typedef struct ipadm_if_info_list_s {
+ struct ipadm_if_info_list_s *ifil_next;
+ ipadm_if_info_t ifil_ifi;
+} ipadm_if_info_list_t;
+
+
/* ipadm_if_info_t flags */
#define IFIF_BROADCAST 0x00000001
#define IFIF_MULTICAST 0x00000002
@@ -279,8 +293,8 @@ extern ipadm_status_t ipadm_disable_if(ipadm_handle_t, const char *,
uint32_t);
extern ipadm_status_t ipadm_enable_if(ipadm_handle_t, const char *, uint32_t);
extern ipadm_status_t ipadm_if_info(ipadm_handle_t, const char *,
- ipadm_if_info_t **, uint32_t, int64_t);
-extern void ipadm_free_if_info(ipadm_if_info_t *);
+ ipadm_if_info_list_t **, uint32_t, int64_t);
+extern void ipadm_free_if_info(ipadm_if_info_list_t *);
extern ipadm_status_t ipadm_delete_if(ipadm_handle_t, const char *,
sa_family_t, uint32_t);
extern void ipadm_if_move(ipadm_handle_t, const char *);
@@ -362,7 +376,7 @@ extern ipadm_status_t ipadm_get_prop(ipadm_handle_t, const char *, char *,
/*
* miscellaneous helper functions.
*/
-extern const char *ipadm_status2str(ipadm_status_t);
+extern const char *ipadm_status2str(ipadm_status_t);
extern int ipadm_str2nvlist(const char *, nvlist_t **, uint_t);
extern size_t ipadm_nvlist2str(nvlist_t *, char *, size_t);
extern char *ipadm_proto2str(uint_t);
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c
index ba3fb7f8dd..ec27949a43 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.c
+++ b/usr/src/lib/libvmmapi/common/vmmapi.c
@@ -39,7 +39,7 @@
*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2019 Joyent, Inc.
- * Copyright 2020 Oxide Computer Company
+ * Copyright 2021 Oxide Computer Company
*/
#include <sys/cdefs.h>
@@ -109,12 +109,31 @@ struct vmctx {
#ifdef __FreeBSD__
#define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
#define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
-#else
-#define CREATE(x) vm_do_ctl(VMM_CREATE_VM, (x))
-#define DESTROY(x) vm_do_ctl(VMM_DESTROY_VM, (x))
+int
+vm_create(const char *name)
+{
+ /* Try to load vmm(4) module before creating a guest. */
+ if (modfind("vmm") < 0)
+ kldload("vmm");
+ return (CREATE((char *)name));
+}
+
+void
+vm_destroy(struct vmctx *vm)
+{
+ assert(vm != NULL);
+
+ if (vm->fd >= 0)
+ close(vm->fd);
+ DESTROY(vm->name);
+
+ free(vm);
+}
+
+#else
static int
-vm_do_ctl(int cmd, const char *name)
+vm_do_ctl(int cmd, void *req)
{
int ctl_fd;
@@ -123,7 +142,7 @@ vm_do_ctl(int cmd, const char *name)
return (-1);
}
- if (ioctl(ctl_fd, cmd, name) == -1) {
+ if (ioctl(ctl_fd, cmd, req) == -1) {
int err = errno;
/* Do not lose ioctl errno through the close(2) */
@@ -135,6 +154,46 @@ vm_do_ctl(int cmd, const char *name)
return (0);
}
+
+int
+vm_create(const char *name, uint64_t flags)
+{
+ struct vm_create_req req;
+
+ (void) strncpy(req.name, name, VM_MAX_NAMELEN);
+ req.flags = flags;
+
+ return (vm_do_ctl(VMM_CREATE_VM, &req));
+}
+
+void
+vm_close(struct vmctx *vm)
+{
+ assert(vm != NULL);
+ assert(vm->fd >= 0);
+
+ (void) close(vm->fd);
+
+ free(vm);
+}
+
+void
+vm_destroy(struct vmctx *vm)
+{
+ struct vm_destroy_req req;
+
+ assert(vm != NULL);
+
+ if (vm->fd >= 0) {
+ (void) close(vm->fd);
+ vm->fd = -1;
+ }
+
+ (void) strncpy(req.name, vm->name, VM_MAX_NAMELEN);
+ (void) vm_do_ctl(VMM_DESTROY_VM, &req);
+
+ free(vm);
+}
#endif
static int
@@ -155,17 +214,6 @@ vm_device_open(const char *name)
return (fd);
}
-int
-vm_create(const char *name)
-{
-#ifdef __FreeBSD__
- /* Try to load vmm(4) module before creating a guest. */
- if (modfind("vmm") < 0)
- kldload("vmm");
-#endif
- return (CREATE((char *)name));
-}
-
struct vmctx *
vm_open(const char *name)
{
@@ -189,30 +237,6 @@ err:
return (NULL);
}
-#ifndef __FreeBSD__
-void
-vm_close(struct vmctx *vm)
-{
- assert(vm != NULL);
- assert(vm->fd >= 0);
-
- (void) close(vm->fd);
-
- free(vm);
-}
-#endif
-
-void
-vm_destroy(struct vmctx *vm)
-{
- assert(vm != NULL);
-
- if (vm->fd >= 0)
- close(vm->fd);
- DESTROY(vm->name);
-
- free(vm);
-}
int
vm_parse_memsize(const char *optarg, size_t *ret_memsize)
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h
index 79c7dc02ee..e239b70a56 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.h
+++ b/usr/src/lib/libvmmapi/common/vmmapi.h
@@ -39,7 +39,7 @@
*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2019 Joyent, Inc.
- * Copyright 2020 Oxide Computer Company
+ * Copyright 2021 Oxide Computer Company
*/
#ifndef _VMMAPI_H_
@@ -134,7 +134,11 @@ int vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid,
int vm_munmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, size_t len);
+#ifndef __FreeBSD__
+int vm_create(const char *name, uint64_t flags);
+#else
int vm_create(const char *name);
+#endif /* __FreeBSD__ */
int vm_get_device_fd(struct vmctx *ctx);
struct vmctx *vm_open(const char *name);
#ifndef __FreeBSD__
diff --git a/usr/src/pkg/manifests/system-bhyve.mf b/usr/src/pkg/manifests/system-bhyve.mf
index 0495d9f649..3f67fa743e 100644
--- a/usr/src/pkg/manifests/system-bhyve.mf
+++ b/usr/src/pkg/manifests/system-bhyve.mf
@@ -48,6 +48,7 @@ file path=usr/kernel/drv/$(ARCH64)/vmm
file path=usr/kernel/drv/ppt.conf
file path=usr/kernel/drv/viona.conf
file path=usr/kernel/drv/vmm.conf
+file path=usr/lib/rsrvrctl mode=0555
file path=usr/sbin/bhyve mode=0555
file path=usr/sbin/bhyvectl mode=0555
file path=usr/sbin/pptadm mode=0555
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index d88d229363..26cc3b0824 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -30,8 +30,8 @@
* Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
- * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
* Copyright 2020 Joshua M. Clulow <josh@sysmgr.org>
+ * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
*/
/*
@@ -1731,13 +1731,15 @@ spa_load_l2cache(spa_t *spa)
ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
+ nl2cache = 0;
+ newvdevs = NULL;
if (sav->sav_config != NULL) {
VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
- newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP);
- } else {
- nl2cache = 0;
- newvdevs = NULL;
+ if (nl2cache > 0) {
+ newvdevs = kmem_alloc(
+ nl2cache * sizeof (void *), KM_SLEEP);
+ }
}
oldvdevs = sav->sav_vdevs;
@@ -1829,7 +1831,11 @@ spa_load_l2cache(spa_t *spa)
VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
DATA_TYPE_NVLIST_ARRAY) == 0);
- l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
+ l2cache = NULL;
+ if (sav->sav_count > 0) {
+ l2cache = kmem_alloc(
+ sav->sav_count * sizeof (void *), KM_SLEEP);
+ }
for (i = 0; i < sav->sav_count; i++)
l2cache[i] = vdev_config_generate(spa,
sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE);
diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h
index ae9b0be758..29466d15ef 100644
--- a/usr/src/uts/common/vm/page.h
+++ b/usr/src/uts/common/vm/page.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2017, Joyent, Inc.
+ * Copyright 2021 Oxide Computer Company
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -342,13 +343,13 @@ struct as;
*
* So, as a quick summary:
*
- * pse_mutex[]'s protect the p_selock and p_cv fields.
+ * pse_mutex[]'s protect the p_selock and p_cv fields.
*
- * p_selock protects the p_free, p_age, p_vnode, p_offset and p_hash,
+ * p_selock protects the p_free, p_age, p_vnode, p_offset and p_hash,
*
- * ph_mutex[]'s protect the page_hash[] array and its chains.
+ * ph_mutex[]'s protect the page_hash[] array and its chains.
*
- * vph_mutex[]'s protect the v_pages field and the vp page chains.
+ * vph_mutex[]'s protect the v_pages field and the vp page chains.
*
* First lock the page, then the hash chain, then the vnode chain. When
* this is not possible `trylocks' must be used. Sleeping while holding
@@ -763,6 +764,7 @@ void page_lock_delete(page_t *);
int page_deleted(page_t *);
int page_pp_lock(page_t *, int, int);
void page_pp_unlock(page_t *, int, int);
+int page_xresv(pgcnt_t, uint_t, int (*)(void));
int page_resv(pgcnt_t, uint_t);
void page_unresv(pgcnt_t);
void page_pp_useclaim(page_t *, page_t *, uint_t);
@@ -1079,7 +1081,7 @@ typedef struct kpm_hlk {
* The state about how a kpm page is mapped and whether it is ready to go
* is indicated by the following 1 byte kpm_spage structure. This byte is
* split into two 4-bit parts - kp_mapped and kp_mapped_go.
- * - kp_mapped == 1 the page is mapped cacheable
+ * - kp_mapped == 1 the page is mapped cacheable
* - kp_mapped == 2 the page is mapped non-cacheable
* - kp_mapped_go == 1 the mapping is ready to be dropped in
* - kp_mapped_go == 0 the mapping is not ready to be dropped in.
diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c
index b3a3e03fa3..3806c25533 100644
--- a/usr/src/uts/common/vm/vm_page.c
+++ b/usr/src/uts/common/vm/vm_page.c
@@ -23,6 +23,7 @@
* Copyright (c) 2015, Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
* Copyright (c) 2015, 2016 by Delphix. All rights reserved.
* Copyright 2018 Joyent, Inc.
+ * Copyright 2021 Oxide Computer Company
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -3922,29 +3923,68 @@ page_pp_unlock(
}
/*
- * This routine reserves availrmem for npages;
- * flags: KM_NOSLEEP or KM_SLEEP
- * returns 1 on success or 0 on failure
+ * This routine reserves availrmem for npages.
+ * It returns 1 on success or 0 on failure.
+ *
+ * flags: KM_NOSLEEP or KM_SLEEP
+ * cb_wait: called to induce delay when KM_SLEEP reservation requires kmem
+ * reaping to potentially succeed. If the callback returns 0, the
+ * reservation attempts will cease to repeat and page_xresv() may
+ * report a failure. If cb_wait is NULL, the traditional delay(hz/2)
+ * behavior will be used while waiting for a reap.
*/
int
-page_resv(pgcnt_t npages, uint_t flags)
+page_xresv(pgcnt_t npages, uint_t flags, int (*cb_wait)(void))
{
mutex_enter(&freemem_lock);
- while (availrmem < tune.t_minarmem + npages) {
- if (flags & KM_NOSLEEP) {
- mutex_exit(&freemem_lock);
- return (0);
- }
+ if (availrmem >= tune.t_minarmem + npages) {
+ availrmem -= npages;
mutex_exit(&freemem_lock);
- page_needfree(npages);
- kmem_reap();
- delay(hz >> 2);
- page_needfree(-(spgcnt_t)npages);
- mutex_enter(&freemem_lock);
+ return (1);
+ } else if ((flags & KM_NOSLEEP) != 0) {
+ mutex_exit(&freemem_lock);
+ return (0);
}
- availrmem -= npages;
mutex_exit(&freemem_lock);
- return (1);
+
+ /*
+ * We signal memory pressure to the system by elevating 'needfree'.
+ * Processes such as kmem reaping, pageout, and ZFS ARC shrinking can
+ * then respond to said pressure by freeing pages.
+ */
+ page_needfree(npages);
+ int nobail = 1;
+ do {
+ kmem_reap();
+ if (cb_wait == NULL) {
+ delay(hz >> 2);
+ } else {
+ nobail = cb_wait();
+ }
+
+ mutex_enter(&freemem_lock);
+ if (availrmem >= tune.t_minarmem + npages) {
+ availrmem -= npages;
+ mutex_exit(&freemem_lock);
+ page_needfree(-(spgcnt_t)npages);
+ return (1);
+ }
+ mutex_exit(&freemem_lock);
+ } while (nobail != 0);
+ page_needfree(-(spgcnt_t)npages);
+
+ return (0);
+}
+
+/*
+ * This routine reserves availrmem for npages;
+ * flags: KM_NOSLEEP or KM_SLEEP
+ * returns 1 on success or 0 on failure
+ */
+int
+page_resv(pgcnt_t npages, uint_t flags)
+{
+ return (page_xresv(npages, flags, NULL));
}
/*
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index 4370e90d9a..0a3fad877c 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -23,7 +23,6 @@
# Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
#
# Copyright (c) 2010, Intel Corporation.
-# Copyright 2019 Joyent, Inc.
# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
# Copyright 2019 Joyent, Inc.
# Copyright 2021 Oxide Computer Company
@@ -117,7 +116,6 @@ CORE_OBJS += \
pmem.o \
ppage.o \
pwrnow.o \
- seg_vmm.o \
smt.o \
speedstep.o \
ssp.o \
@@ -271,6 +269,8 @@ VMM_OBJS += vmm.o \
svm_support.o \
amdv.o \
vmm_gpt.o \
+ seg_vmm.o \
+ vmm_reservoir.o \
vmm_sol_vm.o \
vmm_sol_glue.o \
vmm_sol_ept.o \
diff --git a/usr/src/uts/i86pc/vm/seg_vmm.c b/usr/src/uts/i86pc/io/vmm/seg_vmm.c
index beb5e81d53..23a8da3bc5 100644
--- a/usr/src/uts/i86pc/vm/seg_vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/seg_vmm.c
@@ -11,6 +11,7 @@
/*
* Copyright 2018 Joyent, Inc.
+ * Copyright 2021 Oxide Computer Company
*/
/*
@@ -40,7 +41,16 @@
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/seg_kmem.h>
-#include <vm/seg_vmm.h>
+
+#include <sys/seg_vmm.h>
+
+typedef struct segvmm_data {
+ krwlock_t svmd_lock;
+ vm_object_t svmd_obj;
+ uintptr_t svmd_obj_off;
+ uchar_t svmd_prot;
+ size_t svmd_softlockcnt;
+} segvmm_data_t;
static int segvmm_dup(struct seg *, struct seg *);
@@ -105,31 +115,14 @@ segvmm_create(struct seg **segpp, void *argsp)
segvmm_crargs_t *cra = argsp;
segvmm_data_t *data;
- /*
- * Check several aspects of the mapping request to ensure validity:
- * - kernel pages must reside entirely in kernel space
- * - target protection must be user-accessible
- * - kernel address must be page-aligned
- */
- if ((uintptr_t)cra->kaddr <= _userlimit ||
- ((uintptr_t)cra->kaddr + seg->s_size) < (uintptr_t)cra->kaddr ||
- (cra->prot & PROT_USER) == 0 ||
- ((uintptr_t)cra->kaddr & PAGEOFFSET) != 0) {
- return (EINVAL);
- }
-
data = kmem_zalloc(sizeof (*data), KM_SLEEP);
rw_init(&data->svmd_lock, NULL, RW_DEFAULT, NULL);
- data->svmd_kaddr = (uintptr_t)cra->kaddr;
+ data->svmd_obj = cra->obj;
+ data->svmd_obj_off = cra->offset;
data->svmd_prot = cra->prot;
- data->svmd_cookie = cra->cookie;
- data->svmd_hold = cra->hold;
- data->svmd_rele = cra->rele;
- /* Since initial checks have passed, grab a reference on the cookie */
- if (data->svmd_hold != NULL) {
- data->svmd_hold(data->svmd_cookie);
- }
+ /* Grab a hold on the VM object for the duration of this seg mapping */
+ vm_object_reference(data->svmd_obj);
seg->s_ops = &segvmm_ops;
seg->s_data = data;
@@ -146,16 +139,12 @@ segvmm_dup(struct seg *seg, struct seg *newseg)
newsvmd = kmem_zalloc(sizeof (segvmm_data_t), KM_SLEEP);
rw_init(&newsvmd->svmd_lock, NULL, RW_DEFAULT, NULL);
- newsvmd->svmd_kaddr = svmd->svmd_kaddr;
+ newsvmd->svmd_obj = svmd->svmd_obj;
+ newsvmd->svmd_obj_off = svmd->svmd_obj_off;
newsvmd->svmd_prot = svmd->svmd_prot;
- newsvmd->svmd_cookie = svmd->svmd_cookie;
- newsvmd->svmd_hold = svmd->svmd_hold;
- newsvmd->svmd_rele = svmd->svmd_rele;
/* Grab another hold for the duplicate segment */
- if (svmd->svmd_hold != NULL) {
- newsvmd->svmd_hold(newsvmd->svmd_cookie);
- }
+ vm_object_reference(svmd->svmd_obj);
newseg->s_ops = seg->s_ops;
newseg->s_data = newsvmd;
@@ -180,10 +169,8 @@ segvmm_unmap(struct seg *seg, caddr_t addr, size_t len)
/* Unconditionally unload the entire segment range. */
hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
- /* Release the hold this segment possessed */
- if (svmd->svmd_rele != NULL) {
- svmd->svmd_rele(svmd->svmd_cookie);
- }
+ /* Release the VM object hold this segment possessed */
+ vm_object_deallocate(svmd->svmd_obj);
seg_free(seg);
return (0);
@@ -206,41 +193,23 @@ static int
segvmm_fault_in(struct hat *hat, struct seg *seg, uintptr_t va, size_t len)
{
segvmm_data_t *svmd = seg->s_data;
- const uintptr_t koff = svmd->svmd_kaddr - (uintptr_t)seg->s_base;
const uintptr_t end = va + len;
const uintptr_t prot = svmd->svmd_prot;
- /* Stick to the simple non-large-page case for now */
va &= PAGEMASK;
-
+ uintptr_t off = va - (uintptr_t)seg->s_base;
do {
- htable_t *ht;
- uint_t entry, lvl;
- size_t psz;
pfn_t pfn;
- const uintptr_t kaddr = va + koff;
-
- ASSERT(kaddr >= (uintptr_t)svmd->svmd_kaddr);
- ASSERT(kaddr < ((uintptr_t)svmd->svmd_kaddr + seg->s_size));
- ht = htable_getpage(kas.a_hat, kaddr, &entry);
- if (ht == NULL) {
- return (-1);
- }
- lvl = ht->ht_level;
- pfn = PTE2PFN(x86pte_get(ht, entry), lvl);
- htable_release(ht);
+ pfn = vm_object_pfn(svmd->svmd_obj, off);
if (pfn == PFN_INVALID) {
return (-1);
}
- /* For the time being, handling for large pages is absent. */
- psz = PAGESIZE;
- pfn += mmu_btop(kaddr & LEVEL_OFFSET(lvl));
-
- hat_devload(hat, (caddr_t)va, psz, pfn, prot, HAT_LOAD);
-
- va = va + psz;
+ /* Ignore any large-page possibilities for now */
+ hat_devload(hat, (caddr_t)va, PAGESIZE, pfn, prot, HAT_LOAD);
+ va += PAGESIZE;
+ off += PAGESIZE;
} while (va < end);
return (0);
@@ -399,8 +368,8 @@ static int
segvmm_gettype(struct seg *seg, caddr_t addr)
{
/*
- * Since already-existing kernel pages are being mapped into userspace,
- * always report the segment type as shared.
+ * Since already-existing vmm reservoir pages are being mapped into
+ * userspace, always report the segment type as shared.
*/
return (MAP_SHARED);
}
@@ -457,8 +426,8 @@ segvmm_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
{
segvmm_data_t *svmd = seg->s_data;
- memidp->val[0] = (uintptr_t)svmd->svmd_kaddr;
- memidp->val[1] = (uintptr_t)(addr - seg->s_base);
+ memidp->val[0] = (uintptr_t)svmd->svmd_obj;
+ memidp->val[1] = (uintptr_t)(addr - seg->s_base) + svmd->svmd_obj_off;
return (0);
}
diff --git a/usr/src/uts/i86pc/io/vmm/sys/seg_vmm.h b/usr/src/uts/i86pc/io/vmm/sys/seg_vmm.h
new file mode 100644
index 0000000000..a4f72f816e
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/sys/seg_vmm.h
@@ -0,0 +1,30 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc.
+ * Copyright 2021 Oxide Computer Company
+ */
+
+#ifndef _VM_SEG_VMM_H
+#define _VM_SEG_VMM_H
+
+#include <sys/vmm_vm.h>
+
+typedef struct segvmm_crargs {
+ uchar_t prot; /* protection */
+ vm_object_t obj;
+ uintptr_t offset;
+} segvmm_crargs_t;
+
+int segvmm_create(struct seg **, void *);
+
+#endif /* _VM_SEG_VMM_H */
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_impl.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_impl.h
index 606be4bbae..2b6f41ec54 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_impl.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_impl.h
@@ -40,7 +40,7 @@
struct vmm_devmem_entry {
list_node_t vde_node;
int vde_segid;
- char vde_name[SPECNAMELEN + 1];
+ char vde_name[VM_MAX_SEG_NAMELEN];
size_t vde_len;
off_t vde_off;
};
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
index 8441b51e03..4191aaee5c 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
@@ -115,7 +115,7 @@ struct vmm_ops {
extern struct vmm_ops vmm_ops_intel;
extern struct vmm_ops vmm_ops_amd;
-int vm_create(const char *name, struct vm **retvm);
+int vm_create(const char *name, uint64_t flags, struct vm **retvm);
void vm_destroy(struct vm *vm);
int vm_reinit(struct vm *vm);
const char *vm_name(struct vm *vm);
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_reservoir.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_reservoir.h
new file mode 100644
index 0000000000..b8215ce654
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_reservoir.h
@@ -0,0 +1,40 @@
+
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2021 Oxide Computer Company
+ */
+
+#ifndef _SYS_VMM_RESERVOIR_H
+#define _SYS_VMM_RESERVOIR_H
+
+#include <sys/types.h>
+#include <sys/cred.h>
+
+struct vmmr_region;
+typedef struct vmmr_region vmmr_region_t;
+
+void vmmr_init();
+void vmmr_fini();
+bool vmmr_is_empty();
+
+int vmmr_alloc(size_t, bool, vmmr_region_t **);
+void *vmmr_region_mem_at(vmmr_region_t *, uintptr_t);
+pfn_t vmmr_region_pfn_at(vmmr_region_t *, uintptr_t);
+void vmmr_free(vmmr_region_t *);
+
+int vmmr_add(size_t, bool);
+int vmmr_remove(size_t, bool);
+
+int vmmr_ioctl(int, intptr_t, int, cred_t *, int *);
+
+#endif /* _SYS_VMM_RESERVOIR_H */
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_vm.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_vm.h
index 6c7f9d423e..76d5fec8b7 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_vm.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_vm.h
@@ -160,8 +160,6 @@ int vm_segmap_obj(vm_object_t, off_t, size_t, struct as *, caddr_t *, uint_t,
int vm_segmap_space(struct vmspace *, off_t, struct as *, caddr_t *, off_t,
uint_t, uint_t, uint_t);
void *vmspace_find_kva(struct vmspace *, uintptr_t, size_t);
-void vmm_arena_init(void);
-void vmm_arena_fini(void);
typedef int (*pmap_pinit_t)(struct pmap *pmap);
@@ -171,13 +169,12 @@ void vmspace_free(struct vmspace *);
int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int);
int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
vm_prot_t prot, vm_page_t *ma, int max_count);
-void vmm_arena_fini(void);
-
-struct vm_object *vm_object_allocate(objtype_t, vm_pindex_t);
+struct vm_object *vm_object_allocate(objtype_t, vm_pindex_t, bool);
void vm_object_deallocate(vm_object_t);
void vm_object_reference(vm_object_t);
int vm_object_set_memattr(vm_object_t, vm_memattr_t);
+pfn_t vm_object_pfn(vm_object_t, uintptr_t);
#define VM_OBJECT_WLOCK(vmo) mutex_enter(&(vmo)->vmo_lock)
#define VM_OBJECT_WUNLOCK(vmo) mutex_exit(&(vmo)->vmo_lock)
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 425969d622..80c9ec6bd7 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -39,7 +39,7 @@
*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2018 Joyent, Inc.
- * Copyright 2020 Oxide Computer Company
+ * Copyright 2021 Oxide Computer Company
* Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
*/
@@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/systm.h>
+#include <sys/sunddi.h>
#include <machine/pcb.h>
#include <machine/smp.h>
@@ -191,6 +192,8 @@ struct vm {
uint64_t boot_tsc_offset; /* (i) TSC offset at VM boot */
struct ioport_config ioports; /* (o) ioport handling */
+
+ bool mem_transient; /* (o) alloc transient memory */
};
static int vmm_initialized;
@@ -490,7 +493,7 @@ uint_t cores_per_package = 1;
uint_t threads_per_core = 1;
int
-vm_create(const char *name, struct vm **retvm)
+vm_create(const char *name, uint64_t flags, struct vm **retvm)
{
struct vm *vm;
struct vmspace *vmspace;
@@ -502,8 +505,8 @@ vm_create(const char *name, struct vm **retvm)
if (!vmm_initialized)
return (ENXIO);
- if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
- return (EINVAL);
+ /* Name validation has already occurred */
+ VERIFY3U(strnlen(name, VM_MAX_NAMELEN), <, VM_MAX_NAMELEN);
vmspace = VMSPACE_ALLOC(0, VM_MAXUSER_ADDRESS);
if (vmspace == NULL)
@@ -512,6 +515,7 @@ vm_create(const char *name, struct vm **retvm)
vm = malloc(sizeof (struct vm), M_VM, M_WAITOK | M_ZERO);
strcpy(vm->name, name);
vm->vmspace = vmspace;
+ vm->mem_transient = (flags & VCF_RESERVOIR_MEM) == 0;
vm->sockets = 1;
vm->cores = cores_per_package; /* XXX backwards compatibility */
@@ -708,21 +712,12 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
struct mem_seg *seg;
vm_object_t obj;
-#ifndef __FreeBSD__
- extern pgcnt_t get_max_page_get(void);
-#endif
-
if (ident < 0 || ident >= VM_MAX_MEMSEGS)
return (EINVAL);
if (len == 0 || (len & PAGE_MASK))
return (EINVAL);
-#ifndef __FreeBSD__
- if (len > ptob(get_max_page_get()))
- return (EINVAL);
-#endif
-
seg = &vm->mem_segs[ident];
if (seg->object != NULL) {
if (seg->len == len && seg->sysmem == sysmem)
@@ -731,7 +726,8 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
return (EINVAL);
}
- obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
+ obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT,
+ vm->mem_transient);
if (obj == NULL)
return (ENOMEM);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_reservoir.c b/usr/src/uts/i86pc/io/vmm/vmm_reservoir.c
new file mode 100644
index 0000000000..1bb64a4851
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_reservoir.c
@@ -0,0 +1,820 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2021 Oxide Computer Company
+ */
+
+/*
+ * VMM Memory Reservoir
+ *
+ *
+ * In order to make the allocation of large (multi-GiB) chunks of memory
+ * for bhyve VMs easier, we introduce the "VMM Reservoir", where system
+ * operators can set aside a substantial portion of system memory exclusively
+ * for VMs. This memory is unavailable for general use by the rest of the
+ * system. Rather than having to scour the freelist, reap kmem caches, or put
+ * pressure on the ARC, bhyve guest memory allocations can quickly determine if
+ * there is adequate reservoir memory available. Since the pages stored in the
+ * reservoir are pre-zeroed, it can be immediately used when allocated to a
+ * guest. When the memory is returned to the reservoir, it is zeroed once more
+ * to avoid leaking any sensitive data from that guest.
+ *
+ *
+ * Transient Allocations
+ *
+ * While the explicit reservoir model may work well for some applications,
+ * others may want a more traditional model, where pages for guest memory
+ * objects are allocated on demand, rather than from a pool set aside from the
+ * system. In this case, the allocation can be made in "transient" mode, where
+ * the memory is allocated normally, even if there is free capacity in the
+ * reservoir. When use of the transient allocation is complete (the guest is
+ * halted and destroyed), the pages will be freed back to the system, rather
+ * than added back to the reservoir.
+ *
+ * From an implementation standpoint, transient allocations follow the same
+ * code paths as ones using the reservoir normally. Those allocations have a
+ * tag which marks them as transient, and used/free size tallies are maintained
+ * separately for normal and transient operations. When performing a transient
+ * allocation, that amount of memory is immediately added to the reservoir ,
+ * from which the allocation can be made. When freeing a transient allocation,
+ * a matching amount of memory is removed from the reservoir as part of the
+ * operation. This allows both allocation types to coexist without too much
+ * additional machinery.
+ *
+ *
+ * Administration
+ *
+ * Operators may increase, decrease, and query the the amount of memory
+ * allocated to the reservoir and from to VMs via ioctls against the vmmctl
+ * device. The total amount added to the reservoir is arbitrarily limited at
+ * this time by `vmmr_total_limit` which defaults to 80% of physmem. This is
+ * done to prevent the reservoir from inadvertently growing to a size where the
+ * system has inadequate memory to make forward progress. Memory may only be
+ * removed from the reservoir when it is free (not allocated by any guest VMs).
+ *
+ *
+ * Page Tracking
+ *
+ * The reservoir currently uses vnode association to keep track of pages under
+ * its control (either designated to the reservoir and free, or allocated to a
+ * guest VM object). This means using the existing VM system primitives for
+ * page_t instances being associated with a given (vnode, offset) tuple. It
+ * means that spans of pages, either free or allocated, need only to store a
+ * length (of the span) and an offset (into the vnode) in order to gain access
+ * to all of the underlying pages associated with that span. Associating the
+ * pages against `kvps[KV_VVP]` (the VMM kernel vnode) means they will be
+ * properly tracked as KAS pages, but be excluded from normal dumps (unless the
+ * operator has chosen to dump all of RAM).
+ */
+
+#include <sys/types.h>
+#include <sys/mutex.h>
+#include <sys/avl.h>
+#include <sys/list.h>
+#include <sys/machparam.h>
+#include <sys/kmem.h>
+#include <sys/stddef.h>
+#include <sys/null.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/sunddi.h>
+#include <sys/policy.h>
+#include <vm/seg_kmem.h>
+#include <vm/hat_i86.h>
+
+#include <sys/vmm_reservoir.h>
+#include <sys/vmm_dev.h>
+
+static kmutex_t vmmr_lock;
+
+static size_t vmmr_free_sz;
+static size_t vmmr_free_transient_sz;
+static size_t vmmr_adding_sz;
+static size_t vmmr_alloc_sz;
+static size_t vmmr_alloc_transient_sz;
+static size_t vmmr_empty_sz;
+
+static uintptr_t vmmr_empty_last;
+/* Upper limit for the size (free + allocated) of the reservoir */
+static size_t vmmr_total_limit;
+
+/* VA range allocated from the VMM arena for the mappings */
+static uintptr_t vmmr_va;
+static uintptr_t vmmr_va_sz;
+
+/* Pair of AVL trees to store set of spans ordered by addr and size */
+typedef struct vmmr_treepair {
+ avl_tree_t by_addr;
+ avl_tree_t by_size;
+} vmmr_treepair_t;
+
+/* Spans of free memory in the reservoir */
+static vmmr_treepair_t vmmr_free_tp;
+
+/* Spans of empty (not backed by memory) space in the reservoir */
+static vmmr_treepair_t vmmr_empty_tp;
+
+/* Regions of memory allocated from the reservoir */
+static list_t vmmr_alloc_regions;
+
+struct vmmr_span {
+ uintptr_t vs_addr;
+ size_t vs_size;
+ avl_node_t vs_by_addr;
+ avl_node_t vs_by_size;
+ uintptr_t vs_region_addr;
+};
+typedef struct vmmr_span vmmr_span_t;
+
+struct vmmr_region {
+ size_t vr_size;
+ avl_tree_t vr_spans;
+ list_node_t vr_node;
+ bool vr_transient;
+};
+
+static int
+vmmr_cmp_addr(const void *a, const void *b)
+{
+ const vmmr_span_t *sa = a;
+ const vmmr_span_t *sb = b;
+
+ if (sa->vs_addr == sb->vs_addr) {
+ return (0);
+ } else if (sa->vs_addr < sb->vs_addr) {
+ return (-1);
+ } else {
+ return (1);
+ }
+}
+
+static int
+vmmr_cmp_size(const void *a, const void *b)
+{
+ const vmmr_span_t *sa = a;
+ const vmmr_span_t *sb = b;
+
+ if (sa->vs_size == sb->vs_size) {
+ /*
+ * Since discontiguous spans could have the same size in a
+ * by-size tree, differentiate them (as required by AVL) by
+ * address so they can safely coexist while remaining sorted.
+ */
+ return (vmmr_cmp_addr(a, b));
+ } else if (sa->vs_size < sb->vs_size) {
+ return (-1);
+ } else {
+ return (1);
+ }
+}
+
+static int
+vmmr_cmp_region_addr(const void *a, const void *b)
+{
+ const vmmr_span_t *sa = a;
+ const vmmr_span_t *sb = b;
+
+ if (sa->vs_region_addr == sb->vs_region_addr) {
+ return (0);
+ } else if (sa->vs_region_addr < sb->vs_region_addr) {
+ return (-1);
+ } else {
+ return (1);
+ }
+}
+
+static void
+vmmr_tp_init(vmmr_treepair_t *tree)
+{
+ avl_create(&tree->by_addr, vmmr_cmp_addr, sizeof (vmmr_span_t),
+ offsetof(vmmr_span_t, vs_by_addr));
+ avl_create(&tree->by_size, vmmr_cmp_size, sizeof (vmmr_span_t),
+ offsetof(vmmr_span_t, vs_by_size));
+}
+
+static void
+vmmr_tp_destroy(vmmr_treepair_t *tree)
+{
+ void *vcp = NULL;
+ vmmr_span_t *span;
+
+ while (avl_destroy_nodes(&tree->by_addr, &vcp) != NULL) {
+ /* Freeing spans will be done when tearing down by-size tree */
+ }
+ while ((span = avl_destroy_nodes(&tree->by_size, &vcp)) != NULL) {
+ kmem_free(span, sizeof (*span));
+ }
+ avl_destroy(&tree->by_addr);
+ avl_destroy(&tree->by_size);
+}
+
+/*
+ * Insert a vmmr_span_t into a treepair, concatenating if possible with adjacent
+ * span(s). Such concatenation could result in the `to_add` span being freed,
+ * so the caller cannot use it after this returns.
+ */
+static void
+vmmr_tp_insert_concat(vmmr_span_t *to_add, vmmr_treepair_t *tree)
+{
+ avl_tree_t *by_addr = &tree->by_addr;
+ avl_tree_t *by_size = &tree->by_size;
+ vmmr_span_t *node;
+ avl_index_t where;
+
+ /* This addr should not already exist in the treepair */
+ node = avl_find(by_addr, to_add, &where);
+ ASSERT3P(node, ==, NULL);
+
+ node = avl_nearest(by_addr, where, AVL_BEFORE);
+ if (node != NULL &&
+ (node->vs_addr + node->vs_size) == to_add->vs_addr) {
+ /* concat with preceeding item */
+ avl_remove(by_addr, node);
+ avl_remove(by_size, node);
+ node->vs_size += to_add->vs_size;
+ kmem_free(to_add, sizeof (*to_add));
+
+ /*
+ * Since this now-concatenated span could be adjacent one
+ * trailing it, fall through to perform that check.
+ */
+ to_add = node;
+ }
+
+ node = avl_nearest(by_addr, where, AVL_AFTER);
+ if (node != NULL &&
+ (to_add->vs_addr + to_add->vs_size) == node->vs_addr) {
+ /* concat with trailing item */
+ avl_remove(by_addr, node);
+ avl_remove(by_size, node);
+ node->vs_addr = to_add->vs_addr;
+ node->vs_size += to_add->vs_size;
+ avl_add(by_addr, node);
+ avl_add(by_size, node);
+
+ kmem_free(to_add, sizeof (*to_add));
+ return;
+ }
+
+ /* simply insert */
+ avl_add(by_addr, to_add);
+ avl_add(by_size, to_add);
+}
+
+/*
+ * Remove a vmmr_span_t from a treepair, splitting if necessary when a span of
+ * the exact target size is not present, but a larger one is. May return a span
+ * with a size smaller than the target if splitting is not an option.
+ */
+static vmmr_span_t *
+vmmr_tp_remove_split(size_t target_sz, vmmr_treepair_t *tree)
+{
+ avl_tree_t *by_addr = &tree->by_addr;
+ avl_tree_t *by_size = &tree->by_size;
+ vmmr_span_t *span;
+ avl_index_t where;
+
+ ASSERT3U(target_sz, !=, 0);
+ ASSERT(!avl_is_empty(by_addr));
+ ASSERT(!avl_is_empty(by_size));
+
+ vmmr_span_t search = { .vs_size = target_sz };
+ span = avl_find(by_size, &search, &where);
+ if (span == NULL) {
+ /* Try for a larger span (instead of exact match) */
+ span = avl_nearest(by_size, where, AVL_AFTER);
+ if (span == NULL) {
+ /*
+ * Caller will need to collect several smaller spans in
+ * order to fulfill their request.
+ */
+ span = avl_nearest(by_size, where, AVL_BEFORE);
+ ASSERT3P(span, !=, NULL);
+ }
+ }
+
+ if (span->vs_size <= target_sz) {
+ avl_remove(by_size, span);
+ avl_remove(by_addr, span);
+
+ return (span);
+ } else {
+ /* Split off adequate chunk from larger span */
+ uintptr_t start = span->vs_addr + span->vs_size - target_sz;
+
+ avl_remove(by_size, span);
+ span->vs_size -= target_sz;
+ avl_add(by_size, span);
+
+ vmmr_span_t *split_span =
+ kmem_zalloc(sizeof (vmmr_span_t), KM_SLEEP);
+ split_span->vs_addr = start;
+ split_span->vs_size = target_sz;
+
+ return (split_span);
+ }
+}
+
+void
+vmmr_init()
+{
+ mutex_init(&vmmr_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /*
+ * `vmm_total_limit` represents the absolute maximum size of the VMM
+ * memory reservoir. It is meant to provide some measure of protection
+ * against an operator pushing the system into unrecoverable memory
+ * starvation through explicit or transient additions to the reservoir.
+ *
+ * There will be many situations where this limit would be inadequate to
+ * prevent kernel memory starvation in the face of certain operator
+ * actions. It is a balance to be struck between safety and allowing
+ * large systems to reach high utilization.
+ *
+ * The value is based off of pages_pp_maximum: "Number of currently
+ * available pages that cannot be 'locked'". It is sized as all of
+ * `physmem` less 120% of `pages_pp_maximum`.
+ */
+ vmmr_total_limit =
+ (((physmem * 10) - (pages_pp_maximum * 12)) * PAGESIZE) / 10;
+
+ vmmr_empty_last = 0;
+ vmmr_free_sz = 0;
+ vmmr_alloc_sz = 0;
+ vmmr_empty_sz = 0;
+ vmmr_adding_sz = 0;
+ vmmr_free_transient_sz = 0;
+ vmmr_alloc_transient_sz = 0;
+
+ vmmr_tp_init(&vmmr_free_tp);
+ vmmr_tp_init(&vmmr_empty_tp);
+
+ list_create(&vmmr_alloc_regions, sizeof (vmmr_region_t),
+ offsetof(vmmr_region_t, vr_node));
+
+ /* Grab a chunk of VA for the reservoir */
+ vmmr_va_sz = physmem * PAGESIZE;
+ vmmr_va = (uintptr_t)vmem_alloc(kvmm_arena, vmmr_va_sz, VM_SLEEP);
+}
+
+void
+vmmr_fini()
+{
+ mutex_enter(&vmmr_lock);
+ VERIFY3U(vmmr_alloc_sz, ==, 0);
+ VERIFY3U(vmmr_free_sz, ==, 0);
+ VERIFY3U(vmmr_adding_sz, ==, 0);
+ VERIFY3U(vmmr_alloc_transient_sz, ==, 0);
+ VERIFY3U(vmmr_free_transient_sz, ==, 0);
+ VERIFY(avl_is_empty(&vmmr_free_tp.by_addr));
+ VERIFY(avl_is_empty(&vmmr_free_tp.by_size));
+ VERIFY(list_is_empty(&vmmr_alloc_regions));
+
+ vmmr_tp_destroy(&vmmr_free_tp);
+ vmmr_tp_destroy(&vmmr_empty_tp);
+ list_destroy(&vmmr_alloc_regions);
+
+ /* Release reservoir VA chunk */
+ vmem_free(kvmm_arena, (void *)vmmr_va, vmmr_va_sz);
+ vmmr_va = 0;
+ vmmr_va_sz = 0;
+ vmmr_total_limit = 0;
+ vmmr_empty_last = 0;
+
+ mutex_exit(&vmmr_lock);
+ mutex_destroy(&vmmr_lock);
+}
+
+bool
+vmmr_is_empty()
+{
+ mutex_enter(&vmmr_lock);
+ bool res = (vmmr_alloc_sz == 0 && vmmr_alloc_transient_sz == 0 &&
+ vmmr_free_sz == 0 && vmmr_free_transient_sz == 0);
+ mutex_exit(&vmmr_lock);
+ return (res);
+}
+
+int
+vmmr_alloc(size_t sz, bool transient, vmmr_region_t **resp)
+{
+ VERIFY3U(sz & PAGEOFFSET, ==, 0);
+
+ if (!transient) {
+ mutex_enter(&vmmr_lock);
+ if (sz > vmmr_free_sz) {
+ mutex_exit(&vmmr_lock);
+ return (ENOSPC);
+ }
+ } else {
+ int err;
+
+ err = vmmr_add(sz, true);
+ if (err != 0) {
+ return (err);
+ }
+ mutex_enter(&vmmr_lock);
+ VERIFY3U(vmmr_free_transient_sz, >=, sz);
+ }
+
+ vmmr_region_t *region;
+ region = kmem_zalloc(sizeof (vmmr_region_t), KM_SLEEP);
+ avl_create(&region->vr_spans, vmmr_cmp_region_addr,
+ sizeof (vmmr_span_t), offsetof(vmmr_span_t, vs_by_addr));
+ region->vr_size = sz;
+
+ size_t remain = sz;
+ uintptr_t map_at = 0;
+ while (remain > 0) {
+ vmmr_span_t *span = vmmr_tp_remove_split(remain, &vmmr_free_tp);
+
+ /*
+ * We have already ensured that adequate free memory is present
+ * in the reservoir for this allocation.
+ */
+ VERIFY3P(span, !=, NULL);
+ ASSERT3U(span->vs_size, <=, remain);
+
+ span->vs_region_addr = map_at;
+ avl_add(&region->vr_spans, span);
+ map_at += span->vs_size;
+ remain -= span->vs_size;
+ }
+
+ if (!transient) {
+ vmmr_free_sz -= sz;
+ vmmr_alloc_sz += sz;
+ } else {
+ vmmr_free_transient_sz -= sz;
+ vmmr_alloc_transient_sz += sz;
+ region->vr_transient = true;
+ }
+ list_insert_tail(&vmmr_alloc_regions, region);
+ mutex_exit(&vmmr_lock);
+
+ *resp = region;
+ return (0);
+}
+
+void *
+vmmr_region_mem_at(vmmr_region_t *region, uintptr_t off)
+{
+ /* just use KPM region for now */
+ return (hat_kpm_pfn2va(vmmr_region_pfn_at(region, off)));
+}
+
+pfn_t
+vmmr_region_pfn_at(vmmr_region_t *region, uintptr_t off)
+{
+ VERIFY3U(off & PAGEOFFSET, ==, 0);
+ VERIFY3U(off, <, region->vr_size);
+
+ vmmr_span_t search = {
+ .vs_region_addr = off
+ };
+ avl_index_t where;
+ vmmr_span_t *span = avl_find(&region->vr_spans, &search, &where);
+
+ if (span == NULL) {
+ span = avl_nearest(&region->vr_spans, where, AVL_BEFORE);
+ ASSERT3P(span, !=, NULL);
+ }
+ uintptr_t span_off = off - span->vs_region_addr + span->vs_addr;
+ page_t *pp = page_find(&kvps[KV_VVP], (u_offset_t)span_off);
+ VERIFY(pp != NULL);
+ return (pp->p_pagenum);
+}
+
+void
+vmmr_free(vmmr_region_t *region)
+{
+ mutex_enter(&vmmr_lock);
+ if (!region->vr_transient) {
+ VERIFY3U(region->vr_size, <=, vmmr_alloc_sz);
+ } else {
+ VERIFY3U(region->vr_size, <=, vmmr_alloc_transient_sz);
+ }
+ list_remove(&vmmr_alloc_regions, region);
+ mutex_exit(&vmmr_lock);
+
+ /* Zero the contents */
+ for (uintptr_t off = 0; off < region->vr_size; off += PAGESIZE) {
+ bzero(vmmr_region_mem_at(region, off), PAGESIZE);
+ }
+
+ mutex_enter(&vmmr_lock);
+
+ /* Put the contained span(s) back in the free pool */
+ void *cookie = NULL;
+ vmmr_span_t *span;
+ while ((span = avl_destroy_nodes(&region->vr_spans, &cookie)) != NULL) {
+ span->vs_region_addr = 0;
+ vmmr_tp_insert_concat(span, &vmmr_free_tp);
+ }
+ avl_destroy(&region->vr_spans);
+ if (!region->vr_transient) {
+ vmmr_free_sz += region->vr_size;
+ vmmr_alloc_sz -= region->vr_size;
+ } else {
+ vmmr_free_transient_sz += region->vr_size;
+ vmmr_alloc_transient_sz -= region->vr_size;
+ }
+ mutex_exit(&vmmr_lock);
+
+ if (region->vr_transient) {
+ vmmr_remove(region->vr_size, true);
+ }
+ kmem_free(region, sizeof (*region));
+}
+
+static void
+vmmr_destroy_pages(vmmr_span_t *span)
+{
+ const uintptr_t end = span->vs_addr + span->vs_size;
+ struct vnode *vp = &kvps[KV_VVP];
+ for (uintptr_t pos = span->vs_addr; pos < end; pos += PAGESIZE) {
+ page_t *pp;
+
+ /* Page-free logic cribbed from segkmem_xfree(): */
+ pp = page_find(vp, (u_offset_t)pos);
+ VERIFY(pp != NULL);
+ if (!page_tryupgrade(pp)) {
+ /*
+ * Some other thread has a sharelock. Wait for
+ * it to drop the lock so we can free this page.
+ */
+ page_unlock(pp);
+ pp = page_lookup(vp, (u_offset_t)pos, SE_EXCL);
+ }
+
+ /*
+ * Clear p_lckcnt so page_destroy() doesn't update availrmem.
+ * That will be taken care of later via page_unresv().
+ */
+ pp->p_lckcnt = 0;
+ page_destroy(pp, 0);
+ }
+}
+
+static int
+vmmr_alloc_pages(const vmmr_span_t *span)
+{
+ struct seg kseg = {
+ .s_as = &kas
+ };
+ struct vnode *vp = &kvps[KV_VVP];
+
+ const uintptr_t end = span->vs_addr + span->vs_size;
+ for (uintptr_t pos = span->vs_addr; pos < end; pos += PAGESIZE) {
+ page_t *pp;
+
+ pp = page_create_va(vp, (u_offset_t)pos, PAGESIZE,
+ PG_EXCL | PG_NORELOC, &kseg, (void *)(vmmr_va + pos));
+
+ if (pp == NULL) {
+ /* Destroy any already-created pages */
+ if (pos != span->vs_addr) {
+ vmmr_span_t destroy_span = {
+ .vs_addr = span->vs_addr,
+ .vs_size = pos - span->vs_addr,
+ };
+
+ vmmr_destroy_pages(&destroy_span);
+ }
+ return (ENOMEM);
+ }
+
+ /* mimic page state from segkmem */
+ ASSERT(PAGE_EXCL(pp));
+ page_io_unlock(pp);
+ pp->p_lckcnt = 1;
+ page_downgrade(pp);
+
+ /* pre-zero the page */
+ bzero(hat_kpm_pfn2va(pp->p_pagenum), PAGESIZE);
+ }
+
+ return (0);
+}
+
+static int
+vmmr_resv_wait()
+{
+ if (delay_sig(hz >> 2) != 0) {
+ /* bail due to interruption */
+ return (0);
+ }
+ return (1);
+}
+
+static void
+vmmr_remove_raw(size_t sz)
+{
+ VERIFY3U(sz & PAGEOFFSET, ==, 0);
+ VERIFY(MUTEX_HELD(&vmmr_lock));
+
+ size_t remain = sz;
+ while (remain > 0) {
+ vmmr_span_t *span = vmmr_tp_remove_split(remain, &vmmr_free_tp);
+
+ /*
+ * The caller must ensure that at least `sz` amount is present
+ * in the free treepair.
+ */
+ VERIFY3P(span, !=, NULL);
+ ASSERT3U(span->vs_size, <=, remain);
+
+ /* TODO: perhaps arrange to destroy pages outside the lock? */
+ vmmr_destroy_pages(span);
+
+ remain -= span->vs_size;
+ vmmr_tp_insert_concat(span, &vmmr_empty_tp);
+ }
+
+ vmmr_empty_sz += sz;
+}
+
+int
+vmmr_add(size_t sz, bool transient)
+{
+ VERIFY3U(sz & PAGEOFFSET, ==, 0);
+
+ mutex_enter(&vmmr_lock);
+ /*
+ * Make sure that the amount added is not going to breach the limits
+ * we've chosen
+ */
+ const size_t current_total =
+ vmmr_alloc_sz + vmmr_free_sz + vmmr_adding_sz +
+ vmmr_alloc_transient_sz + vmmr_free_transient_sz;
+ if ((current_total + sz) < current_total) {
+ mutex_exit(&vmmr_lock);
+ return (EOVERFLOW);
+ }
+ if ((current_total + sz) > vmmr_total_limit) {
+ mutex_exit(&vmmr_lock);
+ return (ENOSPC);
+ }
+ vmmr_adding_sz += sz;
+ mutex_exit(&vmmr_lock);
+
+ /* Wait for enough pages to become available */
+ if (page_xresv(sz >> PAGESHIFT, KM_SLEEP, vmmr_resv_wait) == 0) {
+ mutex_enter(&vmmr_lock);
+ vmmr_adding_sz -= sz;
+ mutex_exit(&vmmr_lock);
+
+ return (EINTR);
+ }
+
+ mutex_enter(&vmmr_lock);
+ size_t added = 0;
+ size_t remain = sz;
+ while (added < sz) {
+ vmmr_span_t *span = NULL;
+
+ if (vmmr_empty_sz > 0) {
+ span = vmmr_tp_remove_split(remain, &vmmr_empty_tp);
+
+ vmmr_empty_sz -= span->vs_size;
+ } else {
+ /*
+ * No empty space to fill with new pages, so just tack
+ * it on at the end instead.
+ */
+ span = kmem_zalloc(sizeof (vmmr_span_t), KM_SLEEP);
+ span->vs_addr = vmmr_empty_last;
+ span->vs_size = remain;
+ vmmr_empty_last += remain;
+ }
+ VERIFY3P(span, !=, NULL);
+
+
+ /* Allocate the actual pages to back this span */
+ mutex_exit(&vmmr_lock);
+ int err = vmmr_alloc_pages(span);
+ mutex_enter(&vmmr_lock);
+
+ /*
+ * If an error is encountered during page allocation for the
+ * span, unwind any progress made by the addition request.
+ */
+ if (err != 0) {
+ /*
+ * Without pages allocated to this span, it is now
+ * tracked as empty.
+ */
+ vmmr_empty_sz += span->vs_size;
+ vmmr_tp_insert_concat(span, &vmmr_empty_tp);
+
+ if (added != 0) {
+ vmmr_remove_raw(added);
+ }
+
+ vmmr_adding_sz -= sz;
+ mutex_exit(&vmmr_lock);
+
+ page_unresv(sz >> PAGESHIFT);
+ return (err);
+ }
+
+ /*
+ * The allocated-page-bearing span is placed in the "free"
+ * treepair now, but is not officially exposed for consumption
+ * until `vmm_free_sz` or `vmm_free_transient_sz` are updated.
+ *
+ * This allows us to unwind the allocation in case of a failure
+ * without the risk of the freshly added span(s) being snapped
+ * up by a consumer already.
+ */
+ added += span->vs_size;
+ remain -= span->vs_size;
+ vmmr_tp_insert_concat(span, &vmmr_free_tp);
+ }
+
+ /* Make the added memory usable by exposing it to the size accounting */
+ if (!transient) {
+ vmmr_free_sz += added;
+ } else {
+ vmmr_free_transient_sz += added;
+ }
+ ASSERT3U(added, ==, sz);
+ vmmr_adding_sz -= added;
+
+ mutex_exit(&vmmr_lock);
+ return (0);
+}
+
+int
+vmmr_remove(size_t sz, bool transient)
+{
+ VERIFY3U(sz & PAGEOFFSET, ==, 0);
+
+ mutex_enter(&vmmr_lock);
+ if ((!transient && sz > vmmr_free_sz) ||
+ (transient && sz > vmmr_free_transient_sz)) {
+ mutex_exit(&vmmr_lock);
+ return (ENOSPC);
+ }
+
+ vmmr_remove_raw(sz);
+
+ if (!transient) {
+ vmmr_free_sz -= sz;
+ } else {
+ vmmr_free_transient_sz -= sz;
+ }
+ mutex_exit(&vmmr_lock);
+ page_unresv(sz >> PAGESHIFT);
+ return (0);
+}
+
+int
+vmmr_ioctl(int cmd, intptr_t arg, int md, cred_t *cr, int *rvalp)
+{
+ switch (cmd) {
+ case VMM_RESV_QUERY: {
+ struct vmm_resv_query res;
+ void *datap = (void *)(uintptr_t)arg;
+
+ /* For now, anyone in GZ can query */
+ if (crgetzoneid(cr) != GLOBAL_ZONEID) {
+ return (EPERM);
+ }
+ mutex_enter(&vmmr_lock);
+ res.vrq_free_sz = vmmr_free_sz;
+ res.vrq_alloc_sz = vmmr_alloc_sz;
+ res.vrq_alloc_transient_sz = vmmr_alloc_transient_sz;
+ res.vrq_limit = vmmr_total_limit;
+ mutex_exit(&vmmr_lock);
+ if (ddi_copyout(&res, datap, sizeof (res), md) != 0) {
+ return (EFAULT);
+ }
+ break;
+ }
+ case VMM_RESV_ADD: {
+ if (secpolicy_sys_config(cr, B_FALSE) != 0) {
+ return (EPERM);
+ }
+ return (vmmr_add((size_t)arg, false));
+ }
+ case VMM_RESV_REMOVE: {
+ if (secpolicy_sys_config(cr, B_FALSE) != 0) {
+ return (EPERM);
+ }
+ return (vmmr_remove((size_t)arg, false));
+ }
+ default:
+ return (ENOTTY);
+ }
+ return (0);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index d5f4b3883b..ef366ddaff 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -45,6 +45,7 @@
#include <sys/vmm_impl.h>
#include <sys/vmm_drv.h>
#include <sys/vmm_vm.h>
+#include <sys/vmm_reservoir.h>
#include <vm/seg_dev.h>
@@ -1506,13 +1507,22 @@ vmm_hma_release(void)
}
static int
-vmmdev_do_vm_create(char *name, cred_t *cr)
+vmmdev_do_vm_create(const struct vm_create_req *req, cred_t *cr)
{
vmm_softc_t *sc = NULL;
minor_t minor;
int error = ENOMEM;
+ size_t len;
+ const char *name = req->name;
- if (strnlen(name, VM_MAX_NAMELEN) >= VM_MAX_NAMELEN) {
+ len = strnlen(name, VM_MAX_NAMELEN);
+ if (len == 0) {
+ return (EINVAL);
+ }
+ if (len >= VM_MAX_NAMELEN) {
+ return (ENAMETOOLONG);
+ }
+ if (strchr(name, '/') != NULL) {
return (EINVAL);
}
@@ -1555,7 +1565,7 @@ vmmdev_do_vm_create(char *name, cred_t *cr)
goto fail;
}
- error = vm_create(name, &sc->vmm_vm);
+ error = vm_create(req->name, req->flags, &sc->vmm_vm);
if (error == 0) {
/* Complete VM intialization and report success. */
(void) strlcpy(sc->vmm_name, name, sizeof (sc->vmm_name));
@@ -1938,7 +1948,7 @@ vmm_do_vm_destroy(vmm_softc_t *sc, boolean_t clean_zsd)
/* ARGSUSED */
static int
-vmmdev_do_vm_destroy(const char *name, cred_t *cr)
+vmmdev_do_vm_destroy(const struct vm_destroy_req *req, cred_t *cr)
{
boolean_t hma_release = B_FALSE;
vmm_softc_t *sc;
@@ -1949,7 +1959,7 @@ vmmdev_do_vm_destroy(const char *name, cred_t *cr)
mutex_enter(&vmm_mtx);
- if ((sc = vmm_lookup(name)) == NULL) {
+ if ((sc = vmm_lookup(req->name)) == NULL) {
mutex_exit(&vmm_mtx);
return (ENOENT);
}
@@ -2193,6 +2203,47 @@ vmm_is_supported(intptr_t arg)
}
static int
+vmm_ctl_ioctl(int cmd, intptr_t arg, int md, cred_t *cr, int *rvalp)
+{
+ void *argp = (void *)arg;
+
+ switch (cmd) {
+ case VMM_CREATE_VM: {
+ struct vm_create_req req;
+
+ if ((md & FWRITE) == 0) {
+ return (EPERM);
+ }
+ if (ddi_copyin(argp, &req, sizeof (req), md) != 0) {
+ return (EFAULT);
+ }
+ return (vmmdev_do_vm_create(&req, cr));
+ }
+ case VMM_DESTROY_VM: {
+ struct vm_destroy_req req;
+
+ if ((md & FWRITE) == 0) {
+ return (EPERM);
+ }
+ if (ddi_copyin(argp, &req, sizeof (req), md) != 0) {
+ return (EFAULT);
+ }
+ return (vmmdev_do_vm_destroy(&req, cr));
+ }
+ case VMM_VM_SUPPORTED:
+ return (vmm_is_supported(arg));
+ case VMM_RESV_QUERY:
+ case VMM_RESV_ADD:
+ case VMM_RESV_REMOVE:
+ return (vmmr_ioctl(cmd, arg, md, cr, rvalp));
+ default:
+ break;
+ }
+ /* No other actions are legal on ctl device */
+ return (ENOTTY);
+}
+
+static int
vmm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
int *rvalp)
{
@@ -2207,36 +2258,7 @@ vmm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
minor = getminor(dev);
if (minor == VMM_CTL_MINOR) {
- void *argp = (void *)arg;
- char name[VM_MAX_NAMELEN] = { 0 };
- size_t len = 0;
-
- if ((mode & FKIOCTL) != 0) {
- len = strlcpy(name, argp, sizeof (name));
- } else {
- if (copyinstr(argp, name, sizeof (name), &len) != 0) {
- return (EFAULT);
- }
- }
- if (len >= VM_MAX_NAMELEN) {
- return (ENAMETOOLONG);
- }
-
- switch (cmd) {
- case VMM_CREATE_VM:
- if ((mode & FWRITE) == 0)
- return (EPERM);
- return (vmmdev_do_vm_create(name, credp));
- case VMM_DESTROY_VM:
- if ((mode & FWRITE) == 0)
- return (EPERM);
- return (vmmdev_do_vm_destroy(name, credp));
- case VMM_VM_SUPPORTED:
- return (vmm_is_supported(arg));
- default:
- /* No other actions are legal on ctl device */
- return (ENOTTY);
- }
+ return (vmm_ctl_ioctl(cmd, arg, mode, credp, rvalp));
}
sc = ddi_get_soft_state(vmm_statep, minor);
@@ -2422,7 +2444,6 @@ vmm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
}
vmm_sol_glue_init();
- vmm_arena_init();
/*
* Perform temporary HMA registration to determine if the system
@@ -2462,7 +2483,6 @@ fail:
if (reg != NULL) {
hma_unregister(reg);
}
- vmm_arena_fini();
vmm_sol_glue_cleanup();
mutex_exit(&vmmdev_mtx);
return (DDI_FAILURE);
@@ -2494,6 +2514,11 @@ vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
}
mutex_exit(&vmm_mtx);
+ if (!vmmr_is_empty()) {
+ mutex_exit(&vmmdev_mtx);
+ return (DDI_FAILURE);
+ }
+
VERIFY(vmmdev_sdev_hdl != (sdev_plugin_hdl_t)NULL);
if (sdev_plugin_unregister(vmmdev_sdev_hdl) != 0) {
mutex_exit(&vmmdev_mtx);
@@ -2507,7 +2532,6 @@ vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
VERIFY0(vmm_mod_unload());
VERIFY3U(vmmdev_hma_reg, ==, NULL);
- vmm_arena_fini();
vmm_sol_glue_cleanup();
mutex_exit(&vmmdev_mtx);
@@ -2579,11 +2603,13 @@ _init(void)
}
vmm_zsd_init();
+ vmmr_init();
error = mod_install(&modlinkage);
if (error) {
ddi_soft_state_fini(&vmm_statep);
vmm_zsd_fini();
+ vmmr_fini();
}
return (error);
@@ -2600,6 +2626,7 @@ _fini(void)
}
vmm_zsd_fini();
+ vmmr_fini();
ddi_soft_state_fini(&vmm_statep);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c
index 720af54200..bd1f1890d4 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c
@@ -32,11 +32,12 @@
#include <vm/hat_i86.h>
#include <vm/seg_vn.h>
#include <vm/seg_kmem.h>
-#include <vm/seg_vmm.h>
#include <machine/vm.h>
#include <sys/vmm_gpt.h>
#include <sys/vmm_vm.h>
+#include <sys/seg_vmm.h>
+#include <sys/vmm_reservoir.h>
#define PMAP_TO_VMMAP(pm) ((vm_map_t) \
((caddr_t)(pm) - offsetof(struct vmspace, vms_pmap)))
@@ -65,38 +66,6 @@ static vmspace_mapping_t *vm_mapping_find(struct vmspace *, uintptr_t, size_t,
boolean_t);
static void vm_mapping_remove(struct vmspace *, vmspace_mapping_t *);
-static vmem_t *vmm_alloc_arena = NULL;
-
-static void *
-vmm_arena_alloc(vmem_t *vmp, size_t size, int vmflag)
-{
- return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
- segkmem_page_create, &kvps[KV_VVP]));
-}
-
-static void
-vmm_arena_free(vmem_t *vmp, void *inaddr, size_t size)
-{
- segkmem_xfree(vmp, inaddr, size, &kvps[KV_VVP], NULL);
-}
-
-void
-vmm_arena_init(void)
-{
- vmm_alloc_arena = vmem_create("vmm_alloc_arena", NULL, 0, 1024 * 1024,
- vmm_arena_alloc, vmm_arena_free, kvmm_arena, 0, VM_SLEEP);
-
- ASSERT(vmm_alloc_arena != NULL);
-}
-
-void
-vmm_arena_fini(void)
-{
- VERIFY(vmem_size(vmm_alloc_arena, VMEM_ALLOC) == 0);
- vmem_destroy(vmm_alloc_arena);
- vmm_alloc_arena = NULL;
-}
-
struct vmspace *
vmspace_alloc(vm_offset_t start, vm_offset_t end, pmap_pinit_t pinit)
{
@@ -164,8 +133,9 @@ vmspace_find_kva(struct vmspace *vms, uintptr_t addr, size_t size)
switch (vmo->vmo_type) {
case OBJT_DEFAULT:
- result = (void *)((uintptr_t)vmo->vmo_data +
- VMSM_OFFSET(vmsm, addr));
+ result = vmmr_region_mem_at(
+ (vmmr_region_t *)vmo->vmo_data,
+ VMSM_OFFSET(vmsm, addr) & PAGEMASK);
break;
default:
break;
@@ -344,39 +314,23 @@ vm_object_pager_none(vm_object_t vmo, uintptr_t off, pfn_t *lpfn, uint_t *lvl)
}
static pfn_t
-vm_object_pager_heap(vm_object_t vmo, uintptr_t off, pfn_t *lpfn, uint_t *lvl)
+vm_object_pager_reservoir(vm_object_t vmo, uintptr_t off, pfn_t *lpfn,
+ uint_t *lvl)
{
- const uintptr_t kaddr = ALIGN2PAGE((uintptr_t)vmo->vmo_data + off);
- uint_t idx, level;
- htable_t *ht;
- x86pte_t pte;
- pfn_t top_pfn, pfn;
+ vmmr_region_t *region;
+ pfn_t pfn;
ASSERT(vmo->vmo_type == OBJT_DEFAULT);
- ASSERT(off < vmo->vmo_size);
- ht = htable_getpage(kas.a_hat, kaddr, &idx);
- if (ht == NULL) {
- return (PFN_INVALID);
- }
- pte = x86pte_get(ht, idx);
- if (!PTE_ISPAGE(pte, ht->ht_level)) {
- htable_release(ht);
- return (PFN_INVALID);
- }
-
- pfn = top_pfn = PTE2PFN(pte, ht->ht_level);
- level = ht->ht_level;
- if (ht->ht_level > 0) {
- pfn += mmu_btop(kaddr & LEVEL_OFFSET((uint_t)ht->ht_level));
- }
- htable_release(ht);
+ region = vmo->vmo_data;
+ pfn = vmmr_region_pfn_at(region, off & PAGEMASK);
+ /* TODO: handle large pages */
if (lpfn != NULL) {
- *lpfn = top_pfn;
+ *lpfn = pfn;
}
if (lvl != NULL) {
- *lvl = level;
+ *lvl = 0;
}
return (pfn);
}
@@ -419,41 +373,8 @@ vm_object_pager_sg(vm_object_t vmo, uintptr_t off, pfn_t *lpfn, uint_t *lvl)
return (pfn);
}
-static void
-vm_reserve_pages(size_t npages)
-{
- uint_t retries = 60;
- int rc;
-
- mutex_enter(&freemem_lock);
- if (availrmem < npages) {
- mutex_exit(&freemem_lock);
-
- /*
- * Set needfree and wait for the ZFS ARC reap thread to free up
- * some memory.
- */
- page_needfree(npages);
-
- mutex_enter(&freemem_lock);
- while ((availrmem < npages) && retries-- > 0) {
- mutex_exit(&freemem_lock);
- rc = delay_sig(drv_usectohz(1 * MICROSEC));
- mutex_enter(&freemem_lock);
-
- if (rc == EINTR)
- break;
- }
- mutex_exit(&freemem_lock);
-
- page_needfree(-npages);
- } else {
- mutex_exit(&freemem_lock);
- }
-}
-
vm_object_t
-vm_object_allocate(objtype_t type, vm_pindex_t psize)
+vm_object_allocate(objtype_t type, vm_pindex_t psize, bool transient)
{
vm_object_t vmo;
const size_t size = ptob((size_t)psize);
@@ -468,17 +389,19 @@ vm_object_allocate(objtype_t type, vm_pindex_t psize)
switch (type) {
case OBJT_DEFAULT: {
- vm_reserve_pages(psize);
- /* XXXJOY: opt-in to larger pages? */
- vmo->vmo_data = vmem_alloc(vmm_alloc_arena, size, KM_NOSLEEP);
- if (vmo->vmo_data == NULL) {
+ /* TODO: opt-in to larger pages? */
+ int err;
+ vmmr_region_t *region = NULL;
+
+ err = vmmr_alloc(size, transient, &region);
+ if (err != 0) {
mutex_destroy(&vmo->vmo_lock);
kmem_free(vmo, sizeof (*vmo));
return (NULL);
}
- bzero(vmo->vmo_data, size);
- vmo->vmo_pager = vm_object_pager_heap;
+ vmo->vmo_data = region;
+ vmo->vmo_pager = vm_object_pager_reservoir;
}
break;
case OBJT_SG:
@@ -505,7 +428,7 @@ vm_pager_allocate(objtype_t type, void *handle, vm_ooffset_t size,
VERIFY(type == OBJT_SG);
VERIFY(off == 0);
- vmo = vm_object_allocate(type, size);
+ vmo = vm_object_allocate(type, size, false);
vmo->vmo_data = sg;
mutex_enter(&sg->sg_lock);
@@ -529,7 +452,7 @@ vm_object_deallocate(vm_object_t vmo)
switch (vmo->vmo_type) {
case OBJT_DEFAULT:
- vmem_free(vmm_alloc_arena, vmo->vmo_data, vmo->vmo_size);
+ vmmr_free((vmmr_region_t *)vmo->vmo_data);
break;
case OBJT_SG:
sglist_free((struct sglist *)vmo->vmo_data);
@@ -574,6 +497,17 @@ vm_object_reference(vm_object_t vmo)
VERIFY3U(ref, !=, 0);
}
+pfn_t
+vm_object_pfn(vm_object_t vmo, uintptr_t off)
+{
+ /* This is expected to be used only on reservoir-backed memory */
+ if (vmo->vmo_type != OBJT_DEFAULT) {
+ return (PFN_INVALID);
+ }
+
+ return (vmo->vmo_pager(vmo, off, NULL, NULL));
+}
+
static vmspace_mapping_t *
vm_mapping_find(struct vmspace *vms, uintptr_t addr, size_t size,
boolean_t no_lock)
@@ -912,11 +846,9 @@ vm_segmap_obj(vm_object_t vmo, off_t map_off, size_t size, struct as *as,
if (err == 0) {
segvmm_crargs_t svma;
- svma.kaddr = (caddr_t)vmo->vmo_data + map_off;
+ svma.obj = vmo;
+ svma.offset = map_off;
svma.prot = prot;
- svma.cookie = vmo;
- svma.hold = (segvmm_holdfn_t)vm_object_reference;
- svma.rele = (segvmm_relefn_t)vm_object_deallocate;
err = as_map(as, *addrp, size, segvmm_create, &svma);
}
@@ -969,11 +901,9 @@ vm_segmap_space(struct vmspace *vms, off_t off, struct as *as, caddr_t *addrp,
VERIFY(mapoff < vmo->vmo_size);
VERIFY((mapoff + size) <= vmo->vmo_size);
- svma.kaddr = (void *)((uintptr_t)vmo->vmo_data + mapoff);
+ svma.obj = vmo;
+ svma.offset = mapoff;
svma.prot = prot;
- svma.cookie = vmo;
- svma.hold = (segvmm_holdfn_t)vm_object_reference;
- svma.rele = (segvmm_relefn_t)vm_object_deallocate;
err = as_map(as, *addrp, len, segvmm_create, &svma);
}
diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h
index 5b3e7f9b10..e58d63761e 100644
--- a/usr/src/uts/i86pc/sys/vmm.h
+++ b/usr/src/uts/i86pc/sys/vmm.h
@@ -39,7 +39,7 @@
*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2019 Joyent, Inc.
- * Copyright 2020 Oxide Computer Company
+ * Copyright 2021 Oxide Computer Company
*/
#ifndef _VMM_H_
@@ -124,20 +124,12 @@ enum x2apic_state {
/*
* illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does.
- * Instead of picking an arbitrary value we will just rely on the same
- * calculation that's made below. If this calculation ever changes we need to
- * update the the VM_MAX_NAMELEN mapping in the bhyve brand's boot.c file.
+ * To simplify structure definitions, an arbitrary limit has been chosen.
+ * This same limit is used for memory segment names
*/
-#define VM_MAX_PREFIXLEN 10
-#define VM_MAX_SUFFIXLEN 15
-#define VM_MIN_NAMELEN 6
-#define VM_MAX_NAMELEN \
- (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1)
-
-#ifdef _KERNEL
-CTASSERT(VM_MAX_NAMELEN >= VM_MIN_NAMELEN);
-#endif
+#define VM_MAX_NAMELEN 128
+#define VM_MAX_SEG_NAMELEN 128
#define VM_MAXCPU 32 /* maximum virtual cpus */
@@ -389,4 +381,12 @@ struct vm_entry {
int vm_restart_instruction(void *vm, int vcpuid);
+enum vm_create_flags {
+ /*
+ * Allocate guest memory segments from existing reservoir capacity,
+ * rather than attempting to create transient allocations.
+ */
+ VCF_RESERVOIR_MEM = (1 << 0),
+};
+
#endif /* _VMM_H_ */
diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h
index 15c64355c4..f371ad1266 100644
--- a/usr/src/uts/i86pc/sys/vmm_dev.h
+++ b/usr/src/uts/i86pc/sys/vmm_dev.h
@@ -39,6 +39,7 @@
*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2019 Joyent, Inc.
+ * Copyright 2021 Oxide Computer Company
*/
#ifndef _VMM_DEV_H_
@@ -46,6 +47,19 @@
#include <machine/vmm.h>
+#include <sys/param.h>
+#include <sys/cpuset.h>
+
+struct vm_create_req {
+ char name[VM_MAX_NAMELEN];
+ uint64_t flags;
+};
+
+
+struct vm_destroy_req {
+ char name[VM_MAX_NAMELEN];
+};
+
struct vm_memmap {
vm_paddr_t gpa;
int segid; /* memory segment */
@@ -66,7 +80,7 @@ struct vm_munmap {
struct vm_memseg {
int segid;
size_t len;
- char name[SPECNAMELEN + 1];
+ char name[VM_MAX_SEG_NAMELEN];
};
struct vm_register {
@@ -282,6 +296,13 @@ struct vm_run_state {
uint8_t _pad[3];
};
+struct vmm_resv_query {
+ size_t vrq_free_sz;
+ size_t vrq_alloc_sz;
+ size_t vrq_alloc_transient_sz;
+ size_t vrq_limit;
+};
+
#define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8))
#define VMM_IOC_BASE (('v' << 16) | ('m' << 8))
#define VMM_LOCK_IOC_BASE (('v' << 16) | ('l' << 8))
@@ -292,6 +313,10 @@ struct vm_run_state {
#define VMM_DESTROY_VM (VMMCTL_IOC_BASE | 0x02)
#define VMM_VM_SUPPORTED (VMMCTL_IOC_BASE | 0x03)
+#define VMM_RESV_QUERY (VMMCTL_IOC_BASE | 0x10)
+#define VMM_RESV_ADD (VMMCTL_IOC_BASE | 0x11)
+#define VMM_RESV_REMOVE (VMMCTL_IOC_BASE | 0x12)
+
/* Operations performed in the context of a given vCPU */
#define VM_RUN (VMM_CPU_IOC_BASE | 0x01)
#define VM_SET_REGISTER (VMM_CPU_IOC_BASE | 0x02)
diff --git a/usr/src/uts/i86pc/vm/seg_vmm.h b/usr/src/uts/i86pc/vm/seg_vmm.h
deleted file mode 100644
index f5b95c6a27..0000000000
--- a/usr/src/uts/i86pc/vm/seg_vmm.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source. A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- */
-
-/*
- * Copyright 2018 Joyent, Inc.
- */
-
-#ifndef _VM_SEG_VMM_H
-#define _VM_SEG_VMM_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct segvmm_crargs {
- caddr_t kaddr;
- uchar_t prot; /* protection */
- void *cookie; /* opaque resource backing memory */
- void (*hold)(void *); /* add reference to cookie */
- void (*rele)(void *); /* release reference to cookie */
-} segvmm_crargs_t;
-
-typedef void (*segvmm_holdfn_t)(void *);
-typedef void (*segvmm_relefn_t)(void *);
-
-typedef struct segvmm_data {
- krwlock_t svmd_lock;
- uintptr_t svmd_kaddr;
- uchar_t svmd_prot;
- void *svmd_cookie;
- segvmm_holdfn_t svmd_hold;
- segvmm_relefn_t svmd_rele;
- size_t svmd_softlockcnt;
-} segvmm_data_t;
-
-extern int segvmm_create(struct seg **, void *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _VM_SEG_VMM_H */