diff options
Diffstat (limited to 'usr/src')
52 files changed, 1847 insertions, 648 deletions
diff --git a/usr/src/boot/Makefile.version b/usr/src/boot/Makefile.version index 5f2a2b4676..1f4b347809 100644 --- a/usr/src/boot/Makefile.version +++ b/usr/src/boot/Makefile.version @@ -34,4 +34,4 @@ LOADER_VERSION = 1.1 # Use date like formatting here, YYYY.MM.DD.XX, without leading zeroes. # The version is processed from left to right, the version number can only # be increased. -BOOT_VERSION = $(LOADER_VERSION)-2021.06.15.1 +BOOT_VERSION = $(LOADER_VERSION)-2021.07.27.1 diff --git a/usr/src/boot/lib/libstand/Makefile.inc b/usr/src/boot/lib/libstand/Makefile.inc index 633005d142..2acb9ef490 100644 --- a/usr/src/boot/lib/libstand/Makefile.inc +++ b/usr/src/boot/lib/libstand/Makefile.inc @@ -21,25 +21,25 @@ # # standalone components and stuff we have modified locally -SRCS= $(ZLIB)/gzguts.h $(ZLIB)/zutil.h +SRCS += $(ZLIB)/gzguts.h $(ZLIB)/zutil.h SRCS += $(SASRC)/__main.c $(SASRC)/abort.c $(SASRC)/assert.c SRCS += $(SASRC)/bcd.c $(SASRC)/environment.c SRCS += $(SASRC)/getopt.c $(SASRC)/random.c SRCS += $(SASRC)/sbrk.c $(SASRC)/twiddle.c SRCS += $(SASRC)/zalloc.c $(SASRC)/zalloc_malloc.c -OBJS= __main.o abort.o assert.o bcd.o environment.o \ +OBJECTS += __main.o abort.o assert.o bcd.o environment.o \ getopt.o gets.o globals.o pager.o panic.o printf.o \ strdup.o strerror.o strtol.o strtoll.o strtoul.o strtoull.o random.o \ sbrk.o twiddle.o zalloc.o zalloc_malloc.o # private (pruned) versions of libc string functions SRCS += $(SASRC)/strcasecmp.c -OBJS += strcasecmp.o +OBJECTS += strcasecmp.o # from libc SRCS += $(LIBSRC)/libc/net/ntoh.c -OBJS += ntoh.o +OBJECTS += ntoh.o # string functions from libc SRCS += $(LIBSRC)/libc/string/bcmp.c $(LIBSRC)/libc/string/bcopy.c @@ -61,7 +61,7 @@ SRCS += $(LIBSRC)/libc/string/swab.c SRCS += $(SASRC)/qdivrem.c -OBJS += bcmp.o bcopy.o bzero.o ffs.o fls.o \ +OBJECTS += bcmp.o bcopy.o bzero.o ffs.o fls.o \ memccpy.o memchr.o memcmp.o memcpy.o memmove.o memset.o \ qdivrem.o strcat.o strchr.o strcmp.o strcpy.o stpcpy.o stpncpy.o \ strcspn.o strlcat.o strlcpy.o strlen.o strncat.o strncmp.o strncpy.o \ @@ -75,22 +75,57 @@ SRCS += $(LIBSRC)/libc/uuid/uuid_is_nil.c SRCS += $(SASRC)/uuid_from_string.c SRCS += $(SASRC)/uuid_to_string.c -OBJS += uuid_create_nil.o uuid_equal.o uuid_from_string.o uuid_is_nil.o \ +OBJECTS += uuid_create_nil.o uuid_equal.o uuid_from_string.o uuid_is_nil.o \ uuid_to_string.o # decompression functionality from libbz2 # NOTE: to actually test this functionality after libbz2 upgrade compile # loader(8) with LOADER_BZIP2_SUPPORT defined -_bzlib.o _crctable.o _decompress.o _huffman.o _randtable.o bzipfs.o \ -:= CFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS +objs/_bzlib.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS +objs/_bzlib.o := CPPFLAGS += -I$(SRC)/common/bzip2 +objs/_bzlib.o: libstand_bzlib_private.h +pics/_bzlib.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS +pics/_bzlib.o := CPPFLAGS += -I$(SRC)/common/bzip2 +pics/_bzlib.o: libstand_bzlib_private.h +objs/_crctable.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS +objs/_crctable.o := CPPFLAGS += -I$(SRC)/common/bzip2 +objs/_crctable.o: libstand_bzlib_private.h +pics/_crctable.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS +pics/_crctable.o := CPPFLAGS += -I$(SRC)/common/bzip2 +pics/_crctable.o: libstand_bzlib_private.h +objs/_decompress.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS +objs/_decompress.o := CPPFLAGS += -I$(SRC)/common/bzip2 +objs/_decompress.o: libstand_bzlib_private.h +pics/_decompress.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS +pics/_decompress.o := CPPFLAGS += -I$(SRC)/common/bzip2 +pics/_decompress.o: libstand_bzlib_private.h +objs/_huffman.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS +objs/_huffman.o := CPPFLAGS += -I$(SRC)/common/bzip2 +objs/_huffman.o: libstand_bzlib_private.h +pics/_huffman.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS +pics/_huffman.o := CPPFLAGS += -I$(SRC)/common/bzip2 +pics/_huffman.o: libstand_bzlib_private.h +objs/_randtable.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS +objs/_randtable.o := CPPFLAGS += -I$(SRC)/common/bzip2 +objs/_randtable.o: libstand_bzlib_private.h +pics/_randtable.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS +pics/_randtable.o := CPPFLAGS += -I$(SRC)/common/bzip2 +pics/_randtable.o: libstand_bzlib_private.h +objs/bzipfs.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS +objs/bzipfs.o := CPPFLAGS += -I$(SRC)/common/bzip2 +objs/bzipfs.o: libstand_bzlib_private.h +pics/bzipfs.o := CPPFLAGS += -DBZ_LOADER -DBZ_NO_STDIO -DBZ_NO_COMPRESS +pics/bzipfs.o := CPPFLAGS += -I$(SRC)/common/bzip2 +pics/bzipfs.o: libstand_bzlib_private.h SRCS += libstand_bzlib_private.h # too hairy -_inflate.o := SMATCH=off +objs/_inflate.o := SMATCH=off +pics/_inflate.o := SMATCH=off SRCS += _bzlib.c _crctable.c _decompress.c _huffman.c _randtable.c -OBJS += _bzlib.o _crctable.o _decompress.o _huffman.o _randtable.o -CLEANFILES += _bzlib.c _crctable.c _decompress.c _huffman.c _randtable.c +OBJECTS += _bzlib.o _crctable.o _decompress.o _huffman.o _randtable.o +CLEANFILES += _bzlib.c _crctable.c _decompress.c _huffman.c _randtable.c _bzlib.c: $(SRC)/common/bzip2/bzlib.c sed "s|bzlib_private\.h|libstand_bzlib_private.h|" $^ > $@ @@ -112,11 +147,33 @@ libstand_bzlib_private.h: $(SRC)/common/bzip2/bzlib_private.h sed -e 's|<stdlib.h>|"stand.h"|' $^ > $@ # decompression functionality from zlib -adler32.o crc32.o _infback.o _inffast.o _inflate.o _inftrees.o _zutil.o \ -gzipfs.o gzip.o := CPPFLAGS += -I$(ZLIB) +objs/adler32.o := CPPFLAGS += -I$(ZLIB) +pics/adler32.o := CPPFLAGS += -I$(ZLIB) +objs/crc32.o := CPPFLAGS += -I$(ZLIB) +pics/crc32.o := CPPFLAGS += -I$(ZLIB) +objs/_infback.o := CPPFLAGS += -I$(ZLIB) +pics/_infback.o := CPPFLAGS += -I$(ZLIB) +objs/_infback.o pics/_infback.o: libstand_zutil.h libstand_gzguts.h +objs/_inffast.o := CPPFLAGS += -I$(ZLIB) +pics/_inffast.o := CPPFLAGS += -I$(ZLIB) +objs/_inffast.o pics/_inffast.o: libstand_zutil.h libstand_gzguts.h +objs/_inflate.o := CPPFLAGS += -I$(ZLIB) +pics/_inflate.o := CPPFLAGS += -I$(ZLIB) +objs/_inflate.o pics/_inflate.o: libstand_zutil.h libstand_gzguts.h +objs/_inftrees.o := CPPFLAGS += -I$(ZLIB) +pics/_inftrees.o := CPPFLAGS += -I$(ZLIB) +objs/_inftrees.o pics/_inftrees.o: libstand_zutil.h libstand_gzguts.h +objs/_zutil.o := CPPFLAGS += -I$(ZLIB) +pics/_zutil.o := CPPFLAGS += -I$(ZLIB) +objs/_zutil.o pics/_zutil.o: libstand_zutil.h libstand_gzguts.h +objs/gzipfs.o := CPPFLAGS += -I$(ZLIB) +pics/gzipfs.o := CPPFLAGS += -I$(ZLIB) +objs/gzip.o := CPPFLAGS += -I$(ZLIB) +pics/gzip.o := CPPFLAGS += -I$(ZLIB) + SRCS += $(ZLIB)/adler32.c $(ZLIB)/crc32.c \ libstand_zutil.h libstand_gzguts.h -OBJS += adler32.o crc32.o +OBJECTS += adler32.o crc32.o _infback.c: $(ZLIB)/infback.c sed -e "s|zutil\.h|libstand_zutil.h|" \ @@ -140,8 +197,8 @@ _zutil.c: $(ZLIB)/zutil.c $^ > $@ SRCS += _infback.c _inffast.c _inflate.c _inftrees.c _zutil.c -OBJS += _infback.o _inffast.o _inflate.o _inftrees.o _zutil.o -CLEANFILES += _infback.c _inffast.c _inflate.c _inftrees.c _zutil.c +OBJECTS += _infback.o _inffast.o _inflate.o _inftrees.o _zutil.o +CLEANFILES += _infback.c _inffast.c _inflate.c _inftrees.c _zutil.c # depend on stand.h being able to be included multiple times libstand_zutil.h: $(ZLIB)/zutil.h @@ -163,9 +220,10 @@ libstand_gzguts.h: $(ZLIB)/gzguts.h CLEANFILES += libstand_zutil.h libstand_gzguts.h # lz4 decompression functionality -lz4.o := CPPFLAGS += -I$(LZ4) +pics/lz4.o := CPPFLAGS += -I$(LZ4) +objs/lz4.o := CPPFLAGS += -I$(LZ4) SRCS += $(LZ4)/lz4.c -OBJS += lz4.o +OBJECTS += lz4.o # io routines SRCS += $(SASRC)/closeall.c $(SASRC)/dev.c \ @@ -174,7 +232,7 @@ SRCS += $(SASRC)/closeall.c $(SASRC)/dev.c \ $(SASRC)/lseek.c $(SASRC)/open.c $(SASRC)/read.c \ $(SASRC)/write.c $(SASRC)/readdir.c -OBJS += closeall.o dev.o ioctl.o nullfs.o stat.o fstat.o close.o lseek.o \ +OBJECTS += closeall.o dev.o ioctl.o nullfs.o stat.o fstat.o close.o lseek.o \ open.o read.o write.o readdir.o # network routines @@ -182,12 +240,12 @@ SRCS += $(SASRC)/arp.c $(SASRC)/ether.c $(SASRC)/ip.c \ $(SASRC)/inet_ntoa.c $(SASRC)/in_cksum.c \ $(SASRC)/net.c $(SASRC)/udp.c $(SASRC)/netif.c \ $(SASRC)/rpc.c -OBJS += arp.o ether.o ip.o inet_ntoa.o in_cksum.o net.o udp.o netif.o rpc.o +OBJECTS += arp.o ether.o ip.o inet_ntoa.o in_cksum.o net.o udp.o netif.o rpc.o # network info services: SRCS += $(SASRC)/bootp.c $(SASRC)/rarp.c \ $(SASRC)/bootparam.c -OBJS += bootp.o rarp.o bootparam.o +OBJECTS += bootp.o rarp.o bootparam.o # boot filesystems SRCS += $(SASRC)/ufs.c @@ -197,12 +255,16 @@ SRCS += $(SASRC)/tftp.c SRCS += $(SASRC)/gzipfs.c SRCS += $(SASRC)/bzipfs.c SRCS += $(SASRC)/dosfs.c -OBJS += ufs.o -OBJS += nfs.o -OBJS += cd9660.o -OBJS += tftp.o -OBJS += gzipfs.o -OBJS += bzipfs.o -OBJS += dosfs.o -# -.PARALLEL: +OBJECTS += ufs.o +OBJECTS += nfs.o +OBJECTS += cd9660.o +OBJECTS += tftp.o +OBJECTS += gzipfs.o +OBJECTS += bzipfs.o +OBJECTS += dosfs.o + +# utility +SRCS += (SRC)/common/util/explicit_bzero.c +SRCS += (SRC)/common/util/memmem.c +OBJECTS += explicit_bzero.o +OBJECTS += memmem.o diff --git a/usr/src/boot/lib/libstand/crypto/Makefile.inc b/usr/src/boot/lib/libstand/crypto/Makefile.inc index 6a8dadc313..3cff3ecdb6 100644 --- a/usr/src/boot/lib/libstand/crypto/Makefile.inc +++ b/usr/src/boot/lib/libstand/crypto/Makefile.inc @@ -21,32 +21,32 @@ SRCS += $(COMDIR)/edonr/edonr.c SRCS += $(COMDIR)/skein/skein.c SRCS += $(COMDIR)/skein/skein_iv.c SRCS += $(COMDIR)/skein/skein_block.c -OBJS += digest.o -OBJS += sha1.o -OBJS += edonr.o -OBJS += skein.o -OBJS += skein_iv.o -OBJS += skein_block.o +OBJECTS += digest.o +OBJECTS += sha1.o +OBJECTS += edonr.o +OBJECTS += skein.o +OBJECTS += skein_iv.o +OBJECTS += skein_block.o -digest.o := CPPFLAGS += -I../../common +objs/digest.o pics/digest.o := CPPFLAGS += -I../../common # Do not unroll skein loops, reduce code size -skein_block.o := CPPFLAGS += -DSKEIN_LOOP=111 +objs/skein_block.o pics/skein_block.o := CPPFLAGS += -DSKEIN_LOOP=111 -%.o: $(COMDIR)/edonr/%.c +objs/%.o pics/%.o: $(COMDIR)/edonr/%.c $(COMPILE.c) -o $@ $< -%.o: $(COMDIR)/skein/%.c +objs/%.o pics/%.o: $(COMDIR)/skein/%.c $(COMPILE.c) -o $@ $< -%.o: $(CRYPTOSRC)/%.c +objs/%.o pics/%.o: $(CRYPTOSRC)/%.c $(COMPILE.c) -o $@ $< -%.o: $(COMDIR)/sha1/%.c - $(COMPILE.c) $< +objs/%.o pics/%.o: $(COMDIR)/sha1/%.c + $(COMPILE.c) -o $@ $< sha1-x86_64.s: $(COMDIR)/sha1/amd64/sha1-x86_64.pl $(PERL) $? $@ -sha1-x86_64.o: sha1-x86_64.s +pics/sha1-x86_64.o: sha1-x86_64.s $(COMPILE.s) -o $@ ${@F:.o=.s} diff --git a/usr/src/boot/lib/libstand/zfs/Makefile.inc b/usr/src/boot/lib/libstand/zfs/Makefile.inc index fd054a9411..a33a42d703 100644 --- a/usr/src/boot/lib/libstand/zfs/Makefile.inc +++ b/usr/src/boot/lib/libstand/zfs/Makefile.inc @@ -13,23 +13,25 @@ # Copyright 2021 Toomas Soome <tsoome@me.com> # -SRCS += $(ZFSSRC)/zfs.c -SRCS += $(ZFSSRC)/gzip.c -SRCS += $(SRC)/common/list/list.c -OBJS += zfs.o -OBJS += gzip.o -OBJS += nvlist.o -OBJS += list.o +SRCS += $(ZFSSRC)/zfs.c +SRCS += $(ZFSSRC)/gzip.c +SRCS += $(SRC)/common/list/list.c +OBJECTS += zfs.o +OBJECTS += gzip.o +OBJECTS += nvlist.o +OBJECTS += list.o -zfs.o := CPPFLAGS += -I../../common -zfs.o := CPPFLAGS += -I../../../cddl/boot/zfs -I$(LZ4) -zfs.o := CPPFLAGS += -I$(SRC)/uts/common/fs/zfs -nvlist.o := CPPFLAGS += -I../../common -I../../../cddl/boot/zfs +objs/zfs.o pics/zfs.o := CPPFLAGS += -I../../common +objs/zfs.o pics/zfs.o := CPPFLAGS += -I../../../cddl/boot/zfs -I$(LZ4) +objs/zfs.o pics/zfs.o := CPPFLAGS += -I$(SRC)/uts/common/fs/zfs +objs/zfs.o pics/zfs.o := CPPFLAGS += -I$(CRYPTOSRC) +objs/nvlist.o pics/nvlist.o := CPPFLAGS += -I../../common +objs/nvlist.o pics/nvlist.o := CPPFLAGS += -I../../../cddl/boot/zfs -%.o: $(ZFSSRC)/%.c +pics/%.o objs/%.o: $(ZFSSRC)/%.c $(COMPILE.c) -o $@ $< -%.o: $(SRC)/common/list/%.c - $(COMPILE.c) -DNDEBUG $< +pics/%.o objs/%.o: $(SRC)/common/list/%.c + $(COMPILE.c) -DNDEBUG -o $@ $< zfs.o: $(ZFSSRC)/zfsimpl.c diff --git a/usr/src/boot/sys/boot/Makefile.inc b/usr/src/boot/sys/boot/Makefile.inc index 83a480c156..c6b5320866 100644 --- a/usr/src/boot/sys/boot/Makefile.inc +++ b/usr/src/boot/sys/boot/Makefile.inc @@ -49,14 +49,14 @@ AS_CPPFLAGS= CPPFLAGS= -D_STANDALONE -_gcc=-nostdinc CFLAGS64= -_gcc=-mno-red-zone -CFLAGS= -_gcc=-Os -_gcc=-fPIC -_gcc=-ffreestanding -_gcc=-fno-builtin +CFLAGS= -_gcc=-Os -_gcc=-ffreestanding -_gcc=-fno-builtin CFLAGS += -_gcc=-ffunction-sections -_gcc=-fdata-sections CFLAGS += -_gcc=-mno-mmx -_gcc=-mno-3dnow -_gcc=-mno-sse -_gcc=-mno-sse2 CFLAGS += -_gcc=-mno-sse3 -_gcc=-msoft-float CFLAGS += -_gcc=-mno-avx -_gcc=-mno-aes CFLAGS += -_gcc=-Wall CFLAGS += $(CCNOAUTOINLINE) $(CCNOREORDER) $(CSTD_GNU99) -CCASFLAGS= -fPIC -Wa,--divide +CCASFLAGS= -Wa,--divide ASFLAGS= --divide SMATCH_ = @@ -64,11 +64,11 @@ SMATCH_on = SMATCH_off = -_smatch=off # SMATCH_ARGS will bring in set of -Wno-* options. -#CFLAGS += $(SMATCH_ARGS:%=-_smatch=%) +SMATCH_ARGS = --timeout=0 +CFLAGS += $(SMATCH_ARGS:%=-_smatch=%) CFLAGS += $(SMOFF:%=-_smatch=--disable=%) CFLAGS += $(SMATCH_$(MACHINE)) CFLAGS += $(SMATCH_$(SMATCH)) -CFLAGS += -_smatch=--timeout=0 COMPILE.S= $(CC) $(SMATCH_off) $(CCASFLAGS) $(CPPFLAGS) -c diff --git a/usr/src/boot/sys/boot/Makefile.lib b/usr/src/boot/sys/boot/Makefile.lib new file mode 100644 index 0000000000..baa97a1513 --- /dev/null +++ b/usr/src/boot/sys/boot/Makefile.lib @@ -0,0 +1,34 @@ +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2021 Toomas Soome <tsoome@me.com> +# + +OBJS= $(OBJECTS:%=objs/%) +PICS= $(OBJECTS:%=pics/%) + +.PARALLEL: $(OBJS) $(PICS) DUMMY + +$(PICS) := CFLAGS += -_gcc=-fPIC +$(PICS) := CCASFLAGS += -_gcc=-fPIC + +$(OBJS) $(PICS): machine x86 + +objs pics: + -@mkdir -p $@ + +$(LIBRARY): objs .WAIT $$(OBJS) + $(AR) $(ARFLAGS) $@ $(OBJS) + +$(DYNLIB): pics .WAIT $$(PICS) + $(AR) $(ARFLAGS) $@ $(PICS) + +CLEANFILES += $(OBJS) $(PICS) $(LIBRARY) $(DYNLIB) diff --git a/usr/src/boot/sys/boot/efi/Makefile.inc b/usr/src/boot/sys/boot/efi/Makefile.inc index 4201600ac3..ad29e53249 100644 --- a/usr/src/boot/sys/boot/efi/Makefile.inc +++ b/usr/src/boot/sys/boot/efi/Makefile.inc @@ -14,6 +14,6 @@ # # Options used when building app-specific efi components -CFLAGS += -_gcc=-fshort-wchar +CFLAGS += -_gcc=-fshort-wchar $(C_BIGPICFLAGS) .PARALLEL: diff --git a/usr/src/boot/sys/boot/efi/loader/Makefile.com b/usr/src/boot/sys/boot/efi/loader/Makefile.com index dc71ddeede..ea6303034f 100644 --- a/usr/src/boot/sys/boot/efi/loader/Makefile.com +++ b/usr/src/boot/sys/boot/efi/loader/Makefile.com @@ -15,7 +15,6 @@ # Copyright (c) 2019, Joyent, Inc. # -include $(SRC)/Makefile.master include $(SRC)/boot/Makefile.version include $(SRC)/boot/sys/boot/Makefile.inc @@ -98,18 +97,46 @@ smbios.o := CPPFLAGS += -DSMBIOS_LITTLE_ENDIAN_UUID # Use network-endian UUID format for backward compatibility. #CPPFLAGS += -DSMBIOS_NETWORK_ENDIAN_UUID -DPLIBSTAND= ../../../libstand/$(MACHINE)/libstand.a -LIBSTAND= -L../../../libstand/$(MACHINE) -lstand +DPLIBSTAND= ../../../libstand/$(MACHINE)/libstand_pics.a +LIBSTAND= -L../../../libstand/$(MACHINE) -lstand_pics BOOT_FORTH= yes CPPFLAGS += -DBOOT_FORTH CPPFLAGS += -I$(SRC)/common/ficl CPPFLAGS += -I../../../libficl -DPLIBFICL= ../../../libficl/$(MACHINE)/libficl.a -LIBFICL= -L../../../libficl/$(MACHINE) -lficl +DPLIBFICL= ../../../libficl/$(MACHINE)/libficl_pics.a +LIBFICL= -L../../../libficl/$(MACHINE) -lficl_pics # Always add MI sources -include ../Makefile.common +# +SRCS += boot.c commands.c console.c devopen.c interp.c +SRCS += interp_backslash.c interp_parse.c ls.c misc.c +SRCS += module.c linenoise.c zfs_cmd.c + +OBJS += boot.o commands.o console.o devopen.o interp.o \ + interp_backslash.o interp_parse.o ls.o misc.o \ + module.o linenoise.o zfs_cmd.o + +SRCS += load_elf32.c load_elf32_obj.c reloc_elf32.c +SRCS += load_elf64.c load_elf64_obj.c reloc_elf64.c + +OBJS += load_elf32.o load_elf32_obj.o reloc_elf32.o \ + load_elf64.o load_elf64_obj.o reloc_elf64.o + +SRCS += disk.c part.c dev_net.c vdisk.c +OBJS += disk.o part.o dev_net.o vdisk.o +CPPFLAGS += -DLOADER_DISK_SUPPORT +CPPFLAGS += -DLOADER_GPT_SUPPORT +CPPFLAGS += -DLOADER_MBR_SUPPORT + +part.o := CPPFLAGS += -I$(ZLIB) + +SRCS += bcache.c +OBJS += bcache.o + +# Forth interpreter +SRCS += interp_forth.c +OBJS += interp_forth.o CPPFLAGS += -I../../../common # For multiboot2.h, must be last, to avoid conflicts @@ -125,7 +152,7 @@ LDFLAGS = -nostdlib --eh-frame-hdr LDFLAGS += -shared --hash-style=both --enable-new-dtags LDFLAGS += -T$(LDSCRIPT) -Bsymbolic -CLEANFILES= loader.sym loader.bin +CLEANFILES= $(EFIPROG) loader.sym loader.bin CLEANFILES += $(FONT).c vers.c NEWVERSWHAT= "EFI loader" $(MACHINE) @@ -167,7 +194,7 @@ x86: $(SYMLINK) ../../../../x86/include x86 clean clobber: - $(RM) $(CLEANFILES) $(OBJS) + $(RM) $(CLEANFILES) $(OBJS) machine x86 %.o: ../%.c $(COMPILE.c) $< diff --git a/usr/src/boot/sys/boot/efi/loader/Makefile.common b/usr/src/boot/sys/boot/efi/loader/Makefile.common deleted file mode 100644 index 9399748591..0000000000 --- a/usr/src/boot/sys/boot/efi/loader/Makefile.common +++ /dev/null @@ -1,30 +0,0 @@ -# - -SRCS += boot.c commands.c console.c devopen.c interp.c -SRCS += interp_backslash.c interp_parse.c ls.c misc.c -SRCS += module.c linenoise.c zfs_cmd.c - -OBJS += boot.o commands.o console.o devopen.o interp.o \ - interp_backslash.o interp_parse.o ls.o misc.o \ - module.o linenoise.o zfs_cmd.o - -SRCS += load_elf32.c load_elf32_obj.c reloc_elf32.c -SRCS += load_elf64.c load_elf64_obj.c reloc_elf64.c - -OBJS += load_elf32.o load_elf32_obj.o reloc_elf32.o \ - load_elf64.o load_elf64_obj.o reloc_elf64.o - -SRCS += disk.c part.c dev_net.c vdisk.c -OBJS += disk.o part.o dev_net.o vdisk.o -CFLAGS += -DLOADER_DISK_SUPPORT -CFLAGS += -DLOADER_GPT_SUPPORT -CFLAGS += -DLOADER_MBR_SUPPORT - -part.o := CPPFLAGS += -I$(ZLIB) - -SRCS += bcache.c -OBJS += bcache.o - -# Forth interpreter -SRCS += interp_forth.c -OBJS += interp_forth.o diff --git a/usr/src/boot/sys/boot/efi/loader/amd64/Makefile b/usr/src/boot/sys/boot/efi/loader/amd64/Makefile index 8022b364f2..a7894b50ca 100644 --- a/usr/src/boot/sys/boot/efi/loader/amd64/Makefile +++ b/usr/src/boot/sys/boot/efi/loader/amd64/Makefile @@ -14,6 +14,8 @@ # Copyright 2016 RackTop Systems. # +include $(SRC)/Makefile.master + MACHINE= $(MACH64) EFIPROG= loader64.efi diff --git a/usr/src/boot/sys/boot/efi/loader/i386/Makefile b/usr/src/boot/sys/boot/efi/loader/i386/Makefile index 42885ca81f..b2f086971a 100644 --- a/usr/src/boot/sys/boot/efi/loader/i386/Makefile +++ b/usr/src/boot/sys/boot/efi/loader/i386/Makefile @@ -14,6 +14,8 @@ # Copyright 2016 RackTop Systems. # +include $(SRC)/Makefile.master + MACHINE= $(MACH) EFIPROG= loader32.efi @@ -28,8 +30,6 @@ efi_main.o := CPPFLAGS += -DLOADER_EFI=L\"loader32.efi\" CFLAGS += -m32 CCASFLAGS += -m32 -CLEANFILES += machine x86 $(EFIPROG) - $(OBJS): machine x86 %.o: ../../../i386/libi386/%.c diff --git a/usr/src/boot/sys/boot/i386/libi386/Makefile b/usr/src/boot/sys/boot/i386/libi386/Makefile index aa11fc2276..4b6d501c95 100644 --- a/usr/src/boot/sys/boot/i386/libi386/Makefile +++ b/usr/src/boot/sys/boot/i386/libi386/Makefile @@ -105,48 +105,39 @@ SRCS += $(ZFSSRC)/devicename_stubs.c OBJS += devicename_stubs.o BOOT_COMCONSOLE_PORT= 0x3f8 -CFLAGS += -DCOMPORT=${BOOT_COMCONSOLE_PORT} +CPPFLAGS += -DCOMPORT=${BOOT_COMCONSOLE_PORT} BOOT_COMCONSOLE_SPEED= 9600 -CFLAGS += -DCOMSPEED=${BOOT_COMCONSOLE_SPEED} +CPPFLAGS += -DCOMSPEED=${BOOT_COMCONSOLE_SPEED} # Make the disk code more talkative -# CFLAGS+= -DDISK_DEBUG +# CPPFLAGS+= -DDISK_DEBUG # Export serial numbers, UUID, and asset tag from loader. -smbios.o := CFLAGS += -DSMBIOS_SERIAL_NUMBERS +smbios.o := CPPFLAGS += -DSMBIOS_SERIAL_NUMBERS # Use little-endian UUID format as defined in SMBIOS 2.6. -smbios.o := CFLAGS += -DSMBIOS_LITTLE_ENDIAN_UUID +smbios.o := CPPFLAGS += -DSMBIOS_LITTLE_ENDIAN_UUID # Use network-endian UUID format for backward compatibility. -#CFLAGS += -DSMBIOS_NETWORK_ENDIAN_UUID +#CPPFLAGS += -DSMBIOS_NETWORK_ENDIAN_UUID # XXX: make alloca() useable -CFLAGS += -Dalloca=__builtin_alloca +CPPFLAGS += -Dalloca=__builtin_alloca -CFLAGS += -I$(SRC)/common/ficl -I../../libficl \ +CPPFLAGS += -I$(SRC)/common/ficl -I../../libficl \ -I../../common -I../common \ -I../btx/lib \ -I$(SRC)/uts/intel/sys/acpi \ -I../../.. -I. # the location of libstand -CFLAGS += -I../../../../lib/libstand/ +CPPFLAGS += -I../../../../lib/libstand/ multiboot.o := CPPFLAGS += -I../../../cddl/boot/zfs multiboot2.o := CPPFLAGS += -I../../../cddl/boot/zfs devicename.o := CPPFLAGS += -I../../../cddl/boot/zfs devicename_stubs.o := CPPFLAGS += -I../../../cddl/boot/zfs -# Handle FreeBSD specific %b and %D printf format specifiers -#FORMAT_EXTENSIONS=-D__printf__=__freebsd_kprintf__ -#CFLAGS += ${FORMAT_EXTENSIONS} - CLEANFILES += machine x86 -# XXX: clang integrated-as doesn't grok .codeNN directives yet -# CFLAGS.amd64_tramp.S= ${CLANG_NO_IAS} -# CFLAGS.multiboot_tramp.S= ${CLANG_NO_IAS} -# CFLAGS += ${CFLAGS.${.IMPSRC:T}} - include ../Makefile.inc # For multiboot2.h, must be last, to avoid conflicts diff --git a/usr/src/boot/sys/boot/libficl/Makefile.com b/usr/src/boot/sys/boot/libficl/Makefile.com index 0cd051356f..564403516d 100644 --- a/usr/src/boot/sys/boot/libficl/Makefile.com +++ b/usr/src/boot/sys/boot/libficl/Makefile.com @@ -34,20 +34,14 @@ HEADERS= $(FICLDIR)/ficl.h $(FICLDIR)/ficlplatform/unix.h ../ficllocal.h # # disable inner loop variable 'fw' check -vm.o := SMOFF += check_check_deref - -.PARALLEL: +objs/vm.o := SMOFF += check_check_deref +pics/vm.o := SMOFF += check_check_deref MAJOR = 4 MINOR = 1.0 -lib: libficl.a - -vm.o := CFLAGS += -_gcc=-Wno-clobbered - -# static library build -libficl.a: $(OBJECTS) - $(AR) $(ARFLAGS) libficl.a $(OBJECTS) +objs/vm.o := CFLAGS += -_gcc=-Wno-clobbered +pics/vm.o := CFLAGS += -_gcc=-Wno-clobbered machine: $(RM) machine @@ -57,17 +51,17 @@ x86: $(RM) x86 $(SYMLINK) ../../../x86/include x86 -%.o: ../softcore/%.c $(HEADERS) - $(COMPILE.c) $< +objs/%.o pics/%.o: ../softcore/%.c $(HEADERS) + $(COMPILE.c) -o $@ $< -%.o: $(FICLDIR)/%.c $(HEADERS) - $(COMPILE.c) $< +objs/%.o pics/%.o: $(FICLDIR)/%.c $(HEADERS) + $(COMPILE.c) -o $@ $< -%.o: $(FICLDIR)/ficlplatform/%.c $(HEADERS) - $(COMPILE.c) $< +objs/%.o pics/%.o: $(FICLDIR)/ficlplatform/%.c $(HEADERS) + $(COMPILE.c) -o $@ $< # # generic cleanup code # clobber clean: FRC - $(RM) *.o *.a libficl.* ficl machine x86 + $(RM) $(CLEANFILES) machine x86 diff --git a/usr/src/boot/sys/boot/libficl/amd64/Makefile b/usr/src/boot/sys/boot/libficl/amd64/Makefile index ef560fa39b..2f006575f7 100644 --- a/usr/src/boot/sys/boot/libficl/amd64/Makefile +++ b/usr/src/boot/sys/boot/libficl/amd64/Makefile @@ -17,13 +17,14 @@ include $(SRC)/Makefile.master MACHINE= $(MACH64) +DYNLIB= libficl_pics.a -all install: lib +all install: $(DYNLIB) include ../Makefile.com CFLAGS += -m64 $(CFLAGS64) -$(OBJECTS): machine x86 +include $(SRC)/boot/sys/boot/Makefile.lib FRC: diff --git a/usr/src/boot/sys/boot/libficl/i386/Makefile b/usr/src/boot/sys/boot/libficl/i386/Makefile index f4478a4c50..6ad5f9467d 100644 --- a/usr/src/boot/sys/boot/libficl/i386/Makefile +++ b/usr/src/boot/sys/boot/libficl/i386/Makefile @@ -17,13 +17,15 @@ include $(SRC)/Makefile.master MACHINE= $(MACH) +LIBRARY= libficl.a +DYNLIB= libficl_pics.a -all install: lib +all install: $(LIBRARY) $(DYNLIB) include ../Makefile.com CFLAGS += -m32 -$(OBJECTS): machine x86 +include $(SRC)/boot/sys/boot/Makefile.lib FRC: diff --git a/usr/src/boot/sys/boot/libstand/Makefile b/usr/src/boot/sys/boot/libstand/Makefile index 41e614a3ec..01b0b02cab 100644 --- a/usr/src/boot/sys/boot/libstand/Makefile +++ b/usr/src/boot/sys/boot/libstand/Makefile @@ -13,8 +13,6 @@ # Copyright 2015 Toomas Soome <tsoome@me.com> # -.KEEP_STATE: - include $(SRC)/Makefile.master SUBDIRS = $(MACH) $(MACH64) @@ -24,9 +22,9 @@ clean := TARGET = clean clobber := TARGET = clobber install := TARGET = install -all clean clobber: $(SUBDIRS) +.KEEP_STATE: -install: all +all clean clobber install: $(SUBDIRS) .PARALLEL: diff --git a/usr/src/boot/sys/boot/libstand/Makefile.com b/usr/src/boot/sys/boot/libstand/Makefile.com index 9294460253..d282deedaa 100644 --- a/usr/src/boot/sys/boot/libstand/Makefile.com +++ b/usr/src/boot/sys/boot/libstand/Makefile.com @@ -14,37 +14,23 @@ # Copyright 2019 Joyent, Inc. # -include $(SRC)/Makefile.master include $(SRC)/boot/sys/boot/Makefile.inc CPPFLAGS += -I../../../../include -I$(SASRC) -CPPFLAGS += -I../../.. -I. -I$(SRC)/common/bzip2 - -$(LIBRARY): $(SRCS) $(OBJS) - $(AR) $(ARFLAGS) $@ $(OBJS) +CPPFLAGS += -I../../.. -I. include $(SASRC)/Makefile.inc include $(CRYPTOSRC)/Makefile.inc include $(ZFSSRC)/Makefile.inc -LIBCSRC= $(SRC)/lib/libc -OBJS += explicit_bzero.o -OBJS += memmem.o - CPPFLAGS += -I$(SRC)/uts/common -# needs work -printf.o := SMOFF += 64bit_shift - -# too hairy -_inflate.o := SMATCH=off - # 64-bit smatch false positive :/ SMOFF += uninitialized -clean: clobber -clobber: - $(RM) $(CLEANFILES) $(OBJS) machine $(LIBRARY) +# needs work +objs/printf.o := SMOFF += 64bit_shift +pics/printf.o := SMOFF += 64bit_shift machine: $(RM) machine @@ -54,23 +40,30 @@ x86: $(RM) x86 $(SYMLINK) ../../../x86/include x86 -%.o: $(SASRC)/%.c - $(COMPILE.c) $< +pics/%.o objs/%.o: %.c + $(COMPILE.c) -o $@ $< + +pics/%.o objs/%.o: $(SASRC)/%.c + $(COMPILE.c) -o $@ $< -%.o: $(LIBSRC)/libc/net/%.c - $(COMPILE.c) $< +pics/%.o objs/%.o: $(LIBSRC)/libc/net/%.c + $(COMPILE.c) -o $@ $< -%.o: $(LIBSRC)/libc/string/%.c - $(COMPILE.c) $< +pics/%.o objs/%.o: $(LIBSRC)/libc/string/%.c + $(COMPILE.c) -o $@ $< -%.o: $(LIBSRC)/libc/uuid/%.c - $(COMPILE.c) $< +pics/%.o objs/%.o: $(LIBSRC)/libc/uuid/%.c + $(COMPILE.c) -o $@ $< -%.o: $(ZLIB)/%.c - $(COMPILE.c) $< +pics/%.o objs/%.o: $(ZLIB)/%.c + $(COMPILE.c) -o $@ $< -%.o: $(LZ4)/%.c - $(COMPILE.c) $< +pics/%.o objs/%.o: $(LZ4)/%.c + $(COMPILE.c) -o $@ $< -%.o: $(SRC)/common/util/%.c - $(COMPILE.c) $< +pics/%.o objs/%.o: $(SRC)/common/util/%.c + $(COMPILE.c) -o $@ $< + +clean: clobber +clobber: + $(RM) $(CLEANFILES) machine x86 diff --git a/usr/src/boot/sys/boot/libstand/amd64/Makefile b/usr/src/boot/sys/boot/libstand/amd64/Makefile index 8c28ac3dda..ec11ff2156 100644 --- a/usr/src/boot/sys/boot/libstand/amd64/Makefile +++ b/usr/src/boot/sys/boot/libstand/amd64/Makefile @@ -14,10 +14,12 @@ # Copyright 2016 RackTop Systems. # +include $(SRC)/Makefile.master + MACHINE= $(MACH64) -LIBRARY= libstand.a +DYNLIB= libstand_pics.a -all install: $(LIBRARY) +all install: $(DYNLIB) include ../Makefile.com @@ -25,17 +27,17 @@ ASFLAGS = $(amd64_AS_XARCH) -I$(SRC)/uts/common -D_ASM CFLAGS += -m64 $(CFLAGS64) CCASFLAGS += -m64 -CLEANFILES += x86 sha1-x86_64.s - # _setjmp/_longjmp SRCS += $(SASRC)/amd64/_setjmp.S -OBJS += _setjmp.o +OBJECTS += _setjmp.o SRCS += sha1-x86_64.s -OBJS += sha1-x86_64.o +OBJECTS += sha1-x86_64.o + +CLEANFILES += sha1-x86_64.s -$(LIBRARY): $(SRCS) $(OBJS) +pics/%.o: $(SASRC)/amd64/%.S + $(COMPILE.S) -o $@ $< -$(OBJS): machine x86 +include $(SRC)/boot/sys/boot/Makefile.lib -%.o: $(SASRC)/amd64/%.S - $(COMPILE.S) $< +FRC: diff --git a/usr/src/boot/sys/boot/libstand/i386/Makefile b/usr/src/boot/sys/boot/libstand/i386/Makefile index 4837c0eb4d..aa55779a26 100644 --- a/usr/src/boot/sys/boot/libstand/i386/Makefile +++ b/usr/src/boot/sys/boot/libstand/i386/Makefile @@ -14,25 +14,26 @@ # Copyright 2016 RackTop Systems. # +include $(SRC)/Makefile.master + MACHINE= $(MACH) LIBRARY= libstand.a +DYNLIB= libstand_pics.a -all install: $(LIBRARY) +all install: $(LIBRARY) $(DYNLIB) include ../Makefile.com CFLAGS += -m32 CCASFLAGS += -m32 -CLEANFILES += x86 - # _setjmp/_longjmp SRCS += $(SASRC)/i386/_setjmp.S -OBJS += _setjmp.o +OBJECTS += _setjmp.o -$(LIBRARY): $(SRCS) $(OBJS) +pics/%.o objs/%.o: $(SASRC)/i386/%.S + $(COMPILE.S) -o $@ $< -$(OBJS): machine x86 +include $(SRC)/boot/sys/boot/Makefile.lib -%.o: $(SASRC)/i386/%.S - $(COMPILE.S) $< +FRC: diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile index a4bacee105..a66bec5656 100644 --- a/usr/src/cmd/Makefile +++ b/usr/src/cmd/Makefile @@ -496,6 +496,7 @@ i386_SUBDIRS= \ nvmeadm \ pptadm \ rdmsr \ + rsrvrctl \ rtc \ ucodeadm \ xhci \ diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c index 9531db8fef..f8a3cd8760 100644 --- a/usr/src/cmd/bhyve/bhyverun.c +++ b/usr/src/cmd/bhyve/bhyverun.c @@ -39,7 +39,7 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2018 Joyent, Inc. - * Copyright 2020 Oxide Computer Company + * Copyright 2021 Oxide Computer Company */ #include <sys/cdefs.h> @@ -1257,8 +1257,15 @@ do_open(const char *vmname) if (lpc_bootrom()) romboot = true; - +#ifndef __FreeBSD__ + uint64_t create_flags = 0; + if (get_config_bool_default("memory.use_reservoir", false)) { + create_flags |= VCF_RESERVOIR_MEM; + } + error = vm_create(vmname, create_flags); +#else error = vm_create(vmname); +#endif /* __FreeBSD__ */ if (error) { if (errno == EEXIST) { if (romboot) { diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c index 313a1a37f4..4fc6ddc251 100644 --- a/usr/src/cmd/bhyvectl/bhyvectl.c +++ b/usr/src/cmd/bhyvectl/bhyvectl.c @@ -39,7 +39,7 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. - * Copyright 2020 Oxide Computer Company + * Copyright 2021 Oxide Computer Company */ #include <sys/cdefs.h> @@ -1953,8 +1953,13 @@ main(int argc, char *argv[]) error = 0; +#ifndef __FreeBSD__ + if (!error && create) + error = vm_create(vmname, 0); +# else if (!error && create) error = vm_create(vmname); +#endif /* __FreeBSD__ */ if (!error) { ctx = vm_open(vmname); diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c index 81539340d3..cc4d4e95d4 100644 --- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c +++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c @@ -748,7 +748,8 @@ ipmgmt_getif_handler(void *argp) ipmgmt_getif_rval_t *rvalp; ipmgmt_retval_t rval; ipmgmt_getif_cbarg_t cbarg; - ipadm_if_info_t *ifp, *rifp, *curifp; + ipadm_if_info_list_t *ifl, *curifl; + ipadm_if_info_t *ifp, *rifp; int i, err = 0, count = 0; size_t rbufsize; @@ -771,7 +772,7 @@ ipmgmt_getif_handler(void *argp) } /* allocate sufficient buffer to return the interface info */ - for (ifp = cbarg.cb_ifinfo; ifp != NULL; ifp = ifp->ifi_next) + for (ifl = cbarg.cb_ifinfo; ifl != NULL; ifl = ifl->ifil_next) ++count; rbufsize = sizeof (*rvalp) + count * sizeof (*ifp); rvalp = alloca(rbufsize); @@ -779,7 +780,7 @@ ipmgmt_getif_handler(void *argp) rvalp->ir_ifcnt = count; rifp = rvalp->ir_ifinfo; - ifp = cbarg.cb_ifinfo; + ifl = cbarg.cb_ifinfo; /* * copy the interface info to buffer allocated on stack. The reason @@ -787,12 +788,12 @@ ipmgmt_getif_handler(void *argp) * return */ for (i = 0; i < count; i++) { + ifp = &ifl->ifil_ifi; rifp = rvalp->ir_ifinfo + i; (void) bcopy(ifp, rifp, sizeof (*rifp)); - rifp->ifi_next = NULL; - curifp = ifp->ifi_next; - free(ifp); - ifp = curifp; + curifl = ifl->ifil_next; + free(ifl); + ifl = curifl; } rvalp->ir_err = err; (void) door_return((char *)rvalp, rbufsize, NULL, 0); diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_impl.h b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_impl.h index f4d6d30645..a1f016c8c6 100644 --- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_impl.h +++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_impl.h @@ -23,6 +23,7 @@ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2015 Joyent, Inc. * Copyright (c) 2016, Chris Fraire <cfraire@me.com>. + * Copyright 2021, Tintri by DDN. All rights reserved. */ #ifndef _IPMGMT_IMPL_H @@ -63,8 +64,8 @@ extern db_wfunc_t ipmgmt_db_getprop, ipmgmt_db_resetprop; extern db_wfunc_t ipmgmt_db_add, ipmgmt_db_update; typedef struct { - char *cb_ifname; - ipadm_if_info_t *cb_ifinfo; + char *cb_ifname; + ipadm_if_info_list_t *cb_ifinfo; } ipmgmt_getif_cbarg_t; extern db_wfunc_t ipmgmt_db_getif; @@ -99,7 +100,7 @@ extern db_wfunc_t ipmgmt_db_initif; * A linked list of address object nodes. Each node in the list tracks * following information for the address object identified by `am_aobjname'. * - interface on which the address is created - * - logical interface number on which the address is created + * - logical interface number on which the address is created * - address family * - `am_nextnum' identifies the next number to use to generate user part * of `aobjname'. @@ -176,7 +177,7 @@ extern int ipmgmt_db_walk(db_wfunc_t *, void *, ipadm_db_op_t); extern int ipmgmt_aobjmap_op(ipmgmt_aobjmap_t *, uint32_t); extern boolean_t ipmgmt_aobjmap_init(void *, nvlist_t *, char *, size_t, int *); -extern int ipmgmt_persist_aobjmap(ipmgmt_aobjmap_t *, +extern int ipmgmt_persist_aobjmap(ipmgmt_aobjmap_t *, ipadm_db_op_t); extern boolean_t ipmgmt_ngz_firstboot_postinstall(); extern int ipmgmt_persist_if(ipmgmt_if_arg_t *); diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_main.c b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_main.c index 994d1b0125..c5bda676cc 100644 --- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_main.c +++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_main.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2015 Joyent, Inc. + * Copyright 2021, Tintri by DDN. All rights reserved. */ /* @@ -568,13 +569,14 @@ ipmgmt_persist_if_exists(char *ifname, sa_family_t af) bzero(&cbarg, sizeof (cbarg)); cbarg.cb_ifname = ifname; (void) ipmgmt_db_walk(ipmgmt_db_getif, &cbarg, IPADM_DB_READ); - if ((ifp = cbarg.cb_ifinfo) != NULL) { + if (cbarg.cb_ifinfo != NULL) { + ifp = &cbarg.cb_ifinfo->ifil_ifi; if ((af == AF_INET && (ifp->ifi_pflags & IFIF_IPV4)) || (af == AF_INET6 && (ifp->ifi_pflags & IFIF_IPV6))) { exists = B_TRUE; } } - free(ifp); + free(cbarg.cb_ifinfo); return (exists); } diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_persist.c b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_persist.c index a185068005..2b471b8146 100644 --- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_persist.c +++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_persist.c @@ -24,6 +24,7 @@ * Copyright 2018 Joyent, Inc. * Copyright 2016 Argo Technologie SA. * Copyright (c) 2016-2017, Chris Fraire <cfraire@me.com>. + * Copyright 2021, Tintri by DDN. All rights reserved. */ /* @@ -575,7 +576,8 @@ ipmgmt_db_getif(void *arg, nvlist_t *db_nvl, char *buf, size_t buflen, ipmgmt_getif_cbarg_t *cbarg = arg; char *ifname = cbarg->cb_ifname; char *intf = NULL; - ipadm_if_info_t *ifp = NULL; + ipadm_if_info_list_t *ifl = NULL; + ipadm_if_info_t *ifp; sa_family_t af; char *afstr; @@ -586,20 +588,21 @@ ipmgmt_db_getif(void *arg, nvlist_t *db_nvl, char *buf, size_t buflen, return (B_TRUE); } af = atoi(afstr); - for (ifp = cbarg->cb_ifinfo; ifp != NULL; ifp = ifp->ifi_next) { + for (ifl = cbarg->cb_ifinfo; ifl != NULL; ifl = ifl->ifil_next) { + ifp = &ifl->ifil_ifi; if (strcmp(ifp->ifi_name, intf) == 0) break; } - if (ifp == NULL) { - ipadm_if_info_t *new; + if (ifl == NULL) { + ipadm_if_info_list_t *new; if ((new = calloc(1, sizeof (*new))) == NULL) { *errp = ENOMEM; return (B_FALSE); /* don't continue the walk */ } - new->ifi_next = cbarg->cb_ifinfo; + new->ifil_next = cbarg->cb_ifinfo; cbarg->cb_ifinfo = new; - ifp = new; + ifp = &new->ifil_ifi; (void) strlcpy(ifp->ifi_name, intf, sizeof (ifp->ifi_name)); } diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ipadm/ipadm.c b/usr/src/cmd/cmd-inet/usr.sbin/ipadm/ipadm.c index 30e2f0f549..c195e7be80 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/ipadm/ipadm.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/ipadm/ipadm.c @@ -25,6 +25,7 @@ * Copyright (c) 2018, Joyent, Inc. * Copyright 2017 Gary Mills * Copyright (c) 2016, Chris Fraire <cfraire@me.com>. + * Copyright 2021, Tintri by DDN. All rights reserved. */ #include <arpa/inet.h> @@ -278,7 +279,7 @@ typedef struct show_addr_args_s { typedef struct show_if_args_s { show_if_state_t *si_state; - ipadm_if_info_t *si_info; + ipadm_if_info_list_t *si_info; } show_if_args_t; typedef enum { @@ -675,7 +676,7 @@ do_show_ifprop(int argc, char **argv, const char *use) uint_t proto; boolean_t m_arg = _B_FALSE; char *protostr; - ipadm_if_info_t *ifinfo, *ifp; + ipadm_if_info_list_t *ifinfo, *ifl; ipadm_status_t status; show_prop_state_t state; @@ -739,8 +740,9 @@ do_show_ifprop(int argc, char **argv, const char *use) if (status != IPADM_SUCCESS) die("Error retrieving interface(s): %s", ipadm_status2str(status)); - for (ifp = ifinfo; ifp; ifp = ifp->ifi_next) { - (void) strlcpy(state.sps_ifname, ifp->ifi_name, LIFNAMSIZ); + for (ifl = ifinfo; ifl != NULL; ifl = ifl->ifil_next) { + (void) strlcpy(state.sps_ifname, ifl->ifil_ifi.ifi_name, + LIFNAMSIZ); state.sps_proto = proto; show_properties(&state, IPADMPROP_CLASS_IF); } @@ -1611,7 +1613,7 @@ flags2str(uint64_t flags, fmask_t *tbl, boolean_t is_bits, static boolean_t is_from_gz(const char *lifname) { - ipadm_if_info_t *if_info; + ipadm_if_info_list_t *if_info; char phyname[LIFNAMSIZ], *cp; boolean_t ret = _B_FALSE; ipadm_status_t status; @@ -1634,7 +1636,7 @@ is_from_gz(const char *lifname) if (status != IPADM_SUCCESS) return (ret); - if (if_info->ifi_cflags & IFIF_L3PROTECT) + if (if_info->ifil_ifi.ifi_cflags & IFIF_L3PROTECT) ret = _B_TRUE; ipadm_free_if_info(if_info); return (ret); @@ -1889,8 +1891,8 @@ static boolean_t print_si_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) { show_if_args_t *arg = ofarg->ofmt_cbarg; - ipadm_if_info_t *ifinfo = arg->si_info; - char *ifname = ifinfo->ifi_name; + ipadm_if_info_list_t *ifinfo = arg->si_info; + char *ifname = ifinfo->ifil_ifi.ifi_name; fmask_t intf_state[] = { { "ok", IFIS_OK, IPADM_ALL_BITS}, { "down", IFIS_DOWN, IPADM_ALL_BITS}, @@ -1927,15 +1929,15 @@ print_si_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize) (void) snprintf(buf, bufsize, "%s", ifname); break; case SI_STATE: - flags2str(ifinfo->ifi_state, intf_state, _B_FALSE, + flags2str(ifinfo->ifil_ifi.ifi_state, intf_state, _B_FALSE, buf, bufsize); break; case SI_CURRENT: - flags2str(ifinfo->ifi_cflags, intf_cflags, _B_TRUE, + flags2str(ifinfo->ifil_ifi.ifi_cflags, intf_cflags, _B_TRUE, buf, bufsize); break; case SI_PERSISTENT: - flags2str(ifinfo->ifi_pflags, intf_pflags, _B_TRUE, + flags2str(ifinfo->ifil_ifi.ifi_pflags, intf_pflags, _B_TRUE, buf, bufsize); break; default: @@ -1956,7 +1958,7 @@ do_show_if(int argc, char *argv[], const char *use) ipadm_status_t status; show_if_state_t state; char *fields_str = NULL; - ipadm_if_info_t *if_info, *ptr; + ipadm_if_info_list_t *if_info, *ptr; show_if_args_t sargs; int option; ofmt_handle_t ofmt; @@ -2001,7 +2003,7 @@ do_show_if(int argc, char *argv[], const char *use) ipadm_status2str(status)); } - for (ptr = if_info; ptr; ptr = ptr->ifi_next) { + for (ptr = if_info; ptr != NULL; ptr = ptr->ifil_next) { sargs.si_info = ptr; ofmt_print(state.si_ofmt, &sargs); } diff --git a/usr/src/cmd/rcm_daemon/common/ip_rcm.c b/usr/src/cmd/rcm_daemon/common/ip_rcm.c index a389fc770d..7d2eeec9b7 100644 --- a/usr/src/cmd/rcm_daemon/common/ip_rcm.c +++ b/usr/src/cmd/rcm_daemon/common/ip_rcm.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2021, Tintri by DDN. All rights reserved. */ /* @@ -2373,7 +2374,7 @@ if_configure_ipadm(datalink_id_t linkid) { char ifinst[MAXLINKNAMELEN]; boolean_t found; - ipadm_if_info_t *ifinfo, *ptr; + ipadm_if_info_list_t *ifinfo, *ptr; ipadm_status_t status; assert(linkid != DATALINK_INVALID_LINKID); @@ -2398,14 +2399,14 @@ if_configure_ipadm(datalink_id_t linkid) } if (ifinfo != NULL) { found = B_FALSE; - for (ptr = ifinfo; ptr; ptr = ptr->ifi_next) { - if (strncmp(ptr->ifi_name, ifinst, + for (ptr = ifinfo; ptr != NULL; ptr = ptr->ifil_next) { + if (strncmp(ptr->ifil_ifi.ifi_name, ifinst, sizeof (ifinst)) == 0) { found = B_TRUE; break; } } - free(ifinfo); + ipadm_free_if_info(ifinfo); if (!found) { return (0); } diff --git a/usr/src/cmd/rsrvrctl/Makefile b/usr/src/cmd/rsrvrctl/Makefile new file mode 100644 index 0000000000..f51df92730 --- /dev/null +++ b/usr/src/cmd/rsrvrctl/Makefile @@ -0,0 +1,48 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2021 Oxide Computer Company +# + +PROG = rsrvrctl + +include ../Makefile.cmd +include ../Makefile.cmd.64 +include ../Makefile.ctf + +SRCS = rsrvrctl.c +OBJS = $(SRCS:.c=.o) + +CLEANFILES = $(PROG) +CLOBBERFILES += $(ROOTUSRSBINPROG) + +.KEEP_STATE: + +CFLAGS += $(CCVERBOSE) +CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \ + -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64 \ + $(CPPFLAGS.master) \ + -I$(SRC)/uts/i86pc/io/vmm \ + -I$(SRC)/uts/i86pc + +all: $(PROG) + +$(PROG): $(OBJS) + $(LINK.c) -o $@ $(OBJS) $(LDFLAGS) $(LDLIBS) + $(POST_PROCESS) + +install: all $(ROOTLIBPROG) + +clean: + $(RM) $(OBJS) $(CLEANFILES) + +include ../Makefile.targ diff --git a/usr/src/cmd/rsrvrctl/rsrvrctl.c b/usr/src/cmd/rsrvrctl/rsrvrctl.c new file mode 100644 index 0000000000..e189520a1c --- /dev/null +++ b/usr/src/cmd/rsrvrctl/rsrvrctl.c @@ -0,0 +1,164 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * Copyright 2021 Oxide Computer Company + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/types.h> + +#include <sys/vmm_dev.h> + +static void +usage(const char *pname) +{ + fprintf(stderr, + "Usage: %s [-a add] [-r remove] [-q]\n" + "\t-a <SZ> add SZ MiB to the reservoir\n" + "\t-r <SZ> remove SZ MiB from the reservoir\n" + "\t-q query reservoir state\n", pname); +} + +static bool +parse_size(const char *arg, size_t *resp) +{ + size_t res; + + errno = 0; + res = strtoul(arg, NULL, 0); + if (errno != 0) { + return (false); + } + + *resp = (res * 1024 * 1024); + return (true); +} + +static void +do_add(int fd, size_t sz) +{ + int res; + + res = ioctl(fd, VMM_RESV_ADD, sz); + if (res != 0) { + perror("Could not add to reservoir"); + exit(EXIT_FAILURE); + } +} + +static void +do_remove(int fd, size_t sz) +{ + int res; + + res = ioctl(fd, VMM_RESV_REMOVE, sz); + if (res != 0) { + perror("Could not remove from reservoir"); + exit(EXIT_FAILURE); + } +} + +static void +do_query(int fd) +{ + struct vmm_resv_query data; + int res; + + res = ioctl(fd, VMM_RESV_QUERY, &data); + if (res != 0) { + perror("Could not query reservoir info"); + return; + } + + printf("Free KiB:\t%llu\n" + "Allocated KiB:\t%llu\n" + "Transient Allocated KiB:\t%llu\n" + "Size limit KiB:\t%llu\n", + data.vrq_free_sz / 1024, + data.vrq_alloc_sz / 1024, + data.vrq_alloc_transient_sz / 1024, + data.vrq_limit / 1024); +} + +int +main(int argc, char *argv[]) +{ + char c; + const char *opt_a = NULL, *opt_r = NULL; + bool opt_q = false; + int fd; + + const char *pname = argv[0]; + + while ((c = getopt(argc, argv, "a:r:qh")) != -1) { + switch (c) { + case 'a': + opt_a = optarg; + break; + case 'r': + opt_r = optarg; + break; + case 'q': + opt_q = true; + break; + case 'h': + usage(pname); + return (EXIT_SUCCESS); + default: + usage(pname); + return (EXIT_FAILURE); + } + } + if (optind < argc || + (opt_a == NULL && opt_r == NULL && !opt_q) || + (opt_a != NULL && opt_r != NULL)) { + usage(pname); + return (EXIT_FAILURE); + } + + fd = open(VMM_CTL_DEV, O_EXCL | O_RDWR); + if (fd < 0) { + perror("Could not open vmmctl"); + usage(pname); + return (EXIT_FAILURE); + } + + if (opt_a != NULL) { + size_t sz; + + if (!parse_size(opt_a, &sz)) { + perror("Invalid size"); + usage(pname); + return (EXIT_FAILURE); + } + + do_add(fd, sz); + } + if (opt_r != NULL) { + size_t sz; + + if (!parse_size(opt_r, &sz)) { + perror("Invalid size"); + usage(pname); + return (EXIT_FAILURE); + } + do_remove(fd, sz); + } + if (opt_q) { + do_query(fd); + } + + (void) close(fd); + return (0); +} diff --git a/usr/src/lib/libipadm/common/ipadm_if.c b/usr/src/lib/libipadm/common/ipadm_if.c index 41f22e4eeb..c140f4ca40 100644 --- a/usr/src/lib/libipadm/common/ipadm_if.c +++ b/usr/src/lib/libipadm/common/ipadm_if.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2021, Tintry by DDN. All rights reserved. */ #include <errno.h> @@ -82,12 +83,12 @@ i_ipadm_is_if_down(char *ifname, struct ifaddrs *ifa) */ static ipadm_status_t i_ipadm_active_if_info(ipadm_handle_t iph, const char *ifname, - ipadm_if_info_t **if_info, int64_t lifc_flags) + ipadm_if_info_list_t **if_info, int64_t lifc_flags) { struct lifreq *buf; struct lifreq *lifrp; struct lifreq lifrl; - ipadm_if_info_t *last = NULL; + ipadm_if_info_list_t *ifl, *last = NULL; ipadm_if_info_t *ifp; int s; int n; @@ -117,24 +118,26 @@ i_ipadm_active_if_info(ipadm_handle_t iph, const char *ifname, * Check if the interface already exists in our list. * If it already exists, we need to update its flags. */ - for (ifp = *if_info; ifp != NULL; ifp = ifp->ifi_next) { + for (ifl = *if_info; ifl != NULL; ifl = ifl->ifil_next) { + ifp = &ifl->ifil_ifi; if (strcmp(lifrp->lifr_name, ifp->ifi_name) == 0) break; } - if (ifp == NULL) { - ifp = calloc(1, sizeof (ipadm_if_info_t)); - if (ifp == NULL) { + if (ifl == NULL) { + ifl = calloc(1, sizeof (ipadm_if_info_list_t)); + if (ifl == NULL) { status = ipadm_errno2status(errno); goto fail; } + ifp = &ifl->ifil_ifi; (void) strlcpy(ifp->ifi_name, lifrp->lifr_name, sizeof (ifp->ifi_name)); - /* Update the `ifi_next' pointer for this new node */ + /* Update the `ifil_next' pointer for this new node */ if (*if_info == NULL) - *if_info = ifp; + *if_info = ifl; else - last->ifi_next = ifp; - last = ifp; + last->ifil_next = ifl; + last = ifl; } /* @@ -188,12 +191,13 @@ fail: */ static ipadm_status_t i_ipadm_persist_if_info(ipadm_handle_t iph, const char *ifname, - ipadm_if_info_t **if_info) + ipadm_if_info_list_t **if_info) { ipadm_status_t status = IPADM_SUCCESS; ipmgmt_getif_arg_t getif; ipmgmt_getif_rval_t *rvalp; - ipadm_if_info_t *ifp, *curr, *prev = NULL; + ipadm_if_info_t *ifp; + ipadm_if_info_list_t *curr, *prev = NULL; int i = 0, err = 0; bzero(&getif, sizeof (getif)); @@ -225,8 +229,8 @@ i_ipadm_persist_if_info(ipadm_handle_t iph, const char *ifname, ipadm_free_if_info(prev); break; } - (void) bcopy(ifp, curr, sizeof (*curr)); - curr->ifi_next = prev; + (void) bcopy(ifp, &curr->ifil_ifi, sizeof (*ifp)); + curr->ifil_next = prev; prev = curr; } *if_info = curr; @@ -242,14 +246,16 @@ i_ipadm_persist_if_info(ipadm_handle_t iph, const char *ifname, */ ipadm_status_t i_ipadm_get_all_if_info(ipadm_handle_t iph, const char *ifname, - ipadm_if_info_t **if_info, int64_t lifc_flags) + ipadm_if_info_list_t **if_info, int64_t lifc_flags) { ipadm_status_t status; - ipadm_if_info_t *aifinfo = NULL; - ipadm_if_info_t *pifinfo = NULL; + ipadm_if_info_list_t *aifinfo = NULL; + ipadm_if_info_list_t *pifinfo = NULL; + ipadm_if_info_list_t *last = NULL; + ipadm_if_info_list_t *aifl; + ipadm_if_info_list_t *pifl; ipadm_if_info_t *aifp; ipadm_if_info_t *pifp; - ipadm_if_info_t *last = NULL; struct ifaddrs *ifa; struct ifaddrs *ifap; @@ -269,7 +275,9 @@ retry: status = ipadm_errno2status(errno); goto fail; } - for (aifp = aifinfo; aifp != NULL; aifp = aifp->ifi_next) { + for (aifl = aifinfo; aifl != NULL; aifl = aifl->ifil_next) { + aifp = &aifl->ifil_ifi; + /* * Find the `ifaddrs' structure from `ifa' * for this interface. We need the IFF_* flags @@ -299,8 +307,8 @@ retry: aifp->ifi_state = IFIS_DOWN; else aifp->ifi_state = IFIS_OK; - if (aifp->ifi_next == NULL) - last = aifp; + if (aifl->ifil_next == NULL) + last = aifl; } freeifaddrs(ifa); } @@ -321,27 +329,29 @@ retry: * `aifinfo', it means that this interface was disabled. We should * add this interface to `aifinfo' and set it state to IFIF_DISABLED. */ - for (pifp = pifinfo; pifp != NULL; pifp = pifp->ifi_next) { - for (aifp = aifinfo; aifp != NULL; aifp = aifp->ifi_next) { + for (pifl = pifinfo; pifl != NULL; pifl = pifl->ifil_next) { + pifp = &pifl->ifil_ifi; + for (aifl = aifinfo; aifl != NULL; aifl = aifl->ifil_next) { + aifp = &aifl->ifil_ifi; if (strcmp(aifp->ifi_name, pifp->ifi_name) == 0) { aifp->ifi_pflags = pifp->ifi_pflags; break; } } - if (aifp == NULL) { - aifp = malloc(sizeof (ipadm_if_info_t)); - if (aifp == NULL) { + if (aifl == NULL) { + aifl = malloc(sizeof (ipadm_if_info_list_t)); + if (aifl == NULL) { status = ipadm_errno2status(errno); goto fail; } - *aifp = *pifp; - aifp->ifi_next = NULL; - aifp->ifi_state = IFIS_DISABLED; + *aifl = *pifl; + aifl->ifil_next = NULL; + aifl->ifil_ifi.ifi_state = IFIS_DISABLED; if (last != NULL) - last->ifi_next = aifp; + last->ifil_next = aifl; else - aifinfo = aifp; - last = aifp; + aifinfo = aifl; + last = aifl; } } *if_info = aifinfo; @@ -375,7 +385,7 @@ ipadm_status_t i_ipadm_if_pexists(ipadm_handle_t iph, const char *ifname, sa_family_t af, boolean_t *exists) { - ipadm_if_info_t *ifinfo; + ipadm_if_info_list_t *ifinfo; ipadm_status_t status; /* @@ -390,9 +400,9 @@ i_ipadm_if_pexists(ipadm_handle_t iph, const char *ifname, sa_family_t af, status = i_ipadm_persist_if_info(iph, ifname, &ifinfo); if (status == IPADM_SUCCESS) { *exists = ((af == AF_INET && - (ifinfo->ifi_pflags & IFIF_IPV4)) || + (ifinfo->ifil_ifi.ifi_pflags & IFIF_IPV4)) || (af == AF_INET6 && - (ifinfo->ifi_pflags & IFIF_IPV6))); + (ifinfo->ifil_ifi.ifi_pflags & IFIF_IPV6))); free(ifinfo); } else if (status == IPADM_NOTFOUND) { status = IPADM_SUCCESS; @@ -495,7 +505,7 @@ i_ipadm_create_ipmp_peer(ipadm_handle_t iph, char *ifname, sa_family_t af) lifgroupinfo_t lifgr; ipadm_status_t status = IPADM_SUCCESS; struct lifreq lifr; - int other_af_sock; + int other_af_sock; assert(af == AF_INET || af == AF_INET6); @@ -1387,7 +1397,7 @@ ipadm_delete_if(ipadm_handle_t iph, const char *ifname, sa_family_t af, * If af is AF_UNSPEC, then we return the following: * status1, if status1 == status2 * IPADM_SUCCESS, if either of status1 or status2 is SUCCESS - * and the other status is ENXIO + * and the other status is ENXIO * IPADM_ENXIO, if both status1 and status2 are ENXIO * IPADM_FAILURE otherwise. */ @@ -1411,12 +1421,12 @@ ipadm_delete_if(ipadm_handle_t iph, const char *ifname, sa_family_t af, * identified by `ifname'. * * Return values: - * On success: IPADM_SUCCESS. - * On error : IPADM_INVALID_ARG, IPADM_ENXIO or IPADM_FAILURE. + * On success: IPADM_SUCCESS. + * On error : IPADM_INVALID_ARG, IPADM_ENXIO or IPADM_FAILURE. */ ipadm_status_t ipadm_if_info(ipadm_handle_t iph, const char *ifname, - ipadm_if_info_t **if_info, uint32_t flags, int64_t lifc_flags) + ipadm_if_info_list_t **if_info, uint32_t flags, int64_t lifc_flags) { ipadm_status_t status; ifspec_t ifsp; @@ -1442,12 +1452,12 @@ ipadm_if_info(ipadm_handle_t iph, const char *ifname, * Frees the linked list allocated by ipadm_if_info(). */ void -ipadm_free_if_info(ipadm_if_info_t *ifinfo) +ipadm_free_if_info(ipadm_if_info_list_t *ifinfo) { - ipadm_if_info_t *ifinfo_next; + ipadm_if_info_list_t *ifinfo_next; for (; ifinfo != NULL; ifinfo = ifinfo_next) { - ifinfo_next = ifinfo->ifi_next; + ifinfo_next = ifinfo->ifil_next; free(ifinfo); } } diff --git a/usr/src/lib/libipadm/common/libipadm.h b/usr/src/lib/libipadm/common/libipadm.h index 0d8e3fdd7b..0ae9d89e4b 100644 --- a/usr/src/lib/libipadm/common/libipadm.h +++ b/usr/src/lib/libipadm/common/libipadm.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, Chris Fraire <cfraire@me.com>. + * Copyright 2021, Tintri by DDN. All rights reserved. */ #ifndef _LIBIPADM_H #define _LIBIPADM_H @@ -205,14 +206,27 @@ typedef enum { IFIS_DISABLED /* Interface has been disabled. */ } ipadm_if_state_t; +/* + * Declare ipadm_if_info_list_t as a container for ipadm_if_info_t. + * + * ipadm_if_info_t used to have a list pointer ifi_next for linking a number + * of ipadm_if_info_t's together. Even though this linking wasn't used in the + * data exchange between ipmgmtd and libipadm, this meant the structure wasn't + * safe for passing through the door between 32bit and 64bit processes. + */ typedef struct ipadm_if_info_s { - struct ipadm_if_info_s *ifi_next; char ifi_name[LIFNAMSIZ]; /* interface name */ ipadm_if_state_t ifi_state; /* see above */ uint_t ifi_cflags; /* current flags */ uint_t ifi_pflags; /* persistent flags */ } ipadm_if_info_t; +typedef struct ipadm_if_info_list_s { + struct ipadm_if_info_list_s *ifil_next; + ipadm_if_info_t ifil_ifi; +} ipadm_if_info_list_t; + + /* ipadm_if_info_t flags */ #define IFIF_BROADCAST 0x00000001 #define IFIF_MULTICAST 0x00000002 @@ -279,8 +293,8 @@ extern ipadm_status_t ipadm_disable_if(ipadm_handle_t, const char *, uint32_t); extern ipadm_status_t ipadm_enable_if(ipadm_handle_t, const char *, uint32_t); extern ipadm_status_t ipadm_if_info(ipadm_handle_t, const char *, - ipadm_if_info_t **, uint32_t, int64_t); -extern void ipadm_free_if_info(ipadm_if_info_t *); + ipadm_if_info_list_t **, uint32_t, int64_t); +extern void ipadm_free_if_info(ipadm_if_info_list_t *); extern ipadm_status_t ipadm_delete_if(ipadm_handle_t, const char *, sa_family_t, uint32_t); extern void ipadm_if_move(ipadm_handle_t, const char *); @@ -362,7 +376,7 @@ extern ipadm_status_t ipadm_get_prop(ipadm_handle_t, const char *, char *, /* * miscellaneous helper functions. */ -extern const char *ipadm_status2str(ipadm_status_t); +extern const char *ipadm_status2str(ipadm_status_t); extern int ipadm_str2nvlist(const char *, nvlist_t **, uint_t); extern size_t ipadm_nvlist2str(nvlist_t *, char *, size_t); extern char *ipadm_proto2str(uint_t); diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c index ba3fb7f8dd..ec27949a43 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.c +++ b/usr/src/lib/libvmmapi/common/vmmapi.c @@ -39,7 +39,7 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. - * Copyright 2020 Oxide Computer Company + * Copyright 2021 Oxide Computer Company */ #include <sys/cdefs.h> @@ -109,12 +109,31 @@ struct vmctx { #ifdef __FreeBSD__ #define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) #define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) -#else -#define CREATE(x) vm_do_ctl(VMM_CREATE_VM, (x)) -#define DESTROY(x) vm_do_ctl(VMM_DESTROY_VM, (x)) +int +vm_create(const char *name) +{ + /* Try to load vmm(4) module before creating a guest. */ + if (modfind("vmm") < 0) + kldload("vmm"); + return (CREATE((char *)name)); +} + +void +vm_destroy(struct vmctx *vm) +{ + assert(vm != NULL); + + if (vm->fd >= 0) + close(vm->fd); + DESTROY(vm->name); + + free(vm); +} + +#else static int -vm_do_ctl(int cmd, const char *name) +vm_do_ctl(int cmd, void *req) { int ctl_fd; @@ -123,7 +142,7 @@ vm_do_ctl(int cmd, const char *name) return (-1); } - if (ioctl(ctl_fd, cmd, name) == -1) { + if (ioctl(ctl_fd, cmd, req) == -1) { int err = errno; /* Do not lose ioctl errno through the close(2) */ @@ -135,6 +154,46 @@ vm_do_ctl(int cmd, const char *name) return (0); } + +int +vm_create(const char *name, uint64_t flags) +{ + struct vm_create_req req; + + (void) strncpy(req.name, name, VM_MAX_NAMELEN); + req.flags = flags; + + return (vm_do_ctl(VMM_CREATE_VM, &req)); +} + +void +vm_close(struct vmctx *vm) +{ + assert(vm != NULL); + assert(vm->fd >= 0); + + (void) close(vm->fd); + + free(vm); +} + +void +vm_destroy(struct vmctx *vm) +{ + struct vm_destroy_req req; + + assert(vm != NULL); + + if (vm->fd >= 0) { + (void) close(vm->fd); + vm->fd = -1; + } + + (void) strncpy(req.name, vm->name, VM_MAX_NAMELEN); + (void) vm_do_ctl(VMM_DESTROY_VM, &req); + + free(vm); +} #endif static int @@ -155,17 +214,6 @@ vm_device_open(const char *name) return (fd); } -int -vm_create(const char *name) -{ -#ifdef __FreeBSD__ - /* Try to load vmm(4) module before creating a guest. */ - if (modfind("vmm") < 0) - kldload("vmm"); -#endif - return (CREATE((char *)name)); -} - struct vmctx * vm_open(const char *name) { @@ -189,30 +237,6 @@ err: return (NULL); } -#ifndef __FreeBSD__ -void -vm_close(struct vmctx *vm) -{ - assert(vm != NULL); - assert(vm->fd >= 0); - - (void) close(vm->fd); - - free(vm); -} -#endif - -void -vm_destroy(struct vmctx *vm) -{ - assert(vm != NULL); - - if (vm->fd >= 0) - close(vm->fd); - DESTROY(vm->name); - - free(vm); -} int vm_parse_memsize(const char *optarg, size_t *ret_memsize) diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h index 79c7dc02ee..e239b70a56 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.h +++ b/usr/src/lib/libvmmapi/common/vmmapi.h @@ -39,7 +39,7 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. - * Copyright 2020 Oxide Computer Company + * Copyright 2021 Oxide Computer Company */ #ifndef _VMMAPI_H_ @@ -134,7 +134,11 @@ int vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, int vm_munmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, size_t len); +#ifndef __FreeBSD__ +int vm_create(const char *name, uint64_t flags); +#else int vm_create(const char *name); +#endif /* __FreeBSD__ */ int vm_get_device_fd(struct vmctx *ctx); struct vmctx *vm_open(const char *name); #ifndef __FreeBSD__ diff --git a/usr/src/pkg/manifests/system-bhyve.mf b/usr/src/pkg/manifests/system-bhyve.mf index 0495d9f649..3f67fa743e 100644 --- a/usr/src/pkg/manifests/system-bhyve.mf +++ b/usr/src/pkg/manifests/system-bhyve.mf @@ -48,6 +48,7 @@ file path=usr/kernel/drv/$(ARCH64)/vmm file path=usr/kernel/drv/ppt.conf file path=usr/kernel/drv/viona.conf file path=usr/kernel/drv/vmm.conf +file path=usr/lib/rsrvrctl mode=0555 file path=usr/sbin/bhyve mode=0555 file path=usr/sbin/bhyvectl mode=0555 file path=usr/sbin/pptadm mode=0555 diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index d88d229363..26cc3b0824 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -30,8 +30,8 @@ * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. * Copyright 2019 Joyent, Inc. * Copyright (c) 2017, Intel Corporation. - * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. * Copyright 2020 Joshua M. Clulow <josh@sysmgr.org> + * Copyright 2021 OmniOS Community Edition (OmniOSce) Association. */ /* @@ -1731,13 +1731,15 @@ spa_load_l2cache(spa_t *spa) ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + nl2cache = 0; + newvdevs = NULL; if (sav->sav_config != NULL) { VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); - newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); - } else { - nl2cache = 0; - newvdevs = NULL; + if (nl2cache > 0) { + newvdevs = kmem_alloc( + nl2cache * sizeof (void *), KM_SLEEP); + } } oldvdevs = sav->sav_vdevs; @@ -1829,7 +1831,11 @@ spa_load_l2cache(spa_t *spa) VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); - l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); + l2cache = NULL; + if (sav->sav_count > 0) { + l2cache = kmem_alloc( + sav->sav_count * sizeof (void *), KM_SLEEP); + } for (i = 0; i < sav->sav_count; i++) l2cache[i] = vdev_config_generate(spa, sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE); diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h index ae9b0be758..29466d15ef 100644 --- a/usr/src/uts/common/vm/page.h +++ b/usr/src/uts/common/vm/page.h @@ -21,6 +21,7 @@ /* * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2017, Joyent, Inc. + * Copyright 2021 Oxide Computer Company */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -342,13 +343,13 @@ struct as; * * So, as a quick summary: * - * pse_mutex[]'s protect the p_selock and p_cv fields. + * pse_mutex[]'s protect the p_selock and p_cv fields. * - * p_selock protects the p_free, p_age, p_vnode, p_offset and p_hash, + * p_selock protects the p_free, p_age, p_vnode, p_offset and p_hash, * - * ph_mutex[]'s protect the page_hash[] array and its chains. + * ph_mutex[]'s protect the page_hash[] array and its chains. * - * vph_mutex[]'s protect the v_pages field and the vp page chains. + * vph_mutex[]'s protect the v_pages field and the vp page chains. * * First lock the page, then the hash chain, then the vnode chain. When * this is not possible `trylocks' must be used. Sleeping while holding @@ -763,6 +764,7 @@ void page_lock_delete(page_t *); int page_deleted(page_t *); int page_pp_lock(page_t *, int, int); void page_pp_unlock(page_t *, int, int); +int page_xresv(pgcnt_t, uint_t, int (*)(void)); int page_resv(pgcnt_t, uint_t); void page_unresv(pgcnt_t); void page_pp_useclaim(page_t *, page_t *, uint_t); @@ -1079,7 +1081,7 @@ typedef struct kpm_hlk { * The state about how a kpm page is mapped and whether it is ready to go * is indicated by the following 1 byte kpm_spage structure. This byte is * split into two 4-bit parts - kp_mapped and kp_mapped_go. - * - kp_mapped == 1 the page is mapped cacheable + * - kp_mapped == 1 the page is mapped cacheable * - kp_mapped == 2 the page is mapped non-cacheable * - kp_mapped_go == 1 the mapping is ready to be dropped in * - kp_mapped_go == 0 the mapping is not ready to be dropped in. diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c index b3a3e03fa3..3806c25533 100644 --- a/usr/src/uts/common/vm/vm_page.c +++ b/usr/src/uts/common/vm/vm_page.c @@ -23,6 +23,7 @@ * Copyright (c) 2015, Josef 'Jeff' Sipek <jeffpc@josefsipek.net> * Copyright (c) 2015, 2016 by Delphix. All rights reserved. * Copyright 2018 Joyent, Inc. + * Copyright 2021 Oxide Computer Company */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -3922,29 +3923,68 @@ page_pp_unlock( } /* - * This routine reserves availrmem for npages; - * flags: KM_NOSLEEP or KM_SLEEP - * returns 1 on success or 0 on failure + * This routine reserves availrmem for npages. + * It returns 1 on success or 0 on failure. + * + * flags: KM_NOSLEEP or KM_SLEEP + * cb_wait: called to induce delay when KM_SLEEP reservation requires kmem + * reaping to potentially succeed. If the callback returns 0, the + * reservation attempts will cease to repeat and page_xresv() may + * report a failure. If cb_wait is NULL, the traditional delay(hz/2) + * behavior will be used while waiting for a reap. */ int -page_resv(pgcnt_t npages, uint_t flags) +page_xresv(pgcnt_t npages, uint_t flags, int (*cb_wait)(void)) { mutex_enter(&freemem_lock); - while (availrmem < tune.t_minarmem + npages) { - if (flags & KM_NOSLEEP) { - mutex_exit(&freemem_lock); - return (0); - } + if (availrmem >= tune.t_minarmem + npages) { + availrmem -= npages; mutex_exit(&freemem_lock); - page_needfree(npages); - kmem_reap(); - delay(hz >> 2); - page_needfree(-(spgcnt_t)npages); - mutex_enter(&freemem_lock); + return (1); + } else if ((flags & KM_NOSLEEP) != 0) { + mutex_exit(&freemem_lock); + return (0); } - availrmem -= npages; mutex_exit(&freemem_lock); - return (1); + + /* + * We signal memory pressure to the system by elevating 'needfree'. + * Processes such as kmem reaping, pageout, and ZFS ARC shrinking can + * then respond to said pressure by freeing pages. + */ + page_needfree(npages); + int nobail = 1; + do { + kmem_reap(); + if (cb_wait == NULL) { + delay(hz >> 2); + } else { + nobail = cb_wait(); + } + + mutex_enter(&freemem_lock); + if (availrmem >= tune.t_minarmem + npages) { + availrmem -= npages; + mutex_exit(&freemem_lock); + page_needfree(-(spgcnt_t)npages); + return (1); + } + mutex_exit(&freemem_lock); + } while (nobail != 0); + page_needfree(-(spgcnt_t)npages); + + return (0); +} + +/* + * This routine reserves availrmem for npages; + * flags: KM_NOSLEEP or KM_SLEEP + * returns 1 on success or 0 on failure + */ +int +page_resv(pgcnt_t npages, uint_t flags) +{ + return (page_xresv(npages, flags, NULL)); } /* diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files index 4370e90d9a..0a3fad877c 100644 --- a/usr/src/uts/i86pc/Makefile.files +++ b/usr/src/uts/i86pc/Makefile.files @@ -23,7 +23,6 @@ # Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. # # Copyright (c) 2010, Intel Corporation. -# Copyright 2019 Joyent, Inc. # Copyright 2019 OmniOS Community Edition (OmniOSce) Association. # Copyright 2019 Joyent, Inc. # Copyright 2021 Oxide Computer Company @@ -117,7 +116,6 @@ CORE_OBJS += \ pmem.o \ ppage.o \ pwrnow.o \ - seg_vmm.o \ smt.o \ speedstep.o \ ssp.o \ @@ -271,6 +269,8 @@ VMM_OBJS += vmm.o \ svm_support.o \ amdv.o \ vmm_gpt.o \ + seg_vmm.o \ + vmm_reservoir.o \ vmm_sol_vm.o \ vmm_sol_glue.o \ vmm_sol_ept.o \ diff --git a/usr/src/uts/i86pc/vm/seg_vmm.c b/usr/src/uts/i86pc/io/vmm/seg_vmm.c index beb5e81d53..23a8da3bc5 100644 --- a/usr/src/uts/i86pc/vm/seg_vmm.c +++ b/usr/src/uts/i86pc/io/vmm/seg_vmm.c @@ -11,6 +11,7 @@ /* * Copyright 2018 Joyent, Inc. + * Copyright 2021 Oxide Computer Company */ /* @@ -40,7 +41,16 @@ #include <vm/as.h> #include <vm/seg.h> #include <vm/seg_kmem.h> -#include <vm/seg_vmm.h> + +#include <sys/seg_vmm.h> + +typedef struct segvmm_data { + krwlock_t svmd_lock; + vm_object_t svmd_obj; + uintptr_t svmd_obj_off; + uchar_t svmd_prot; + size_t svmd_softlockcnt; +} segvmm_data_t; static int segvmm_dup(struct seg *, struct seg *); @@ -105,31 +115,14 @@ segvmm_create(struct seg **segpp, void *argsp) segvmm_crargs_t *cra = argsp; segvmm_data_t *data; - /* - * Check several aspects of the mapping request to ensure validity: - * - kernel pages must reside entirely in kernel space - * - target protection must be user-accessible - * - kernel address must be page-aligned - */ - if ((uintptr_t)cra->kaddr <= _userlimit || - ((uintptr_t)cra->kaddr + seg->s_size) < (uintptr_t)cra->kaddr || - (cra->prot & PROT_USER) == 0 || - ((uintptr_t)cra->kaddr & PAGEOFFSET) != 0) { - return (EINVAL); - } - data = kmem_zalloc(sizeof (*data), KM_SLEEP); rw_init(&data->svmd_lock, NULL, RW_DEFAULT, NULL); - data->svmd_kaddr = (uintptr_t)cra->kaddr; + data->svmd_obj = cra->obj; + data->svmd_obj_off = cra->offset; data->svmd_prot = cra->prot; - data->svmd_cookie = cra->cookie; - data->svmd_hold = cra->hold; - data->svmd_rele = cra->rele; - /* Since initial checks have passed, grab a reference on the cookie */ - if (data->svmd_hold != NULL) { - data->svmd_hold(data->svmd_cookie); - } + /* Grab a hold on the VM object for the duration of this seg mapping */ + vm_object_reference(data->svmd_obj); seg->s_ops = &segvmm_ops; seg->s_data = data; @@ -146,16 +139,12 @@ segvmm_dup(struct seg *seg, struct seg *newseg) newsvmd = kmem_zalloc(sizeof (segvmm_data_t), KM_SLEEP); rw_init(&newsvmd->svmd_lock, NULL, RW_DEFAULT, NULL); - newsvmd->svmd_kaddr = svmd->svmd_kaddr; + newsvmd->svmd_obj = svmd->svmd_obj; + newsvmd->svmd_obj_off = svmd->svmd_obj_off; newsvmd->svmd_prot = svmd->svmd_prot; - newsvmd->svmd_cookie = svmd->svmd_cookie; - newsvmd->svmd_hold = svmd->svmd_hold; - newsvmd->svmd_rele = svmd->svmd_rele; /* Grab another hold for the duplicate segment */ - if (svmd->svmd_hold != NULL) { - newsvmd->svmd_hold(newsvmd->svmd_cookie); - } + vm_object_reference(svmd->svmd_obj); newseg->s_ops = seg->s_ops; newseg->s_data = newsvmd; @@ -180,10 +169,8 @@ segvmm_unmap(struct seg *seg, caddr_t addr, size_t len) /* Unconditionally unload the entire segment range. */ hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP); - /* Release the hold this segment possessed */ - if (svmd->svmd_rele != NULL) { - svmd->svmd_rele(svmd->svmd_cookie); - } + /* Release the VM object hold this segment possessed */ + vm_object_deallocate(svmd->svmd_obj); seg_free(seg); return (0); @@ -206,41 +193,23 @@ static int segvmm_fault_in(struct hat *hat, struct seg *seg, uintptr_t va, size_t len) { segvmm_data_t *svmd = seg->s_data; - const uintptr_t koff = svmd->svmd_kaddr - (uintptr_t)seg->s_base; const uintptr_t end = va + len; const uintptr_t prot = svmd->svmd_prot; - /* Stick to the simple non-large-page case for now */ va &= PAGEMASK; - + uintptr_t off = va - (uintptr_t)seg->s_base; do { - htable_t *ht; - uint_t entry, lvl; - size_t psz; pfn_t pfn; - const uintptr_t kaddr = va + koff; - - ASSERT(kaddr >= (uintptr_t)svmd->svmd_kaddr); - ASSERT(kaddr < ((uintptr_t)svmd->svmd_kaddr + seg->s_size)); - ht = htable_getpage(kas.a_hat, kaddr, &entry); - if (ht == NULL) { - return (-1); - } - lvl = ht->ht_level; - pfn = PTE2PFN(x86pte_get(ht, entry), lvl); - htable_release(ht); + pfn = vm_object_pfn(svmd->svmd_obj, off); if (pfn == PFN_INVALID) { return (-1); } - /* For the time being, handling for large pages is absent. */ - psz = PAGESIZE; - pfn += mmu_btop(kaddr & LEVEL_OFFSET(lvl)); - - hat_devload(hat, (caddr_t)va, psz, pfn, prot, HAT_LOAD); - - va = va + psz; + /* Ignore any large-page possibilities for now */ + hat_devload(hat, (caddr_t)va, PAGESIZE, pfn, prot, HAT_LOAD); + va += PAGESIZE; + off += PAGESIZE; } while (va < end); return (0); @@ -399,8 +368,8 @@ static int segvmm_gettype(struct seg *seg, caddr_t addr) { /* - * Since already-existing kernel pages are being mapped into userspace, - * always report the segment type as shared. + * Since already-existing vmm reservoir pages are being mapped into + * userspace, always report the segment type as shared. */ return (MAP_SHARED); } @@ -457,8 +426,8 @@ segvmm_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) { segvmm_data_t *svmd = seg->s_data; - memidp->val[0] = (uintptr_t)svmd->svmd_kaddr; - memidp->val[1] = (uintptr_t)(addr - seg->s_base); + memidp->val[0] = (uintptr_t)svmd->svmd_obj; + memidp->val[1] = (uintptr_t)(addr - seg->s_base) + svmd->svmd_obj_off; return (0); } diff --git a/usr/src/uts/i86pc/io/vmm/sys/seg_vmm.h b/usr/src/uts/i86pc/io/vmm/sys/seg_vmm.h new file mode 100644 index 0000000000..a4f72f816e --- /dev/null +++ b/usr/src/uts/i86pc/io/vmm/sys/seg_vmm.h @@ -0,0 +1,30 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Joyent, Inc. + * Copyright 2021 Oxide Computer Company + */ + +#ifndef _VM_SEG_VMM_H +#define _VM_SEG_VMM_H + +#include <sys/vmm_vm.h> + +typedef struct segvmm_crargs { + uchar_t prot; /* protection */ + vm_object_t obj; + uintptr_t offset; +} segvmm_crargs_t; + +int segvmm_create(struct seg **, void *); + +#endif /* _VM_SEG_VMM_H */ diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_impl.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_impl.h index 606be4bbae..2b6f41ec54 100644 --- a/usr/src/uts/i86pc/io/vmm/sys/vmm_impl.h +++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_impl.h @@ -40,7 +40,7 @@ struct vmm_devmem_entry { list_node_t vde_node; int vde_segid; - char vde_name[SPECNAMELEN + 1]; + char vde_name[VM_MAX_SEG_NAMELEN]; size_t vde_len; off_t vde_off; }; diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h index 8441b51e03..4191aaee5c 100644 --- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h +++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h @@ -115,7 +115,7 @@ struct vmm_ops { extern struct vmm_ops vmm_ops_intel; extern struct vmm_ops vmm_ops_amd; -int vm_create(const char *name, struct vm **retvm); +int vm_create(const char *name, uint64_t flags, struct vm **retvm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); const char *vm_name(struct vm *vm); diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_reservoir.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_reservoir.h new file mode 100644 index 0000000000..b8215ce654 --- /dev/null +++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_reservoir.h @@ -0,0 +1,40 @@ + +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2021 Oxide Computer Company + */ + +#ifndef _SYS_VMM_RESERVOIR_H +#define _SYS_VMM_RESERVOIR_H + +#include <sys/types.h> +#include <sys/cred.h> + +struct vmmr_region; +typedef struct vmmr_region vmmr_region_t; + +void vmmr_init(); +void vmmr_fini(); +bool vmmr_is_empty(); + +int vmmr_alloc(size_t, bool, vmmr_region_t **); +void *vmmr_region_mem_at(vmmr_region_t *, uintptr_t); +pfn_t vmmr_region_pfn_at(vmmr_region_t *, uintptr_t); +void vmmr_free(vmmr_region_t *); + +int vmmr_add(size_t, bool); +int vmmr_remove(size_t, bool); + +int vmmr_ioctl(int, intptr_t, int, cred_t *, int *); + +#endif /* _SYS_VMM_RESERVOIR_H */ diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_vm.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_vm.h index 6c7f9d423e..76d5fec8b7 100644 --- a/usr/src/uts/i86pc/io/vmm/sys/vmm_vm.h +++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_vm.h @@ -160,8 +160,6 @@ int vm_segmap_obj(vm_object_t, off_t, size_t, struct as *, caddr_t *, uint_t, int vm_segmap_space(struct vmspace *, off_t, struct as *, caddr_t *, off_t, uint_t, uint_t, uint_t); void *vmspace_find_kva(struct vmspace *, uintptr_t, size_t); -void vmm_arena_init(void); -void vmm_arena_fini(void); typedef int (*pmap_pinit_t)(struct pmap *pmap); @@ -171,13 +169,12 @@ void vmspace_free(struct vmspace *); int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int); int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count); -void vmm_arena_fini(void); - -struct vm_object *vm_object_allocate(objtype_t, vm_pindex_t); +struct vm_object *vm_object_allocate(objtype_t, vm_pindex_t, bool); void vm_object_deallocate(vm_object_t); void vm_object_reference(vm_object_t); int vm_object_set_memattr(vm_object_t, vm_memattr_t); +pfn_t vm_object_pfn(vm_object_t, uintptr_t); #define VM_OBJECT_WLOCK(vmo) mutex_enter(&(vmo)->vmo_lock) #define VM_OBJECT_WUNLOCK(vmo) mutex_exit(&(vmo)->vmo_lock) diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c index 425969d622..80c9ec6bd7 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm.c @@ -39,7 +39,7 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2018 Joyent, Inc. - * Copyright 2020 Oxide Computer Company + * Copyright 2021 Oxide Computer Company * Copyright 2021 OmniOS Community Edition (OmniOSce) Association. */ @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sched.h> #include <sys/smp.h> #include <sys/systm.h> +#include <sys/sunddi.h> #include <machine/pcb.h> #include <machine/smp.h> @@ -191,6 +192,8 @@ struct vm { uint64_t boot_tsc_offset; /* (i) TSC offset at VM boot */ struct ioport_config ioports; /* (o) ioport handling */ + + bool mem_transient; /* (o) alloc transient memory */ }; static int vmm_initialized; @@ -490,7 +493,7 @@ uint_t cores_per_package = 1; uint_t threads_per_core = 1; int -vm_create(const char *name, struct vm **retvm) +vm_create(const char *name, uint64_t flags, struct vm **retvm) { struct vm *vm; struct vmspace *vmspace; @@ -502,8 +505,8 @@ vm_create(const char *name, struct vm **retvm) if (!vmm_initialized) return (ENXIO); - if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) - return (EINVAL); + /* Name validation has already occurred */ + VERIFY3U(strnlen(name, VM_MAX_NAMELEN), <, VM_MAX_NAMELEN); vmspace = VMSPACE_ALLOC(0, VM_MAXUSER_ADDRESS); if (vmspace == NULL) @@ -512,6 +515,7 @@ vm_create(const char *name, struct vm **retvm) vm = malloc(sizeof (struct vm), M_VM, M_WAITOK | M_ZERO); strcpy(vm->name, name); vm->vmspace = vmspace; + vm->mem_transient = (flags & VCF_RESERVOIR_MEM) == 0; vm->sockets = 1; vm->cores = cores_per_package; /* XXX backwards compatibility */ @@ -708,21 +712,12 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) struct mem_seg *seg; vm_object_t obj; -#ifndef __FreeBSD__ - extern pgcnt_t get_max_page_get(void); -#endif - if (ident < 0 || ident >= VM_MAX_MEMSEGS) return (EINVAL); if (len == 0 || (len & PAGE_MASK)) return (EINVAL); -#ifndef __FreeBSD__ - if (len > ptob(get_max_page_get())) - return (EINVAL); -#endif - seg = &vm->mem_segs[ident]; if (seg->object != NULL) { if (seg->len == len && seg->sysmem == sysmem) @@ -731,7 +726,8 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) return (EINVAL); } - obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); + obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT, + vm->mem_transient); if (obj == NULL) return (ENOMEM); diff --git a/usr/src/uts/i86pc/io/vmm/vmm_reservoir.c b/usr/src/uts/i86pc/io/vmm/vmm_reservoir.c new file mode 100644 index 0000000000..1bb64a4851 --- /dev/null +++ b/usr/src/uts/i86pc/io/vmm/vmm_reservoir.c @@ -0,0 +1,820 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2021 Oxide Computer Company + */ + +/* + * VMM Memory Reservoir + * + * + * In order to make the allocation of large (multi-GiB) chunks of memory + * for bhyve VMs easier, we introduce the "VMM Reservoir", where system + * operators can set aside a substantial portion of system memory exclusively + * for VMs. This memory is unavailable for general use by the rest of the + * system. Rather than having to scour the freelist, reap kmem caches, or put + * pressure on the ARC, bhyve guest memory allocations can quickly determine if + * there is adequate reservoir memory available. Since the pages stored in the + * reservoir are pre-zeroed, it can be immediately used when allocated to a + * guest. When the memory is returned to the reservoir, it is zeroed once more + * to avoid leaking any sensitive data from that guest. + * + * + * Transient Allocations + * + * While the explicit reservoir model may work well for some applications, + * others may want a more traditional model, where pages for guest memory + * objects are allocated on demand, rather than from a pool set aside from the + * system. In this case, the allocation can be made in "transient" mode, where + * the memory is allocated normally, even if there is free capacity in the + * reservoir. When use of the transient allocation is complete (the guest is + * halted and destroyed), the pages will be freed back to the system, rather + * than added back to the reservoir. + * + * From an implementation standpoint, transient allocations follow the same + * code paths as ones using the reservoir normally. Those allocations have a + * tag which marks them as transient, and used/free size tallies are maintained + * separately for normal and transient operations. When performing a transient + * allocation, that amount of memory is immediately added to the reservoir , + * from which the allocation can be made. When freeing a transient allocation, + * a matching amount of memory is removed from the reservoir as part of the + * operation. This allows both allocation types to coexist without too much + * additional machinery. + * + * + * Administration + * + * Operators may increase, decrease, and query the the amount of memory + * allocated to the reservoir and from to VMs via ioctls against the vmmctl + * device. The total amount added to the reservoir is arbitrarily limited at + * this time by `vmmr_total_limit` which defaults to 80% of physmem. This is + * done to prevent the reservoir from inadvertently growing to a size where the + * system has inadequate memory to make forward progress. Memory may only be + * removed from the reservoir when it is free (not allocated by any guest VMs). + * + * + * Page Tracking + * + * The reservoir currently uses vnode association to keep track of pages under + * its control (either designated to the reservoir and free, or allocated to a + * guest VM object). This means using the existing VM system primitives for + * page_t instances being associated with a given (vnode, offset) tuple. It + * means that spans of pages, either free or allocated, need only to store a + * length (of the span) and an offset (into the vnode) in order to gain access + * to all of the underlying pages associated with that span. Associating the + * pages against `kvps[KV_VVP]` (the VMM kernel vnode) means they will be + * properly tracked as KAS pages, but be excluded from normal dumps (unless the + * operator has chosen to dump all of RAM). + */ + +#include <sys/types.h> +#include <sys/mutex.h> +#include <sys/avl.h> +#include <sys/list.h> +#include <sys/machparam.h> +#include <sys/kmem.h> +#include <sys/stddef.h> +#include <sys/null.h> +#include <sys/errno.h> +#include <sys/systm.h> +#include <sys/sunddi.h> +#include <sys/policy.h> +#include <vm/seg_kmem.h> +#include <vm/hat_i86.h> + +#include <sys/vmm_reservoir.h> +#include <sys/vmm_dev.h> + +static kmutex_t vmmr_lock; + +static size_t vmmr_free_sz; +static size_t vmmr_free_transient_sz; +static size_t vmmr_adding_sz; +static size_t vmmr_alloc_sz; +static size_t vmmr_alloc_transient_sz; +static size_t vmmr_empty_sz; + +static uintptr_t vmmr_empty_last; +/* Upper limit for the size (free + allocated) of the reservoir */ +static size_t vmmr_total_limit; + +/* VA range allocated from the VMM arena for the mappings */ +static uintptr_t vmmr_va; +static uintptr_t vmmr_va_sz; + +/* Pair of AVL trees to store set of spans ordered by addr and size */ +typedef struct vmmr_treepair { + avl_tree_t by_addr; + avl_tree_t by_size; +} vmmr_treepair_t; + +/* Spans of free memory in the reservoir */ +static vmmr_treepair_t vmmr_free_tp; + +/* Spans of empty (not backed by memory) space in the reservoir */ +static vmmr_treepair_t vmmr_empty_tp; + +/* Regions of memory allocated from the reservoir */ +static list_t vmmr_alloc_regions; + +struct vmmr_span { + uintptr_t vs_addr; + size_t vs_size; + avl_node_t vs_by_addr; + avl_node_t vs_by_size; + uintptr_t vs_region_addr; +}; +typedef struct vmmr_span vmmr_span_t; + +struct vmmr_region { + size_t vr_size; + avl_tree_t vr_spans; + list_node_t vr_node; + bool vr_transient; +}; + +static int +vmmr_cmp_addr(const void *a, const void *b) +{ + const vmmr_span_t *sa = a; + const vmmr_span_t *sb = b; + + if (sa->vs_addr == sb->vs_addr) { + return (0); + } else if (sa->vs_addr < sb->vs_addr) { + return (-1); + } else { + return (1); + } +} + +static int +vmmr_cmp_size(const void *a, const void *b) +{ + const vmmr_span_t *sa = a; + const vmmr_span_t *sb = b; + + if (sa->vs_size == sb->vs_size) { + /* + * Since discontiguous spans could have the same size in a + * by-size tree, differentiate them (as required by AVL) by + * address so they can safely coexist while remaining sorted. + */ + return (vmmr_cmp_addr(a, b)); + } else if (sa->vs_size < sb->vs_size) { + return (-1); + } else { + return (1); + } +} + +static int +vmmr_cmp_region_addr(const void *a, const void *b) +{ + const vmmr_span_t *sa = a; + const vmmr_span_t *sb = b; + + if (sa->vs_region_addr == sb->vs_region_addr) { + return (0); + } else if (sa->vs_region_addr < sb->vs_region_addr) { + return (-1); + } else { + return (1); + } +} + +static void +vmmr_tp_init(vmmr_treepair_t *tree) +{ + avl_create(&tree->by_addr, vmmr_cmp_addr, sizeof (vmmr_span_t), + offsetof(vmmr_span_t, vs_by_addr)); + avl_create(&tree->by_size, vmmr_cmp_size, sizeof (vmmr_span_t), + offsetof(vmmr_span_t, vs_by_size)); +} + +static void +vmmr_tp_destroy(vmmr_treepair_t *tree) +{ + void *vcp = NULL; + vmmr_span_t *span; + + while (avl_destroy_nodes(&tree->by_addr, &vcp) != NULL) { + /* Freeing spans will be done when tearing down by-size tree */ + } + while ((span = avl_destroy_nodes(&tree->by_size, &vcp)) != NULL) { + kmem_free(span, sizeof (*span)); + } + avl_destroy(&tree->by_addr); + avl_destroy(&tree->by_size); +} + +/* + * Insert a vmmr_span_t into a treepair, concatenating if possible with adjacent + * span(s). Such concatenation could result in the `to_add` span being freed, + * so the caller cannot use it after this returns. + */ +static void +vmmr_tp_insert_concat(vmmr_span_t *to_add, vmmr_treepair_t *tree) +{ + avl_tree_t *by_addr = &tree->by_addr; + avl_tree_t *by_size = &tree->by_size; + vmmr_span_t *node; + avl_index_t where; + + /* This addr should not already exist in the treepair */ + node = avl_find(by_addr, to_add, &where); + ASSERT3P(node, ==, NULL); + + node = avl_nearest(by_addr, where, AVL_BEFORE); + if (node != NULL && + (node->vs_addr + node->vs_size) == to_add->vs_addr) { + /* concat with preceeding item */ + avl_remove(by_addr, node); + avl_remove(by_size, node); + node->vs_size += to_add->vs_size; + kmem_free(to_add, sizeof (*to_add)); + + /* + * Since this now-concatenated span could be adjacent one + * trailing it, fall through to perform that check. + */ + to_add = node; + } + + node = avl_nearest(by_addr, where, AVL_AFTER); + if (node != NULL && + (to_add->vs_addr + to_add->vs_size) == node->vs_addr) { + /* concat with trailing item */ + avl_remove(by_addr, node); + avl_remove(by_size, node); + node->vs_addr = to_add->vs_addr; + node->vs_size += to_add->vs_size; + avl_add(by_addr, node); + avl_add(by_size, node); + + kmem_free(to_add, sizeof (*to_add)); + return; + } + + /* simply insert */ + avl_add(by_addr, to_add); + avl_add(by_size, to_add); +} + +/* + * Remove a vmmr_span_t from a treepair, splitting if necessary when a span of + * the exact target size is not present, but a larger one is. May return a span + * with a size smaller than the target if splitting is not an option. + */ +static vmmr_span_t * +vmmr_tp_remove_split(size_t target_sz, vmmr_treepair_t *tree) +{ + avl_tree_t *by_addr = &tree->by_addr; + avl_tree_t *by_size = &tree->by_size; + vmmr_span_t *span; + avl_index_t where; + + ASSERT3U(target_sz, !=, 0); + ASSERT(!avl_is_empty(by_addr)); + ASSERT(!avl_is_empty(by_size)); + + vmmr_span_t search = { .vs_size = target_sz }; + span = avl_find(by_size, &search, &where); + if (span == NULL) { + /* Try for a larger span (instead of exact match) */ + span = avl_nearest(by_size, where, AVL_AFTER); + if (span == NULL) { + /* + * Caller will need to collect several smaller spans in + * order to fulfill their request. + */ + span = avl_nearest(by_size, where, AVL_BEFORE); + ASSERT3P(span, !=, NULL); + } + } + + if (span->vs_size <= target_sz) { + avl_remove(by_size, span); + avl_remove(by_addr, span); + + return (span); + } else { + /* Split off adequate chunk from larger span */ + uintptr_t start = span->vs_addr + span->vs_size - target_sz; + + avl_remove(by_size, span); + span->vs_size -= target_sz; + avl_add(by_size, span); + + vmmr_span_t *split_span = + kmem_zalloc(sizeof (vmmr_span_t), KM_SLEEP); + split_span->vs_addr = start; + split_span->vs_size = target_sz; + + return (split_span); + } +} + +void +vmmr_init() +{ + mutex_init(&vmmr_lock, NULL, MUTEX_DEFAULT, NULL); + + /* + * `vmm_total_limit` represents the absolute maximum size of the VMM + * memory reservoir. It is meant to provide some measure of protection + * against an operator pushing the system into unrecoverable memory + * starvation through explicit or transient additions to the reservoir. + * + * There will be many situations where this limit would be inadequate to + * prevent kernel memory starvation in the face of certain operator + * actions. It is a balance to be struck between safety and allowing + * large systems to reach high utilization. + * + * The value is based off of pages_pp_maximum: "Number of currently + * available pages that cannot be 'locked'". It is sized as all of + * `physmem` less 120% of `pages_pp_maximum`. + */ + vmmr_total_limit = + (((physmem * 10) - (pages_pp_maximum * 12)) * PAGESIZE) / 10; + + vmmr_empty_last = 0; + vmmr_free_sz = 0; + vmmr_alloc_sz = 0; + vmmr_empty_sz = 0; + vmmr_adding_sz = 0; + vmmr_free_transient_sz = 0; + vmmr_alloc_transient_sz = 0; + + vmmr_tp_init(&vmmr_free_tp); + vmmr_tp_init(&vmmr_empty_tp); + + list_create(&vmmr_alloc_regions, sizeof (vmmr_region_t), + offsetof(vmmr_region_t, vr_node)); + + /* Grab a chunk of VA for the reservoir */ + vmmr_va_sz = physmem * PAGESIZE; + vmmr_va = (uintptr_t)vmem_alloc(kvmm_arena, vmmr_va_sz, VM_SLEEP); +} + +void +vmmr_fini() +{ + mutex_enter(&vmmr_lock); + VERIFY3U(vmmr_alloc_sz, ==, 0); + VERIFY3U(vmmr_free_sz, ==, 0); + VERIFY3U(vmmr_adding_sz, ==, 0); + VERIFY3U(vmmr_alloc_transient_sz, ==, 0); + VERIFY3U(vmmr_free_transient_sz, ==, 0); + VERIFY(avl_is_empty(&vmmr_free_tp.by_addr)); + VERIFY(avl_is_empty(&vmmr_free_tp.by_size)); + VERIFY(list_is_empty(&vmmr_alloc_regions)); + + vmmr_tp_destroy(&vmmr_free_tp); + vmmr_tp_destroy(&vmmr_empty_tp); + list_destroy(&vmmr_alloc_regions); + + /* Release reservoir VA chunk */ + vmem_free(kvmm_arena, (void *)vmmr_va, vmmr_va_sz); + vmmr_va = 0; + vmmr_va_sz = 0; + vmmr_total_limit = 0; + vmmr_empty_last = 0; + + mutex_exit(&vmmr_lock); + mutex_destroy(&vmmr_lock); +} + +bool +vmmr_is_empty() +{ + mutex_enter(&vmmr_lock); + bool res = (vmmr_alloc_sz == 0 && vmmr_alloc_transient_sz == 0 && + vmmr_free_sz == 0 && vmmr_free_transient_sz == 0); + mutex_exit(&vmmr_lock); + return (res); +} + +int +vmmr_alloc(size_t sz, bool transient, vmmr_region_t **resp) +{ + VERIFY3U(sz & PAGEOFFSET, ==, 0); + + if (!transient) { + mutex_enter(&vmmr_lock); + if (sz > vmmr_free_sz) { + mutex_exit(&vmmr_lock); + return (ENOSPC); + } + } else { + int err; + + err = vmmr_add(sz, true); + if (err != 0) { + return (err); + } + mutex_enter(&vmmr_lock); + VERIFY3U(vmmr_free_transient_sz, >=, sz); + } + + vmmr_region_t *region; + region = kmem_zalloc(sizeof (vmmr_region_t), KM_SLEEP); + avl_create(®ion->vr_spans, vmmr_cmp_region_addr, + sizeof (vmmr_span_t), offsetof(vmmr_span_t, vs_by_addr)); + region->vr_size = sz; + + size_t remain = sz; + uintptr_t map_at = 0; + while (remain > 0) { + vmmr_span_t *span = vmmr_tp_remove_split(remain, &vmmr_free_tp); + + /* + * We have already ensured that adequate free memory is present + * in the reservoir for this allocation. + */ + VERIFY3P(span, !=, NULL); + ASSERT3U(span->vs_size, <=, remain); + + span->vs_region_addr = map_at; + avl_add(®ion->vr_spans, span); + map_at += span->vs_size; + remain -= span->vs_size; + } + + if (!transient) { + vmmr_free_sz -= sz; + vmmr_alloc_sz += sz; + } else { + vmmr_free_transient_sz -= sz; + vmmr_alloc_transient_sz += sz; + region->vr_transient = true; + } + list_insert_tail(&vmmr_alloc_regions, region); + mutex_exit(&vmmr_lock); + + *resp = region; + return (0); +} + +void * +vmmr_region_mem_at(vmmr_region_t *region, uintptr_t off) +{ + /* just use KPM region for now */ + return (hat_kpm_pfn2va(vmmr_region_pfn_at(region, off))); +} + +pfn_t +vmmr_region_pfn_at(vmmr_region_t *region, uintptr_t off) +{ + VERIFY3U(off & PAGEOFFSET, ==, 0); + VERIFY3U(off, <, region->vr_size); + + vmmr_span_t search = { + .vs_region_addr = off + }; + avl_index_t where; + vmmr_span_t *span = avl_find(®ion->vr_spans, &search, &where); + + if (span == NULL) { + span = avl_nearest(®ion->vr_spans, where, AVL_BEFORE); + ASSERT3P(span, !=, NULL); + } + uintptr_t span_off = off - span->vs_region_addr + span->vs_addr; + page_t *pp = page_find(&kvps[KV_VVP], (u_offset_t)span_off); + VERIFY(pp != NULL); + return (pp->p_pagenum); +} + +void +vmmr_free(vmmr_region_t *region) +{ + mutex_enter(&vmmr_lock); + if (!region->vr_transient) { + VERIFY3U(region->vr_size, <=, vmmr_alloc_sz); + } else { + VERIFY3U(region->vr_size, <=, vmmr_alloc_transient_sz); + } + list_remove(&vmmr_alloc_regions, region); + mutex_exit(&vmmr_lock); + + /* Zero the contents */ + for (uintptr_t off = 0; off < region->vr_size; off += PAGESIZE) { + bzero(vmmr_region_mem_at(region, off), PAGESIZE); + } + + mutex_enter(&vmmr_lock); + + /* Put the contained span(s) back in the free pool */ + void *cookie = NULL; + vmmr_span_t *span; + while ((span = avl_destroy_nodes(®ion->vr_spans, &cookie)) != NULL) { + span->vs_region_addr = 0; + vmmr_tp_insert_concat(span, &vmmr_free_tp); + } + avl_destroy(®ion->vr_spans); + if (!region->vr_transient) { + vmmr_free_sz += region->vr_size; + vmmr_alloc_sz -= region->vr_size; + } else { + vmmr_free_transient_sz += region->vr_size; + vmmr_alloc_transient_sz -= region->vr_size; + } + mutex_exit(&vmmr_lock); + + if (region->vr_transient) { + vmmr_remove(region->vr_size, true); + } + kmem_free(region, sizeof (*region)); +} + +static void +vmmr_destroy_pages(vmmr_span_t *span) +{ + const uintptr_t end = span->vs_addr + span->vs_size; + struct vnode *vp = &kvps[KV_VVP]; + for (uintptr_t pos = span->vs_addr; pos < end; pos += PAGESIZE) { + page_t *pp; + + /* Page-free logic cribbed from segkmem_xfree(): */ + pp = page_find(vp, (u_offset_t)pos); + VERIFY(pp != NULL); + if (!page_tryupgrade(pp)) { + /* + * Some other thread has a sharelock. Wait for + * it to drop the lock so we can free this page. + */ + page_unlock(pp); + pp = page_lookup(vp, (u_offset_t)pos, SE_EXCL); + } + + /* + * Clear p_lckcnt so page_destroy() doesn't update availrmem. + * That will be taken care of later via page_unresv(). + */ + pp->p_lckcnt = 0; + page_destroy(pp, 0); + } +} + +static int +vmmr_alloc_pages(const vmmr_span_t *span) +{ + struct seg kseg = { + .s_as = &kas + }; + struct vnode *vp = &kvps[KV_VVP]; + + const uintptr_t end = span->vs_addr + span->vs_size; + for (uintptr_t pos = span->vs_addr; pos < end; pos += PAGESIZE) { + page_t *pp; + + pp = page_create_va(vp, (u_offset_t)pos, PAGESIZE, + PG_EXCL | PG_NORELOC, &kseg, (void *)(vmmr_va + pos)); + + if (pp == NULL) { + /* Destroy any already-created pages */ + if (pos != span->vs_addr) { + vmmr_span_t destroy_span = { + .vs_addr = span->vs_addr, + .vs_size = pos - span->vs_addr, + }; + + vmmr_destroy_pages(&destroy_span); + } + return (ENOMEM); + } + + /* mimic page state from segkmem */ + ASSERT(PAGE_EXCL(pp)); + page_io_unlock(pp); + pp->p_lckcnt = 1; + page_downgrade(pp); + + /* pre-zero the page */ + bzero(hat_kpm_pfn2va(pp->p_pagenum), PAGESIZE); + } + + return (0); +} + +static int +vmmr_resv_wait() +{ + if (delay_sig(hz >> 2) != 0) { + /* bail due to interruption */ + return (0); + } + return (1); +} + +static void +vmmr_remove_raw(size_t sz) +{ + VERIFY3U(sz & PAGEOFFSET, ==, 0); + VERIFY(MUTEX_HELD(&vmmr_lock)); + + size_t remain = sz; + while (remain > 0) { + vmmr_span_t *span = vmmr_tp_remove_split(remain, &vmmr_free_tp); + + /* + * The caller must ensure that at least `sz` amount is present + * in the free treepair. + */ + VERIFY3P(span, !=, NULL); + ASSERT3U(span->vs_size, <=, remain); + + /* TODO: perhaps arrange to destroy pages outside the lock? */ + vmmr_destroy_pages(span); + + remain -= span->vs_size; + vmmr_tp_insert_concat(span, &vmmr_empty_tp); + } + + vmmr_empty_sz += sz; +} + +int +vmmr_add(size_t sz, bool transient) +{ + VERIFY3U(sz & PAGEOFFSET, ==, 0); + + mutex_enter(&vmmr_lock); + /* + * Make sure that the amount added is not going to breach the limits + * we've chosen + */ + const size_t current_total = + vmmr_alloc_sz + vmmr_free_sz + vmmr_adding_sz + + vmmr_alloc_transient_sz + vmmr_free_transient_sz; + if ((current_total + sz) < current_total) { + mutex_exit(&vmmr_lock); + return (EOVERFLOW); + } + if ((current_total + sz) > vmmr_total_limit) { + mutex_exit(&vmmr_lock); + return (ENOSPC); + } + vmmr_adding_sz += sz; + mutex_exit(&vmmr_lock); + + /* Wait for enough pages to become available */ + if (page_xresv(sz >> PAGESHIFT, KM_SLEEP, vmmr_resv_wait) == 0) { + mutex_enter(&vmmr_lock); + vmmr_adding_sz -= sz; + mutex_exit(&vmmr_lock); + + return (EINTR); + } + + mutex_enter(&vmmr_lock); + size_t added = 0; + size_t remain = sz; + while (added < sz) { + vmmr_span_t *span = NULL; + + if (vmmr_empty_sz > 0) { + span = vmmr_tp_remove_split(remain, &vmmr_empty_tp); + + vmmr_empty_sz -= span->vs_size; + } else { + /* + * No empty space to fill with new pages, so just tack + * it on at the end instead. + */ + span = kmem_zalloc(sizeof (vmmr_span_t), KM_SLEEP); + span->vs_addr = vmmr_empty_last; + span->vs_size = remain; + vmmr_empty_last += remain; + } + VERIFY3P(span, !=, NULL); + + + /* Allocate the actual pages to back this span */ + mutex_exit(&vmmr_lock); + int err = vmmr_alloc_pages(span); + mutex_enter(&vmmr_lock); + + /* + * If an error is encountered during page allocation for the + * span, unwind any progress made by the addition request. + */ + if (err != 0) { + /* + * Without pages allocated to this span, it is now + * tracked as empty. + */ + vmmr_empty_sz += span->vs_size; + vmmr_tp_insert_concat(span, &vmmr_empty_tp); + + if (added != 0) { + vmmr_remove_raw(added); + } + + vmmr_adding_sz -= sz; + mutex_exit(&vmmr_lock); + + page_unresv(sz >> PAGESHIFT); + return (err); + } + + /* + * The allocated-page-bearing span is placed in the "free" + * treepair now, but is not officially exposed for consumption + * until `vmm_free_sz` or `vmm_free_transient_sz` are updated. + * + * This allows us to unwind the allocation in case of a failure + * without the risk of the freshly added span(s) being snapped + * up by a consumer already. + */ + added += span->vs_size; + remain -= span->vs_size; + vmmr_tp_insert_concat(span, &vmmr_free_tp); + } + + /* Make the added memory usable by exposing it to the size accounting */ + if (!transient) { + vmmr_free_sz += added; + } else { + vmmr_free_transient_sz += added; + } + ASSERT3U(added, ==, sz); + vmmr_adding_sz -= added; + + mutex_exit(&vmmr_lock); + return (0); +} + +int +vmmr_remove(size_t sz, bool transient) +{ + VERIFY3U(sz & PAGEOFFSET, ==, 0); + + mutex_enter(&vmmr_lock); + if ((!transient && sz > vmmr_free_sz) || + (transient && sz > vmmr_free_transient_sz)) { + mutex_exit(&vmmr_lock); + return (ENOSPC); + } + + vmmr_remove_raw(sz); + + if (!transient) { + vmmr_free_sz -= sz; + } else { + vmmr_free_transient_sz -= sz; + } + mutex_exit(&vmmr_lock); + page_unresv(sz >> PAGESHIFT); + return (0); +} + +int +vmmr_ioctl(int cmd, intptr_t arg, int md, cred_t *cr, int *rvalp) +{ + switch (cmd) { + case VMM_RESV_QUERY: { + struct vmm_resv_query res; + void *datap = (void *)(uintptr_t)arg; + + /* For now, anyone in GZ can query */ + if (crgetzoneid(cr) != GLOBAL_ZONEID) { + return (EPERM); + } + mutex_enter(&vmmr_lock); + res.vrq_free_sz = vmmr_free_sz; + res.vrq_alloc_sz = vmmr_alloc_sz; + res.vrq_alloc_transient_sz = vmmr_alloc_transient_sz; + res.vrq_limit = vmmr_total_limit; + mutex_exit(&vmmr_lock); + if (ddi_copyout(&res, datap, sizeof (res), md) != 0) { + return (EFAULT); + } + break; + } + case VMM_RESV_ADD: { + if (secpolicy_sys_config(cr, B_FALSE) != 0) { + return (EPERM); + } + return (vmmr_add((size_t)arg, false)); + } + case VMM_RESV_REMOVE: { + if (secpolicy_sys_config(cr, B_FALSE) != 0) { + return (EPERM); + } + return (vmmr_remove((size_t)arg, false)); + } + default: + return (ENOTTY); + } + return (0); +} diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c index d5f4b3883b..ef366ddaff 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c @@ -45,6 +45,7 @@ #include <sys/vmm_impl.h> #include <sys/vmm_drv.h> #include <sys/vmm_vm.h> +#include <sys/vmm_reservoir.h> #include <vm/seg_dev.h> @@ -1506,13 +1507,22 @@ vmm_hma_release(void) } static int -vmmdev_do_vm_create(char *name, cred_t *cr) +vmmdev_do_vm_create(const struct vm_create_req *req, cred_t *cr) { vmm_softc_t *sc = NULL; minor_t minor; int error = ENOMEM; + size_t len; + const char *name = req->name; - if (strnlen(name, VM_MAX_NAMELEN) >= VM_MAX_NAMELEN) { + len = strnlen(name, VM_MAX_NAMELEN); + if (len == 0) { + return (EINVAL); + } + if (len >= VM_MAX_NAMELEN) { + return (ENAMETOOLONG); + } + if (strchr(name, '/') != NULL) { return (EINVAL); } @@ -1555,7 +1565,7 @@ vmmdev_do_vm_create(char *name, cred_t *cr) goto fail; } - error = vm_create(name, &sc->vmm_vm); + error = vm_create(req->name, req->flags, &sc->vmm_vm); if (error == 0) { /* Complete VM intialization and report success. */ (void) strlcpy(sc->vmm_name, name, sizeof (sc->vmm_name)); @@ -1938,7 +1948,7 @@ vmm_do_vm_destroy(vmm_softc_t *sc, boolean_t clean_zsd) /* ARGSUSED */ static int -vmmdev_do_vm_destroy(const char *name, cred_t *cr) +vmmdev_do_vm_destroy(const struct vm_destroy_req *req, cred_t *cr) { boolean_t hma_release = B_FALSE; vmm_softc_t *sc; @@ -1949,7 +1959,7 @@ vmmdev_do_vm_destroy(const char *name, cred_t *cr) mutex_enter(&vmm_mtx); - if ((sc = vmm_lookup(name)) == NULL) { + if ((sc = vmm_lookup(req->name)) == NULL) { mutex_exit(&vmm_mtx); return (ENOENT); } @@ -2193,6 +2203,47 @@ vmm_is_supported(intptr_t arg) } static int +vmm_ctl_ioctl(int cmd, intptr_t arg, int md, cred_t *cr, int *rvalp) +{ + void *argp = (void *)arg; + + switch (cmd) { + case VMM_CREATE_VM: { + struct vm_create_req req; + + if ((md & FWRITE) == 0) { + return (EPERM); + } + if (ddi_copyin(argp, &req, sizeof (req), md) != 0) { + return (EFAULT); + } + return (vmmdev_do_vm_create(&req, cr)); + } + case VMM_DESTROY_VM: { + struct vm_destroy_req req; + + if ((md & FWRITE) == 0) { + return (EPERM); + } + if (ddi_copyin(argp, &req, sizeof (req), md) != 0) { + return (EFAULT); + } + return (vmmdev_do_vm_destroy(&req, cr)); + } + case VMM_VM_SUPPORTED: + return (vmm_is_supported(arg)); + case VMM_RESV_QUERY: + case VMM_RESV_ADD: + case VMM_RESV_REMOVE: + return (vmmr_ioctl(cmd, arg, md, cr, rvalp)); + default: + break; + } + /* No other actions are legal on ctl device */ + return (ENOTTY); +} + +static int vmm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) { @@ -2207,36 +2258,7 @@ vmm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, minor = getminor(dev); if (minor == VMM_CTL_MINOR) { - void *argp = (void *)arg; - char name[VM_MAX_NAMELEN] = { 0 }; - size_t len = 0; - - if ((mode & FKIOCTL) != 0) { - len = strlcpy(name, argp, sizeof (name)); - } else { - if (copyinstr(argp, name, sizeof (name), &len) != 0) { - return (EFAULT); - } - } - if (len >= VM_MAX_NAMELEN) { - return (ENAMETOOLONG); - } - - switch (cmd) { - case VMM_CREATE_VM: - if ((mode & FWRITE) == 0) - return (EPERM); - return (vmmdev_do_vm_create(name, credp)); - case VMM_DESTROY_VM: - if ((mode & FWRITE) == 0) - return (EPERM); - return (vmmdev_do_vm_destroy(name, credp)); - case VMM_VM_SUPPORTED: - return (vmm_is_supported(arg)); - default: - /* No other actions are legal on ctl device */ - return (ENOTTY); - } + return (vmm_ctl_ioctl(cmd, arg, mode, credp, rvalp)); } sc = ddi_get_soft_state(vmm_statep, minor); @@ -2422,7 +2444,6 @@ vmm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) } vmm_sol_glue_init(); - vmm_arena_init(); /* * Perform temporary HMA registration to determine if the system @@ -2462,7 +2483,6 @@ fail: if (reg != NULL) { hma_unregister(reg); } - vmm_arena_fini(); vmm_sol_glue_cleanup(); mutex_exit(&vmmdev_mtx); return (DDI_FAILURE); @@ -2494,6 +2514,11 @@ vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) } mutex_exit(&vmm_mtx); + if (!vmmr_is_empty()) { + mutex_exit(&vmmdev_mtx); + return (DDI_FAILURE); + } + VERIFY(vmmdev_sdev_hdl != (sdev_plugin_hdl_t)NULL); if (sdev_plugin_unregister(vmmdev_sdev_hdl) != 0) { mutex_exit(&vmmdev_mtx); @@ -2507,7 +2532,6 @@ vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) VERIFY0(vmm_mod_unload()); VERIFY3U(vmmdev_hma_reg, ==, NULL); - vmm_arena_fini(); vmm_sol_glue_cleanup(); mutex_exit(&vmmdev_mtx); @@ -2579,11 +2603,13 @@ _init(void) } vmm_zsd_init(); + vmmr_init(); error = mod_install(&modlinkage); if (error) { ddi_soft_state_fini(&vmm_statep); vmm_zsd_fini(); + vmmr_fini(); } return (error); @@ -2600,6 +2626,7 @@ _fini(void) } vmm_zsd_fini(); + vmmr_fini(); ddi_soft_state_fini(&vmm_statep); diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c index 720af54200..bd1f1890d4 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c @@ -32,11 +32,12 @@ #include <vm/hat_i86.h> #include <vm/seg_vn.h> #include <vm/seg_kmem.h> -#include <vm/seg_vmm.h> #include <machine/vm.h> #include <sys/vmm_gpt.h> #include <sys/vmm_vm.h> +#include <sys/seg_vmm.h> +#include <sys/vmm_reservoir.h> #define PMAP_TO_VMMAP(pm) ((vm_map_t) \ ((caddr_t)(pm) - offsetof(struct vmspace, vms_pmap))) @@ -65,38 +66,6 @@ static vmspace_mapping_t *vm_mapping_find(struct vmspace *, uintptr_t, size_t, boolean_t); static void vm_mapping_remove(struct vmspace *, vmspace_mapping_t *); -static vmem_t *vmm_alloc_arena = NULL; - -static void * -vmm_arena_alloc(vmem_t *vmp, size_t size, int vmflag) -{ - return (segkmem_xalloc(vmp, NULL, size, vmflag, 0, - segkmem_page_create, &kvps[KV_VVP])); -} - -static void -vmm_arena_free(vmem_t *vmp, void *inaddr, size_t size) -{ - segkmem_xfree(vmp, inaddr, size, &kvps[KV_VVP], NULL); -} - -void -vmm_arena_init(void) -{ - vmm_alloc_arena = vmem_create("vmm_alloc_arena", NULL, 0, 1024 * 1024, - vmm_arena_alloc, vmm_arena_free, kvmm_arena, 0, VM_SLEEP); - - ASSERT(vmm_alloc_arena != NULL); -} - -void -vmm_arena_fini(void) -{ - VERIFY(vmem_size(vmm_alloc_arena, VMEM_ALLOC) == 0); - vmem_destroy(vmm_alloc_arena); - vmm_alloc_arena = NULL; -} - struct vmspace * vmspace_alloc(vm_offset_t start, vm_offset_t end, pmap_pinit_t pinit) { @@ -164,8 +133,9 @@ vmspace_find_kva(struct vmspace *vms, uintptr_t addr, size_t size) switch (vmo->vmo_type) { case OBJT_DEFAULT: - result = (void *)((uintptr_t)vmo->vmo_data + - VMSM_OFFSET(vmsm, addr)); + result = vmmr_region_mem_at( + (vmmr_region_t *)vmo->vmo_data, + VMSM_OFFSET(vmsm, addr) & PAGEMASK); break; default: break; @@ -344,39 +314,23 @@ vm_object_pager_none(vm_object_t vmo, uintptr_t off, pfn_t *lpfn, uint_t *lvl) } static pfn_t -vm_object_pager_heap(vm_object_t vmo, uintptr_t off, pfn_t *lpfn, uint_t *lvl) +vm_object_pager_reservoir(vm_object_t vmo, uintptr_t off, pfn_t *lpfn, + uint_t *lvl) { - const uintptr_t kaddr = ALIGN2PAGE((uintptr_t)vmo->vmo_data + off); - uint_t idx, level; - htable_t *ht; - x86pte_t pte; - pfn_t top_pfn, pfn; + vmmr_region_t *region; + pfn_t pfn; ASSERT(vmo->vmo_type == OBJT_DEFAULT); - ASSERT(off < vmo->vmo_size); - ht = htable_getpage(kas.a_hat, kaddr, &idx); - if (ht == NULL) { - return (PFN_INVALID); - } - pte = x86pte_get(ht, idx); - if (!PTE_ISPAGE(pte, ht->ht_level)) { - htable_release(ht); - return (PFN_INVALID); - } - - pfn = top_pfn = PTE2PFN(pte, ht->ht_level); - level = ht->ht_level; - if (ht->ht_level > 0) { - pfn += mmu_btop(kaddr & LEVEL_OFFSET((uint_t)ht->ht_level)); - } - htable_release(ht); + region = vmo->vmo_data; + pfn = vmmr_region_pfn_at(region, off & PAGEMASK); + /* TODO: handle large pages */ if (lpfn != NULL) { - *lpfn = top_pfn; + *lpfn = pfn; } if (lvl != NULL) { - *lvl = level; + *lvl = 0; } return (pfn); } @@ -419,41 +373,8 @@ vm_object_pager_sg(vm_object_t vmo, uintptr_t off, pfn_t *lpfn, uint_t *lvl) return (pfn); } -static void -vm_reserve_pages(size_t npages) -{ - uint_t retries = 60; - int rc; - - mutex_enter(&freemem_lock); - if (availrmem < npages) { - mutex_exit(&freemem_lock); - - /* - * Set needfree and wait for the ZFS ARC reap thread to free up - * some memory. - */ - page_needfree(npages); - - mutex_enter(&freemem_lock); - while ((availrmem < npages) && retries-- > 0) { - mutex_exit(&freemem_lock); - rc = delay_sig(drv_usectohz(1 * MICROSEC)); - mutex_enter(&freemem_lock); - - if (rc == EINTR) - break; - } - mutex_exit(&freemem_lock); - - page_needfree(-npages); - } else { - mutex_exit(&freemem_lock); - } -} - vm_object_t -vm_object_allocate(objtype_t type, vm_pindex_t psize) +vm_object_allocate(objtype_t type, vm_pindex_t psize, bool transient) { vm_object_t vmo; const size_t size = ptob((size_t)psize); @@ -468,17 +389,19 @@ vm_object_allocate(objtype_t type, vm_pindex_t psize) switch (type) { case OBJT_DEFAULT: { - vm_reserve_pages(psize); - /* XXXJOY: opt-in to larger pages? */ - vmo->vmo_data = vmem_alloc(vmm_alloc_arena, size, KM_NOSLEEP); - if (vmo->vmo_data == NULL) { + /* TODO: opt-in to larger pages? */ + int err; + vmmr_region_t *region = NULL; + + err = vmmr_alloc(size, transient, ®ion); + if (err != 0) { mutex_destroy(&vmo->vmo_lock); kmem_free(vmo, sizeof (*vmo)); return (NULL); } - bzero(vmo->vmo_data, size); - vmo->vmo_pager = vm_object_pager_heap; + vmo->vmo_data = region; + vmo->vmo_pager = vm_object_pager_reservoir; } break; case OBJT_SG: @@ -505,7 +428,7 @@ vm_pager_allocate(objtype_t type, void *handle, vm_ooffset_t size, VERIFY(type == OBJT_SG); VERIFY(off == 0); - vmo = vm_object_allocate(type, size); + vmo = vm_object_allocate(type, size, false); vmo->vmo_data = sg; mutex_enter(&sg->sg_lock); @@ -529,7 +452,7 @@ vm_object_deallocate(vm_object_t vmo) switch (vmo->vmo_type) { case OBJT_DEFAULT: - vmem_free(vmm_alloc_arena, vmo->vmo_data, vmo->vmo_size); + vmmr_free((vmmr_region_t *)vmo->vmo_data); break; case OBJT_SG: sglist_free((struct sglist *)vmo->vmo_data); @@ -574,6 +497,17 @@ vm_object_reference(vm_object_t vmo) VERIFY3U(ref, !=, 0); } +pfn_t +vm_object_pfn(vm_object_t vmo, uintptr_t off) +{ + /* This is expected to be used only on reservoir-backed memory */ + if (vmo->vmo_type != OBJT_DEFAULT) { + return (PFN_INVALID); + } + + return (vmo->vmo_pager(vmo, off, NULL, NULL)); +} + static vmspace_mapping_t * vm_mapping_find(struct vmspace *vms, uintptr_t addr, size_t size, boolean_t no_lock) @@ -912,11 +846,9 @@ vm_segmap_obj(vm_object_t vmo, off_t map_off, size_t size, struct as *as, if (err == 0) { segvmm_crargs_t svma; - svma.kaddr = (caddr_t)vmo->vmo_data + map_off; + svma.obj = vmo; + svma.offset = map_off; svma.prot = prot; - svma.cookie = vmo; - svma.hold = (segvmm_holdfn_t)vm_object_reference; - svma.rele = (segvmm_relefn_t)vm_object_deallocate; err = as_map(as, *addrp, size, segvmm_create, &svma); } @@ -969,11 +901,9 @@ vm_segmap_space(struct vmspace *vms, off_t off, struct as *as, caddr_t *addrp, VERIFY(mapoff < vmo->vmo_size); VERIFY((mapoff + size) <= vmo->vmo_size); - svma.kaddr = (void *)((uintptr_t)vmo->vmo_data + mapoff); + svma.obj = vmo; + svma.offset = mapoff; svma.prot = prot; - svma.cookie = vmo; - svma.hold = (segvmm_holdfn_t)vm_object_reference; - svma.rele = (segvmm_relefn_t)vm_object_deallocate; err = as_map(as, *addrp, len, segvmm_create, &svma); } diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h index 5b3e7f9b10..e58d63761e 100644 --- a/usr/src/uts/i86pc/sys/vmm.h +++ b/usr/src/uts/i86pc/sys/vmm.h @@ -39,7 +39,7 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. - * Copyright 2020 Oxide Computer Company + * Copyright 2021 Oxide Computer Company */ #ifndef _VMM_H_ @@ -124,20 +124,12 @@ enum x2apic_state { /* * illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does. - * Instead of picking an arbitrary value we will just rely on the same - * calculation that's made below. If this calculation ever changes we need to - * update the the VM_MAX_NAMELEN mapping in the bhyve brand's boot.c file. + * To simplify structure definitions, an arbitrary limit has been chosen. + * This same limit is used for memory segment names */ -#define VM_MAX_PREFIXLEN 10 -#define VM_MAX_SUFFIXLEN 15 -#define VM_MIN_NAMELEN 6 -#define VM_MAX_NAMELEN \ - (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1) - -#ifdef _KERNEL -CTASSERT(VM_MAX_NAMELEN >= VM_MIN_NAMELEN); -#endif +#define VM_MAX_NAMELEN 128 +#define VM_MAX_SEG_NAMELEN 128 #define VM_MAXCPU 32 /* maximum virtual cpus */ @@ -389,4 +381,12 @@ struct vm_entry { int vm_restart_instruction(void *vm, int vcpuid); +enum vm_create_flags { + /* + * Allocate guest memory segments from existing reservoir capacity, + * rather than attempting to create transient allocations. + */ + VCF_RESERVOIR_MEM = (1 << 0), +}; + #endif /* _VMM_H_ */ diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h index 15c64355c4..f371ad1266 100644 --- a/usr/src/uts/i86pc/sys/vmm_dev.h +++ b/usr/src/uts/i86pc/sys/vmm_dev.h @@ -39,6 +39,7 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. + * Copyright 2021 Oxide Computer Company */ #ifndef _VMM_DEV_H_ @@ -46,6 +47,19 @@ #include <machine/vmm.h> +#include <sys/param.h> +#include <sys/cpuset.h> + +struct vm_create_req { + char name[VM_MAX_NAMELEN]; + uint64_t flags; +}; + + +struct vm_destroy_req { + char name[VM_MAX_NAMELEN]; +}; + struct vm_memmap { vm_paddr_t gpa; int segid; /* memory segment */ @@ -66,7 +80,7 @@ struct vm_munmap { struct vm_memseg { int segid; size_t len; - char name[SPECNAMELEN + 1]; + char name[VM_MAX_SEG_NAMELEN]; }; struct vm_register { @@ -282,6 +296,13 @@ struct vm_run_state { uint8_t _pad[3]; }; +struct vmm_resv_query { + size_t vrq_free_sz; + size_t vrq_alloc_sz; + size_t vrq_alloc_transient_sz; + size_t vrq_limit; +}; + #define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8)) #define VMM_IOC_BASE (('v' << 16) | ('m' << 8)) #define VMM_LOCK_IOC_BASE (('v' << 16) | ('l' << 8)) @@ -292,6 +313,10 @@ struct vm_run_state { #define VMM_DESTROY_VM (VMMCTL_IOC_BASE | 0x02) #define VMM_VM_SUPPORTED (VMMCTL_IOC_BASE | 0x03) +#define VMM_RESV_QUERY (VMMCTL_IOC_BASE | 0x10) +#define VMM_RESV_ADD (VMMCTL_IOC_BASE | 0x11) +#define VMM_RESV_REMOVE (VMMCTL_IOC_BASE | 0x12) + /* Operations performed in the context of a given vCPU */ #define VM_RUN (VMM_CPU_IOC_BASE | 0x01) #define VM_SET_REGISTER (VMM_CPU_IOC_BASE | 0x02) diff --git a/usr/src/uts/i86pc/vm/seg_vmm.h b/usr/src/uts/i86pc/vm/seg_vmm.h deleted file mode 100644 index f5b95c6a27..0000000000 --- a/usr/src/uts/i86pc/vm/seg_vmm.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright 2018 Joyent, Inc. - */ - -#ifndef _VM_SEG_VMM_H -#define _VM_SEG_VMM_H - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct segvmm_crargs { - caddr_t kaddr; - uchar_t prot; /* protection */ - void *cookie; /* opaque resource backing memory */ - void (*hold)(void *); /* add reference to cookie */ - void (*rele)(void *); /* release reference to cookie */ -} segvmm_crargs_t; - -typedef void (*segvmm_holdfn_t)(void *); -typedef void (*segvmm_relefn_t)(void *); - -typedef struct segvmm_data { - krwlock_t svmd_lock; - uintptr_t svmd_kaddr; - uchar_t svmd_prot; - void *svmd_cookie; - segvmm_holdfn_t svmd_hold; - segvmm_relefn_t svmd_rele; - size_t svmd_softlockcnt; -} segvmm_data_t; - -extern int segvmm_create(struct seg **, void *); - -#ifdef __cplusplus -} -#endif - -#endif /* _VM_SEG_VMM_H */ |
