summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2019-11-08 14:16:48 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2019-11-08 14:16:48 +0000
commit4351df24a18fd73b1e6cc2591e622883e502167c (patch)
tree055eea240497456740e6f737bef63765ed7a6574
parentfaabb223a29c66e258a2c067cb14888c51ba6f47 (diff)
parent42cd19316c818c8b8283fc48263a1b4ce99cf049 (diff)
downloadillumos-joyent-4351df24a18fd73b1e6cc2591e622883e502167c.tar.gz
[illumos-gate merge]
commit 42cd19316c818c8b8283fc48263a1b4ce99cf049 11859 need swapgs mitigation commit ad3e6d4dd82f2e18743399134a4b99cf303478f6 11880 changing encryption key on dataset with unencrypted children triggers VERIFY commit 249622b3e0d46f0016d00e3f87b314635d11065a 11929 mac_minor_hold() gets id_alloc_nosleep() wrong commit 1c085a54d061bc17f8b209d1ea6161fcdf66d971 3334 zonestat missing man page commit 327c8d1665439dd2540c1b460773bd9f0c1c0fa9 11792 ibtl: cast between incompatible function types commit 22f89f96cd7b45b9686231ed7d98e610077df6c6 11922 ipmi_open looks for wrong return value
-rw-r--r--usr/src/man/man1/Makefile4
-rw-r--r--usr/src/man/man1/zonestat.1485
-rw-r--r--usr/src/man/man1m/Makefile2
-rw-r--r--usr/src/man/man1m/zonestatd.1m90
-rw-r--r--usr/src/pkg/manifests/system-zones.mf3
-rw-r--r--usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh19
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_crypt.c7
-rw-r--r--usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c6
-rw-r--r--usr/src/uts/common/io/mac/mac.c10
-rw-r--r--usr/src/uts/i86pc/ml/kpti_trampolines.s25
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c40
-rw-r--r--usr/src/uts/intel/amd64/ml/amd64.il20
-rw-r--r--usr/src/uts/intel/amd64/sys/privregs.h14
-rw-r--r--usr/src/uts/intel/asm/cpu.h11
-rw-r--r--usr/src/uts/intel/ia32/ml/exception.s5
-rw-r--r--usr/src/uts/intel/ia32/os/sundep.c11
-rw-r--r--usr/src/uts/intel/io/ipmi/ipmi_main.c6
-rw-r--r--usr/src/uts/intel/kdi/kdi_asm.s5
-rw-r--r--usr/src/uts/intel/sys/archsystm.h4
-rw-r--r--usr/src/uts/intel/sys/segments.h3
20 files changed, 701 insertions, 69 deletions
diff --git a/usr/src/man/man1/Makefile b/usr/src/man/man1/Makefile
index a4d59dadcc..d7904100eb 100644
--- a/usr/src/man/man1/Makefile
+++ b/usr/src/man/man1/Makefile
@@ -13,6 +13,7 @@
# Copyright 2011, Richard Lowe
# Copyright 2018 Nexenta Systems, Inc.
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
+# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
#
include $(SRC)/Makefile.master
@@ -421,7 +422,8 @@ MANFILES= acctcom.1 \
yppasswd.1 \
ypwhich.1 \
zlogin.1 \
- zonename.1
+ zonename.1 \
+ zonestat.1
MANLINKS= batch.1 \
bg.1 \
diff --git a/usr/src/man/man1/zonestat.1 b/usr/src/man/man1/zonestat.1
new file mode 100644
index 0000000000..39f2be5f61
--- /dev/null
+++ b/usr/src/man/man1/zonestat.1
@@ -0,0 +1,485 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\" Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+.\" Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+.\"
+.Dd April 04, 2019
+.Dt zonestat 1
+.Os
+.Sh NAME
+.Nm zonestat
+.Nd report active zone statistics.
+.Sh SYNOPSIS
+.Nm
+.Op Fl z Ar zonelist
+.Op Fl r Ar reslist
+.Op Fl n Ar namelist
+.Op Fl T Ar u Ns | Ns Ar d Ns | Ns Ar i
+.Op Fl R Ar reports
+.Op Fl q
+.Op Fl p Oo Fl P Ar lines Oc
+.Op Fl S Ar cols
+.Ar interval
+.Oo Ar duration
+.Oo Ar report
+.Oc Oc
+.Sh DESCRIPTION
+The
+.Nm
+utility reports on the cpu, memory, and resource control utilization of the
+currently running zones.
+Each zone's utilization is reported both as a percentage of system resources
+and the zone's configured limits.
+.Pp
+The
+.Nm
+utility prints a series of interval reports at the specified interval.
+It optionally also prints one or more summary reports at a specified interval.
+.Pp
+The default output is a summary of cpu, physical, and virtual memory
+utilization.
+The -r option can be used to choose detailed output for specific resources.
+.Pp
+The following options are supported:
+.Bl -tag -width Ds
+.It Fl z Ar zonename Ns Oo Ar ,zonename Ns ... Oc
+Specify a list of zones on which to report.
+By default all zones are reported.
+In addition to a comma-separated list, multiple
+.Fl z
+options can be specified to report on a set of zones.
+The output will include any resources which have usage by the specified zone(s).
+.It Fl r Ar resource Ns Oo Ar ,resource Ns ... Oc
+Specify resource types on which to report.
+The available resources are:
+.Bd -ragged -offset indent
+.Em physical-memory ,
+.Em virtual-memory ,
+.Em locked-memory ,
+.Em processor-sets ,
+.Em processes ,
+.Em lwps ,
+.Em shm-memory ,
+.Em shm-ids ,
+.Em sem-ids ,
+.Em msg-ids ,
+.Em lofi
+.Ed
+.Pp
+The following nicknames can also be specified as resource names:
+.Bl -tag -width indent
+.It Em summary
+A summary of cpu, physical-memory, and virtual memory usage.
+.It Em memory
+physical-memory, virtual-memory, and locked memory.
+.It Em psets
+processor-sets
+.It Em default-pset
+The default pset only.
+.It Em limits
+processes, lwps, lofi
+.It Em sysv
+shm-memory, shm-ids, sem-ids msg-ids
+.It Em all
+all resource types.
+.El
+.Pp
+By default the summary resource is printed.
+.Pp
+In addition to a comma-separated list, multiple
+.Fl r
+options can be specified to report on a set of resources types.
+.Pp
+The system's cpus can be partitioned into processor sets
+(psets) By default, all cpus are in a single pset named
+.Em pset_default .
+.Pp
+Memory is not partition-able into sets.
+The zonestat utility output for these resources will show them as named
+.Em mem_default
+and
+.Em vm_default .
+.Pp
+The
+.Em all
+resource specifies that all resource types should be reported.
+.It Fl n Ar name Ns Oo Ar ,name Oc
+Specify a list resource names on which to report.
+For pset resources, this is the name of the processor set.
+For physical-memory, locked-memory, and virtual-memory resources,
+the only names are
+.Em mem_default
+and
+.Em vm_default .
+.Pp
+Dedicated-cpu processor sets can be specified by their pset
+name, or by just their zonename.
+.Pp
+Processor sets created by psrset can be specified by their pool
+pset name, or just by their psetid.
+.Pp
+In addition to a comma-separated list, multiple
+.Fl n
+options can be specified to report on a set of resources.
+.It Fl T Ar u Ns | Ns Ar d Ns | Ns Ar i
+Include timestamp of each report.
+The following formats are supported:
+.Bl -tag -width indent
+.It u
+A printed representation of the internal representation of time;
+see
+.Xr time 2 .
+This is also known as unix time.
+.It d
+Standard date format; see
+.Xr date 1 .
+This option is not valid with
+.Fl p .
+.It i
+Time formatted as the ISO 8601 compliant format:
+.D1 YYYYMMDDThhmmssZ
+.El
+.It Fl R Ar report Ns Oo Ar ,report Oc
+Print a summary report.
+The supported report types are described below.
+In addition to a comma-separated list, multiple
+.Fl R
+options may be specified for a set of summary reports.
+.Bl -tag -width indent
+.It total
+Prints a summary report detailing the following for each resource:
+.Bl -tag -width indent
+.It psets
+Total cpu used since start of command invocation.
+The percent used for each zone includes time that a zone was not running.
+For instance, if a zone used 100% of the cpu while it was running, but the zone
+was halted for half of the intervals, then the summary report will show the
+zone used 50% of the cpu time.
+.It memory, limits, sysv
+Average resource used of all intervals reported since command invocation.
+This average factors in intervals in which a zone was not running.
+For example if a zone used on average of 100M of physical memory while it was
+running, and was only running for half the intervals, then the summary report
+will show that the zone used 50M of physical memory on average.
+.El
+.It average
+Similar to
+.Em total ,
+but only intervals in which a zone is running are factored in.
+For example, if a zone was only running for a single interval, and during that
+interval, the zone used 200M of virtual memory, then its average
+virtual-memory will be 200M, regardless of the number of intervals reported
+before the summary report.
+.It high
+Print a summary report detailing the highest usage of each resource and zone
+during any interval of the zonestat utility invocation.
+.El
+.It Fl S Ar col Ns Oo Ar ,col Oc
+Sort zones utilizing each resource.
+The following sorting columns can be specified.
+.Bl -tag -width indent
+.It name
+Sort alphanumerically by zone name.
+.It used
+Sort by quantity of resource used.
+.It cap
+Sort by configured cap.
+.It pcap
+Sort by percent of cap used.
+.It shr
+Sort by allocated share.
+.It pshru
+Sort by percent of share used.
+.El
+.Pp
+By default, output is sorted by quantity of resource used.
+.It Fl q
+Only print summary reports (requires
+.Fl R ) .
+All interval reports are omitted.
+.It Fl p
+Print output in stable, machine-parsable format.
+Individual fields will be delimited with :.
+The line format is:
+.Pp
+.D1 <report type>:<resource>:<field>[:<field>]*
+.Pp
+If
+.Fl T
+is specified each line is prefixed with a timestamp:
+.Pp
+.D1 <timestamp>:<report type>:<resource>:<field>[:<field>]*
+.Pp
+The report types are:
+.Bd -ragged -offset indent
+.Em report-total ,
+.Em report-average ,
+.Em report-high ,
+.Em interval
+.Ed
+.Pp
+The resource types are:
+.Bd -ragged -offset indent
+.Em header ,
+.Em footer ,
+.Em summary ,
+.Em physical-memory ,
+.Em virtual-memory ,
+.Em locked-memory ,
+.Em processor-set ,
+.Em processes ,
+.Em lwps ,
+.Em sysv-shared-memory ,
+.Em sysv-shmids ,
+.Em sysv-semids ,
+.Em sysv-msgids ,
+.Em lofi
+.Ed
+.Pp
+The
+.Em header
+resource is a special resource used to state the beginning of an interval or
+summary report.
+All output lines between header resources belong to the same report.
+Each header has a matching footer.
+.Pp
+The remaining fields are resource type specific.
+See the zonestat utility output for details.
+.Pp
+All existing output fields are stable.
+Future versions may introduce new report and resource types.
+Future versions may also add additional new fields to the end of existing
+output lines.
+.It Fl P Ar line Ns Oo Ar ,line Oc
+For parsable output, specify lines to output in parsable output.
+One or more of the following line types can be chosen:
+.Bl -tag -width indent
+.It resource
+The lines describing each resource.
+.It total
+The total utilization of each resource.
+.It system
+The utilization of each resource by the system.
+This includes the kernel, and any resource consumption not contributable to a
+specific zone.
+When zonestat is run from within a non-global-zone, this value will be the
+aggregate resource consumed by the system and all other zones.
+.It zones
+Lines detailing the per-zone utilization of each resource.
+.It header, footer
+Each interval and summary report has a header, which prints details such
+as the interval and count information.
+After each report, any footer is also printed
+.El
+.El
+.Ss OPERANDS
+.Bl -tag -width indent
+.It interval
+Specifies the length in seconds to pause between each interval report.
+An interval of
+.Em default
+will use the configured interval of the zones
+monitoring service - see
+.Xr zonestatd 1m .
+.Pp
+Interval is required.
+An interval of zero is not permitted.
+The interval can be specified as [nh][nm][ns], such as 10s or 1m.
+.It duration
+Specifies the number of intervals to report.
+Defaults to infinity if not specified.
+The command duration is (interval * duration).
+A duration of zero is invalid.
+A value of
+.Em inf
+can also be specified to explicitly choose infinity.
+.Pp
+Duration can also be specified as [nh][nm][ns].
+In this case, duration will be interpreted as the duration of execution time.
+The actual duration will be rounded up to the nearest multiple of the interval.
+.It report
+Specify the summary report period.
+For instance, a report of 4 would produce reports every 4 intervals.
+If the command duration is not a multiple of report, then the last report will
+be of any remaining intervals.
+.Pp
+Report can also be specified as [nh][nm][ns].
+In this case, reports will be output at the specified time period, rounded up
+to the nearest interval.
+If the command duration is not a multiple of report, then the last report will
+be of any remaining intervals.
+.Pp
+Requires
+.Fl R .
+If
+.Fl R
+is specified and report is not, the report period will be the entire command
+duration, producing the specified reports at the end of execution.
+.El
+.Ss OUTPUT
+The following list defines the column heading of the command output:
+.Bl -tag -width indent
+.It SYSTEM-MEMORY
+The total amount of memory available on the physical host.
+.It SYSTEM-LIMIT
+The maximum amount of resource available on the physical host.
+.It CPUS
+The number of cpus allocated to a processor set.
+.It ONLINE
+Of the cpus allocated to a processor set, the number of cpus
+which can execute processes.
+.It MIN/MAX
+The minimum and maximum number of cpus which may be allocated
+to the processor set by the system.
+.It ZONE
+The zone using the resource.
+In addition to zone names, this column may also contain:
+.Bl -tag -width indent
+.It [total]
+The total quantity of resource used system-wide.
+.It [system]
+The quantity of resource used by the kernel or in a manner not associated with
+any particular zone.
+.Pp
+When zonestat is used within a non-global zone, [system] designates the
+aggregate resource used by the system and by all other zones.
+.El
+.It USED
+The amount of resource used.
+.It PCT
+The amount of resource used as a percent of the total resource.
+.It %PART
+The amount of cpu uses as a percentage of the total cpu in a processor-set to
+which the zone is bound.
+A zone can only have processes bound to multiple processor sets if it is the
+global zone, or if psrset(1m) psets are used.
+If multiple binding are found for a zone, its %PART will be the fraction used
+of all bound psets.
+For [total] and [system], %PART is the percent used of all cpus on the system.
+.It CAP
+If a zone is configured to have a cap on the given resource, the cap will be
+displayed in this column.
+.It %CAP
+The amount of resource used as a percent of zone's configured cap.
+.It SHRS
+The number of shares allocated to the zone.
+For the [total] row, this will be the total number of shares allocated to all
+zones sharing the resource.
+.Pp
+If a zone is not configured to use shares, and is sharing a
+resource with other zones that are configured to use shares,
+this column will contain
+.Em no-fss
+for the zone.
+.It %SHR
+The fraction of the total shares allocated to the zone.
+For instance, if 2 zones share a processor set, each with 10 shares, then each
+zone will have a %SHR of 50%.
+.It %SHRU
+Of the share allocated to the zone, the fraction of resource
+used.
+Zones using all of their share will have a %SHRU of 100%.
+Because shares are only enforced when there is resource contention, it is
+possible for a zone to have a %SHRU in excess of 100%.
+.El
+.Sh IMPLEMENTATION NOTES
+The zonestat utility depends on the zones monitoring service:
+.Pp
+.D1 svc/system/zonestat:default
+.Pp
+If the zonestat service is stopped while the zonestat utility is running, the
+zonestat command invocation will quit without printing additional reports.
+.Pp
+The reports will be printed if zonestat is interrupted (by ctrl-c,
+.Dv SIGINT )
+before reaching the next report period.
+.Sh EXIT STATUS
+.Ex -std
+.Bl -tag -width indent
+.It 0
+Successful completion.
+.It 1
+An error occurred.
+.It 2
+Invalid usage.
+.It 3
+svc:system/zones_monitoring:default not running or not responding.
+.El
+.Sh EXAMPLES
+Example 1: Summary of cpu and memory utilization every 5 seconds.
+.Bd -literal
+ # zonestat 5 1
+ SUMMARY
+ -----CPU------------- ----PHYSICAL--- ----VIRTUAL----
+ ZONE USED %PART %CAP %SHRU USED PCT %CAP USED PCT %CAP
+ [total] 9.74 30% - - 7140M 21% - 10.6G 22% -
+ [system] 0.28 0.8% - - 6535M 19% - 10.4G 21% -
+ global 9.10 28% - - 272M 0.8% - 366M 0.7% -
+ zoneA 0.32 1.0% - - 256M 0.7% - 265M 0.5% -
+ zoneB 0.00 0.0% - - 77.6M 0.2% - 71.1M 0.1% -
+.Ed
+.Pp
+Example 2: Using parsable output, fetching only zone usages.
+.Pp
+The following command will produce parsable output, printing one
+line per zone using each pset resource for a 5 second interval.
+.Bd -literal
+
+ # zonestat -p -P zones -r psets 5 1
+
+.Ed
+.Pp
+Example 3: Report on the default pset.
+.Pp
+The following command will report on the default pset once a second
+for one minute.
+.Bd -literal
+
+ # zonestat -r default-pset 1 1m
+
+.Ed
+.Pp
+Example 4: Report total and high utilization.
+.Pp
+The following command monitors silently at a 10 second interval
+for 24 hours, producing a total and high report every 1 hour.
+.Bd -literal
+
+ # zonestat -q -R total,high 10s 24h 1h
+
+.Ed
+.Sh INTERFACE STABILITY
+Command invocation and parsable output is Committed.
+Human readable output (default output) is uncommitted.
+.Sh SECURITY
+When run from within a non-global zone (NGZ), only processor sets
+visible to the NGZ are reported.
+The NGZ output will include all of other system resources, such as memory and
+limits.
+.Pp
+For all reported resources, the NGZ's usage will be output.
+Usage of each resource by the system, global zone, and all other
+zones, will be reported as used by
+.Em system .
+.Sh SEE ALSO
+.Xr date 1 ,
+.Xr pooladm 1m ,
+.Xr poolcfg 1m ,
+.Xr prctl 1m ,
+.Xr rcapadm 1m ,
+.Xr zoneadm 1m ,
+.Xr zonecfg 1m ,
+.Xr zonestatd 1m ,
+.Xr libzonestat 3lib ,
+.Xr timezone 4 ,
+.Xr privileges 5 ,
+.Xr resource_controls 5 ,
+.Xr zones 5
diff --git a/usr/src/man/man1m/Makefile b/usr/src/man/man1m/Makefile
index 6dcd74a84f..2152864b8d 100644
--- a/usr/src/man/man1m/Makefile
+++ b/usr/src/man/man1m/Makefile
@@ -18,6 +18,7 @@
# Copyright 2018 Nexenta Systems, Inc.
# Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
# Copyright 2019 Peter Tribble
+# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
#
include $(SRC)/Makefile.master
@@ -539,6 +540,7 @@ _MANFILES= 6to4relay.1m \
zoneadm.1m \
zoneadmd.1m \
zonecfg.1m \
+ zonestatd.1m \
zpool.1m \
zstreamdump.1m \
ztest.1m
diff --git a/usr/src/man/man1m/zonestatd.1m b/usr/src/man/man1m/zonestatd.1m
new file mode 100644
index 0000000000..fb7760436b
--- /dev/null
+++ b/usr/src/man/man1m/zonestatd.1m
@@ -0,0 +1,90 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\" Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+.\" Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+.\"
+.Dd April 04, 2019
+.Dt zonestatd 1M
+.Os
+.Sh NAME
+.Nm zonestatd
+.Nd zones monitoring daemon
+.Sh SYNOPSIS
+.Nm /usr/lib/zones/zonestatd
+.Sh DESCRIPTION
+.Nm
+is a system daemon that is started during system boot.
+It monitors the utilization of system resources by zones, as well
+as zone and system configuration information such as psrset psets,
+pool psets, and resource control settings.
+.Pp
+This daemon is started automatically by the zone management
+software and should not be invoked directly.
+It does not constitute a programming interface, but is classified as a
+private interface.
+.Sh IMPLEMENTATION NOTES
+The zonestat service is managed by the service management
+facility,
+.Xr smf 5 ,
+under the service identifier:
+.Pp
+.D1 svc:/system/zones-monitoring:default
+.Pp
+Administrative actions on this service, such as enabling, disabling, or
+requesting restart, can be performed using
+.Xr svcadm 1m .
+The service's status can be queried using the
+.Xr svcs 1
+command.
+.Pp
+The zonestat service has the following SMF configuration property:
+.Bd -ragged -offset indent
+config/sample_interval
+.Pp
+This property sets the
+.Nm
+sample interval.
+This is the interval used by the zones monitoring daemon,
+.Nm
+to sample resource utilization.
+This is also the interval used to determine configuration changes such as
+processor set changes, resource control changes, and zone state changes.
+.Pp
+The default interval is 5 seconds.
+.Ed
+.Pp
+The zonestat service makes use of extended accounting facility.
+If not already enabled, it enables the tracking of process accounting
+resources, and configures a process accounting file.
+The zonestat service will roll the process accounting log at its configured
+interval.
+.Pp
+If extended process accounting is enabled externally, the zonestat
+service will use the process accounting log as configured.
+It will not roll the accounting log, but will operate correctly if
+the accounting log is rolled externally.
+.Sh INTERFACE STABILITY
+.Sy Private
+.Sh SECURITY
+The zonestat service in the global zone must be online for the zonestat
+service in each non-global zone (NGZ) to function properly.
+The zonestat service in each NGZ does not directly read system configuration
+and utilization data, but rather reads from the zonestat service on the
+global zone.
+.Sh SEE ALSO
+.Xr zonestat 1 ,
+.Xr acctadm 1m ,
+.Xr pooladm 1m ,
+.Xr poolcfg 1m ,
+.Xr prctl 1m ,
+.Xr rcapadm 1m ,
+.Xr smf 5 ,
+.Xr zones 5
diff --git a/usr/src/pkg/manifests/system-zones.mf b/usr/src/pkg/manifests/system-zones.mf
index 2b9566accb..ae885a159e 100644
--- a/usr/src/pkg/manifests/system-zones.mf
+++ b/usr/src/pkg/manifests/system-zones.mf
@@ -21,6 +21,7 @@
#
# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
#
set name=pkg.fmri value=pkg:/system/zones@$(PKGVERS)
@@ -96,9 +97,11 @@ file path=usr/share/lib/xml/dtd/brand.dtd.1 mode=0644
file path=usr/share/lib/xml/dtd/zone_platform.dtd.1 mode=0644
file path=usr/share/lib/xml/dtd/zonecfg.dtd.1
file path=usr/share/man/man1/zlogin.1
+file path=usr/share/man/man1/zonestat.1
file path=usr/share/man/man1m/zoneadm.1m
file path=usr/share/man/man1m/zoneadmd.1m
file path=usr/share/man/man1m/zonecfg.1m
+file path=usr/share/man/man1m/zonestatd.1m
file path=usr/share/man/man5/brands.5
file path=usr/share/man/man5/zones.5
file path=usr/share/man/man7d/zcons.7d
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh
index dda7c1df43..a886ab8a77 100644
--- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh
@@ -28,13 +28,15 @@
# STRATEGY:
# 1. Create an encrypted dataset
# 2. Create an encrypted child dataset
-# 3. Attempt to change the key without any flags
-# 4. Attempt to change the key specifying keylocation
-# 5. Attempt to change the key specifying keyformat
-# 6. Verify the new encryption root can unload and load its key
-# 7. Recreate the child dataset
-# 8. Attempt to change the key specifying both the keylocation and keyformat
-# 9. Verify the new encryption root can unload and load its key
+# 3. Create an unencrypted child dataset
+# 4. Attempt to change the key without any flags
+# 5. Attempt to change the key specifying keylocation
+# 6. Attempt to change the key specifying keyformat
+# 7. Verify the new encryption root can unload and load its key
+# 8. Recreate the child dataset
+# 9. Attempt to change the key specifying both the keylocation and keyformat
+# 10. Verify the new encryption root can unload and load its key
+# 11. Verify the unencrytped child is still accessible normally
#
verify_runnable "both"
@@ -53,6 +55,7 @@ log_assert "'zfs change-key' should promote an encrypted child to an" \
log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \
"-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1"
log_must zfs create $TESTPOOL/$TESTFS1/child
+log_must zfs create -o encryption=off $TESTPOOL/$TESTFS1/child2
log_mustnot eval "echo $PASSPHRASE2 | zfs change-key" \
"$TESTPOOL/$TESTFS1/child"
@@ -82,5 +85,7 @@ log_must key_unavailable $TESTPOOL/$TESTFS1/child
log_must eval "echo $PASSPHRASE2 | zfs load-key $TESTPOOL/$TESTFS1/child"
log_must key_available $TESTPOOL/$TESTFS1/child
+log_must zfs unmount $TESTPOOL/$TESTFS1/child2
+log_must zfs mount $TESTPOOL/$TESTFS1/child2
log_pass "'zfs change-key' promotes an encrypted child to an encryption root"
diff --git a/usr/src/uts/common/fs/zfs/dsl_crypt.c b/usr/src/uts/common/fs/zfs/dsl_crypt.c
index 3896efbc76..c9d02e1c57 100644
--- a/usr/src/uts/common/fs/zfs/dsl_crypt.c
+++ b/usr/src/uts/common/fs/zfs/dsl_crypt.c
@@ -1401,6 +1401,7 @@ static void
spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj,
uint64_t new_rddobj, dsl_wrapping_key_t *wkey, dmu_tx_t *tx)
{
+ int ret;
zap_cursor_t *zc;
zap_attribute_t *za;
dsl_pool_t *dp = dmu_tx_pool(tx);
@@ -1419,12 +1420,14 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj,
return;
}
+ ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj);
+ VERIFY(ret == 0 || ret == ENOENT);
+
/*
* Stop recursing if this dsl dir didn't inherit from the root
* or if this dd is a clone.
*/
- VERIFY0(dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj));
- if (curr_rddobj != rddobj || dsl_dir_is_clone(dd)) {
+ if (ret == ENOENT || curr_rddobj != rddobj || dsl_dir_is_clone(dd)) {
dsl_dir_rele(dd, FTAG);
return;
}
diff --git a/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c b/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c
index b4046299a0..4604334971 100644
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c
+++ b/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c
@@ -505,8 +505,8 @@ ibt_cisco_embedded_sm_rereg_fix(void *arg)
hca_guid = hca_devp->hd_hca_attr->hca_node_guid;
mutex_exit(&ibtl_clnt_list_mutex);
- ibt_status = ((ibtl_node_info_cb_t)mgrp->mgr_async_handler)(hca_guid,
- port, sm_lid, &node_info);
+ ibt_status = ((ibtl_node_info_cb_t)(uintptr_t)
+ mgrp->mgr_async_handler)(hca_guid, port, sm_lid, &node_info);
if (ibt_status == IBT_SUCCESS) {
if ((node_info.n_vendor_id == IBT_VENDOR_CISCO) &&
(node_info.n_node_type == IBT_NODE_TYPE_SWITCH)) {
@@ -740,7 +740,7 @@ ibtl_do_hca_asyncs(ibtl_hca_devinfo_t *hca_devp)
if ((code == IBT_PORT_CHANGE_EVENT) &&
eventp->ev_port_flags & IBT_PORT_CHANGE_SM_LID)
ibtl_cm_get_node_info(hca_devp,
- (ibt_async_handler_t)ibtl_node_info_cb);
+ (ibt_async_handler_t)(uintptr_t)ibtl_node_info_cb);
/* wait for node info task to complete */
while (hca_devp->hd_async_task_cnt != 0)
cv_wait(&hca_devp->hd_async_task_cv,
diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c
index f2a18c98f2..c8d34d2590 100644
--- a/usr/src/uts/common/io/mac/mac.c
+++ b/usr/src/uts/common/io/mac/mac.c
@@ -2610,7 +2610,7 @@ mac_client_restart(mac_client_impl_t *mcip)
minor_t
mac_minor_hold(boolean_t sleep)
{
- minor_t minor;
+ id_t id;
/*
* Grab a value from the arena.
@@ -2618,16 +2618,14 @@ mac_minor_hold(boolean_t sleep)
atomic_inc_32(&minor_count);
if (sleep)
- minor = (uint_t)id_alloc(minor_ids);
- else
- minor = (uint_t)id_alloc_nosleep(minor_ids);
+ return ((uint_t)id_alloc(minor_ids));
- if (minor == 0) {
+ if ((id = id_alloc_nosleep(minor_ids)) == -1) {
atomic_dec_32(&minor_count);
return (0);
}
- return (minor);
+ return ((uint_t)id);
}
/*
diff --git a/usr/src/uts/i86pc/ml/kpti_trampolines.s b/usr/src/uts/i86pc/ml/kpti_trampolines.s
index 6ab3edc3d4..a036eefee1 100644
--- a/usr/src/uts/i86pc/ml/kpti_trampolines.s
+++ b/usr/src/uts/i86pc/ml/kpti_trampolines.s
@@ -9,7 +9,7 @@
* http://www.illumos.org/license/CDDL.
*/
/*
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
/*
@@ -88,7 +88,7 @@
* Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64
* cases) in that they do not push an interrupt frame (and also have some other
* effects). In the syscall trampolines, we assume that we can only be taking
- * the call from userland and use SWAPGS and an unconditional overwrite of %cr3.
+ * the call from userland and use swapgs and an unconditional overwrite of %cr3.
* We do not do any stack pivoting for syscalls (and we leave SYSENTER's
* existing %rsp pivot untouched) -- instead we spill registers into
* %gs:CPU_KPTI_* as we need to.
@@ -503,7 +503,7 @@ tr_sysc_ret_end:
pushq %gs:CPU_KPTI_CS; \
pushq %gs:CPU_KPTI_RIP; \
mov %gs:CPU_KPTI_R13, %r13; \
- SWAPGS; \
+ swapgs; \
jmp isr; \
SET_SIZE(tr_/**/isr)
@@ -536,10 +536,9 @@ tr_intr_ret_start:
ENTRY_NP(tr_iret_user)
#if DEBUG
/*
- * Ensure that we return to user land with CR0.TS clear. We do this
- * before we trampoline back and pivot the stack and %cr3. This way
- * we're still on the kernel stack and kernel %cr3, though we are on the
- * user GSBASE.
+ * Panic if we find CR0.TS set. We're still on the kernel stack and
+ * %cr3, but we do need to swap back to the kernel gs. (We don't worry
+ * about swapgs speculation here.)
*/
pushq %rax
mov %cr0, %rax
@@ -559,14 +558,24 @@ tr_intr_ret_start:
cmpq $1, kpti_enable
jne 1f
+ /*
+ * KPTI enabled: we're on the user gsbase at this point, so we
+ * need to swap back so we can pivot stacks.
+ *
+ * The swapgs lfence mitigation is probably not needed here
+ * since a mis-speculation of the above branch would imply KPTI
+ * is disabled, but we'll do so anyway.
+ */
swapgs
+ lfence
mov %r13, %gs:CPU_KPTI_R13
PIVOT_KPTI_STK(%r13)
SET_USER_CR3(%r13)
mov %gs:CPU_KPTI_R13, %r13
- /* Zero these to make sure they didn't leak from a kernel trap */
+ /* Zero these to make sure they didn't leak from a kernel trap. */
movq $0, %gs:CPU_KPTI_R13
movq $0, %gs:CPU_KPTI_R14
+ /* And back to user gsbase again. */
swapgs
1:
iretq
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index c02e2e0469..6c317392b3 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -910,6 +910,7 @@
* more work in the system to mitigate against:
*
* - Spectre v1
+ * - swapgs (Spectre v1 variant)
* - Spectre v2
* - Meltdown (Spectre v3)
* - Rogue Register Read (Spectre v3a)
@@ -926,7 +927,7 @@
* overall approach that the system has taken to address these as well as their
* shortcomings. Unfortunately, not all of the above have been handled today.
*
- * SPECTRE FAMILY (Spectre v2, ret2spec, SpectreRSB)
+ * SPECTRE v2, ret2spec, SpectreRSB
*
* The second variant of the spectre attack focuses on performing branch target
* injection. This generally impacts indirect call instructions in the system.
@@ -1035,11 +1036,43 @@
* it may make more sense to investigate using prediction barriers as the whole
* system is only executing a single instruction at a time while in kmdb.
*
- * SPECTRE FAMILY (v1, v4)
+ * SPECTRE v1, v4
*
* The v1 and v4 variants of spectre are not currently mitigated in the
* system and require other classes of changes to occur in the code.
*
+ * SPECTRE v1 (SWAPGS VARIANT)
+ *
+ * The class of Spectre v1 vulnerabilities aren't all about bounds checks, but
+ * can generally affect any branch-dependent code. The swapgs issue is one
+ * variant of this. If we are coming in from userspace, we can have code like
+ * this:
+ *
+ * cmpw $KCS_SEL, REGOFF_CS(%rsp)
+ * je 1f
+ * movq $0, REGOFF_SAVFP(%rsp)
+ * swapgs
+ * 1:
+ * movq %gs:CPU_THREAD, %rax
+ *
+ * If an attacker can cause a mis-speculation of the branch here, we could skip
+ * the needed swapgs, and use the /user/ %gsbase as the base of the %gs-based
+ * load. If subsequent code can act as the usual Spectre cache gadget, this
+ * would potentially allow KPTI bypass. To fix this, we need an lfence prior to
+ * any use of the %gs override.
+ *
+ * The other case is also an issue: if we're coming into a trap from kernel
+ * space, we could mis-speculate and swapgs the user %gsbase back in prior to
+ * using it. AMD systems are not vulnerable to this version, as a swapgs is
+ * serializing with respect to subsequent uses. But as AMD /does/ need the other
+ * case, and the fix is the same in both cases (an lfence at the branch target
+ * 1: in this example), we'll just do it unconditionally.
+ *
+ * Note that we don't enable user-space "wrgsbase" via CR4_FSGSBASE, making it
+ * harder for user-space to actually set a useful %gsbase value: although it's
+ * not clear, it might still be feasible via lwp_setprivate(), though, so we
+ * mitigate anyway.
+ *
* MELTDOWN
*
* Meltdown, or spectre v3, allowed a user process to read any data in their
@@ -1159,12 +1192,13 @@
* and what's done in various places:
*
* - Spectre v1: Not currently mitigated
+ * - swapgs: lfences after swapgs paths
* - Spectre v2: Retpolines/RSB Stuffing or EIBRS if HW support
* - Meltdown: Kernel Page Table Isolation
* - Spectre v3a: Updated CPU microcode
* - Spectre v4: Not currently mitigated
* - SpectreRSB: SMEP and RSB Stuffing
- * - L1TF: spec_uarch_flush, smt exclusion, requires microcode
+ * - L1TF: spec_uarch_flush, SMT exclusion, requires microcode
* - MDS: x86_md_clear, requires microcode, disabling hyper threading
*
* The following table indicates the x86 feature set bits that indicate that a
diff --git a/usr/src/uts/intel/amd64/ml/amd64.il b/usr/src/uts/intel/amd64/ml/amd64.il
index fc78c95a95..3e2a790729 100644
--- a/usr/src/uts/intel/amd64/ml/amd64.il
+++ b/usr/src/uts/intel/amd64/ml/amd64.il
@@ -23,6 +23,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
/
/ In-line functions for amd64 kernels.
/
@@ -189,34 +193,26 @@
movw %di, %gs
.end
- /*
- * OPTERON_ERRATUM_88 requires mfence
- */
- .inline __swapgs, 0
- mfence
- swapgs
- .end
-
/*
* prefetch 64 bytes
*/
- .inline prefetch_read_many,8
+ .inline prefetch_read_many,8
prefetcht0 (%rdi)
prefetcht0 32(%rdi)
.end
- .inline prefetch_read_once,8
+ .inline prefetch_read_once,8
prefetchnta (%rdi)
prefetchnta 32(%rdi)
.end
- .inline prefetch_write_many,8
+ .inline prefetch_write_many,8
prefetcht0 (%rdi)
prefetcht0 32(%rdi)
.end
- .inline prefetch_write_once,8
+ .inline prefetch_write_once,8
prefetcht0 (%rdi)
prefetcht0 32(%rdi)
.end
diff --git a/usr/src/uts/intel/amd64/sys/privregs.h b/usr/src/uts/intel/amd64/sys/privregs.h
index 83782c4b37..7e5f7cd392 100644
--- a/usr/src/uts/intel/amd64/sys/privregs.h
+++ b/usr/src/uts/intel/amd64/sys/privregs.h
@@ -24,6 +24,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
#ifndef _AMD64_SYS_PRIVREGS_H
#define _AMD64_SYS_PRIVREGS_H
@@ -206,7 +210,8 @@ struct regs {
je 6f; \
movq $0, REGOFF_SAVFP(%rsp); \
SWAPGS; \
-6: CLEAN_CS
+6: lfence; /* swapgs mitigation */ \
+ CLEAN_CS
#define INTR_POP \
leaq sys_lcall32(%rip), %r11;\
@@ -216,8 +221,13 @@ struct regs {
cmpw $KCS_SEL, REGOFF_CS(%rsp);\
je 8f; \
5: SWAPGS; \
-8: addq $REGOFF_RIP, %rsp
+8: lfence; /* swapgs mitigation */ \
+ addq $REGOFF_RIP, %rsp
+/*
+ * No need for swapgs mitigation: it's unconditional, and we're heading
+ * back to userspace.
+ */
#define USER_POP \
__RESTORE_REGS; \
SWAPGS; \
diff --git a/usr/src/uts/intel/asm/cpu.h b/usr/src/uts/intel/asm/cpu.h
index faaaea7c8e..95e882601a 100644
--- a/usr/src/uts/intel/asm/cpu.h
+++ b/usr/src/uts/intel/asm/cpu.h
@@ -172,17 +172,6 @@ __set_gs(selector_t value)
: "r" (value));
}
-#if !defined(__xpv)
-
-extern __GNU_INLINE void
-__swapgs(void)
-{
- __asm__ __volatile__(
- "mfence; swapgs");
-}
-
-#endif /* !__xpv */
-
#endif /* __amd64 */
#endif /* !__lint && __GNUC__ */
diff --git a/usr/src/uts/intel/ia32/ml/exception.s b/usr/src/uts/intel/ia32/ml/exception.s
index 5806087ca1..b35eab3220 100644
--- a/usr/src/uts/intel/ia32/ml/exception.s
+++ b/usr/src/uts/intel/ia32/ml/exception.s
@@ -174,8 +174,9 @@
leaq tr_brand_sys_sysenter(%rip), %r11
cmpq %r11, 24(%rsp)
jne 2f
-1: SWAPGS
-2: popq %r11
+1: swapgs
+2: lfence /* swapgs mitigation */
+ popq %r11
#endif /* !__xpv */
INTR_PUSH
diff --git a/usr/src/uts/intel/ia32/os/sundep.c b/usr/src/uts/intel/ia32/os/sundep.c
index cfb4552287..34e0a03d68 100644
--- a/usr/src/uts/intel/ia32/os/sundep.c
+++ b/usr/src/uts/intel/ia32/os/sundep.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -551,16 +551,19 @@ update_sregs(struct regs *rp, klwp_t *lwp)
*
* We've just mucked up the kernel's gsbase. Oops. In
* particular we can't take any traps at all. Make the newly
- * computed gsbase be the hidden gs via __swapgs, and fix
+ * computed gsbase be the hidden gs via swapgs, and fix
* the kernel's gsbase back again. Later, when we return to
* userland we'll swapgs again restoring gsbase just loaded
* above.
*/
- __swapgs();
+ __asm__ __volatile__("mfence; swapgs");
+
rp->r_gs = pcb->pcb_gs;
/*
- * restore kernel's gsbase
+ * Restore kernel's gsbase. Note that this also serializes any
+ * attempted speculation from loading the user-controlled
+ * %gsbase.
*/
wrmsr(MSR_AMD_GSBASE, kgsbase);
diff --git a/usr/src/uts/intel/io/ipmi/ipmi_main.c b/usr/src/uts/intel/io/ipmi/ipmi_main.c
index 8b25829d2b..e7671ce734 100644
--- a/usr/src/uts/intel/io/ipmi/ipmi_main.c
+++ b/usr/src/uts/intel/io/ipmi/ipmi_main.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2017 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
*/
@@ -151,6 +151,7 @@ ipmi_open(dev_t *devp, int flag, int otyp, cred_t *cred)
{
minor_t minor;
ipmi_device_t *dev;
+ id_t mid;
if (ipmi_attached == B_FALSE)
return (ENXIO);
@@ -162,8 +163,9 @@ ipmi_open(dev_t *devp, int flag, int otyp, cred_t *cred)
if (flag & FEXCL)
return (ENOTSUP);
- if ((minor = (minor_t)id_alloc_nosleep(minor_ids)) == 0)
+ if ((mid = id_alloc_nosleep(minor_ids)) == -1)
return (ENODEV);
+ minor = (minor_t)mid;
/* Initialize the per file descriptor data. */
dev = kmem_zalloc(sizeof (ipmi_device_t), KM_SLEEP);
diff --git a/usr/src/uts/intel/kdi/kdi_asm.s b/usr/src/uts/intel/kdi/kdi_asm.s
index f106d643f7..3dd6db5952 100644
--- a/usr/src/uts/intel/kdi/kdi_asm.s
+++ b/usr/src/uts/intel/kdi/kdi_asm.s
@@ -23,7 +23,7 @@
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
/*
@@ -271,6 +271,9 @@
* KDI_SAVE_REGS macro to prevent a usermode process's GSBASE from being
* blown away. On the hypervisor, we don't need to do this, since it's
* ensured we're on our requested kernel GSBASE already.
+ *
+ * No need to worry about swapgs speculation here as it's unconditional
+ * and via wrmsr anyway.
*/
subq $10, %rsp
sgdt (%rsp)
diff --git a/usr/src/uts/intel/sys/archsystm.h b/usr/src/uts/intel/sys/archsystm.h
index 0c9ceac7be..55c387f9b1 100644
--- a/usr/src/uts/intel/sys/archsystm.h
+++ b/usr/src/uts/intel/sys/archsystm.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _SYS_ARCHSYSTM_H
@@ -94,10 +94,8 @@ extern void brand_sys_call();
#endif
extern void sys_sysenter();
extern void tr_sys_sysenter();
-extern void _sys_sysenter_post_swapgs();
extern void brand_sys_sysenter();
extern void tr_brand_sys_sysenter();
-extern void _brand_sys_sysenter_post_swapgs();
extern void dosyscall(void);
diff --git a/usr/src/uts/intel/sys/segments.h b/usr/src/uts/intel/sys/segments.h
index 6bf18b3082..52831c9d87 100644
--- a/usr/src/uts/intel/sys/segments.h
+++ b/usr/src/uts/intel/sys/segments.h
@@ -2,7 +2,7 @@
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _SYS_SEGMENTS_H
@@ -179,7 +179,6 @@ extern void __set_ds(selector_t);
extern void __set_es(selector_t);
extern void __set_fs(selector_t);
extern void __set_gs(selector_t);
-extern void __swapgs(void);
#endif /* __amd64 */
#if defined(__amd64)