diff options
| author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2019-11-08 14:16:48 +0000 |
|---|---|---|
| committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2019-11-08 14:16:48 +0000 |
| commit | 4351df24a18fd73b1e6cc2591e622883e502167c (patch) | |
| tree | 055eea240497456740e6f737bef63765ed7a6574 | |
| parent | faabb223a29c66e258a2c067cb14888c51ba6f47 (diff) | |
| parent | 42cd19316c818c8b8283fc48263a1b4ce99cf049 (diff) | |
| download | illumos-joyent-4351df24a18fd73b1e6cc2591e622883e502167c.tar.gz | |
[illumos-gate merge]
commit 42cd19316c818c8b8283fc48263a1b4ce99cf049
11859 need swapgs mitigation
commit ad3e6d4dd82f2e18743399134a4b99cf303478f6
11880 changing encryption key on dataset with unencrypted children triggers VERIFY
commit 249622b3e0d46f0016d00e3f87b314635d11065a
11929 mac_minor_hold() gets id_alloc_nosleep() wrong
commit 1c085a54d061bc17f8b209d1ea6161fcdf66d971
3334 zonestat missing man page
commit 327c8d1665439dd2540c1b460773bd9f0c1c0fa9
11792 ibtl: cast between incompatible function types
commit 22f89f96cd7b45b9686231ed7d98e610077df6c6
11922 ipmi_open looks for wrong return value
| -rw-r--r-- | usr/src/man/man1/Makefile | 4 | ||||
| -rw-r--r-- | usr/src/man/man1/zonestat.1 | 485 | ||||
| -rw-r--r-- | usr/src/man/man1m/Makefile | 2 | ||||
| -rw-r--r-- | usr/src/man/man1m/zonestatd.1m | 90 | ||||
| -rw-r--r-- | usr/src/pkg/manifests/system-zones.mf | 3 | ||||
| -rw-r--r-- | usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh | 19 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/zfs/dsl_crypt.c | 7 | ||||
| -rw-r--r-- | usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c | 6 | ||||
| -rw-r--r-- | usr/src/uts/common/io/mac/mac.c | 10 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/ml/kpti_trampolines.s | 25 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/os/cpuid.c | 40 | ||||
| -rw-r--r-- | usr/src/uts/intel/amd64/ml/amd64.il | 20 | ||||
| -rw-r--r-- | usr/src/uts/intel/amd64/sys/privregs.h | 14 | ||||
| -rw-r--r-- | usr/src/uts/intel/asm/cpu.h | 11 | ||||
| -rw-r--r-- | usr/src/uts/intel/ia32/ml/exception.s | 5 | ||||
| -rw-r--r-- | usr/src/uts/intel/ia32/os/sundep.c | 11 | ||||
| -rw-r--r-- | usr/src/uts/intel/io/ipmi/ipmi_main.c | 6 | ||||
| -rw-r--r-- | usr/src/uts/intel/kdi/kdi_asm.s | 5 | ||||
| -rw-r--r-- | usr/src/uts/intel/sys/archsystm.h | 4 | ||||
| -rw-r--r-- | usr/src/uts/intel/sys/segments.h | 3 |
20 files changed, 701 insertions, 69 deletions
diff --git a/usr/src/man/man1/Makefile b/usr/src/man/man1/Makefile index a4d59dadcc..d7904100eb 100644 --- a/usr/src/man/man1/Makefile +++ b/usr/src/man/man1/Makefile @@ -13,6 +13,7 @@ # Copyright 2011, Richard Lowe # Copyright 2018 Nexenta Systems, Inc. # Copyright 2014 Garrett D'Amore <garrett@damore.org> +# Copyright 2019 OmniOS Community Edition (OmniOSce) Association. # include $(SRC)/Makefile.master @@ -421,7 +422,8 @@ MANFILES= acctcom.1 \ yppasswd.1 \ ypwhich.1 \ zlogin.1 \ - zonename.1 + zonename.1 \ + zonestat.1 MANLINKS= batch.1 \ bg.1 \ diff --git a/usr/src/man/man1/zonestat.1 b/usr/src/man/man1/zonestat.1 new file mode 100644 index 0000000000..39f2be5f61 --- /dev/null +++ b/usr/src/man/man1/zonestat.1 @@ -0,0 +1,485 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. +.\" Copyright 2019 OmniOS Community Edition (OmniOSce) Association. +.\" +.Dd April 04, 2019 +.Dt zonestat 1 +.Os +.Sh NAME +.Nm zonestat +.Nd report active zone statistics. +.Sh SYNOPSIS +.Nm +.Op Fl z Ar zonelist +.Op Fl r Ar reslist +.Op Fl n Ar namelist +.Op Fl T Ar u Ns | Ns Ar d Ns | Ns Ar i +.Op Fl R Ar reports +.Op Fl q +.Op Fl p Oo Fl P Ar lines Oc +.Op Fl S Ar cols +.Ar interval +.Oo Ar duration +.Oo Ar report +.Oc Oc +.Sh DESCRIPTION +The +.Nm +utility reports on the cpu, memory, and resource control utilization of the +currently running zones. +Each zone's utilization is reported both as a percentage of system resources +and the zone's configured limits. +.Pp +The +.Nm +utility prints a series of interval reports at the specified interval. +It optionally also prints one or more summary reports at a specified interval. +.Pp +The default output is a summary of cpu, physical, and virtual memory +utilization. +The -r option can be used to choose detailed output for specific resources. +.Pp +The following options are supported: +.Bl -tag -width Ds +.It Fl z Ar zonename Ns Oo Ar ,zonename Ns ... Oc +Specify a list of zones on which to report. +By default all zones are reported. +In addition to a comma-separated list, multiple +.Fl z +options can be specified to report on a set of zones. +The output will include any resources which have usage by the specified zone(s). +.It Fl r Ar resource Ns Oo Ar ,resource Ns ... Oc +Specify resource types on which to report. +The available resources are: +.Bd -ragged -offset indent +.Em physical-memory , +.Em virtual-memory , +.Em locked-memory , +.Em processor-sets , +.Em processes , +.Em lwps , +.Em shm-memory , +.Em shm-ids , +.Em sem-ids , +.Em msg-ids , +.Em lofi +.Ed +.Pp +The following nicknames can also be specified as resource names: +.Bl -tag -width indent +.It Em summary +A summary of cpu, physical-memory, and virtual memory usage. +.It Em memory +physical-memory, virtual-memory, and locked memory. +.It Em psets +processor-sets +.It Em default-pset +The default pset only. +.It Em limits +processes, lwps, lofi +.It Em sysv +shm-memory, shm-ids, sem-ids msg-ids +.It Em all +all resource types. +.El +.Pp +By default the summary resource is printed. +.Pp +In addition to a comma-separated list, multiple +.Fl r +options can be specified to report on a set of resources types. +.Pp +The system's cpus can be partitioned into processor sets +(psets) By default, all cpus are in a single pset named +.Em pset_default . +.Pp +Memory is not partition-able into sets. +The zonestat utility output for these resources will show them as named +.Em mem_default +and +.Em vm_default . +.Pp +The +.Em all +resource specifies that all resource types should be reported. +.It Fl n Ar name Ns Oo Ar ,name Oc +Specify a list resource names on which to report. +For pset resources, this is the name of the processor set. +For physical-memory, locked-memory, and virtual-memory resources, +the only names are +.Em mem_default +and +.Em vm_default . +.Pp +Dedicated-cpu processor sets can be specified by their pset +name, or by just their zonename. +.Pp +Processor sets created by psrset can be specified by their pool +pset name, or just by their psetid. +.Pp +In addition to a comma-separated list, multiple +.Fl n +options can be specified to report on a set of resources. +.It Fl T Ar u Ns | Ns Ar d Ns | Ns Ar i +Include timestamp of each report. +The following formats are supported: +.Bl -tag -width indent +.It u +A printed representation of the internal representation of time; +see +.Xr time 2 . +This is also known as unix time. +.It d +Standard date format; see +.Xr date 1 . +This option is not valid with +.Fl p . +.It i +Time formatted as the ISO 8601 compliant format: +.D1 YYYYMMDDThhmmssZ +.El +.It Fl R Ar report Ns Oo Ar ,report Oc +Print a summary report. +The supported report types are described below. +In addition to a comma-separated list, multiple +.Fl R +options may be specified for a set of summary reports. +.Bl -tag -width indent +.It total +Prints a summary report detailing the following for each resource: +.Bl -tag -width indent +.It psets +Total cpu used since start of command invocation. +The percent used for each zone includes time that a zone was not running. +For instance, if a zone used 100% of the cpu while it was running, but the zone +was halted for half of the intervals, then the summary report will show the +zone used 50% of the cpu time. +.It memory, limits, sysv +Average resource used of all intervals reported since command invocation. +This average factors in intervals in which a zone was not running. +For example if a zone used on average of 100M of physical memory while it was +running, and was only running for half the intervals, then the summary report +will show that the zone used 50M of physical memory on average. +.El +.It average +Similar to +.Em total , +but only intervals in which a zone is running are factored in. +For example, if a zone was only running for a single interval, and during that +interval, the zone used 200M of virtual memory, then its average +virtual-memory will be 200M, regardless of the number of intervals reported +before the summary report. +.It high +Print a summary report detailing the highest usage of each resource and zone +during any interval of the zonestat utility invocation. +.El +.It Fl S Ar col Ns Oo Ar ,col Oc +Sort zones utilizing each resource. +The following sorting columns can be specified. +.Bl -tag -width indent +.It name +Sort alphanumerically by zone name. +.It used +Sort by quantity of resource used. +.It cap +Sort by configured cap. +.It pcap +Sort by percent of cap used. +.It shr +Sort by allocated share. +.It pshru +Sort by percent of share used. +.El +.Pp +By default, output is sorted by quantity of resource used. +.It Fl q +Only print summary reports (requires +.Fl R ) . +All interval reports are omitted. +.It Fl p +Print output in stable, machine-parsable format. +Individual fields will be delimited with :. +The line format is: +.Pp +.D1 <report type>:<resource>:<field>[:<field>]* +.Pp +If +.Fl T +is specified each line is prefixed with a timestamp: +.Pp +.D1 <timestamp>:<report type>:<resource>:<field>[:<field>]* +.Pp +The report types are: +.Bd -ragged -offset indent +.Em report-total , +.Em report-average , +.Em report-high , +.Em interval +.Ed +.Pp +The resource types are: +.Bd -ragged -offset indent +.Em header , +.Em footer , +.Em summary , +.Em physical-memory , +.Em virtual-memory , +.Em locked-memory , +.Em processor-set , +.Em processes , +.Em lwps , +.Em sysv-shared-memory , +.Em sysv-shmids , +.Em sysv-semids , +.Em sysv-msgids , +.Em lofi +.Ed +.Pp +The +.Em header +resource is a special resource used to state the beginning of an interval or +summary report. +All output lines between header resources belong to the same report. +Each header has a matching footer. +.Pp +The remaining fields are resource type specific. +See the zonestat utility output for details. +.Pp +All existing output fields are stable. +Future versions may introduce new report and resource types. +Future versions may also add additional new fields to the end of existing +output lines. +.It Fl P Ar line Ns Oo Ar ,line Oc +For parsable output, specify lines to output in parsable output. +One or more of the following line types can be chosen: +.Bl -tag -width indent +.It resource +The lines describing each resource. +.It total +The total utilization of each resource. +.It system +The utilization of each resource by the system. +This includes the kernel, and any resource consumption not contributable to a +specific zone. +When zonestat is run from within a non-global-zone, this value will be the +aggregate resource consumed by the system and all other zones. +.It zones +Lines detailing the per-zone utilization of each resource. +.It header, footer +Each interval and summary report has a header, which prints details such +as the interval and count information. +After each report, any footer is also printed +.El +.El +.Ss OPERANDS +.Bl -tag -width indent +.It interval +Specifies the length in seconds to pause between each interval report. +An interval of +.Em default +will use the configured interval of the zones +monitoring service - see +.Xr zonestatd 1m . +.Pp +Interval is required. +An interval of zero is not permitted. +The interval can be specified as [nh][nm][ns], such as 10s or 1m. +.It duration +Specifies the number of intervals to report. +Defaults to infinity if not specified. +The command duration is (interval * duration). +A duration of zero is invalid. +A value of +.Em inf +can also be specified to explicitly choose infinity. +.Pp +Duration can also be specified as [nh][nm][ns]. +In this case, duration will be interpreted as the duration of execution time. +The actual duration will be rounded up to the nearest multiple of the interval. +.It report +Specify the summary report period. +For instance, a report of 4 would produce reports every 4 intervals. +If the command duration is not a multiple of report, then the last report will +be of any remaining intervals. +.Pp +Report can also be specified as [nh][nm][ns]. +In this case, reports will be output at the specified time period, rounded up +to the nearest interval. +If the command duration is not a multiple of report, then the last report will +be of any remaining intervals. +.Pp +Requires +.Fl R . +If +.Fl R +is specified and report is not, the report period will be the entire command +duration, producing the specified reports at the end of execution. +.El +.Ss OUTPUT +The following list defines the column heading of the command output: +.Bl -tag -width indent +.It SYSTEM-MEMORY +The total amount of memory available on the physical host. +.It SYSTEM-LIMIT +The maximum amount of resource available on the physical host. +.It CPUS +The number of cpus allocated to a processor set. +.It ONLINE +Of the cpus allocated to a processor set, the number of cpus +which can execute processes. +.It MIN/MAX +The minimum and maximum number of cpus which may be allocated +to the processor set by the system. +.It ZONE +The zone using the resource. +In addition to zone names, this column may also contain: +.Bl -tag -width indent +.It [total] +The total quantity of resource used system-wide. +.It [system] +The quantity of resource used by the kernel or in a manner not associated with +any particular zone. +.Pp +When zonestat is used within a non-global zone, [system] designates the +aggregate resource used by the system and by all other zones. +.El +.It USED +The amount of resource used. +.It PCT +The amount of resource used as a percent of the total resource. +.It %PART +The amount of cpu uses as a percentage of the total cpu in a processor-set to +which the zone is bound. +A zone can only have processes bound to multiple processor sets if it is the +global zone, or if psrset(1m) psets are used. +If multiple binding are found for a zone, its %PART will be the fraction used +of all bound psets. +For [total] and [system], %PART is the percent used of all cpus on the system. +.It CAP +If a zone is configured to have a cap on the given resource, the cap will be +displayed in this column. +.It %CAP +The amount of resource used as a percent of zone's configured cap. +.It SHRS +The number of shares allocated to the zone. +For the [total] row, this will be the total number of shares allocated to all +zones sharing the resource. +.Pp +If a zone is not configured to use shares, and is sharing a +resource with other zones that are configured to use shares, +this column will contain +.Em no-fss +for the zone. +.It %SHR +The fraction of the total shares allocated to the zone. +For instance, if 2 zones share a processor set, each with 10 shares, then each +zone will have a %SHR of 50%. +.It %SHRU +Of the share allocated to the zone, the fraction of resource +used. +Zones using all of their share will have a %SHRU of 100%. +Because shares are only enforced when there is resource contention, it is +possible for a zone to have a %SHRU in excess of 100%. +.El +.Sh IMPLEMENTATION NOTES +The zonestat utility depends on the zones monitoring service: +.Pp +.D1 svc/system/zonestat:default +.Pp +If the zonestat service is stopped while the zonestat utility is running, the +zonestat command invocation will quit without printing additional reports. +.Pp +The reports will be printed if zonestat is interrupted (by ctrl-c, +.Dv SIGINT ) +before reaching the next report period. +.Sh EXIT STATUS +.Ex -std +.Bl -tag -width indent +.It 0 +Successful completion. +.It 1 +An error occurred. +.It 2 +Invalid usage. +.It 3 +svc:system/zones_monitoring:default not running or not responding. +.El +.Sh EXAMPLES +Example 1: Summary of cpu and memory utilization every 5 seconds. +.Bd -literal + # zonestat 5 1 + SUMMARY + -----CPU------------- ----PHYSICAL--- ----VIRTUAL---- + ZONE USED %PART %CAP %SHRU USED PCT %CAP USED PCT %CAP + [total] 9.74 30% - - 7140M 21% - 10.6G 22% - + [system] 0.28 0.8% - - 6535M 19% - 10.4G 21% - + global 9.10 28% - - 272M 0.8% - 366M 0.7% - + zoneA 0.32 1.0% - - 256M 0.7% - 265M 0.5% - + zoneB 0.00 0.0% - - 77.6M 0.2% - 71.1M 0.1% - +.Ed +.Pp +Example 2: Using parsable output, fetching only zone usages. +.Pp +The following command will produce parsable output, printing one +line per zone using each pset resource for a 5 second interval. +.Bd -literal + + # zonestat -p -P zones -r psets 5 1 + +.Ed +.Pp +Example 3: Report on the default pset. +.Pp +The following command will report on the default pset once a second +for one minute. +.Bd -literal + + # zonestat -r default-pset 1 1m + +.Ed +.Pp +Example 4: Report total and high utilization. +.Pp +The following command monitors silently at a 10 second interval +for 24 hours, producing a total and high report every 1 hour. +.Bd -literal + + # zonestat -q -R total,high 10s 24h 1h + +.Ed +.Sh INTERFACE STABILITY +Command invocation and parsable output is Committed. +Human readable output (default output) is uncommitted. +.Sh SECURITY +When run from within a non-global zone (NGZ), only processor sets +visible to the NGZ are reported. +The NGZ output will include all of other system resources, such as memory and +limits. +.Pp +For all reported resources, the NGZ's usage will be output. +Usage of each resource by the system, global zone, and all other +zones, will be reported as used by +.Em system . +.Sh SEE ALSO +.Xr date 1 , +.Xr pooladm 1m , +.Xr poolcfg 1m , +.Xr prctl 1m , +.Xr rcapadm 1m , +.Xr zoneadm 1m , +.Xr zonecfg 1m , +.Xr zonestatd 1m , +.Xr libzonestat 3lib , +.Xr timezone 4 , +.Xr privileges 5 , +.Xr resource_controls 5 , +.Xr zones 5 diff --git a/usr/src/man/man1m/Makefile b/usr/src/man/man1m/Makefile index 6dcd74a84f..2152864b8d 100644 --- a/usr/src/man/man1m/Makefile +++ b/usr/src/man/man1m/Makefile @@ -18,6 +18,7 @@ # Copyright 2018 Nexenta Systems, Inc. # Copyright (c) 2017, Chris Fraire <cfraire@me.com>. # Copyright 2019 Peter Tribble +# Copyright 2019 OmniOS Community Edition (OmniOSce) Association. # include $(SRC)/Makefile.master @@ -539,6 +540,7 @@ _MANFILES= 6to4relay.1m \ zoneadm.1m \ zoneadmd.1m \ zonecfg.1m \ + zonestatd.1m \ zpool.1m \ zstreamdump.1m \ ztest.1m diff --git a/usr/src/man/man1m/zonestatd.1m b/usr/src/man/man1m/zonestatd.1m new file mode 100644 index 0000000000..fb7760436b --- /dev/null +++ b/usr/src/man/man1m/zonestatd.1m @@ -0,0 +1,90 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. +.\" Copyright 2019 OmniOS Community Edition (OmniOSce) Association. +.\" +.Dd April 04, 2019 +.Dt zonestatd 1M +.Os +.Sh NAME +.Nm zonestatd +.Nd zones monitoring daemon +.Sh SYNOPSIS +.Nm /usr/lib/zones/zonestatd +.Sh DESCRIPTION +.Nm +is a system daemon that is started during system boot. +It monitors the utilization of system resources by zones, as well +as zone and system configuration information such as psrset psets, +pool psets, and resource control settings. +.Pp +This daemon is started automatically by the zone management +software and should not be invoked directly. +It does not constitute a programming interface, but is classified as a +private interface. +.Sh IMPLEMENTATION NOTES +The zonestat service is managed by the service management +facility, +.Xr smf 5 , +under the service identifier: +.Pp +.D1 svc:/system/zones-monitoring:default +.Pp +Administrative actions on this service, such as enabling, disabling, or +requesting restart, can be performed using +.Xr svcadm 1m . +The service's status can be queried using the +.Xr svcs 1 +command. +.Pp +The zonestat service has the following SMF configuration property: +.Bd -ragged -offset indent +config/sample_interval +.Pp +This property sets the +.Nm +sample interval. +This is the interval used by the zones monitoring daemon, +.Nm +to sample resource utilization. +This is also the interval used to determine configuration changes such as +processor set changes, resource control changes, and zone state changes. +.Pp +The default interval is 5 seconds. +.Ed +.Pp +The zonestat service makes use of extended accounting facility. +If not already enabled, it enables the tracking of process accounting +resources, and configures a process accounting file. +The zonestat service will roll the process accounting log at its configured +interval. +.Pp +If extended process accounting is enabled externally, the zonestat +service will use the process accounting log as configured. +It will not roll the accounting log, but will operate correctly if +the accounting log is rolled externally. +.Sh INTERFACE STABILITY +.Sy Private +.Sh SECURITY +The zonestat service in the global zone must be online for the zonestat +service in each non-global zone (NGZ) to function properly. +The zonestat service in each NGZ does not directly read system configuration +and utilization data, but rather reads from the zonestat service on the +global zone. +.Sh SEE ALSO +.Xr zonestat 1 , +.Xr acctadm 1m , +.Xr pooladm 1m , +.Xr poolcfg 1m , +.Xr prctl 1m , +.Xr rcapadm 1m , +.Xr smf 5 , +.Xr zones 5 diff --git a/usr/src/pkg/manifests/system-zones.mf b/usr/src/pkg/manifests/system-zones.mf index 2b9566accb..ae885a159e 100644 --- a/usr/src/pkg/manifests/system-zones.mf +++ b/usr/src/pkg/manifests/system-zones.mf @@ -21,6 +21,7 @@ # # Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright 2019 OmniOS Community Edition (OmniOSce) Association. # set name=pkg.fmri value=pkg:/system/zones@$(PKGVERS) @@ -96,9 +97,11 @@ file path=usr/share/lib/xml/dtd/brand.dtd.1 mode=0644 file path=usr/share/lib/xml/dtd/zone_platform.dtd.1 mode=0644 file path=usr/share/lib/xml/dtd/zonecfg.dtd.1 file path=usr/share/man/man1/zlogin.1 +file path=usr/share/man/man1/zonestat.1 file path=usr/share/man/man1m/zoneadm.1m file path=usr/share/man/man1m/zoneadmd.1m file path=usr/share/man/man1m/zonecfg.1m +file path=usr/share/man/man1m/zonestatd.1m file path=usr/share/man/man5/brands.5 file path=usr/share/man/man5/zones.5 file path=usr/share/man/man7d/zcons.7d diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh index dda7c1df43..a886ab8a77 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh @@ -28,13 +28,15 @@ # STRATEGY: # 1. Create an encrypted dataset # 2. Create an encrypted child dataset -# 3. Attempt to change the key without any flags -# 4. Attempt to change the key specifying keylocation -# 5. Attempt to change the key specifying keyformat -# 6. Verify the new encryption root can unload and load its key -# 7. Recreate the child dataset -# 8. Attempt to change the key specifying both the keylocation and keyformat -# 9. Verify the new encryption root can unload and load its key +# 3. Create an unencrypted child dataset +# 4. Attempt to change the key without any flags +# 5. Attempt to change the key specifying keylocation +# 6. Attempt to change the key specifying keyformat +# 7. Verify the new encryption root can unload and load its key +# 8. Recreate the child dataset +# 9. Attempt to change the key specifying both the keylocation and keyformat +# 10. Verify the new encryption root can unload and load its key +# 11. Verify the unencrytped child is still accessible normally # verify_runnable "both" @@ -53,6 +55,7 @@ log_assert "'zfs change-key' should promote an encrypted child to an" \ log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" log_must zfs create $TESTPOOL/$TESTFS1/child +log_must zfs create -o encryption=off $TESTPOOL/$TESTFS1/child2 log_mustnot eval "echo $PASSPHRASE2 | zfs change-key" \ "$TESTPOOL/$TESTFS1/child" @@ -82,5 +85,7 @@ log_must key_unavailable $TESTPOOL/$TESTFS1/child log_must eval "echo $PASSPHRASE2 | zfs load-key $TESTPOOL/$TESTFS1/child" log_must key_available $TESTPOOL/$TESTFS1/child +log_must zfs unmount $TESTPOOL/$TESTFS1/child2 +log_must zfs mount $TESTPOOL/$TESTFS1/child2 log_pass "'zfs change-key' promotes an encrypted child to an encryption root" diff --git a/usr/src/uts/common/fs/zfs/dsl_crypt.c b/usr/src/uts/common/fs/zfs/dsl_crypt.c index 3896efbc76..c9d02e1c57 100644 --- a/usr/src/uts/common/fs/zfs/dsl_crypt.c +++ b/usr/src/uts/common/fs/zfs/dsl_crypt.c @@ -1401,6 +1401,7 @@ static void spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, uint64_t new_rddobj, dsl_wrapping_key_t *wkey, dmu_tx_t *tx) { + int ret; zap_cursor_t *zc; zap_attribute_t *za; dsl_pool_t *dp = dmu_tx_pool(tx); @@ -1419,12 +1420,14 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, return; } + ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj); + VERIFY(ret == 0 || ret == ENOENT); + /* * Stop recursing if this dsl dir didn't inherit from the root * or if this dd is a clone. */ - VERIFY0(dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj)); - if (curr_rddobj != rddobj || dsl_dir_is_clone(dd)) { + if (ret == ENOENT || curr_rddobj != rddobj || dsl_dir_is_clone(dd)) { dsl_dir_rele(dd, FTAG); return; } diff --git a/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c b/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c index b4046299a0..4604334971 100644 --- a/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c @@ -505,8 +505,8 @@ ibt_cisco_embedded_sm_rereg_fix(void *arg) hca_guid = hca_devp->hd_hca_attr->hca_node_guid; mutex_exit(&ibtl_clnt_list_mutex); - ibt_status = ((ibtl_node_info_cb_t)mgrp->mgr_async_handler)(hca_guid, - port, sm_lid, &node_info); + ibt_status = ((ibtl_node_info_cb_t)(uintptr_t) + mgrp->mgr_async_handler)(hca_guid, port, sm_lid, &node_info); if (ibt_status == IBT_SUCCESS) { if ((node_info.n_vendor_id == IBT_VENDOR_CISCO) && (node_info.n_node_type == IBT_NODE_TYPE_SWITCH)) { @@ -740,7 +740,7 @@ ibtl_do_hca_asyncs(ibtl_hca_devinfo_t *hca_devp) if ((code == IBT_PORT_CHANGE_EVENT) && eventp->ev_port_flags & IBT_PORT_CHANGE_SM_LID) ibtl_cm_get_node_info(hca_devp, - (ibt_async_handler_t)ibtl_node_info_cb); + (ibt_async_handler_t)(uintptr_t)ibtl_node_info_cb); /* wait for node info task to complete */ while (hca_devp->hd_async_task_cnt != 0) cv_wait(&hca_devp->hd_async_task_cv, diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c index f2a18c98f2..c8d34d2590 100644 --- a/usr/src/uts/common/io/mac/mac.c +++ b/usr/src/uts/common/io/mac/mac.c @@ -2610,7 +2610,7 @@ mac_client_restart(mac_client_impl_t *mcip) minor_t mac_minor_hold(boolean_t sleep) { - minor_t minor; + id_t id; /* * Grab a value from the arena. @@ -2618,16 +2618,14 @@ mac_minor_hold(boolean_t sleep) atomic_inc_32(&minor_count); if (sleep) - minor = (uint_t)id_alloc(minor_ids); - else - minor = (uint_t)id_alloc_nosleep(minor_ids); + return ((uint_t)id_alloc(minor_ids)); - if (minor == 0) { + if ((id = id_alloc_nosleep(minor_ids)) == -1) { atomic_dec_32(&minor_count); return (0); } - return (minor); + return ((uint_t)id); } /* diff --git a/usr/src/uts/i86pc/ml/kpti_trampolines.s b/usr/src/uts/i86pc/ml/kpti_trampolines.s index 6ab3edc3d4..a036eefee1 100644 --- a/usr/src/uts/i86pc/ml/kpti_trampolines.s +++ b/usr/src/uts/i86pc/ml/kpti_trampolines.s @@ -9,7 +9,7 @@ * http://www.illumos.org/license/CDDL. */ /* - * Copyright 2018 Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ /* @@ -88,7 +88,7 @@ * Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64 * cases) in that they do not push an interrupt frame (and also have some other * effects). In the syscall trampolines, we assume that we can only be taking - * the call from userland and use SWAPGS and an unconditional overwrite of %cr3. + * the call from userland and use swapgs and an unconditional overwrite of %cr3. * We do not do any stack pivoting for syscalls (and we leave SYSENTER's * existing %rsp pivot untouched) -- instead we spill registers into * %gs:CPU_KPTI_* as we need to. @@ -503,7 +503,7 @@ tr_sysc_ret_end: pushq %gs:CPU_KPTI_CS; \ pushq %gs:CPU_KPTI_RIP; \ mov %gs:CPU_KPTI_R13, %r13; \ - SWAPGS; \ + swapgs; \ jmp isr; \ SET_SIZE(tr_/**/isr) @@ -536,10 +536,9 @@ tr_intr_ret_start: ENTRY_NP(tr_iret_user) #if DEBUG /* - * Ensure that we return to user land with CR0.TS clear. We do this - * before we trampoline back and pivot the stack and %cr3. This way - * we're still on the kernel stack and kernel %cr3, though we are on the - * user GSBASE. + * Panic if we find CR0.TS set. We're still on the kernel stack and + * %cr3, but we do need to swap back to the kernel gs. (We don't worry + * about swapgs speculation here.) */ pushq %rax mov %cr0, %rax @@ -559,14 +558,24 @@ tr_intr_ret_start: cmpq $1, kpti_enable jne 1f + /* + * KPTI enabled: we're on the user gsbase at this point, so we + * need to swap back so we can pivot stacks. + * + * The swapgs lfence mitigation is probably not needed here + * since a mis-speculation of the above branch would imply KPTI + * is disabled, but we'll do so anyway. + */ swapgs + lfence mov %r13, %gs:CPU_KPTI_R13 PIVOT_KPTI_STK(%r13) SET_USER_CR3(%r13) mov %gs:CPU_KPTI_R13, %r13 - /* Zero these to make sure they didn't leak from a kernel trap */ + /* Zero these to make sure they didn't leak from a kernel trap. */ movq $0, %gs:CPU_KPTI_R13 movq $0, %gs:CPU_KPTI_R14 + /* And back to user gsbase again. */ swapgs 1: iretq diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c index c02e2e0469..6c317392b3 100644 --- a/usr/src/uts/i86pc/os/cpuid.c +++ b/usr/src/uts/i86pc/os/cpuid.c @@ -910,6 +910,7 @@ * more work in the system to mitigate against: * * - Spectre v1 + * - swapgs (Spectre v1 variant) * - Spectre v2 * - Meltdown (Spectre v3) * - Rogue Register Read (Spectre v3a) @@ -926,7 +927,7 @@ * overall approach that the system has taken to address these as well as their * shortcomings. Unfortunately, not all of the above have been handled today. * - * SPECTRE FAMILY (Spectre v2, ret2spec, SpectreRSB) + * SPECTRE v2, ret2spec, SpectreRSB * * The second variant of the spectre attack focuses on performing branch target * injection. This generally impacts indirect call instructions in the system. @@ -1035,11 +1036,43 @@ * it may make more sense to investigate using prediction barriers as the whole * system is only executing a single instruction at a time while in kmdb. * - * SPECTRE FAMILY (v1, v4) + * SPECTRE v1, v4 * * The v1 and v4 variants of spectre are not currently mitigated in the * system and require other classes of changes to occur in the code. * + * SPECTRE v1 (SWAPGS VARIANT) + * + * The class of Spectre v1 vulnerabilities aren't all about bounds checks, but + * can generally affect any branch-dependent code. The swapgs issue is one + * variant of this. If we are coming in from userspace, we can have code like + * this: + * + * cmpw $KCS_SEL, REGOFF_CS(%rsp) + * je 1f + * movq $0, REGOFF_SAVFP(%rsp) + * swapgs + * 1: + * movq %gs:CPU_THREAD, %rax + * + * If an attacker can cause a mis-speculation of the branch here, we could skip + * the needed swapgs, and use the /user/ %gsbase as the base of the %gs-based + * load. If subsequent code can act as the usual Spectre cache gadget, this + * would potentially allow KPTI bypass. To fix this, we need an lfence prior to + * any use of the %gs override. + * + * The other case is also an issue: if we're coming into a trap from kernel + * space, we could mis-speculate and swapgs the user %gsbase back in prior to + * using it. AMD systems are not vulnerable to this version, as a swapgs is + * serializing with respect to subsequent uses. But as AMD /does/ need the other + * case, and the fix is the same in both cases (an lfence at the branch target + * 1: in this example), we'll just do it unconditionally. + * + * Note that we don't enable user-space "wrgsbase" via CR4_FSGSBASE, making it + * harder for user-space to actually set a useful %gsbase value: although it's + * not clear, it might still be feasible via lwp_setprivate(), though, so we + * mitigate anyway. + * * MELTDOWN * * Meltdown, or spectre v3, allowed a user process to read any data in their @@ -1159,12 +1192,13 @@ * and what's done in various places: * * - Spectre v1: Not currently mitigated + * - swapgs: lfences after swapgs paths * - Spectre v2: Retpolines/RSB Stuffing or EIBRS if HW support * - Meltdown: Kernel Page Table Isolation * - Spectre v3a: Updated CPU microcode * - Spectre v4: Not currently mitigated * - SpectreRSB: SMEP and RSB Stuffing - * - L1TF: spec_uarch_flush, smt exclusion, requires microcode + * - L1TF: spec_uarch_flush, SMT exclusion, requires microcode * - MDS: x86_md_clear, requires microcode, disabling hyper threading * * The following table indicates the x86 feature set bits that indicate that a diff --git a/usr/src/uts/intel/amd64/ml/amd64.il b/usr/src/uts/intel/amd64/ml/amd64.il index fc78c95a95..3e2a790729 100644 --- a/usr/src/uts/intel/amd64/ml/amd64.il +++ b/usr/src/uts/intel/amd64/ml/amd64.il @@ -23,6 +23,10 @@ * Use is subject to license terms. */ +/* + * Copyright 2019 Joyent, Inc. + */ + / / In-line functions for amd64 kernels. / @@ -189,34 +193,26 @@ movw %di, %gs .end - /* - * OPTERON_ERRATUM_88 requires mfence - */ - .inline __swapgs, 0 - mfence - swapgs - .end - /* * prefetch 64 bytes */ - .inline prefetch_read_many,8 + .inline prefetch_read_many,8 prefetcht0 (%rdi) prefetcht0 32(%rdi) .end - .inline prefetch_read_once,8 + .inline prefetch_read_once,8 prefetchnta (%rdi) prefetchnta 32(%rdi) .end - .inline prefetch_write_many,8 + .inline prefetch_write_many,8 prefetcht0 (%rdi) prefetcht0 32(%rdi) .end - .inline prefetch_write_once,8 + .inline prefetch_write_once,8 prefetcht0 (%rdi) prefetcht0 32(%rdi) .end diff --git a/usr/src/uts/intel/amd64/sys/privregs.h b/usr/src/uts/intel/amd64/sys/privregs.h index 83782c4b37..7e5f7cd392 100644 --- a/usr/src/uts/intel/amd64/sys/privregs.h +++ b/usr/src/uts/intel/amd64/sys/privregs.h @@ -24,6 +24,10 @@ * Use is subject to license terms. */ +/* + * Copyright 2019 Joyent, Inc. + */ + #ifndef _AMD64_SYS_PRIVREGS_H #define _AMD64_SYS_PRIVREGS_H @@ -206,7 +210,8 @@ struct regs { je 6f; \ movq $0, REGOFF_SAVFP(%rsp); \ SWAPGS; \ -6: CLEAN_CS +6: lfence; /* swapgs mitigation */ \ + CLEAN_CS #define INTR_POP \ leaq sys_lcall32(%rip), %r11;\ @@ -216,8 +221,13 @@ struct regs { cmpw $KCS_SEL, REGOFF_CS(%rsp);\ je 8f; \ 5: SWAPGS; \ -8: addq $REGOFF_RIP, %rsp +8: lfence; /* swapgs mitigation */ \ + addq $REGOFF_RIP, %rsp +/* + * No need for swapgs mitigation: it's unconditional, and we're heading + * back to userspace. + */ #define USER_POP \ __RESTORE_REGS; \ SWAPGS; \ diff --git a/usr/src/uts/intel/asm/cpu.h b/usr/src/uts/intel/asm/cpu.h index faaaea7c8e..95e882601a 100644 --- a/usr/src/uts/intel/asm/cpu.h +++ b/usr/src/uts/intel/asm/cpu.h @@ -172,17 +172,6 @@ __set_gs(selector_t value) : "r" (value)); } -#if !defined(__xpv) - -extern __GNU_INLINE void -__swapgs(void) -{ - __asm__ __volatile__( - "mfence; swapgs"); -} - -#endif /* !__xpv */ - #endif /* __amd64 */ #endif /* !__lint && __GNUC__ */ diff --git a/usr/src/uts/intel/ia32/ml/exception.s b/usr/src/uts/intel/ia32/ml/exception.s index 5806087ca1..b35eab3220 100644 --- a/usr/src/uts/intel/ia32/ml/exception.s +++ b/usr/src/uts/intel/ia32/ml/exception.s @@ -174,8 +174,9 @@ leaq tr_brand_sys_sysenter(%rip), %r11 cmpq %r11, 24(%rsp) jne 2f -1: SWAPGS -2: popq %r11 +1: swapgs +2: lfence /* swapgs mitigation */ + popq %r11 #endif /* !__xpv */ INTR_PUSH diff --git a/usr/src/uts/intel/ia32/os/sundep.c b/usr/src/uts/intel/ia32/os/sundep.c index cfb4552287..34e0a03d68 100644 --- a/usr/src/uts/intel/ia32/os/sundep.c +++ b/usr/src/uts/intel/ia32/os/sundep.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2018 Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ @@ -551,16 +551,19 @@ update_sregs(struct regs *rp, klwp_t *lwp) * * We've just mucked up the kernel's gsbase. Oops. In * particular we can't take any traps at all. Make the newly - * computed gsbase be the hidden gs via __swapgs, and fix + * computed gsbase be the hidden gs via swapgs, and fix * the kernel's gsbase back again. Later, when we return to * userland we'll swapgs again restoring gsbase just loaded * above. */ - __swapgs(); + __asm__ __volatile__("mfence; swapgs"); + rp->r_gs = pcb->pcb_gs; /* - * restore kernel's gsbase + * Restore kernel's gsbase. Note that this also serializes any + * attempted speculation from loading the user-controlled + * %gsbase. */ wrmsr(MSR_AMD_GSBASE, kgsbase); diff --git a/usr/src/uts/intel/io/ipmi/ipmi_main.c b/usr/src/uts/intel/io/ipmi/ipmi_main.c index 8b25829d2b..e7671ce734 100644 --- a/usr/src/uts/intel/io/ipmi/ipmi_main.c +++ b/usr/src/uts/intel/io/ipmi/ipmi_main.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2017 Joyent, Inc. + * Copyright 2019 Joyent, Inc. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. */ @@ -151,6 +151,7 @@ ipmi_open(dev_t *devp, int flag, int otyp, cred_t *cred) { minor_t minor; ipmi_device_t *dev; + id_t mid; if (ipmi_attached == B_FALSE) return (ENXIO); @@ -162,8 +163,9 @@ ipmi_open(dev_t *devp, int flag, int otyp, cred_t *cred) if (flag & FEXCL) return (ENOTSUP); - if ((minor = (minor_t)id_alloc_nosleep(minor_ids)) == 0) + if ((mid = id_alloc_nosleep(minor_ids)) == -1) return (ENODEV); + minor = (minor_t)mid; /* Initialize the per file descriptor data. */ dev = kmem_zalloc(sizeof (ipmi_device_t), KM_SLEEP); diff --git a/usr/src/uts/intel/kdi/kdi_asm.s b/usr/src/uts/intel/kdi/kdi_asm.s index f106d643f7..3dd6db5952 100644 --- a/usr/src/uts/intel/kdi/kdi_asm.s +++ b/usr/src/uts/intel/kdi/kdi_asm.s @@ -23,7 +23,7 @@ * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Copyright 2018 Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ /* @@ -271,6 +271,9 @@ * KDI_SAVE_REGS macro to prevent a usermode process's GSBASE from being * blown away. On the hypervisor, we don't need to do this, since it's * ensured we're on our requested kernel GSBASE already. + * + * No need to worry about swapgs speculation here as it's unconditional + * and via wrmsr anyway. */ subq $10, %rsp sgdt (%rsp) diff --git a/usr/src/uts/intel/sys/archsystm.h b/usr/src/uts/intel/sys/archsystm.h index 0c9ceac7be..55c387f9b1 100644 --- a/usr/src/uts/intel/sys/archsystm.h +++ b/usr/src/uts/intel/sys/archsystm.h @@ -21,7 +21,7 @@ /* * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2018 Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ #ifndef _SYS_ARCHSYSTM_H @@ -94,10 +94,8 @@ extern void brand_sys_call(); #endif extern void sys_sysenter(); extern void tr_sys_sysenter(); -extern void _sys_sysenter_post_swapgs(); extern void brand_sys_sysenter(); extern void tr_brand_sys_sysenter(); -extern void _brand_sys_sysenter_post_swapgs(); extern void dosyscall(void); diff --git a/usr/src/uts/intel/sys/segments.h b/usr/src/uts/intel/sys/segments.h index 6bf18b3082..52831c9d87 100644 --- a/usr/src/uts/intel/sys/segments.h +++ b/usr/src/uts/intel/sys/segments.h @@ -2,7 +2,7 @@ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ /* - * Copyright 2018 Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ #ifndef _SYS_SEGMENTS_H @@ -179,7 +179,6 @@ extern void __set_ds(selector_t); extern void __set_es(selector_t); extern void __set_fs(selector_t); extern void __set_gs(selector_t); -extern void __swapgs(void); #endif /* __amd64 */ #if defined(__amd64) |
