summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/smbios/smbios.c101
-rw-r--r--usr/src/cmd/zoneadm/zfs.c19
-rw-r--r--usr/src/man/man2/chown.215
-rw-r--r--usr/src/man/man3c/closedir.3c143
-rw-r--r--usr/src/man/man3c/opendir.3c331
-rw-r--r--usr/src/man/man3c/readdir.3c587
-rw-r--r--usr/src/man/man9e/mc_getprop.9e2
-rw-r--r--usr/src/man/man9e/mc_propinfo.9e2
-rw-r--r--usr/src/man/man9e/mc_setprop.9e2
-rw-r--r--usr/src/man/man9f/mac_link_update.9f4
-rw-r--r--usr/src/man/man9f/mac_prop_info.9f4
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.c237
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.h1
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/vmcb.c5
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/vmcb.h12
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h6
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c60
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c118
-rw-r--r--usr/src/uts/i86pc/os/hma.c29
19 files changed, 1114 insertions, 564 deletions
diff --git a/usr/src/cmd/smbios/smbios.c b/usr/src/cmd/smbios/smbios.c
index 399a85501e..1290505c07 100644
--- a/usr/src/cmd/smbios/smbios.c
+++ b/usr/src/cmd/smbios/smbios.c
@@ -22,7 +22,7 @@
/*
* Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright (c) 2018, Joyent, Inc.
- * Copyright 2020 Oxide Computer Company
+ * Copyright 2021 Oxide Computer Company
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -213,6 +213,43 @@ u128_print(FILE *fp, const char *desc, const uint8_t *data)
oprintf(fp, "\n");
}
+/*
+ * Print a string that came from an SMBIOS table. We do this character by
+ * character so we can potentially escape strings.
+ */
+static void
+str_print_label(FILE *fp, const char *header, const char *str, boolean_t label)
+{
+ const char *c;
+
+ oprintf(fp, header);
+ if (label) {
+ oprintf(fp, ": ");
+ }
+
+ for (c = str; *c != '\0'; c++) {
+ if (isprint(*c)) {
+ oprintf(fp, "%c", *c);
+ } else {
+ oprintf(fp, "\\x%02x", *c);
+ }
+ }
+
+ oprintf(fp, "\n");
+}
+
+static void
+str_print_nolabel(FILE *fp, const char *ws, const char *str)
+{
+ return (str_print_label(fp, ws, str, B_FALSE));
+}
+
+static void
+str_print(FILE *fp, const char *header, const char *str)
+{
+ return (str_print_label(fp, header, str, B_TRUE));
+}
+
static int
check_oem(smbios_hdl_t *shp)
{
@@ -316,19 +353,19 @@ static void
print_common(const smbios_info_t *ip, FILE *fp)
{
if (ip->smbi_manufacturer[0] != '\0')
- oprintf(fp, " Manufacturer: %s\n", ip->smbi_manufacturer);
+ str_print(fp, " Manufacturer", ip->smbi_manufacturer);
if (ip->smbi_product[0] != '\0')
- oprintf(fp, " Product: %s\n", ip->smbi_product);
+ str_print(fp, " Product", ip->smbi_product);
if (ip->smbi_version[0] != '\0')
- oprintf(fp, " Version: %s\n", ip->smbi_version);
+ str_print(fp, " Version", ip->smbi_version);
if (ip->smbi_serial[0] != '\0')
- oprintf(fp, " Serial Number: %s\n", ip->smbi_serial);
+ str_print(fp, " Serial Number", ip->smbi_serial);
if (ip->smbi_asset[0] != '\0')
- oprintf(fp, " Asset Tag: %s\n", ip->smbi_asset);
+ str_print(fp, " Asset Tag", ip->smbi_asset);
if (ip->smbi_location[0] != '\0')
- oprintf(fp, " Location Tag: %s\n", ip->smbi_location);
+ str_print(fp, " Location Tag", ip->smbi_location);
if (ip->smbi_part[0] != '\0')
- oprintf(fp, " Part Number: %s\n", ip->smbi_part);
+ str_print(fp, " Part Number", ip->smbi_part);
}
static void
@@ -338,9 +375,9 @@ print_bios(smbios_hdl_t *shp, FILE *fp)
(void) smbios_info_bios(shp, &b);
- oprintf(fp, " Vendor: %s\n", b.smbb_vendor);
- oprintf(fp, " Version String: %s\n", b.smbb_version);
- oprintf(fp, " Release Date: %s\n", b.smbb_reldate);
+ str_print(fp, " Vendor", b.smbb_vendor);
+ str_print(fp, " Version String", b.smbb_version);
+ str_print(fp, " Release Date", b.smbb_reldate);
oprintf(fp, " Address Segment: 0x%x\n", b.smbb_segment);
oprintf(fp, " ROM Size: %" PRIu64 " bytes\n", b.smbb_extromsize);
oprintf(fp, " Image Size: %u bytes\n", b.smbb_runsize);
@@ -419,8 +456,8 @@ print_system(smbios_hdl_t *shp, FILE *fp)
desc_printf(smbios_system_wakeup_desc(s.smbs_wakeup),
fp, " Wake-Up Event: 0x%x", s.smbs_wakeup);
- oprintf(fp, " SKU Number: %s\n", s.smbs_sku);
- oprintf(fp, " Family: %s\n", s.smbs_family);
+ str_print(fp, " SKU Number", s.smbs_sku);
+ str_print(fp, " Family", s.smbs_family);
}
static void
@@ -468,7 +505,7 @@ print_chassis(smbios_hdl_t *shp, id_t id, FILE *fp)
(void) smbios_info_chassis(shp, id, &c);
oprintf(fp, " OEM Data: 0x%x\n", c.smbc_oemdata);
- oprintf(fp, " SKU number: %s\n",
+ str_print(fp, " SKU Number",
c.smbc_sku[0] == '\0' ? "<unknown>" : c.smbc_sku);
oprintf(fp, " Lock Present: %s\n", c.smbc_lock ? "Y" : "N");
@@ -666,8 +703,8 @@ print_port(smbios_hdl_t *shp, id_t id, FILE *fp)
(void) smbios_info_port(shp, id, &p);
- oprintf(fp, " Internal Reference Designator: %s\n", p.smbo_iref);
- oprintf(fp, " External Reference Designator: %s\n", p.smbo_eref);
+ str_print(fp, " Internal Reference Designator", p.smbo_iref);
+ str_print(fp, " External Reference Designator", p.smbo_eref);
desc_printf(smbios_port_conn_desc(p.smbo_itype),
fp, " Internal Connector Type: %u", p.smbo_itype);
@@ -688,7 +725,7 @@ print_slot(smbios_hdl_t *shp, id_t id, FILE *fp)
(void) smbios_info_slot(shp, id, &s);
smbios_info_smbios_version(shp, &v);
- oprintf(fp, " Reference Designator: %s\n", s.smbl_name);
+ str_print(fp, " Reference Designator", s.smbl_name);
oprintf(fp, " Slot ID: 0x%x\n", s.smbl_id);
desc_printf(smbios_slot_type_desc(s.smbl_type),
@@ -783,7 +820,7 @@ print_obdevs_ext(smbios_hdl_t *shp, id_t id, FILE *fp)
enabled = oe.smboe_dtype >> 7;
type = smbios_onboard_type_desc(oe.smboe_dtype & 0x7f);
- oprintf(fp, " Reference Designator: %s\n", oe.smboe_name);
+ str_print(fp, " Reference Designator", oe.smboe_name);
oprintf(fp, " Device Enabled: %s\n", enabled == B_TRUE ? "true" :
"false");
oprintf(fp, " Device Type: %s\n", type);
@@ -803,7 +840,7 @@ print_obdevs(smbios_hdl_t *shp, id_t id, FILE *fp)
argv = alloca(sizeof (smbios_obdev_t) * argc);
(void) smbios_info_obdevs(shp, id, argc, argv);
for (i = 0; i < argc; i++)
- oprintf(fp, " %s\n", argv[i].smbd_name);
+ str_print_nolabel(fp, " ", argv[i].smbd_name);
}
}
@@ -817,7 +854,7 @@ print_strtab(smbios_hdl_t *shp, id_t id, FILE *fp)
argv = alloca(sizeof (char *) * argc);
(void) smbios_info_strtab(shp, id, argc, argv);
for (i = 0; i < argc; i++)
- oprintf(fp, " %s\n", argv[i]);
+ str_print_nolabel(fp, " ", argv[i]);
}
}
@@ -828,7 +865,7 @@ print_lang(smbios_hdl_t *shp, id_t id, FILE *fp)
(void) smbios_info_lang(shp, &l);
- oprintf(fp, " Current Language: %s\n", l.smbla_cur);
+ str_print(fp, " Current Language", l.smbla_cur);
oprintf(fp, " Language String Format: %u\n", l.smbla_fmt);
oprintf(fp, " Number of Installed Languages: %u\n", l.smbla_num);
oprintf(fp, " Installed Languages:\n");
@@ -1014,8 +1051,8 @@ print_memdevice(smbios_hdl_t *shp, id_t id, FILE *fp)
oprintf(fp, " Configured Speed: Unknown\n");
}
- oprintf(fp, " Device Locator: %s\n", md.smbmd_dloc);
- oprintf(fp, " Bank Locator: %s\n", md.smbmd_bloc);
+ str_print(fp, " Device Locator", md.smbmd_dloc);
+ str_print(fp, " Bank Locator", md.smbmd_bloc);
if (md.smbmd_minvolt != 0) {
oprintf(fp, " Minimum Voltage: %.2fV\n",
@@ -1051,7 +1088,7 @@ print_memdevice(smbios_hdl_t *shp, id_t id, FILE *fp)
}
if (md.smbmd_firmware_rev[0] != '\0') {
- oprintf(fp, " Firmware Revision: %s\n", md.smbmd_firmware_rev);
+ str_print(fp, " Firmware Revision", md.smbmd_firmware_rev);
}
if (md.smbmd_modmfg_id != 0) {
@@ -1160,7 +1197,7 @@ print_vprobe(smbios_hdl_t *shp, id_t id, FILE *fp)
return;
}
- oprintf(fp, " Description: %s\n", vp.smbvp_description != NULL ?
+ str_print(fp, " Description", vp.smbvp_description != NULL ?
vp.smbvp_description : "unknown");
desc_printf(smbios_vprobe_loc_desc(vp.smbvp_location),
fp, " Location: %u", vp.smbvp_location);
@@ -1238,7 +1275,7 @@ print_cooldev(smbios_hdl_t *shp, id_t id, FILE *fp)
}
if (cd.smbcd_descr != NULL && cd.smbcd_descr[0] != '\0') {
- oprintf(fp, " Description: %s\n", cd.smbcd_descr);
+ str_print(fp, " Description", cd.smbcd_descr);
}
}
@@ -1253,7 +1290,7 @@ print_tprobe(smbios_hdl_t *shp, id_t id, FILE *fp)
return;
}
- oprintf(fp, " Description: %s\n", tp.smbtp_description != NULL ?
+ str_print(fp, " Description", tp.smbtp_description != NULL ?
tp.smbtp_description : "unknown");
desc_printf(smbios_tprobe_loc_desc(tp.smbtp_location),
fp, " Location: %u", tp.smbtp_location);
@@ -1317,7 +1354,7 @@ print_iprobe(smbios_hdl_t *shp, id_t id, FILE *fp)
return;
}
- oprintf(fp, " Description: %s\n", ip.smbip_description != NULL ?
+ str_print(fp, " Description", ip.smbip_description != NULL ?
ip.smbip_description : "unknown");
desc_printf(smbios_iprobe_loc_desc(ip.smbip_location),
fp, " Location: %u", ip.smbip_location);
@@ -1527,11 +1564,11 @@ print_battery(smbios_hdl_t *shp, id_t id, FILE *fp)
}
if (bat.smbb_date != NULL) {
- oprintf(fp, " Manufacture Date: %s\n", bat.smbb_date);
+ str_print(fp, " Manufacture Date", bat.smbb_date);
}
if (bat.smbb_serial != NULL) {
- oprintf(fp, " Serial Number: %s\n", bat.smbb_serial);
+ str_print(fp, " Serial Number", bat.smbb_serial);
}
if (bat.smbb_chem != SMB_BDC_UNKNOWN) {
@@ -1551,7 +1588,7 @@ print_battery(smbios_hdl_t *shp, id_t id, FILE *fp)
oprintf(fp, " Design Voltage: unknown\n");
}
- oprintf(fp, " SBDS Version Number: %s\n", bat.smbb_version);
+ str_print(fp, " SBDS Version Number", bat.smbb_version);
if (bat.smbb_err != UINT8_MAX) {
oprintf(fp, " Maximum Error: %u\n", bat.smbb_err);
} else {
@@ -1560,7 +1597,7 @@ print_battery(smbios_hdl_t *shp, id_t id, FILE *fp)
oprintf(fp, " SBDS Serial Number: %04x\n", bat.smbb_ssn);
oprintf(fp, " SBDS Manufacture Date: %u-%02u-%02u\n", bat.smbb_syear,
bat.smbb_smonth, bat.smbb_sday);
- oprintf(fp, " SBDS Device Chemistry: %s\n", bat.smbb_schem);
+ str_print(fp, " SBDS Device Chemistry", bat.smbb_schem);
oprintf(fp, " OEM-specific Information: 0x%08x\n", bat.smbb_oemdata);
}
diff --git a/usr/src/cmd/zoneadm/zfs.c b/usr/src/cmd/zoneadm/zfs.c
index 214340d0ce..f1ce9fab0a 100644
--- a/usr/src/cmd/zoneadm/zfs.c
+++ b/usr/src/cmd/zoneadm/zfs.c
@@ -24,6 +24,7 @@
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2016 Martin Matuska. All rights reserved.
+ * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
*/
/*
@@ -88,18 +89,23 @@ match_mountpoint(zfs_handle_t *zhp, void *data)
return (0);
}
- /* First check if the dataset is mounted. */
+ /*
+ * First check if the dataset is mounted.
+ * If not, move on to iterating child datasets which may still be
+ * mounted.
+ */
if (zfs_prop_get(zhp, ZFS_PROP_MOUNTED, mp, sizeof (mp), NULL, NULL,
0, B_FALSE) != 0 || strcmp(mp, "no") == 0) {
- zfs_close(zhp);
- return (0);
+ goto children;
}
- /* Now check mount point. */
+ /*
+ * Now check mount point.
+ * Move on to children if it cannot be retrieved.
+ */
if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
0, B_FALSE) != 0) {
- zfs_close(zhp);
- return (0);
+ goto children;
}
cbp = (zfs_mount_data_t *)data;
@@ -134,6 +140,7 @@ match_mountpoint(zfs_handle_t *zhp, void *data)
return (1);
}
+children:
/* Iterate over any nested datasets. */
res = zfs_iter_filesystems(zhp, match_mountpoint, data);
zfs_close(zhp);
diff --git a/usr/src/man/man2/chown.2 b/usr/src/man/man2/chown.2
index b3270cafc6..72c42a6a5f 100644
--- a/usr/src/man/man2/chown.2
+++ b/usr/src/man/man2/chown.2
@@ -4,11 +4,10 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH CHOWN 2 "Oct 9, 2008"
+.TH CHOWN 2 "Mar 2, 2021"
.SH NAME
chown, lchown, fchown, fchownat \- change owner and group of a file
.SH SYNOPSIS
-.LP
.nf
#include <unistd.h>
#include <sys/types.h>
@@ -33,8 +32,6 @@ chown, lchown, fchown, fchownat \- change owner and group of a file
.fi
.SH DESCRIPTION
-.sp
-.LP
The \fBchown()\fR function sets the owner \fBID\fR and group \fBID\fR of the
file specified by \fIpath\fR or referenced by the open file descriptor
\fIfildes\fR to \fIowner\fR and \fIgroup\fR respectively. If \fIowner\fR or
@@ -54,7 +51,7 @@ in the same manner as \fBchown()\fR. If, however, the \fIpath\fR argument is
relative, the path is resolved relative to the \fIfildes\fR argument rather
than the current working directory. If the \fIfildes\fR argument has the
special value \fBAT_FDCWD\fR, the path resolution reverts back to current
-working directory relative. If the \fIflag\fR argument is set to \fBSYMLNK\fR,
+working directory relative. If the \fIflag\fR argument is set to \fBAT_SYMLINK_NOFOLLOW\fR,
the function behaves like \fBlchown()\fR with respect to symbolic links. If the
\fIpath\fR argument is absolute, the \fIfildes\fR argument is ignored. If the
\fIpath\fR argument is a null pointer, the function behaves like
@@ -105,14 +102,10 @@ See \fBsystem\fR(4) and \fBfpathconf\fR(2).
Upon successful completion, \fBchown()\fR, \fBfchown()\fR and \fBlchown()\fR
mark for update the \fBst_ctime\fR field of the file.
.SH RETURN VALUES
-.sp
-.LP
Upon successful completion, \fB0\fR is returned. Otherwise, \fB\(mi1\fR is
returned, the owner and group of the named file remain unchanged, and
\fBerrno\fR is set to indicate the error.
.SH ERRORS
-.sp
-.LP
All of these functions will fail if:
.sp
.ne 2
@@ -299,8 +292,6 @@ The named file referred to by \fIfildes\fR resides on a read-only file system.
.RE
.SH ATTRIBUTES
-.sp
-.LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
@@ -324,7 +315,5 @@ The \fBfchownat()\fR function is Evolving.
.LP
The \fBchown()\fR and \fBfchownat()\fR functions are Async-Signal-Safe.
.SH SEE ALSO
-.sp
-.LP
\fBchgrp\fR(1), \fBchown\fR(1), \fBchmod\fR(2), \fBfpathconf\fR(2),
\fBsystem\fR(4), \fBattributes\fR(5), \fBstandards\fR(5)
diff --git a/usr/src/man/man3c/closedir.3c b/usr/src/man/man3c/closedir.3c
index 09e7d3e3f7..c88ecd707c 100644
--- a/usr/src/man/man3c/closedir.3c
+++ b/usr/src/man/man3c/closedir.3c
@@ -43,72 +43,79 @@
.\" Copyright 1989 AT&T
.\" Portions Copyright (c) 1992, X/Open Company Limited All Rights Reserved
.\" Copyright (c) 2002, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright 2021 Oxide Computer Company
.\"
-.TH CLOSEDIR 3C "Jul 24, 2002"
-.SH NAME
-closedir \- close a directory stream
-.SH SYNOPSIS
-.LP
-.nf
-#include <sys/types.h>
-#include <dirent.h>
-
-\fBint\fR \fBclosedir\fR(\fBDIR *\fR\fIdirp\fR);
-.fi
-
-.SH DESCRIPTION
-.sp
-.LP
-The \fBclosedir()\fR function closes the directory stream referred to by the
-argument \fIdirp.\fR Upon return, the value of \fIdirp\fR may no longer point
-to an accessible object of the type \fBDIR\fR. If a file descriptor is used to
-implement type \fBDIR\fR, that file descriptor will be closed.
-.SH RETURN VALUES
-.sp
-.LP
-Upon successful completion, \fBclosedir()\fR returns \fB0\fR. Otherwise,
-\fB\(mi1\fR is returned and \fBerrno\fR is set to indicate the error.
-.SH ERRORS
-.sp
-.LP
-The \fBclosedir()\fR function may fail if:
-.sp
-.ne 2
-.na
-\fB\fBEBADF\fR\fR
-.ad
-.RS 9n
-The \fIdirp\fR argument does not refer to an open directory stream.
-.RE
-
-.sp
-.ne 2
-.na
-\fB\fBEINTR\fR\fR
-.ad
-.RS 9n
-The \fBclosedir()\fR function was interrupted by a signal.
-.RE
-
-.SH ATTRIBUTES
-.sp
-.LP
-See \fBattributes\fR(5) for descriptions of the following attributes:
-.sp
-
-.sp
-.TS
-box;
-c | c
-l | l .
-ATTRIBUTE TYPE ATTRIBUTE VALUE
-_
-Interface Stability Standard
-_
-MT-Level Safe
-.TE
-
-.SH SEE ALSO
-.sp
-.LP
-\fBopendir\fR(3C), \fBattributes\fR(5), \fBstandards\fR(5)
+.Dd February 25, 2021
+.Dt CLOSEDIR 3C
+.Os
+.Sh NAME
+.Nm closedir
+.Nd close a directory stream
+.Sh SYNOPSIS
+.In sys/types.h
+.In dirent.h
+.Ft int
+.Fo closedir
+.Fa "DIR *dirp"
+.Fc
+.Sh DESCRIPTION
+The
+.Fn closedir
+function closes the directory stream referred to by the argument
+.Fa dirp ,
+which generally came from calling
+.Xr opendir 3C
+or
+.Xr fdopendir 3C .
+Upon return, the value of
+.Fa dirp
+will no longer point to an accessible object of the type
+.Ft DIR .
+The underlying file descriptor for the directory stream will be closed.
+This includes a
+.Ft DIR
+created by passing a file descriptor to
+.Xr fdopendir 3C .
+.Pp
+When the directory stream is closed, any memory that is associated with
+the stream will no longer be valid.
+Most notably, the dirent structures returned from
+.Xr readdir 3C
+use memory associated with the corresponding
+.Ft "DIR *"
+argument.
+Each directory stream has its own independent memory.
+Closing one stream does not impact the validity of other streams.
+.Sh RETURN VALUES
+Upon successful completion, the
+.Fn closedir
+function returns
+.Sy 0 .
+Otherwise,
+.Sy -1
+is returned and
+.Va errno
+is set to indicate the error.
+.Sh ERRORS
+The
+.Fn closedir
+function may fail if:
+.Bl -tag -width Er
+.It Er EBADF
+The
+.Fa dirp
+argument does not refer to an open directory stream.
+.It Er EINTR
+The
+.Fn closedir
+function was interrupted by a signal.
+.El
+.Sh INTERFACE STABILITY
+.Sy Standard
+.Sh MT-LEVEL
+.Sy Safe
+.Sh SEE ALSO
+.Xr fdopendir 3C ,
+.Xr opendir 3C ,
+.Xr readdir 3C ,
+.Xr attributes 5
diff --git a/usr/src/man/man3c/opendir.3c b/usr/src/man/man3c/opendir.3c
index fa35c1aca9..a03928c9c7 100644
--- a/usr/src/man/man3c/opendir.3c
+++ b/usr/src/man/man3c/opendir.3c
@@ -42,175 +42,168 @@
.\"
.\" Portions Copyright (c) 1992, X/Open Company Limited. All Rights Reserved.
.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2021 Oxide Computer Company
.\"
-.TH OPENDIR 3C "Jun 26, 2007"
-.SH NAME
-opendir, fdopendir \- open directory
-.SH SYNOPSIS
-.LP
-.nf
-#include <sys/types.h>
-#include <dirent.h>
-
-\fBDIR *\fR\fBopendir\fR(\fBconst char *\fR\fIdirname\fR);
-.fi
-
-.LP
-.nf
-\fBDIR *\fR\fBfdopendir\fR(\fBint\fR \fIfildes\fR);
-.fi
-
-.SH DESCRIPTION
-.sp
-.LP
-The \fBopendir()\fR function opens a directory stream corresponding to the
-directory named by the \fIdirname\fR argument.
-.sp
-.LP
-The \fBfdopendir()\fR function opens a directory stream for the directory file
-descriptor \fIfildes\fR. The directory file descriptor should not be used or
-closed following a successful function call, as this might cause undefined
-results from future operations on the directory stream obtained from the call.
-Use \fBclosedir\fR(3C) to close a directory stream.
-.sp
-.LP
-The directory stream is positioned at the first entry. If the type \fBDIR\fR is
-implemented using a file descriptor, applications will only be able to open up
-to a total of {\fBOPEN_MAX\fR} files and directories. A successful call to any
-of the \fBexec\fR functions will close any directory streams that are open in
-the calling process. See \fBexec\fR(2).
-.SH RETURN VALUES
-.sp
-.LP
-Upon successful completion, \fBopendir()\fR and \fBfdopendir()\fR return a
-pointer to an object of type \fBDIR\fR. Otherwise, a null pointer is returned
-and \fBerrno\fR is set to indicate the error.
-.SH ERRORS
-.sp
-.LP
-The \fBopendir()\fR function will fail if:
-.sp
-.ne 2
-.na
-\fB\fBEACCES\fR\fR
-.ad
-.RS 16n
-Search permission is denied for the component of the path prefix of
-\fIdirname\fR or read permission is denied for \fIdirname\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fB\fBELOOP\fR\fR
-.ad
-.RS 16n
-Too many symbolic links were encountered in resolving \fIpath\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fB\fBENAMETOOLONG\fR\fR
-.ad
-.RS 16n
-The length of the \fIdirname\fR argument exceeds {\fBPATH_MAX\fR}, or a path
-name component is longer than {\fBNAME_MAX\fR} while {\fB_POSIX_NO_TRUNC\fR} is
-in effect.
-.RE
-
-.sp
-.ne 2
-.na
-\fB\fBENOENT\fR\fR
-.ad
-.RS 16n
-A component of \fIdirname\fR does not name an existing directory or
-\fIdirname\fR is an empty string.
-.RE
-
-.sp
-.ne 2
-.na
-\fB\fBENOTDIR\fR\fR
-.ad
-.RS 16n
-A component of \fIdirname\fR is not a directory.
-.RE
-
-.sp
-.LP
-The \fBfdopendir()\fR function will fail if:
-.sp
-.ne 2
-.na
-\fB\fBENOTDIR\fR\fR
-.ad
-.RS 11n
-The file descriptor \fIfildes\fR does not reference a directory.
-.RE
-
-.sp
-.LP
-The \fBopendir()\fR function may fail if:
-.sp
-.ne 2
-.na
-\fB\fBEMFILE\fR\fR
-.ad
-.RS 16n
-There are {\fBOPEN_MAX\fR} file descriptors currently open in the calling
-process.
-.RE
-
-.sp
-.ne 2
-.na
-\fB\fBENAMETOOLONG\fR\fR
-.ad
-.RS 16n
+.Dd February 25, 2021
+.Dt OPENDIR 3C
+.Os
+.Sh NAME
+.Nm opendir ,
+.Nm fdopendir
+.Nd open directory stream
+.Sh SYNOPSIS
+.In sys/types.h
+.In dirent.h
+.Ft "DIR *"
+.Fo opendir
+.Fa "dirname"
+.Fc
+.Ft "DIR *"
+.Fo fdopendir
+.Fa "int filedes"
+.Fc
+.Sh DESCRIPTION
+The
+.Fn opendir
+and
+.Fn fdopendir
+functions are used to create seekable directory streams that can be used
+to iterate over the contents of a directory, most commonly with
+.Xr readdir 3C .
+One can traverse and seek the stream with functions such as
+.Xr seekdir 3C ,
+.Xr telldir 3C ,
+and
+.Xr rewinddir 3C .
+.Pp
+The
+.Fn opendir
+function creates a directory stream from the path named by
+.Fa dirname .
+The
+.Fn fdopendir
+function creates a directory stream from an already opened file
+descriptor,
+.Fa filedes ,
+that refers to a directory.
+After successfully calling
+.Fn fdopendir ,
+.Fa filedes
+belongs to the system and the application must not modify or close it in
+any way.
+.Pp
+The new directory stream is positioned at the first entry.
+When finished with the directory stream, the caller is responsible for
+releasing its resources by calling the
+.Xr closedir 3C
+function.
+This will close the directory stream's underlying file descriptor,
+including
+.Fa filedes
+if
+.Fn fdopendir
+was used to create it.
+In addition, memory associated with the directory stream, such as the
+.Ft struct dirent
+returned from
+.Xr readdir 3C
+will be invalid once a call to
+.Xr closedir 3C
+is completed.
+.Pp
+All directory streams are closed upon a successful call to any of the
+.Xr exec 2
+family of functions.
+The underlying file descriptors behave as though the
+.Dv FD_CLOEXEC
+flag was set upon them.
+.Pp
+Directory streams created by the
+.Fn opendir
+function require an underlying file descriptor.
+As a result, applications are only able to open up to a total of
+.Brq Dv OPEN_MAX
+files and directories.
+.Sh RETURN VALUES
+Upon successful completion, the
+.Fn opendir
+and
+.Fn fdopendir
+functions return a pointer to an object of type
+Ft DIR .
+Otherwise, a null pointer is returned and
+.Va errno
+is set to indicate the error.
+.Sh ERRORS
+The
+.Fn opendir
+function will fail if:
+.Bl -tag -width Er
+.It Er EACCES
+Search permission is denied for any component of the path prefix of
+.Fa dirname
+or read permission is denied for
+.Fa Idirname .
+.It Er ELOOP
+Too many symbolic links were encountered in resolving
+.Fa path .
+.It Er ENAMETOOLONG
+The length of the
+.Fa dirname
+argument exceeds
+.Brq Dv PATH_MAX ,
+or a path name component is longer than
+.Brq Dv NAME_MAX
+while
+.Brq Dv _POSIX_NO_TRUNC
+is in effect.
+.It Er ENOENT
+A component of
+.Fa dirname
+does not name an existing directory or
+.Fa dirname
+is an empty string.
+.It Er ENOTDIR
+A component of
+.Fa dirname
+is not a directory.
+.El
+.Pp
+The
+.Fn fdopendir
+function will fail if:
+.Bl -tag -width Er
+.It Er ENOTDIR
+The file descriptor
+.Fa filedes
+does not reference a directory.
+.El
+.Pp
+The
+.Fn opendir
+function may fail if:
+.Bl -tag -width Er
+.It Er EMFILE
+There are already
+.Brq Dv OPEN_MAX
+file descriptors currently open in the calling process.
+.It Er ENAMETOOLONG
Pathname resolution of a symbolic link produced an intermediate result whose
-length exceeds \fBPATH_MAX\fR.
-.RE
-
-.sp
-.ne 2
-.na
-\fB\fBENFILE\fR\fR
-.ad
-.RS 16n
+length exceeds
+.Dv PATH_MAX .
+.It Er ENFILE
Too many files are currently open on the system.
-.RE
-
-.SH USAGE
-.sp
-.LP
-The \fBopendir()\fR and \fBfdopendir()\fR functions should be used in
-conjunction with \fBreaddir\fR(3C), \fBclosedir\fR(3C) and \fBrewinddir\fR(3C)
-to examine the contents of the directory (see the \fBEXAMPLES\fR section in
-\fBreaddir\fR(3C)). This method is recommended for portability.
-.SH ATTRIBUTES
-.sp
-.LP
-See \fBattributes\fR(5) for descriptions of the following attributes:
-.sp
-
-.sp
-.TS
-box;
-c | c
-l | l .
-ATTRIBUTE TYPE ATTRIBUTE VALUE
-_
-Interface Stability T{
-\fBopendir()\fR is Standard; \fBfdopendir()\fR is Evolving
-T}
-_
-MT-Level Safe
-.TE
-
-.SH SEE ALSO
-.sp
-.LP
-\fBlstat\fR(2), \fBsymlink\fR(2), \fBclosedir\fR(3C), \fBreaddir\fR(3C),
-\fBrewinddir\fR(3C), \fBscandir\fR(3C), \fBattributes\fR(5)
+.El
+.Sh INTERFACE STABILITY
+.Sy Committed
+.Sh MT-LEVEL
+.Sy Safe
+.Sh SEE ALSO
+.Xr lstat 2 ,
+.Xr symlink 2 ,
+.Xr closedir 3C ,
+.Xr readdir 3C ,
+.Xr rewinddir 3C ,
+.Xr seekdir 3C ,
+.Xr telldir 3C ,
+.Xr attributes 5
diff --git a/usr/src/man/man3c/readdir.3c b/usr/src/man/man3c/readdir.3c
index b7c55cf0e1..4d61797f25 100644
--- a/usr/src/man/man3c/readdir.3c
+++ b/usr/src/man/man3c/readdir.3c
@@ -42,211 +42,250 @@
.\"
.\" Portions Copyright (c) 1992, X/Open Company Limited. All Rights Reserved.
.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2021 Oxide Computer Company
.\"
-.TH READDIR 3C "Jun 26, 2007"
-.SH NAME
-readdir, readdir_r \- read directory
-.SH SYNOPSIS
-.LP
-.nf
-#include <sys/types.h>
-#include <dirent.h>
-
-\fBstruct dirent *\fR\fBreaddir\fR(\fBDIR *\fR\fIdirp\fR);
-.fi
-
-.LP
-.nf
-\fBstruct dirent *\fR\fBreaddir_r\fR(\fBDIR *\fR\fIdirp\fR, \fBstruct dirent *\fR\fIentry\fR);
-.fi
-
-.SS "Standard conforming"
-.LP
-.nf
-cc [ \fIflag\fR... ] \fIfile\fR... \fB-D_POSIX_PTHREAD_SEMANTICS\fR [ \fIlibrary\fR... ]
-
-\fBint\fR \fBreaddir_r\fR(\fBDIR *restrict\fR \fIdirp\fR, \fBstruct dirent *restrict\fR \fIentry\fR,
- \fBstruct dirent **restrict\fR \fIresult\fR);
-.fi
-
-.SH DESCRIPTION
-.sp
-.LP
-The type \fBDIR\fR, which is defined in the header <\fBdirent.h\fR>, represents
-a \fIdirectory stream\fR, which is an ordered sequence of all the directory
-entries in a particular directory. Directory entries represent files. Files can
-be removed from a directory or added to a directory asynchronously to the
-operation of \fBreaddir()\fR and \fBreaddir_r()\fR.
-.SS "\fBreaddir()\fR"
-.sp
-.LP
-The \fBreaddir()\fR function returns a pointer to a structure representing the
-directory entry at the current position in the directory stream specified by
-the argument \fIdirp\fR, and positions the directory stream at the next entry.
-It returns a null pointer upon reaching the end of the directory stream. The
-structure \fBdirent\fR defined by the \fB<dirent.h>\fR header describes a
-directory entry.
-.sp
-.LP
-The \fBreaddir()\fR function will not return directory entries containing empty
-names. If entries for . (dot) or .. (dot-dot) exist, one entry will be returned
-for dot and one entry will be returned for dot-dot; otherwise they will not be
-returned.
-.sp
-.LP
-The pointer returned by \fBreaddir()\fR points to data that can be overwritten
-by another call to \fBreaddir()\fR on the same directory stream. These data are
-not overwritten by another call to \fBreaddir()\fR on a different directory
-stream.
-.sp
-.LP
+.Dd February 25, 2021
+.Dt READDIR 3C
+.Os
+.Sh NAME
+.Nm readdir ,
+.Nm readdir_r
+.Nd read directory
+.Sh SYNOPSIS
+.In sys/types.h
+.In dirent.h
+.Ft "struct dirent *"
+.Fo readdir
+.Fa "DIR *dirp"
+.Fc
+.Ft "struct dirent *"
+.Fo readdir_r
+.Fa "DIR *dirp"
+.Fa "struct dirent *entry"
+.Fc
+.Ss Standard Conforming
+.Fd #define _POSIX_PTHREAD_SEMANTICS
+.Ft int
+.Fo readdir_r
+.Fa "DIR *restrict dirp"
+.Fa "struct dirent *restrict entry"
+.Fa "struct dirent **restrict result"
+.Fc
+.Sh DESCRIPTION
+The type
+.Ft DIR ,
+which is defined in the header
+.In dirent.h ,
+represents a
+.Em directory stream ,
+which is an ordered sequence of all the directory entries in a particular
+directory.
+Directory entries represent files.
+Files can be removed from a directory or added to a directory asynchronously
+to the operation of
+.Fn readdir
+and
+.Fn readdir_r .
+.Ss Fn readdir
+The
+.Fn readdir
+function returns a pointer to a structure representing the directory entry at
+the current position in the directory stream specified by the argument
+.Fa dirp ,
+and positions the directory stream at the next entry.
+It returns a null pointer upon reaching the end of the directory stream.
+The structure
+.Ft dirent
+defined by the
+.In dirent.h
+header describes a directory entry.
+.Pp
+The
+.Fn readdir
+function will not return directory entries containing empty names.
+If entries for
+.No \&.
+.Pq dot
+.No \&..
+.Pq dot-dot
+exist, one entry will be returned for dot and one entry will be returned for
+dot-dot; otherwise they will not be returned.
+.Pp
+The pointer returned by
+.Fn readdir
+points to data that can be overwritten by another call to
+.Fn readdir
+on the same directory stream.
+It will not be overwritten by another call to
+.Fn readdir
+on a different directory stream.
+The returned pointer will remain valid until the directory stream is
+freed with a call to
+.Xr closedir 3C .
+It is safe to use
+.Fn readdir
+in a threaded application, so long as only one thread reads from the directory
+stream at any given time.
+.Pp
If a file is removed from or added to the directory after the most recent call
-to \fBopendir\fR(3C) or \fBrewinddir\fR(3C), whether a subsequent call to
-\fBreaddir()\fR returns an entry for that file is unspecified.
-.sp
-.LP
-The \fBreaddir()\fR function can buffer several directory entries per actual
-read operation. It marks for update the \fBst_atime\fR field of the directory
-each time the directory is actually read.
-.sp
-.LP
-After a call to \fBfork\fR(2), either the parent or child (but not both) can
-continue processing the directory stream using \fBreaddir()\fR,
-\fBrewinddir()\fR or \fBseekdir\fR(3C). If both the parent and child processes
-use these functions, the result is undefined.
-.sp
-.LP
-If the entry names a symbolic link, the value of the \fBd_ino\fR member is
-unspecified.
-.SS "\fBreaddir_r()\fR"
-.sp
-.LP
+to
+.Xr opendir 3C
+or
+.Xr rewinddir 3C ,
+whether a subsequent call to
+.Fn readdir
+returns an entry for that file is unspecified.
+.Pp
+The
+.Fn readdir
+function can buffer several directory entries per actual read operation.
+It marks for update the
+.Ft st_atime
+field of the directory each time the directory is actually read.
+.Pp
+After a call to
+.Xr fork 2 ,
+either the parent or child
+.Pq but not both
+can continue processing the directory stream using
+.Fn readdir ,
+.Xr rewinddir 3C ,
+or
+.Xr seekdir 3C .
+If both the parent and child processes use these functions, the result is
+undefined.
+.Pp
+If the entry names a symbolic link, the value of the
+.Ft d_ino
+member is unspecified.
+.Ss Fn readdir_r
Unless the end of the directory stream has been reached or an error occurred,
-the \fBreaddir_r()\fR function initializes the \fBdirent\fR structure
-referenced by \fBentry\fR to represent the directory entry at the current
-position in the directory stream referred to by \fIdirp\fR, and positions the
-directory stream at the next entry.
-.sp
-.LP
-The caller must allocate storage pointed to by \fIentry\fR to be large enough
-for a \fBdirent\fR structure with an array of \fBchar\fR \fBd_name\fR member
-containing at least \fBNAME_MAX\fR (that is, \fBpathconf(directory,
-_PC_NAME_MAX\fR)) plus one elements. (\fB_PC_NAME_MAX\fR is defined in
-<\fBunistd.h\fR>.)
-.sp
-.LP
-The \fBreaddir_r()\fR function will not return directory entries containing
-empty names. It is unspecified whether entries are returned for . (dot) or ..
-(dot-dot).
-.sp
-.LP
+the
+.Fn readdir_r
+function initializes the
+.Ft dirent
+structure referenced by
+.Fa entry
+to represent the directory entry at the current position in the directory
+stream referred to by
+.Fa dirp ,
+and positions the directory stream at the next entry.
+.Pp
+The caller must allocate storage pointed to by
+.Fa entry
+to be large enough for a
+.Ft dirent
+structure with an array of
+.Ft char
+.Fa d_name
+member containing at least
+.Dv NAME_MAX
+.Po
+that is,
+.Fo pathconf
+.Fa directory ,
+.Dv _PC_NAME_MAX
+.Fc
+plus one element.
+.Po
+.Dv _PC_NAME_MAX
+is defined in
+.In unistd.h
+.Pc
+.Pc
+.Pp
+While the
+.Fn readdir_r function was originally added as a re-entrant version of
+.Fn readdir ,
+it is not recommended that
+.Fn readdir_r
+be used in new applications and existing software should instead use
+.Fn readdir .
+The
+.Fn readdir_r
+function has been deprecated in many systems.
+Historically, the data returned from
+.Fn readdir
+was not specific to the directory stream making it unsafe in a multi-threaded
+environment; however, that is no longer the case.
+.Pp
+The
+.Fn readdir_r
+function will not return directory entries containing empty names.
+It is unspecified whether entries are returned for
+.No \&.
+.Pq dot
+or
+.No \&..
+.Pq dot-dot .
+.Pp
If a file is removed from or added to the directory after the most recent call
-to \fBopendir()\fR or \fBrewinddir()\fR, whether a subsequent call to
-\fBreaddir_r()\fR returns an entry for that file is unspecified.
-.sp
-.LP
-The \fBreaddir_r()\fR function can buffer several directory entries per actual
-read operation. It marks for update the \fBst_atime\fR field of the directory
-each time the directory is actually read.
-.sp
-.LP
-The standard-conforming version (see \fBstandards\fR(5)) of the
-\fBreaddir_r()\fR function performs all of the actions described above and sets
-the pointer pointed to by \fIresult\fR. If a directory entry is returned, the
-pointer will be set to the same value as the \fIentry\fR argument; otherwise,
-it will be set to \fINULL\fR.
-.SH RETURN VALUES
-.sp
-.LP
-Upon successful completion, \fBreaddir()\fR and the default \fBreaddir_r()\fR
-return a pointer to an object of type \fBstruct dirent\fR. When an error is
-encountered, a null pointer is returned and \fBerrno\fR is set to indicate the
-error. When the end of the directory is encountered, a null pointer is returned
-and \fBerrno\fR is not changed.
-.sp
-.LP
-The standard-conforming \fBreaddir_r()\fR returns \fB0\fR if the end of the
-directory is encountered or a directory entry is stored in the structure
-referenced by \fIentry\fR. Otherwise, an error number is returned to indicate
-the failure.
-.SH ERRORS
-.sp
-.LP
-The \fBreaddir()\fR and \fBreaddir_r()\fR functions will fail if:
-.sp
-.ne 2
-.na
-\fB\fBEOVERFLOW\fR\fR
-.ad
-.RS 13n
-One of the values in the structure to be returned cannot be represented
-correctly.
-.RE
-
-.sp
-.LP
-The \fBreaddir()\fR and \fBreaddir_r()\fR functions may fail if:
-.sp
-.ne 2
-.na
-\fB\fBEBADF\fR\fR
-.ad
-.RS 10n
-The \fIdirp\fR argument does not refer to an open directory stream.
-.RE
-
-.sp
-.ne 2
-.na
-\fB\fBENOENT\fR\fR
-.ad
-.RS 10n
-The current position of the directory stream is invalid.
-.RE
-
-.SH USAGE
-.sp
-.LP
-The \fBreaddir()\fR and \fBreaddir_r()\fR functions should be used in
-conjunction with \fBopendir()\fR, \fBclosedir()\fR, and \fBrewinddir()\fR to
-examine the contents of the directory. Since \fBreaddir()\fR and the default
-\fBreaddir_r()\fR return a null pointer both at the end of the directory and on
-error, an application wanting to check for error situations should set
-\fBerrno\fR to 0 before calling either of these functions. If \fBerrno\fR is
-set to non-zero on return, an error occurred.
-.sp
-.LP
-It is safe to use \fBreaddir()\fR in a threaded application, so long as only
-one thread reads from the directory stream at any given time. The
-\fBreaddir()\fR function is generally preferred over the \fBreaddir_r()\fR
-function.
-.sp
-.LP
-The standard-conforming \fBreaddir_r()\fR returns the error number if an error
-occurred. It returns 0 on success (including reaching the end of the directory
-stream).
-.sp
-.LP
-The \fBreaddir()\fR and \fBreaddir_r()\fR functions have transitional
-interfaces for 64-bit file offsets. See \fBlf64\fR(5).
-.SH EXAMPLES
-.LP
-\fBExample 1 \fRSearch the current directory for the entry \fIname\fR.
-.sp
-.LP
+to
+.Xr opendir 3C or
+.Xr rewinddir 3C , whether a subsequent call to
+.Fn readdir_r
+returns an entry for that file is unspecified.
+.Pp
+The
+.Fn readdir_r
+function can buffer several directory entries per actual read operation.
+It marks for update the
+.Ft st_atime
+field of the directory each time the directory is actually read.
+.Pp
+The standard-conforming version
+.Po see
+.Xr standards 5
+.Pc
+of the
+.Fn readdir_r
+function performs all of the actions described above for
+.Fn readdir_r
+and sets the pointer pointed to by
+.Fa result .
+If a directory entry is returned, the pointer will be set to the same value
+as the
+.Fa entry
+argument; otherwise, it will be set to
+.Dv NULL .
+.Sh RETURN VALUES
+Upon successful completion,
+.Fn readdir
+and the default
+.Fn readdir_r
+return a pointer to an object of type
+.Ft struct dirent .
+When an error is encountered, a null pointer is returned and
+.Va errno
+is set to indicate the error.
+When the end of the directory is encountered, a null pointer is returned and
+.Va errno
+is not changed.
+.Pp
+The standard-conforming
+.Fn readdir_r
+returns
+.Sy 0
+if the end of the directory is encountered or a directory entry is stored in
+the structure referenced by
+.Fa entry .
+Otherwise, an error number is returned to indicate the failure.
+.Sh EXAMPLES
+.Sy Example 1
+Search the current directory for the entry
+.Fa name .
+.Pp
The following sample program will search the current directory for each of the
arguments supplied on the command line:
-
-.sp
-.in +2
-.nf
+.Bd -literal
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
#include <stdio.h>
#include <strings.h>
-static void lookup(const char *arg)
+static void
+lookup(const char *arg)
{
DIR *dirp;
struct dirent *dp;
@@ -273,60 +312,124 @@ static void lookup(const char *arg)
else
(void) printf("failed to find %s\en", arg);
(void) closedir(dirp);
- return;
}
-int main(int argc, char *argv[])
+int
+main(int argc, char *argv[])
{
int i;
for (i = 1; i < argc; i++)
lookup(argv[i]);
return (0);
}
-.fi
-.in -2
-
-.SH ATTRIBUTES
-.sp
-.LP
-See \fBattributes\fR(5) for descriptions of the following attributes:
-.sp
-
-.sp
-.TS
-box;
-c | c
-l | l .
-ATTRIBUTE TYPE ATTRIBUTE VALUE
-_
-Interface Stability Standard
-_
-MT-Level See below.
-.TE
-
-.sp
-.LP
-The \fBreaddir()\fR function is Unsafe. The \fBreaddir_r()\fR function is Safe.
-.SH SEE ALSO
-.sp
-.LP
-\fBfork\fR(2), \fBlstat\fR(2), \fBsymlink\fR(2), \fBIntro\fR(3),
-\fBclosedir\fR(3C), \fBopendir\fR(3C), \fBrewinddir\fR(3C), \fBscandir\fR(3C),
-\fBseekdir\fR(3C), \fBattributes\fR(5), \fBlf64\fR(5), \fBstandards\fR(5)
-.SH NOTES
-.sp
-.LP
-When compiling multithreaded programs, see the \fBMULTITHREADED APPLICATIONS\fR
-section of \fBIntro\fR(3).
-.sp
-.LP
-Solaris 2.4 and earlier releases provided a \fBreaddir_r()\fR interface as
-specified in POSIX.1c Draft 6. The final POSIX.1c standard changed the
-interface as described above. Support for the Draft 6 interface is provided for
-compatibility only and might not be supported in future releases. New
-applications and libraries should use the standard-conforming interface.
-.sp
-.LP
-For POSIX.1c-conforming applications, the \fB_POSIX_PTHREAD_SEMANTICS\fR and
-\fB_REENTRANT\fR flags are automatically turned on by defining the
-\fB_POSIX_C_SOURCE\fR flag with a value >= 199506L.
+.Ed
+.Sh ERRORS
+The
+.Fn readdir
+and
+.Fn readdir_r
+functions will fail if:
+.Bl -tag -width Er
+.It Er EOVERFLOW
+One of the values in the structure to be returned cannot be represented
+correctly.
+.El
+.Pp
+The
+.Fn readdir
+and
+.Fn readdir_r
+functions may fail if:
+.Bl -tag -width Er
+.It Er EBADF
+The
+.Fa dirp
+argument does not refer to an open directory stream.
+.It Er ENOENT
+The current position of the directory stream is invalid.
+.El
+.Sh USAGE
+The
+.Fn readdir
+and
+.Fn readdir_r
+functions should be used in conjunction with
+.Xr opendir 3C ,
+.Xr closedir 3C ,
+and
+.Xr rewinddir 3C
+to examine the contents of the directory.
+Since
+.Fn readdir
+and the default
+.Fn readdir_r
+return a null pointer both at the end of the directory and on error, an
+application wanting to check for error situations should set
+.Va errno
+to 0 before calling either of these functions.
+If
+.Va errno
+is set to non-zero on return, an error occurred.
+.Pp
+The standard-conforming
+.Fn readdir_r
+returns the error number if an error occurred.
+It returns 0 on success
+.Pq including reaching the end of the directory stream .
+.Pp
+The
+.Fn readdir
+and
+.Fn readdir_r
+functions have transitional interfaces for 64-bit file offsets.
+See
+.Xr lf64 5 .
+.Sh INTERFACE STABILITY
+.Sy Committed
+.Sh MT-LEVEL
+The
+.Fn readdir
+function is
+.Sy Unsafe ;
+however, it is
+.Sy Safe
+if different directory streams are used concurrently.
+The
+.Fn readdir_r
+function is
+.Sy Safe .
+.Sh SEE ALSO
+.Xr fork 2 ,
+.Xr lstat 2 ,
+.Xr symlink 2 ,
+.Xr Intro 3 ,
+.Xr closedir 3C ,
+.Xr opendir 3C ,
+.Xr rewinddir 3C ,
+.Xr scandir 3C ,
+.Xr seekdir 3C ,
+.Xr attributes 5 ,
+.Xr lf64 5 ,
+.Xr standards 5
+.Sh NOTES
+When compiling multithreaded programs, see the
+.Sy MULTITHREADED APPLICATIONS
+section of
+.Xr Intro 3 .
+.Pp
+Solaris 2.4 and earlier releases provided a
+.Fn readdir_r
+interface as specified in POSIX.1c Draft 6.
+The final POSIX.1c standard changed the interface as described above.
+Support for the Draft 6 interface is provided for compatibility only.
+New applications and libraries should use the standard-conforming interface,
+though preferably
+.Fn readdir .
+.Pp
+For POSIX.1c-conforming applications, the
+.Dv _POSIX_PTHREAD_SEMANTICS
+and
+.Dv _REENTRANT
+flags are automatically turned on by defining the
+.Dv _POSIX_C_SOURCE
+flag with a value >= 199506L.
diff --git a/usr/src/man/man9e/mc_getprop.9e b/usr/src/man/man9e/mc_getprop.9e
index 11a942a22b..184f167fe5 100644
--- a/usr/src/man/man9e/mc_getprop.9e
+++ b/usr/src/man/man9e/mc_getprop.9e
@@ -62,7 +62,7 @@ When the
.Fn mc_getprop
entry point is called, the driver needs to first identify the property.
The set of possible properties and their meaning is listed in the
-.Sx PROPERTIES
+.Sy PROPERTIES
section of
.Xr mac 9E .
It should identify the property based on the value of
diff --git a/usr/src/man/man9e/mc_propinfo.9e b/usr/src/man/man9e/mc_propinfo.9e
index d86aea6ec9..060b05b9e7 100644
--- a/usr/src/man/man9e/mc_propinfo.9e
+++ b/usr/src/man/man9e/mc_propinfo.9e
@@ -68,7 +68,7 @@ When the
.Fn mc_propinfo
entry point is called, the driver needs to first identify the property.
The set of possible properties and their meaning is listed in the
-.Sx PROPERTIES
+.Sy PROPERTIES
section of
.Xr mac 9E .
It should identify the property based on the value of
diff --git a/usr/src/man/man9e/mc_setprop.9e b/usr/src/man/man9e/mc_setprop.9e
index fdd8aca49a..7235f1a347 100644
--- a/usr/src/man/man9e/mc_setprop.9e
+++ b/usr/src/man/man9e/mc_setprop.9e
@@ -62,7 +62,7 @@ When the
.Fn mc_setprop
entry point is called, the driver needs to first identify the property.
The set of possible properties and their meaning is listed in the
-.Sx PROPERTIES
+.Sy PROPERTIES
section of
.Xr mac 9E .
It should identify the property based on the value of
diff --git a/usr/src/man/man9f/mac_link_update.9f b/usr/src/man/man9f/mac_link_update.9f
index f37990c3c3..2b64c90c10 100644
--- a/usr/src/man/man9f/mac_link_update.9f
+++ b/usr/src/man/man9f/mac_link_update.9f
@@ -36,7 +36,7 @@ The current state of the link.
For valid link states see the discussion of
.Sy MAC_PROP_STATUS
in the
-.Sx PROPERTIES
+.Sy PROPERTIES
section of
.Xr mac 9E .
.El
@@ -46,7 +46,7 @@ The
function is used by device drivers to inform the MAC layer that the
state of a link has changed.
As discussed in the
-.Sx Link Updates
+.Sy Link Updates
section of
.Xr mac 9E ,
the driver should call this whenever it detects that the state of the
diff --git a/usr/src/man/man9f/mac_prop_info.9f b/usr/src/man/man9f/mac_prop_info.9f
index b24ee63694..b8127c0230 100644
--- a/usr/src/man/man9f/mac_prop_info.9f
+++ b/usr/src/man/man9f/mac_prop_info.9f
@@ -80,7 +80,7 @@ A valid link flow control entry.
Valid values are documented in the
.Sy MAC_PROP_FLOWCTRL
property description in the
-.Sx PROPERTIES
+.Sy PROPERTIES
section of
.Xr mac 9E .
.It Fa fec
@@ -90,7 +90,7 @@ scheme.
Valid values are documented in the
.Sy MAC_PROP_EN_FEC_CAP
property description in the
-.Sx PROPERTIES
+.Sy PROPERTIES
section of
.Xr mac 9E .
.It Fa str
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index 0f81bcc22b..b17515c259 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -132,17 +132,19 @@ static VMM_STAT_AMD(VCPU_INTINFO_INJECTED, "Events pending at VM entry");
static VMM_STAT_AMD(VMEXIT_VINTR, "VM exits due to interrupt window");
static int svm_setreg(void *arg, int vcpu, int ident, uint64_t val);
+static int svm_getreg(void *arg, int vcpu, int ident, uint64_t *val);
+static void flush_asid(struct svm_softc *sc, int vcpuid);
-static __inline int
+static __inline bool
flush_by_asid(void)
{
- return (svm_feature & AMD_CPUID_SVM_FLUSH_BY_ASID);
+ return ((svm_feature & AMD_CPUID_SVM_FLUSH_BY_ASID) != 0);
}
-static __inline int
+static __inline bool
decode_assist(void)
{
- return (svm_feature & AMD_CPUID_SVM_DECODE_ASSIST);
+ return ((svm_feature & AMD_CPUID_SVM_DECODE_ASSIST) != 0);
}
#ifdef __FreeBSD__
@@ -476,6 +478,13 @@ vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa,
}
/*
+ * Selectively intercept writes to %cr0. This triggers on operations
+ * which would change bits other than TS or MP.
+ */
+ svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
+ VMCB_INTCPT_CR0_WRITE);
+
+ /*
* Intercept everything when tracing guest exceptions otherwise
* just intercept machine check exception.
*/
@@ -884,6 +893,166 @@ svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit,
vie_init_mmio(vie, inst_bytes, inst_len, &paging, gpa);
}
+/*
+ * Do not allow CD, NW, or invalid high bits to be asserted in the value of cr0
+ * which is live in the guest. They are visible via the shadow instead.
+ */
+#define SVM_CR0_MASK ~(CR0_CD | CR0_NW | 0xffffffff00000000)
+
+static void
+svm_set_cr0(struct svm_softc *svm_sc, int vcpu, uint64_t val, bool guest_write)
+{
+ struct vmcb_state *state;
+ struct svm_regctx *regctx;
+ uint64_t masked, old, diff;
+
+ state = svm_get_vmcb_state(svm_sc, vcpu);
+ regctx = svm_get_guest_regctx(svm_sc, vcpu);
+
+ old = state->cr0 | (regctx->sctx_cr0_shadow & ~SVM_CR0_MASK);
+ diff = old ^ val;
+
+ /* No further work needed if register contents remain the same */
+ if (diff == 0) {
+ return;
+ }
+
+ /* Flush the TLB if the paging or write-protect bits are changing */
+ if ((diff & CR0_PG) != 0 || (diff & CR0_WP) != 0) {
+ flush_asid(svm_sc, vcpu);
+ }
+
+ /*
+ * If the change in %cr0 is due to a guest action (via interception)
+ * then other CPU state updates may be required.
+ */
+ if (guest_write) {
+ if ((diff & CR0_PG) != 0) {
+ uint64_t efer = state->efer;
+
+ /* Keep the long-mode state in EFER in sync */
+ if ((val & CR0_PG) != 0 && (efer & EFER_LME) != 0) {
+ state->efer |= EFER_LMA;
+ }
+ if ((val & CR0_PG) == 0 && (efer & EFER_LME) != 0) {
+ state->efer &= ~EFER_LMA;
+ }
+ }
+ }
+
+ masked = val & SVM_CR0_MASK;
+ regctx->sctx_cr0_shadow = val;
+ state->cr0 = masked;
+ svm_set_dirty(svm_sc, vcpu, VMCB_CACHE_CR);
+
+ if ((masked ^ val) != 0) {
+ /*
+ * The guest has set bits in %cr0 which we are masking out and
+ * exposing via shadow.
+ *
+ * We must intercept %cr0 reads in order to make the shadowed
+ * view available to the guest.
+ *
+ * Writes to %cr0 must also be intercepted (unconditionally,
+ * unlike the VMCB_INTCPT_CR0_WRITE mechanism) so we can catch
+ * if/when the guest clears those shadowed bits.
+ */
+ svm_enable_intercept(svm_sc, vcpu, VMCB_CR_INTCPT,
+ BIT(0) | BIT(16));
+ } else {
+ /*
+ * When no bits remain in %cr0 which require shadowing, the
+ * unconditional intercept of reads/writes to %cr0 can be
+ * disabled.
+ *
+ * The selective write intercept (VMCB_INTCPT_CR0_WRITE) remains
+ * in place so we can be notified of operations which change
+ * bits other than TS or MP.
+ */
+ svm_disable_intercept(svm_sc, vcpu, VMCB_CR_INTCPT,
+ BIT(0) | BIT(16));
+ }
+ svm_set_dirty(svm_sc, vcpu, VMCB_CACHE_I);
+}
+
+static void
+svm_get_cr0(struct svm_softc *svm_sc, int vcpu, uint64_t *val)
+{
+ struct vmcb *vmcb;
+ struct svm_regctx *regctx;
+
+ vmcb = svm_get_vmcb(svm_sc, vcpu);
+ regctx = svm_get_guest_regctx(svm_sc, vcpu);
+
+ /*
+ * Include the %cr0 bits which exist only in the shadow along with those
+ * in the running vCPU state.
+ */
+ *val = vmcb->state.cr0 | (regctx->sctx_cr0_shadow & ~SVM_CR0_MASK);
+}
+
+static void
+svm_handle_cr0_read(struct svm_softc *svm_sc, int vcpu, enum vm_reg_name reg)
+{
+ uint64_t val;
+ int err;
+
+ svm_get_cr0(svm_sc, vcpu, &val);
+ err = svm_setreg(svm_sc, vcpu, reg, val);
+ ASSERT(err == 0);
+}
+
+static void
+svm_handle_cr0_write(struct svm_softc *svm_sc, int vcpu, enum vm_reg_name reg)
+{
+ struct vmcb_state *state;
+ uint64_t val;
+ int err;
+
+ state = svm_get_vmcb_state(svm_sc, vcpu);
+
+ err = svm_getreg(svm_sc, vcpu, reg, &val);
+ ASSERT(err == 0);
+
+ if ((val & CR0_NW) != 0 && (val & CR0_CD) == 0) {
+ /* NW without CD is nonsensical */
+ vm_inject_gp(svm_sc->vm, vcpu);
+ return;
+ }
+ if ((val & CR0_PG) != 0 && (val & CR0_PE) == 0) {
+ /* PG requires PE */
+ vm_inject_gp(svm_sc->vm, vcpu);
+ return;
+ }
+ if ((state->cr0 & CR0_PG) == 0 && (val & CR0_PG) != 0) {
+ /* When enabling paging, PAE must be enabled if LME is. */
+ if ((state->efer & EFER_LME) != 0 &&
+ (state->cr4 & CR4_PAE) == 0) {
+ vm_inject_gp(svm_sc->vm, vcpu);
+ return;
+ }
+ }
+
+ svm_set_cr0(svm_sc, vcpu, val, true);
+}
+
+static void
+svm_inst_emul_other(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
+{
+ struct vie *vie;
+ struct vm_guest_paging paging;
+
+ /* Let the instruction emulation (hopefully in-kernel) handle it */
+ vmexit->exitcode = VM_EXITCODE_INST_EMUL;
+ bzero(&vmexit->u.inst_emul, sizeof (vmexit->u.inst_emul));
+ vie = vm_vie_ctx(svm_sc->vm, vcpu);
+ svm_paging_info(svm_get_vmcb(svm_sc, vcpu), &paging);
+ vie_init_other(vie, &paging);
+
+ /* The instruction emulation will handle advancing %rip */
+ vmexit->inst_length = 0;
+}
+
static void
svm_update_virqinfo(struct svm_softc *sc, int vcpu)
{
@@ -1282,6 +1451,41 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
svm_save_exitintinfo(svm_sc, vcpu);
switch (code) {
+ case VMCB_EXIT_CR0_READ:
+ if (VMCB_CRx_INFO1_VALID(info1) != 0) {
+ svm_handle_cr0_read(svm_sc, vcpu,
+ vie_regnum_map(VMCB_CRx_INFO1_GPR(info1)));
+ handled = 1;
+ } else {
+ /*
+ * If SMSW is used to read the contents of %cr0, then
+ * the VALID bit will not be set in `info1`, since the
+ * handling is different from the mov-to-reg case.
+ *
+ * Punt to the instruction emulation to handle it.
+ */
+ svm_inst_emul_other(svm_sc, vcpu, vmexit);
+ }
+ break;
+ case VMCB_EXIT_CR0_WRITE:
+ case VMCB_EXIT_CR0_SEL_WRITE:
+ if (VMCB_CRx_INFO1_VALID(info1) != 0) {
+ svm_handle_cr0_write(svm_sc, vcpu,
+ vie_regnum_map(VMCB_CRx_INFO1_GPR(info1)));
+ handled = 1;
+ } else {
+ /*
+ * Writes to %cr0 without VALID being set in `info1` are
+ * initiated by the LMSW and CLTS instructions. While
+ * LMSW (like SMSW) sees little use in modern OSes and
+ * bootloaders, CLTS is still used for handling FPU
+ * state transitions.
+ *
+ * Punt to the instruction emulation to handle them.
+ */
+ svm_inst_emul_other(svm_sc, vcpu, vmexit);
+ }
+ break;
case VMCB_EXIT_IRET:
/*
* Restart execution at "iret" but with the intercept cleared.
@@ -1844,6 +2048,27 @@ check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, uint_t thiscpu)
ctrl->tlb_ctrl = flush;
vcpustate->eptgen = eptgen;
}
+
+static void
+flush_asid(struct svm_softc *sc, int vcpuid)
+{
+ struct svm_vcpu *vcpustate = svm_get_vcpu(sc, vcpuid);
+ struct vmcb_ctrl *ctrl = svm_get_vmcb_ctrl(sc, vcpuid);
+ uint8_t flush;
+
+ flush = hma_svm_asid_update(&vcpustate->hma_asid, flush_by_asid(),
+ true);
+
+ ASSERT(flush != VMCB_TLB_FLUSH_NOTHING);
+ ctrl->asid = vcpustate->hma_asid.hsa_asid;
+ ctrl->tlb_ctrl = flush;
+ svm_set_dirty(sc, vcpuid, VMCB_CACHE_ASID);
+ /*
+ * A potential future optimization: We could choose to update the eptgen
+ * associated with the vCPU, since any pending eptgen change requiring a
+ * flush will be satisfied by the one which has just now been queued.
+ */
+}
#endif /* __FreeBSD__ */
static __inline void
@@ -2180,6 +2405,8 @@ svm_getreg(void *arg, int vcpu, int ident, uint64_t *val)
break;
case VM_REG_GUEST_CR0:
+ svm_get_cr0(sc, vcpu, val);
+ break;
case VM_REG_GUEST_CR2:
case VM_REG_GUEST_CR3:
case VM_REG_GUEST_CR4:
@@ -2251,6 +2478,8 @@ svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
break;
case VM_REG_GUEST_CR0:
+ svm_set_cr0(sc, vcpu, val, false);
+ break;
case VM_REG_GUEST_CR2:
case VM_REG_GUEST_CR3:
case VM_REG_GUEST_CR4:
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.h b/usr/src/uts/i86pc/io/vmm/amd/svm.h
index a3a83dba19..127c04ab6e 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.h
@@ -53,6 +53,7 @@ struct svm_regctx {
uint64_t sctx_dr1;
uint64_t sctx_dr2;
uint64_t sctx_dr3;
+ uint64_t sctx_cr0_shadow;
uint64_t host_dr0;
uint64_t host_dr1;
diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.c b/usr/src/uts/i86pc/io/vmm/amd/vmcb.c
index b00f974c23..5be5240129 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.c
@@ -91,11 +91,6 @@ vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp)
state = &vmcb->state;
switch (ident) {
- case VM_REG_GUEST_CR0:
- res = &state->cr0;
- dirty = VMCB_CACHE_CR;
- break;
-
case VM_REG_GUEST_CR2:
res = &state->cr2;
dirty = VMCB_CACHE_CR2;
diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
index 41bbf98097..0685165530 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
@@ -141,10 +141,15 @@ struct svm_softc;
#define VMCB_EVENTINJ_TYPE_INTn (4 << 8)
/* VMCB exit code, APM vol2 Appendix C */
+#define VMCB_EXIT_CR0_READ 0x00
+#define VMCB_EXIT_CR15_READ 0x0f
+#define VMCB_EXIT_CR0_WRITE 0x10
+#define VMCB_EXIT_CR15_WRITE 0x1f
#define VMCB_EXIT_MC 0x52
#define VMCB_EXIT_INTR 0x60
#define VMCB_EXIT_NMI 0x61
#define VMCB_EXIT_VINTR 0x64
+#define VMCB_EXIT_CR0_SEL_WRITE 0x65
#define VMCB_EXIT_PUSHF 0x70
#define VMCB_EXIT_POPF 0x71
#define VMCB_EXIT_CPUID 0x72
@@ -170,6 +175,13 @@ struct svm_softc;
#define VMCB_EXIT_INVALID -1
/*
+ * Move to/from CRx
+ * Bit definitions to decode EXITINFO1
+ */
+#define VMCB_CRx_INFO1_GPR(x) ((x) & 0xf)
+#define VMCB_CRx_INFO1_VALID(x) ((x) & (1UL << 63))
+
+/*
* Nested page fault.
* Bit definitions to decode EXITINFO1.
*/
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h
index 75abfeeaf6..4680c86a56 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h
@@ -51,10 +51,13 @@ struct vie;
struct vie *vie_alloc();
void vie_free(struct vie *);
+enum vm_reg_name vie_regnum_map(uint8_t);
+
void vie_init_mmio(struct vie *vie, const char *inst_bytes, uint8_t inst_length,
const struct vm_guest_paging *paging, uint64_t gpa);
void vie_init_inout(struct vie *vie, const struct vm_inout *inout,
uint8_t inst_len, const struct vm_guest_paging *paging);
+void vie_init_other(struct vie *vie, const struct vm_guest_paging *paging);
int vie_fulfill_mmio(struct vie *vie, const struct vm_mmio *res);
int vie_fulfill_inout(struct vie *vie, const struct vm_inout *res);
@@ -64,12 +67,15 @@ bool vie_pending(const struct vie *vie);
uint64_t vie_mmio_gpa(const struct vie *vie);
void vie_exitinfo(const struct vie *vie, struct vm_exit *vme);
void vie_fallback_exitinfo(const struct vie *vie, struct vm_exit *vme);
+void vie_cs_info(const struct vie *vie, struct vm *vm, int vcpuid,
+ uint64_t *cs_base, int *cs_d);
void vie_reset(struct vie *vie);
void vie_advance_pc(struct vie *vie, uint64_t *nextrip);
int vie_emulate_mmio(struct vie *vie, struct vm *vm, int vcpuid);
int vie_emulate_inout(struct vie *vie, struct vm *vm, int vcpuid);
+int vie_emulate_other(struct vie *vie, struct vm *vm, int vcpuid);
/*
* APIs to fetch and decode the instruction from nested page fault handler.
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index cd235b9e4c..1cd0b23a1c 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -1615,9 +1615,10 @@ vm_handle_mmio_emul(struct vm *vm, int vcpuid)
return (error);
} else if (fault) {
/*
- * If a fault during instruction fetch was encounted, it
- * will have asserted that the appropriate exception be
- * injected at next entry. No further work is required.
+ * If a fault during instruction fetch was encountered,
+ * it will have asserted that the appropriate exception
+ * be injected at next entry.
+ * No further work is required.
*/
return (0);
}
@@ -1724,6 +1725,56 @@ repeat:
}
static int
+vm_handle_inst_emul(struct vm *vm, int vcpuid)
+{
+ struct vie *vie;
+ struct vcpu *vcpu;
+ struct vm_exit *vme;
+ uint64_t cs_base;
+ int error, fault, cs_d;
+
+ vcpu = &vm->vcpu[vcpuid];
+ vme = &vcpu->exitinfo;
+ vie = vcpu->vie_ctx;
+
+ vie_cs_info(vie, vm, vcpuid, &cs_base, &cs_d);
+
+ /* Fetch the faulting instruction */
+ ASSERT(vie_needs_fetch(vie));
+ error = vie_fetch_instruction(vie, vm, vcpuid, vme->rip + cs_base,
+ &fault);
+ if (error != 0) {
+ return (error);
+ } else if (fault) {
+ /*
+ * If a fault during instruction fetch was encounted, it will
+ * have asserted that the appropriate exception be injected at
+ * next entry. No further work is required.
+ */
+ return (0);
+ }
+
+ if (vie_decode_instruction(vie, vm, vcpuid, cs_d) != 0) {
+ /* Dump (unrecognized) instruction bytes in userspace */
+ vie_fallback_exitinfo(vie, vme);
+ return (-1);
+ }
+
+ error = vie_emulate_other(vie, vm, vcpuid);
+ if (error != 0) {
+ /*
+ * Instruction emulation was unable to complete successfully, so
+ * kick it out to userspace for handling.
+ */
+ vie_fallback_exitinfo(vie, vme);
+ } else {
+ /* Update %rip now that instruction has been emulated */
+ vie_advance_pc(vie, &vcpu->nextrip);
+ }
+ return (error);
+}
+
+static int
vm_handle_suspend(struct vm *vm, int vcpuid)
{
#ifdef __FreeBSD__
@@ -2362,6 +2413,9 @@ restart:
case VM_EXITCODE_INOUT:
error = vm_handle_inout(vm, vcpuid, vme);
break;
+ case VM_EXITCODE_INST_EMUL:
+ error = vm_handle_inst_emul(vm, vcpuid);
+ break;
case VM_EXITCODE_MONITOR:
case VM_EXITCODE_MWAIT:
case VM_EXITCODE_VMINSN:
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
index 02b87a79f6..b0501a60ad 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
@@ -69,13 +69,14 @@ enum vie_status {
VIES_INIT = (1U << 0),
VIES_MMIO = (1U << 1),
VIES_INOUT = (1U << 2),
- VIES_INST_FETCH = (1U << 3),
- VIES_INST_DECODE = (1U << 4),
- VIES_PENDING_MMIO = (1U << 5),
- VIES_PENDING_INOUT = (1U << 6),
- VIES_REPEAT = (1U << 7),
- VIES_USER_FALLBACK = (1U << 8),
- VIES_COMPLETE = (1U << 9),
+ VIES_OTHER = (1U << 3),
+ VIES_INST_FETCH = (1U << 4),
+ VIES_INST_DECODE = (1U << 5),
+ VIES_PENDING_MMIO = (1U << 6),
+ VIES_PENDING_INOUT = (1U << 7),
+ VIES_REPEAT = (1U << 8),
+ VIES_USER_FALLBACK = (1U << 9),
+ VIES_COMPLETE = (1U << 10),
};
/* State of request to perform emulated access (inout or MMIO) */
@@ -181,6 +182,7 @@ enum {
VIE_OP_TYPE_ADD,
VIE_OP_TYPE_TEST,
VIE_OP_TYPE_BEXTR,
+ VIE_OP_TYPE_CLTS,
VIE_OP_TYPE_LAST
};
@@ -199,6 +201,11 @@ static const struct vie_op three_byte_opcodes_0f38[256] = {
};
static const struct vie_op two_byte_opcodes[256] = {
+ [0x06] = {
+ .op_byte = 0x06,
+ .op_type = VIE_OP_TYPE_CLTS,
+ .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+ },
[0xAE] = {
.op_byte = 0xAE,
.op_type = VIE_OP_TYPE_TWOB_GRP15,
@@ -407,6 +414,13 @@ vie_free(struct vie *vie)
kmem_free(vie, sizeof (struct vie));
}
+enum vm_reg_name
+vie_regnum_map(uint8_t regnum)
+{
+ VERIFY3U(regnum, <, 16);
+ return (gpr_map[regnum]);
+}
+
static void
vie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr)
{
@@ -1876,6 +1890,30 @@ vie_emulate_twob_group15(struct vie *vie, struct vm *vm, int vcpuid,
}
static int
+vie_emulate_clts(struct vie *vie, struct vm *vm, int vcpuid)
+{
+ uint64_t val;
+ int error;
+
+ if (vie->paging.cpl != 0) {
+ vm_inject_gp(vm, vcpuid);
+ vie->num_processed = 0;
+ return (0);
+ }
+
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, &val);
+ ASSERT(error == 0);
+
+ /* Clear %cr0.TS */
+ val &= ~CR0_TS;
+
+ error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR0, val);
+ ASSERT(error == 0);
+
+ return (0);
+}
+
+static int
vie_mmio_read(struct vie *vie, struct vm *vm, int cpuid, uint64_t gpa,
uint64_t *rval, int bytes)
{
@@ -2261,6 +2299,28 @@ vie_emulate_inout(struct vie *vie, struct vm *vm, int vcpuid)
return (err);
}
+int
+vie_emulate_other(struct vie *vie, struct vm *vm, int vcpuid)
+{
+ int error;
+
+ if ((vie->status & (VIES_INST_DECODE | VIES_OTHER)) !=
+ (VIES_INST_DECODE | VIES_OTHER)) {
+ return (EINVAL);
+ }
+
+ switch (vie->op.op_type) {
+ case VIE_OP_TYPE_CLTS:
+ error = vie_emulate_clts(vie, vm, vcpuid);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
void
vie_reset(struct vie *vie)
{
@@ -2338,6 +2398,35 @@ vie_fallback_exitinfo(const struct vie *vie, struct vm_exit *vme)
vme->exitcode = VM_EXITCODE_INST_EMUL;
}
+void
+vie_cs_info(const struct vie *vie, struct vm *vm, int vcpuid, uint64_t *cs_base,
+ int *cs_d)
+{
+ struct seg_desc cs_desc;
+ int error;
+
+ error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_CS, &cs_desc);
+ ASSERT(error == 0);
+
+ /* Initialization required for the paging info to be populated */
+ VERIFY(vie->status & VIES_INIT);
+ switch (vie->paging.cpu_mode) {
+ case CPU_MODE_REAL:
+ *cs_base = cs_desc.base;
+ *cs_d = 0;
+ break;
+ case CPU_MODE_PROTECTED:
+ case CPU_MODE_COMPATIBILITY:
+ *cs_base = cs_desc.base;
+ *cs_d = SEG_DESC_DEF32(cs_desc.access) ? 1 : 0;
+ break;
+ default:
+ *cs_base = 0;
+ *cs_d = 0;
+ break;
+ }
+}
+
bool
vie_pending(const struct vie *vie)
{
@@ -2556,6 +2645,19 @@ vie_init_inout(struct vie *vie, const struct vm_inout *inout, uint8_t inst_len,
vie->num_processed = inst_len;
}
+void
+vie_init_other(struct vie *vie, const struct vm_guest_paging *paging)
+{
+ bzero(vie, sizeof (struct vie));
+
+ vie->base_register = VM_REG_LAST;
+ vie->index_register = VM_REG_LAST;
+ vie->segment_register = VM_REG_LAST;
+ vie->status = VIES_INIT | VIES_OTHER;
+
+ vie->paging = *paging;
+}
+
int
vie_fulfill_mmio(struct vie *vie, const struct vm_mmio *result)
{
@@ -2873,7 +2975,7 @@ vie_fetch_instruction(struct vie *vie, struct vm *vm, int vcpuid, uint64_t rip,
struct vm_copyinfo copyinfo[2];
int error, prot;
- if (vie->status != (VIES_INIT|VIES_MMIO)) {
+ if ((vie->status & VIES_INIT) == 0) {
return (EINVAL);
}
diff --git a/usr/src/uts/i86pc/os/hma.c b/usr/src/uts/i86pc/os/hma.c
index a53c797e4b..215243ea98 100644
--- a/usr/src/uts/i86pc/os/hma.c
+++ b/usr/src/uts/i86pc/os/hma.c
@@ -18,6 +18,7 @@
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/machsystm.h>
+#include <sys/archsystm.h>
#include <sys/controlregs.h>
#include <sys/x86_archext.h>
#include <sys/id_space.h>
@@ -522,9 +523,9 @@ uint8_t
hma_svm_asid_update(hma_svm_asid_t *vcp, boolean_t flush_by_asid,
boolean_t npt_flush)
{
- hma_svm_asid_t *hcp = &hma_svm_cpu_asid[CPU->cpu_seqid];
-
- ASSERT(curthread->t_preempt != 0);
+ hma_svm_asid_t *hcp;
+ ulong_t iflag;
+ uint8_t res = VMCB_FLUSH_NOTHING;
/*
* If NPT changes dictate a TLB flush and by-ASID flushing is not
@@ -534,6 +535,17 @@ hma_svm_asid_update(hma_svm_asid_t *vcp, boolean_t flush_by_asid,
vcp->hsa_gen = 0;
}
+ /*
+ * It is expected that ASID resource updates will commonly be done
+ * inside a VMM critical section where the GIF is already cleared,
+ * preventing any possibility of interruption. Since that cannot be
+ * checked (there is no easy way to read the GIF), %rflags.IF is also
+ * cleared for edge cases where an ASID update is performed outside of
+ * such a GIF-safe critical section.
+ */
+ iflag = intr_clear();
+
+ hcp = &hma_svm_cpu_asid[CPU->cpu_seqid];
if (vcp->hsa_gen != hcp->hsa_gen) {
hcp->hsa_asid++;
@@ -556,14 +568,17 @@ hma_svm_asid_update(hma_svm_asid_t *vcp, boolean_t flush_by_asid,
ASSERT3U(vcp->hsa_asid, <, hma_svm_max_asid);
if (flush_by_asid) {
- return (VMCB_FLUSH_ASID);
+ res = VMCB_FLUSH_ASID;
+ } else {
+ res = VMCB_FLUSH_ALL;
}
- return (VMCB_FLUSH_ALL);
} else if (npt_flush) {
ASSERT(flush_by_asid);
- return (VMCB_FLUSH_ASID);
+ res = VMCB_FLUSH_ASID;
}
- return (VMCB_FLUSH_NOTHING);
+
+ intr_restore(iflag);
+ return (res);
}
static int