summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan McDonald <danmcd@mnx.io>2022-08-14 10:56:57 -0400
committerDan McDonald <danmcd@mnx.io>2022-08-14 10:56:57 -0400
commitefad77c95d2ca5a22a626a8d732e9e206919c0fb (patch)
tree96717dd35d86c11f9a2cb67e56b0f5118acd0b84
parent8edaf2d79d344fa9c1acb35f317dedd5005871a7 (diff)
parentee6ee36a8ff1701c4e61e6f118446b145220478c (diff)
downloadillumos-joyent-efad77c95d2ca5a22a626a8d732e9e206919c0fb.tar.gz
[illumos-gate merge]
commit ee6ee36a8ff1701c4e61e6f118446b145220478c 14838 Rename erratum 147 handling 14839 Untangle erratum 147 from lockstat 14840 Modernize lockstat probes 14865 mutex_tryenter:adaptive-acquire probe never fires commit 64439ec0071c576648f76b4466ad6ee7a580ed33 14579 expose virtio 9P transport device commit ec8422d0a51b3bf0b6550dd15f125990a3f73f4c 7346 beadm list shows duplicates when zone has datasets from multiple pools commit 46dc144bc2859392d4c62f3e72d661e7b3c22a8e 14900 ddi_fm_capable(9F) man page does not match source Conflicts: manifest usr/src/man/man9f/ddi_fm_init.9f usr/src/uts/intel/os/driver_aliases usr/src/uts/intel/os/name_to_major
-rw-r--r--manifest3
-rw-r--r--usr/src/cmd/devfsadm/misc_link.c32
-rw-r--r--usr/src/lib/libbe/common/be_activate.c27
-rw-r--r--usr/src/lib/libbe/common/be_create.c157
-rw-r--r--usr/src/lib/libbe/common/be_list.c17
-rw-r--r--usr/src/lib/libbe/common/be_mount.c25
-rw-r--r--usr/src/lib/libbe/common/be_rename.c24
-rw-r--r--usr/src/lib/libbe/common/be_snapshot.c29
-rw-r--r--usr/src/lib/libbe/common/be_utils.c144
-rw-r--r--usr/src/lib/libbe/common/be_zones.c27
-rw-r--r--usr/src/lib/libbe/common/libbe_priv.h8
-rw-r--r--usr/src/man/man4d/Makefile3
-rw-r--r--usr/src/man/man4d/vio9p.4d141
-rw-r--r--usr/src/pkg/manifests/driver-storage-vio9p.p5m40
-rw-r--r--usr/src/uts/common/Makefile.files3
-rw-r--r--usr/src/uts/common/Makefile.rules4
-rw-r--r--usr/src/uts/common/dtrace/lockstat.c5
-rw-r--r--usr/src/uts/common/io/vio9p/vio9p.c839
-rw-r--r--usr/src/uts/common/io/vio9p/vio9p_impl.h126
-rw-r--r--usr/src/uts/common/io/vioblk/vioblk.c2
-rw-r--r--usr/src/uts/common/io/vioif/vioif.c4
-rw-r--r--usr/src/uts/common/io/vioscsi/vioscsi.c2
-rw-r--r--usr/src/uts/common/io/virtio/virtio.h6
-rw-r--r--usr/src/uts/common/io/virtio/virtio_dma.c30
-rw-r--r--usr/src/uts/common/io/virtio/virtio_main.c2
-rw-r--r--usr/src/uts/common/sys/Makefile1
-rw-r--r--usr/src/uts/common/sys/lockstat.h8
-rw-r--r--usr/src/uts/common/sys/vio9p.h49
-rw-r--r--usr/src/uts/i86pc/Makefile.workarounds5
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c2
-rw-r--r--usr/src/uts/i86pc/os/mp_startup.c27
-rw-r--r--usr/src/uts/i86pc/os/startup.c8
-rw-r--r--usr/src/uts/intel/Makefile.intel3
-rw-r--r--usr/src/uts/intel/ml/lock_prim.s390
-rw-r--r--usr/src/uts/intel/sys/x86_archext.h6
-rw-r--r--usr/src/uts/intel/vio9p/Makefile67
36 files changed, 1866 insertions, 400 deletions
diff --git a/manifest b/manifest
index 6356e0c756..e7c074155f 100644
--- a/manifest
+++ b/manifest
@@ -809,6 +809,7 @@ f kernel/drv/amd64/usbskel 0755 root sys
f kernel/drv/amd64/usbsksp 0755 root sys
f kernel/drv/amd64/usbsprl 0755 root sys
f kernel/drv/amd64/vgatext 0755 root sys
+f kernel/drv/amd64/vio9p 0755 root sys
f kernel/drv/amd64/vioblk 0755 root sys
f kernel/drv/amd64/vioif 0755 root sys
f kernel/drv/amd64/vioscsi 0755 root sys
@@ -4899,6 +4900,7 @@ f usr/include/sys/vfs_opreg.h 0644 root bin
f usr/include/sys/vfstab.h 0644 root bin
f usr/include/sys/vgareg.h 0644 root bin
f usr/include/sys/videodev2.h 0644 root bin
+f usr/include/sys/vio9p.h 0644 root bin
f usr/include/sys/visual_io.h 0644 root bin
f usr/include/sys/vlan.h 0644 root bin
f usr/include/sys/vm.h 0644 root bin
@@ -18988,6 +18990,7 @@ f usr/share/man/man4d/usbftdi.4d 0444 root bin
f usr/share/man/man4d/usbsacm.4d 0444 root bin
f usr/share/man/man4d/usbsksp.4d 0444 root bin
f usr/share/man/man4d/usbsprl.4d 0444 root bin
+f usr/share/man/man4d/vio9p.4d 0444 root bin
f usr/share/man/man4d/vioblk.4d 0444 root bin
f usr/share/man/man4d/vioif.4d 0444 root bin
f usr/share/man/man4d/vioscsi.4d 0444 root bin
diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c
index 936560912f..e8b56c7fce 100644
--- a/usr/src/cmd/devfsadm/misc_link.c
+++ b/usr/src/cmd/devfsadm/misc_link.c
@@ -23,6 +23,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright 2022 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2022 Oxide Computer Company
*/
#include <regex.h>
@@ -60,6 +61,7 @@ static int cpuid(di_minor_t minor, di_node_t node);
static int glvc(di_minor_t minor, di_node_t node);
static int ses_callback(di_minor_t minor, di_node_t node);
static int kmdrv_create(di_minor_t minor, di_node_t node);
+static int vio9p_create(di_minor_t minor, di_node_t node);
static devfsadm_create_t misc_cbt[] = {
{ "pseudo", "ddi_pseudo", "(^sad$)",
@@ -215,7 +217,10 @@ static devfsadm_create_t misc_cbt[] = {
},
{ "pseudo", "ddi_pseudo", "overlay",
TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
- }
+ },
+ { "9p", "ddi_pseudo", "vio9p",
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, vio9p_create,
+ },
};
DEVFSADM_CREATE_INIT_V0(misc_cbt);
@@ -257,7 +262,10 @@ static devfsadm_remove_t misc_remove_cbt[] = {
},
{ "pseudo", "^sctp|sctp6$",
RM_PRE | RM_ALWAYS, ILEVEL_0, devfsadm_rm_link
- }
+ },
+ { "9p", "^9p/[0-9]+$",
+ RM_PRE | RM_HOT | RM_ALWAYS, ILEVEL_0, devfsadm_rm_all
+ },
};
/* Rules for gpio devices */
@@ -643,6 +651,26 @@ av_create(di_minor_t minor, di_node_t node)
}
/*
+ * Create device nodes for Virtio 9P channels:
+ * /dev/9p/[0-9]+
+ */
+static int
+vio9p_create(di_minor_t minor, di_node_t node)
+{
+ char *minor_name = di_minor_name(minor);
+ char path[PATH_MAX + 1];
+
+ if (minor_name == NULL || strcmp(minor_name, "9p") != 0) {
+ return (DEVFSADM_CONTINUE);
+ }
+
+ (void) snprintf(path, sizeof (path), "9p/%d", di_instance(node));
+ (void) devfsadm_mklink(path, node, minor, 0);
+
+ return (DEVFSADM_CONTINUE);
+}
+
+/*
* Creates /dev/lom and /dev/tsalarm:ctl for tsalarm node
*/
static int
diff --git a/usr/src/lib/libbe/common/be_activate.c b/usr/src/lib/libbe/common/be_activate.c
index 4ec8f055e2..581ddc41c2 100644
--- a/usr/src/lib/libbe/common/be_activate.c
+++ b/usr/src/lib/libbe/common/be_activate.c
@@ -26,7 +26,7 @@
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright 2016 Toomas Soome <tsoome@me.com>
- * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
*/
#include <assert.h>
@@ -266,7 +266,12 @@ _be_activate(char *be_name, be_nextboot_state_t nextboot)
return (ret);
}
- be_make_root_ds(cb.obe_zpool, cb.obe_name, root_ds, sizeof (root_ds));
+ if ((ret = be_make_root_ds(cb.obe_zpool, cb.obe_name, root_ds,
+ sizeof (root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, cb.obe_zpool, cb.obe_name);
+ return (ret);
+ }
cb.obe_root_ds = strdup(root_ds);
if (getzoneid() == GLOBAL_ZONEID) {
@@ -573,8 +578,13 @@ set_canmount(be_node_list_t *be_nodes, char *value)
while (list != NULL) {
be_dataset_list_t *datasets = list->be_node_datasets;
- be_make_root_ds(list->be_rpool, list->be_node_name, ds_path,
- sizeof (ds_path));
+ if ((err = be_make_root_ds(list->be_rpool, list->be_node_name,
+ ds_path, sizeof (ds_path))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container "
+ "dataset for %s/%s\n"), __func__,
+ list->be_rpool, list->be_node_name);
+ return (err);
+ }
if ((zhp = zfs_open(g_zfs, ds_path, ZFS_TYPE_DATASET)) ==
NULL) {
@@ -605,9 +615,14 @@ set_canmount(be_node_list_t *be_nodes, char *value)
ZFS_CLOSE(zhp);
while (datasets != NULL) {
- be_make_root_ds(list->be_rpool,
+ if ((err = be_make_root_ds(list->be_rpool,
datasets->be_dataset_name, ds_path,
- sizeof (ds_path));
+ sizeof (ds_path))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE "
+ "container dataset for %s/%s\n"), __func__,
+ list->be_rpool, datasets->be_dataset_name);
+ return (err);
+ }
if ((zhp = zfs_open(g_zfs, ds_path, ZFS_TYPE_DATASET))
== NULL) {
diff --git a/usr/src/lib/libbe/common/be_create.c b/usr/src/lib/libbe/common/be_create.c
index 4158ddb677..62f5bfe112 100644
--- a/usr/src/lib/libbe/common/be_create.c
+++ b/usr/src/lib/libbe/common/be_create.c
@@ -24,7 +24,7 @@
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014, 2015 by Delphix. All rights reserved.
* Copyright (c) 2016 Martin Matuska. All rights reserved.
- * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
*/
/*
@@ -206,8 +206,12 @@ be_init(nvlist_t *be_attrs)
}
/* Generate string for BE's root dataset */
- be_make_root_ds(bt.nbe_zpool, bt.nbe_name, nbe_root_ds,
- sizeof (nbe_root_ds));
+ if ((ret = be_make_root_ds(bt.nbe_zpool, bt.nbe_name, nbe_root_ds,
+ sizeof (nbe_root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, bt.nbe_zpool, bt.nbe_name);
+ return (ret);
+ }
/*
* Create property list for new BE root dataset. If some
@@ -446,8 +450,12 @@ be_destroy(nvlist_t *be_attrs)
}
/* Generate string for obe_name's root dataset */
- be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds,
- sizeof (obe_root_ds));
+ if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds,
+ sizeof (obe_root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name);
+ return (ret);
+ }
bt.obe_root_ds = obe_root_ds;
if (getzoneid() != GLOBAL_ZONEID) {
@@ -780,10 +788,28 @@ be_copy(nvlist_t *be_attrs)
}
/*
+ * If an auto named BE is desired, it must be in the same
+ * pool as the original BE.
+ */
+ if (bt.nbe_name == NULL && bt.nbe_zpool != NULL) {
+ be_print_err(gettext("be_copy: cannot specify pool "
+ "name when creating an auto named BE\n"));
+ ret = BE_ERR_INVAL;
+ goto done;
+ }
+
+ /*
+ * If the zpool name to create new BE in is not provided,
+ * create the new BE in the original BE's pool.
+ */
+ if (bt.nbe_zpool == NULL)
+ bt.nbe_zpool = bt.obe_zpool;
+
+ /*
* If new BE name provided, validate the BE name and then verify
* that new BE name doesn't already exist in some pool.
*/
- if (bt.nbe_name) {
+ if (bt.nbe_name != NULL) {
/* Validate original BE name */
if (!be_valid_be_name(bt.nbe_name)) {
be_print_err(gettext("be_copy: "
@@ -808,8 +834,13 @@ be_copy(nvlist_t *be_attrs)
goto done;
}
} else {
- be_make_root_ds(bt.nbe_zpool, bt.nbe_name, nbe_root_ds,
- sizeof (nbe_root_ds));
+ if ((ret = be_make_root_ds(bt.nbe_zpool, bt.nbe_name,
+ nbe_root_ds, sizeof (nbe_root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE "
+ "container dataset for %s/%s\n"), __func__,
+ bt.nbe_zpool, bt.nbe_name);
+ goto done;
+ }
if (zfs_dataset_exists(g_zfs, nbe_root_ds,
ZFS_TYPE_FILESYSTEM)) {
be_print_err(gettext("be_copy: BE (%s) already "
@@ -820,17 +851,6 @@ be_copy(nvlist_t *be_attrs)
}
} else {
/*
- * If an auto named BE is desired, it must be in the same
- * pool is the original BE.
- */
- if (bt.nbe_zpool != NULL) {
- be_print_err(gettext("be_copy: cannot specify pool "
- "name when creating an auto named BE\n"));
- ret = BE_ERR_INVAL;
- goto done;
- }
-
- /*
* Generate auto named BE
*/
if ((bt.nbe_name = be_auto_be_name(bt.obe_name))
@@ -844,19 +864,19 @@ be_copy(nvlist_t *be_attrs)
autoname = B_TRUE;
}
- /*
- * If zpool name to create new BE in is not provided,
- * create new BE in original BE's pool.
- */
- if (bt.nbe_zpool == NULL) {
- bt.nbe_zpool = bt.obe_zpool;
- }
-
/* Get root dataset names for obe_name and nbe_name */
- be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds,
- sizeof (obe_root_ds));
- be_make_root_ds(bt.nbe_zpool, bt.nbe_name, nbe_root_ds,
- sizeof (nbe_root_ds));
+ if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds,
+ sizeof (obe_root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name);
+ goto done;
+ }
+ if ((ret = be_make_root_ds(bt.nbe_zpool, bt.nbe_name, nbe_root_ds,
+ sizeof (nbe_root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, bt.nbe_zpool, bt.nbe_name);
+ goto done;
+ }
bt.obe_root_ds = obe_root_ds;
bt.nbe_root_ds = nbe_root_ds;
@@ -966,8 +986,15 @@ be_copy(nvlist_t *be_attrs)
* Regenerate string for new BE's
* root dataset name
*/
- be_make_root_ds(bt.nbe_zpool, bt.nbe_name,
- nbe_root_ds, sizeof (nbe_root_ds));
+ if ((ret = be_make_root_ds(bt.nbe_zpool,
+ bt.nbe_name, nbe_root_ds,
+ sizeof (nbe_root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext(
+ "%s: failed to get BE container "
+ "dataset for %s/%s\n"), __func__,
+ bt.nbe_zpool, bt.nbe_name);
+ goto done;
+ }
bt.nbe_root_ds = nbe_root_ds;
/*
@@ -1122,10 +1149,18 @@ be_copy(nvlist_t *be_attrs)
* Update new BE's vfstab.
*/
- be_make_root_container_ds(bt.obe_zpool, obe_root_container,
- sizeof (obe_root_container));
- be_make_root_container_ds(bt.nbe_zpool, nbe_root_container,
- sizeof (nbe_root_container));
+ if ((ret = be_make_root_container_ds(bt.obe_zpool, obe_root_container,
+ sizeof (obe_root_container))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s\n"), __func__, bt.obe_zpool);
+ goto done;
+ }
+ if ((ret = be_make_root_container_ds(bt.nbe_zpool, nbe_root_container,
+ sizeof (nbe_root_container))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s\n"), __func__, bt.nbe_zpool);
+ goto done;
+ }
if ((ret = be_update_vfstab(bt.nbe_name, obe_root_container,
nbe_root_container, &fld, new_mp)) != BE_SUCCESS) {
@@ -1258,11 +1293,15 @@ be_find_zpool_callback(zpool_handle_t *zlp, void *data)
be_transaction_data_t *bt = data;
const char *zpool = zpool_get_name(zlp);
char be_root_ds[MAXPATHLEN];
+ int ret = 0;
/*
* Generate string for the BE's root dataset
*/
- be_make_root_ds(zpool, bt->obe_name, be_root_ds, sizeof (be_root_ds));
+ if (be_make_root_ds(zpool, bt->obe_name, be_root_ds,
+ sizeof (be_root_ds)) != BE_SUCCESS) {
+ goto out;
+ }
/*
* Check if dataset exists
@@ -1270,12 +1309,12 @@ be_find_zpool_callback(zpool_handle_t *zlp, void *data)
if (zfs_dataset_exists(g_zfs, be_root_ds, ZFS_TYPE_FILESYSTEM)) {
/* BE's root dataset exists in zpool */
bt->obe_zpool = strdup(zpool);
- zpool_close(zlp);
- return (1);
+ ret = 1;
}
+out:
zpool_close(zlp);
- return (0);
+ return (ret);
}
/*
@@ -1297,23 +1336,27 @@ be_exists_callback(zpool_handle_t *zlp, void *data)
const char *zpool = zpool_get_name(zlp);
char *be_name = data;
char be_root_ds[MAXPATHLEN];
+ int ret = 0;
/*
* Generate string for the BE's root dataset
*/
- be_make_root_ds(zpool, be_name, be_root_ds, sizeof (be_root_ds));
+ if (be_make_root_ds(zpool, be_name, be_root_ds,
+ sizeof (be_root_ds)) != BE_SUCCESS) {
+ goto out;
+ }
/*
* Check if dataset exists
*/
if (zfs_dataset_exists(g_zfs, be_root_ds, ZFS_TYPE_FILESYSTEM)) {
/* BE's root dataset exists in zpool */
- zpool_close(zlp);
- return (1);
+ ret = 1;
}
+out:
zpool_close(zlp);
- return (0);
+ return (ret);
}
/*
@@ -1779,8 +1822,12 @@ be_destroy_zone_roots(char *zonepath_ds, be_destroy_data_t *dd)
int ret = BE_SUCCESS;
/* Generate string for the root container dataset for this zone. */
- be_make_container_ds(zonepath_ds, zone_container_ds,
- sizeof (zone_container_ds));
+ if ((ret = be_make_container_ds(zonepath_ds, zone_container_ds,
+ sizeof (zone_container_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s\n"), __func__, zonepath_ds);
+ return (ret);
+ }
/* Get handle to this zone's root container dataset. */
if ((zhp = zfs_open(g_zfs, zone_container_ds, ZFS_TYPE_FILESYSTEM))
@@ -2049,8 +2096,12 @@ be_copy_zones(char *obe_name, char *obe_root_ds, char *nbe_root_ds)
goto done;
}
- be_make_container_ds(zonepath_ds, zone_container_ds,
- sizeof (zone_container_ds));
+ if ((ret = be_make_container_ds(zonepath_ds, zone_container_ds,
+ sizeof (zone_container_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container "
+ "dataset for %s\n"), __func__, zonepath_ds);
+ goto done;
+ }
if ((z_zhp = zfs_open(g_zfs, zoneroot_ds,
ZFS_TYPE_FILESYSTEM)) == NULL) {
@@ -2901,8 +2952,12 @@ be_create_container_ds(char *zpool)
char be_container_ds[MAXPATHLEN];
/* Generate string for BE container dataset for this pool */
- be_make_container_ds(zpool, be_container_ds,
- sizeof (be_container_ds));
+ if (be_make_container_ds(zpool, be_container_ds,
+ sizeof (be_container_ds)) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s\n"), __func__, zpool);
+ return (B_FALSE);
+ }
if (!zfs_dataset_exists(g_zfs, be_container_ds, ZFS_TYPE_FILESYSTEM)) {
diff --git a/usr/src/lib/libbe/common/be_list.c b/usr/src/lib/libbe/common/be_list.c
index 3e0833ea83..85b62eff38 100644
--- a/usr/src/lib/libbe/common/be_list.c
+++ b/usr/src/lib/libbe/common/be_list.c
@@ -29,7 +29,7 @@
* Copyright 2015 Toomas Soome <tsoome@me.com>
* Copyright 2015 Gary Mills
* Copyright (c) 2016 Martin Matuska. All rights reserved.
- * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
*/
#include <assert.h>
@@ -445,8 +445,12 @@ be_get_list_callback(zpool_handle_t *zlp, void *data)
/*
* Generate string for the BE container dataset
*/
- be_make_container_ds(rpool, be_container_ds,
- sizeof (be_container_ds));
+ if (be_make_container_ds(rpool, be_container_ds,
+ sizeof (be_container_ds)) != BE_SUCCESS) {
+ /* Move on to the next pool */
+ zpool_close(zlp);
+ return (0);
+ }
/*
* If a BE name was specified we use it's root dataset in place of
@@ -454,12 +458,17 @@ be_get_list_callback(zpool_handle_t *zlp, void *data)
* the information for the specified BE.
*/
if (cb->be_name != NULL) {
+ int rv;
+
if (!be_valid_be_name(cb->be_name))
return (BE_ERR_INVAL);
/*
* Generate string for the BE root dataset
*/
- be_make_root_ds(rpool, cb->be_name, be_ds, sizeof (be_ds));
+ if ((rv = be_make_root_ds(rpool, cb->be_name, be_ds,
+ sizeof (be_ds))) != BE_SUCCESS) {
+ return (rv);
+ }
open_ds = be_ds;
} else {
open_ds = be_container_ds;
diff --git a/usr/src/lib/libbe/common/be_mount.c b/usr/src/lib/libbe/common/be_mount.c
index 98c861fca4..672424d1f2 100644
--- a/usr/src/lib/libbe/common/be_mount.c
+++ b/usr/src/lib/libbe/common/be_mount.c
@@ -24,6 +24,7 @@
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright 2015 EveryCity Ltd.
* Copyright (c) 2015 by Delphix. All rights reserved.
+ * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
*/
/*
@@ -277,8 +278,12 @@ _be_mount(char *be_name, char **altroot, int flags)
}
/* Generate string for obe_name's root dataset */
- be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds,
- sizeof (obe_root_ds));
+ if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds,
+ sizeof (obe_root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name);
+ return (ret);
+ }
bt.obe_root_ds = obe_root_ds;
/* Get handle to BE's root dataset */
@@ -448,8 +453,12 @@ _be_unmount(char *be_name, int flags)
}
/* Generate string for obe_name's root dataset */
- be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds,
- sizeof (obe_root_ds));
+ if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds,
+ sizeof (obe_root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name);
+ return (ret);
+ }
bt.obe_root_ds = obe_root_ds;
/* Get handle to BE's root dataset */
@@ -1565,15 +1574,13 @@ iter_shared_fs_callback(zfs_handle_t *zhp, void *data)
pool = strtok(tmp_name, "/");
if (pool) {
- /* Get the name of this pool's container dataset */
- be_make_container_ds(pool, container_ds,
- sizeof (container_ds));
-
/*
* If what we're processing is this pool's BE container
* dataset, skip it.
*/
- if (strcmp(name, container_ds) == 0) {
+ if (be_make_container_ds(pool, container_ds,
+ sizeof (container_ds)) == BE_SUCCESS &&
+ strcmp(name, container_ds) == 0) {
ZFS_CLOSE(zhp);
return (0);
}
diff --git a/usr/src/lib/libbe/common/be_rename.c b/usr/src/lib/libbe/common/be_rename.c
index f0cd781b22..87dadefaff 100644
--- a/usr/src/lib/libbe/common/be_rename.c
+++ b/usr/src/lib/libbe/common/be_rename.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
*/
#include <assert.h>
@@ -150,9 +150,19 @@ be_rename(nvlist_t *be_attrs)
/* New BE will reside in the same zpool as orig BE */
bt.nbe_zpool = bt.obe_zpool;
- be_make_root_ds(bt.obe_zpool, bt.obe_name, root_ds, sizeof (root_ds));
+ if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, root_ds,
+ sizeof (root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name);
+ goto done;
+ };
bt.obe_root_ds = strdup(root_ds);
- be_make_root_ds(bt.nbe_zpool, bt.nbe_name, root_ds, sizeof (root_ds));
+ if ((ret = be_make_root_ds(bt.nbe_zpool, bt.nbe_name, root_ds,
+ sizeof (root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, bt.nbe_zpool, bt.nbe_name);
+ goto done;
+ }
bt.nbe_root_ds = strdup(root_ds);
/*
@@ -248,8 +258,12 @@ be_rename(nvlist_t *be_attrs)
* Since the new and old BEs reside in the same pool (see above),
* the same variable can be used for the container for both.
*/
- be_make_root_container_ds(bt.obe_zpool, be_root_container,
- sizeof (be_root_container));
+ if ((ret = be_make_root_container_ds(bt.obe_zpool, be_root_container,
+ sizeof (be_root_container))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s\n"), __func__, bt.obe_zpool);
+ goto done;
+ }
if ((ret = be_update_vfstab(bt.nbe_name, be_root_container,
be_root_container, &fld, mp)) != BE_SUCCESS) {
diff --git a/usr/src/lib/libbe/common/be_snapshot.c b/usr/src/lib/libbe/common/be_snapshot.c
index a4e2c79e5a..92fd2ae96d 100644
--- a/usr/src/lib/libbe/common/be_snapshot.c
+++ b/usr/src/lib/libbe/common/be_snapshot.c
@@ -25,6 +25,7 @@
/*
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
*/
/*
@@ -88,7 +89,7 @@ be_create_snapshot(nvlist_t *be_attrs)
char *snap_name = NULL;
char *policy = NULL;
boolean_t autoname = B_FALSE;
- int ret = BE_SUCCESS;
+ int ret = BE_SUCCESS;
/* Initialize libzfs handle */
if (!be_zfs_init())
@@ -184,7 +185,7 @@ be_destroy_snapshot(nvlist_t *be_attrs)
{
char *be_name = NULL;
char *snap_name = NULL;
- int ret = BE_SUCCESS;
+ int ret = BE_SUCCESS;
/* Initialize libzfs handle */
if (!be_zfs_init())
@@ -319,8 +320,12 @@ be_rollback(nvlist_t *be_attrs)
}
/* Generate string for BE's root dataset */
- be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds,
- sizeof (obe_root_ds));
+ if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds,
+ sizeof (obe_root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name);
+ return (ret);
+ }
bt.obe_root_ds = obe_root_ds;
if (getzoneid() != GLOBAL_ZONEID) {
@@ -434,8 +439,12 @@ _be_create_snapshot(char *be_name, char **snap_name, char *policy)
return (zfs_err_to_be_err(g_zfs));
}
- be_make_root_ds(bt.obe_zpool, bt.obe_name, root_ds,
- sizeof (root_ds));
+ if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, root_ds,
+ sizeof (root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name);
+ return (ret);
+ }
bt.obe_root_ds = root_ds;
if (getzoneid() != GLOBAL_ZONEID) {
@@ -662,8 +671,12 @@ _be_destroy_snapshot(char *be_name, char *snap_name)
return (zfs_err_to_be_err(g_zfs));
}
- be_make_root_ds(bt.obe_zpool, bt.obe_name, root_ds,
- sizeof (root_ds));
+ if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, root_ds,
+ sizeof (root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name);
+ return (ret);
+ }
bt.obe_root_ds = root_ds;
zhp = zfs_open(g_zfs, bt.obe_root_ds, ZFS_TYPE_DATASET);
diff --git a/usr/src/lib/libbe/common/be_utils.c b/usr/src/lib/libbe/common/be_utils.c
index 17d43ee195..8b95f7bc91 100644
--- a/usr/src/lib/libbe/common/be_utils.c
+++ b/usr/src/lib/libbe/common/be_utils.c
@@ -24,7 +24,7 @@
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright (c) 2015 by Delphix. All rights reserved.
- * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
* Copyright (c) 2018, Joyent, Inc.
*/
@@ -439,17 +439,19 @@ be_get_defaults(struct be_defaults *defaults)
* be_root_ds - pointer to buffer to return BE root dataset in.
* be_root_ds_size - size of be_root_ds
* Returns:
- * None
+ * BE_SUCCESS - Success
+ * be_errno_t - Failure
* Scope:
* Semi-private (library wide use only)
*/
-void
+int
be_make_root_ds(const char *zpool, const char *be_name, char *be_root_ds,
int be_root_ds_size)
{
struct be_defaults be_defaults;
be_get_defaults(&be_defaults);
- char *root_ds = NULL;
+
+ assert(zpool != NULL);
if (getzoneid() == GLOBAL_ZONEID) {
if (be_defaults.be_deflt_rpool_container) {
@@ -461,18 +463,30 @@ be_make_root_ds(const char *zpool, const char *be_name, char *be_root_ds,
}
} else {
/*
- * In non-global zone we can use path from mounted root dataset
- * to generate BE's root dataset string.
+ * In a non-global zone we can use the path from the mounted
+ * root dataset to generate the BE's root dataset string.
*/
- if ((root_ds = be_get_ds_from_dir("/")) != NULL) {
- (void) snprintf(be_root_ds, be_root_ds_size, "%s/%s",
- dirname(root_ds), be_name);
- } else {
+ char *root_ds = be_get_ds_from_dir("/");
+
+ if (root_ds == NULL) {
be_print_err(gettext("be_make_root_ds: zone root "
"dataset is not mounted\n"));
- return;
+ return (BE_ERR_NOTMOUNTED);
}
+ if (strncmp(root_ds, zpool, strlen(zpool)) != 0 ||
+ root_ds[strlen(zpool)] != '/') {
+ /*
+ * This pool is not the one that contains the zone
+ * root.
+ */
+ return (BE_ERR_ACCESS);
+ }
+
+ (void) snprintf(be_root_ds, be_root_ds_size, "%s/%s",
+ dirname(root_ds), be_name);
}
+
+ return (BE_SUCCESS);
}
/*
@@ -484,17 +498,17 @@ be_make_root_ds(const char *zpool, const char *be_name, char *be_root_ds,
* dataset in.
* container_ds_size - size of container_ds
* Returns:
- * None
+ * BE_SUCCESS - Success
+ * be_errno_t - Failure
* Scope:
* Semi-private (library wide use only)
*/
-void
-be_make_container_ds(const char *zpool, char *container_ds,
+int
+be_make_container_ds(const char *zpool, char *container_ds,
int container_ds_size)
{
struct be_defaults be_defaults;
be_get_defaults(&be_defaults);
- char *root_ds = NULL;
if (getzoneid() == GLOBAL_ZONEID) {
if (be_defaults.be_deflt_rpool_container) {
@@ -505,15 +519,26 @@ be_make_container_ds(const char *zpool, char *container_ds,
"%s/%s", zpool, BE_CONTAINER_DS_NAME);
}
} else {
- if ((root_ds = be_get_ds_from_dir("/")) != NULL) {
- (void) strlcpy(container_ds, dirname(root_ds),
- container_ds_size);
- } else {
+ char *root_ds = be_get_ds_from_dir("/");
+
+ if (root_ds == NULL) {
be_print_err(gettext("be_make_container_ds: zone root "
"dataset is not mounted\n"));
- return;
+ return (BE_ERR_NOTMOUNTED);
+ }
+ if (strncmp(root_ds, zpool, strlen(zpool)) != 0 ||
+ root_ds[strlen(zpool)] != '/') {
+ /*
+ * This pool is not the one that contains the zone
+ * root.
+ */
+ return (BE_ERR_ACCESS);
}
+ (void) strlcpy(container_ds, dirname(root_ds),
+ container_ds_size);
}
+
+ return (BE_SUCCESS);
}
/*
@@ -525,17 +550,22 @@ be_make_container_ds(const char *zpool, char *container_ds,
* container_ds - pointer to buffer in which to return result
* container_ds_size - size of container_ds
* Returns:
- * None
+ * BE_SUCCESS - Success
+ * be_errno_t - Failure
* Scope:
* Semi-private (library wide use only)
*/
-void
+int
be_make_root_container_ds(const char *zpool, char *container_ds,
int container_ds_size)
{
char *root;
+ int ret;
- be_make_container_ds(zpool, container_ds, container_ds_size);
+ if ((ret = be_make_container_ds(zpool, container_ds,
+ container_ds_size)) != BE_SUCCESS) {
+ return (ret);
+ }
/* If the container DS ends with /ROOT, remove it. */
@@ -543,6 +573,8 @@ be_make_root_container_ds(const char *zpool, char *container_ds,
strcmp(root + 1, BE_CONTAINER_DS_NAME) == 0) {
*root = '\0';
}
+
+ return (BE_SUCCESS);
}
/*
@@ -714,7 +746,12 @@ be_append_menu(char *be_name, char *be_root_pool, char *boot_pool,
"%s%s", pool_mntpnt, BE_SPARC_MENU);
}
- be_make_root_ds(be_root_pool, be_name, be_root_ds, sizeof (be_root_ds));
+ if ((ret = be_make_root_ds(be_root_pool, be_name, be_root_ds,
+ sizeof (be_root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, be_root_pool, be_name);
+ goto cleanup;
+ }
/*
* Iterate through menu first to make sure the BE doesn't already
@@ -969,7 +1006,12 @@ be_remove_menu(char *be_name, char *be_root_pool, char *boot_pool)
boot_pool = be_root_pool;
/* Get name of BE's root dataset */
- be_make_root_ds(be_root_pool, be_name, be_root_ds, sizeof (be_root_ds));
+ if ((ret = be_make_root_ds(be_root_pool, be_name, be_root_ds,
+ sizeof (be_root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, be_root_pool, be_name);
+ return (ret);
+ }
/* Get handle to pool dataset */
if ((zhp = zfs_open(g_zfs, be_root_pool, ZFS_TYPE_DATASET)) == NULL) {
@@ -1579,7 +1621,12 @@ be_change_grub_default(char *be_name, char *be_root_pool)
}
/* Generate string for BE's root dataset */
- be_make_root_ds(be_root_pool, be_name, be_root_ds, sizeof (be_root_ds));
+ if ((ret = be_make_root_ds(be_root_pool, be_name, be_root_ds,
+ sizeof (be_root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, be_root_pool, be_name);
+ return (ret);
+ }
/* Get handle to pool dataset */
if ((zhp = zfs_open(g_zfs, be_root_pool, ZFS_TYPE_DATASET)) == NULL) {
@@ -1848,10 +1895,18 @@ be_update_menu(char *be_orig_name, char *be_new_name, char *be_root_pool,
"%s%s", pool_mntpnt, BE_SPARC_MENU);
}
- be_make_root_ds(be_root_pool, be_orig_name, be_root_ds,
- sizeof (be_root_ds));
- be_make_root_ds(be_root_pool, be_new_name, be_new_root_ds,
- sizeof (be_new_root_ds));
+ if ((ret = be_make_root_ds(be_root_pool, be_orig_name, be_root_ds,
+ sizeof (be_root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, be_root_pool, be_orig_name);
+ goto cleanup;
+ }
+ if ((ret = be_make_root_ds(be_root_pool, be_new_name, be_new_root_ds,
+ sizeof (be_new_root_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s/%s\n"), __func__, be_root_pool, be_new_name);
+ goto cleanup;
+ }
if ((ret = be_open_menu(be_root_pool, menu_file,
&menu_fp, "r", B_TRUE)) != BE_SUCCESS) {
@@ -2714,24 +2769,14 @@ be_zpool_find_current_be_callback(zpool_handle_t *zlp, void *data)
zfs_handle_t *zhp = NULL;
const char *zpool = zpool_get_name(zlp);
char be_container_ds[MAXPATHLEN];
- char *zpath = NULL;
/*
* Generate string for BE container dataset
*/
- if (getzoneid() != GLOBAL_ZONEID) {
- if ((zpath = be_get_ds_from_dir("/")) != NULL) {
- (void) strlcpy(be_container_ds, dirname(zpath),
- sizeof (be_container_ds));
- } else {
- be_print_err(gettext(
- "be_zpool_find_current_be_callback: "
- "zone root dataset is not mounted\n"));
- return (0);
- }
- } else {
- be_make_container_ds(zpool, be_container_ds,
- sizeof (be_container_ds));
+ if (be_make_container_ds(zpool, be_container_ds,
+ sizeof (be_container_ds)) != BE_SUCCESS) {
+ zpool_close(zlp);
+ return (0);
}
/*
@@ -2865,7 +2910,10 @@ be_check_be_roots_callback(zpool_handle_t *zlp, void *data)
char be_container_ds[MAXPATHLEN];
/* Generate string for this pool's BE root container dataset */
- be_make_container_ds(zpool, be_container_ds, sizeof (be_container_ds));
+ if (be_make_container_ds(zpool, be_container_ds,
+ sizeof (be_container_ds)) != BE_SUCCESS) {
+ return (0);
+ }
/*
* If dataset lives under the BE root container dataset
@@ -3420,6 +3468,7 @@ update_dataset(char *dataset, int dataset_len, char *be_name,
{
char *ds = NULL;
char *sub_ds = NULL;
+ int ret;
/* Tear off the BE container dataset */
if ((ds = be_make_name_from_ds(dataset, old_rc_loc)) == NULL) {
@@ -3430,7 +3479,10 @@ update_dataset(char *dataset, int dataset_len, char *be_name,
sub_ds = strchr(ds, '/');
/* Generate the BE root dataset name */
- be_make_root_ds(new_rc_loc, be_name, dataset, dataset_len);
+ if ((ret = be_make_root_ds(new_rc_loc, be_name, dataset,
+ dataset_len)) != BE_SUCCESS) {
+ return (ret);
+ }
/* If a subordinate dataset name was found, append it */
if (sub_ds != NULL)
diff --git a/usr/src/lib/libbe/common/be_zones.c b/usr/src/lib/libbe/common/be_zones.c
index e257b62125..665db1b1d0 100644
--- a/usr/src/lib/libbe/common/be_zones.c
+++ b/usr/src/lib/libbe/common/be_zones.c
@@ -25,6 +25,7 @@
/*
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
*/
/*
@@ -135,9 +136,13 @@ be_find_active_zone_root(zfs_handle_t *be_zhp, char *zonepath_ds,
}
}
- /* Generate string for the root container dataset for this zone. */
- be_make_container_ds(zonepath_ds, zone_container_ds,
- sizeof (zone_container_ds));
+ /* Generate string for the root container dataset for this zone. */
+ if ((ret = be_make_container_ds(zonepath_ds, zone_container_ds,
+ sizeof (zone_container_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s\n"), __func__, zonepath_ds);
+ return (ret);
+ }
/* Get handle to this zone's root container dataset */
if ((zhp = zfs_open(g_zfs, zone_container_ds, ZFS_TYPE_FILESYSTEM))
@@ -205,8 +210,12 @@ be_find_mounted_zone_root(char *zone_altroot, char *zonepath_ds,
int zret = 0;
/* Generate string for the root container dataset for this zone. */
- be_make_container_ds(zonepath_ds, zone_container_ds,
- sizeof (zone_container_ds));
+ if ((ret = be_make_container_ds(zonepath_ds, zone_container_ds,
+ sizeof (zone_container_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s\n"), __func__, zonepath_ds);
+ return (ret);
+ }
/* Get handle to this zone's root container dataset. */
if ((zhp = zfs_open(g_zfs, zone_container_ds, ZFS_TYPE_FILESYSTEM))
@@ -293,8 +302,12 @@ be_zone_supported(char *zonepath_ds)
* Make sure the zonepath has a zone root container dataset
* underneath it.
*/
- be_make_container_ds(zonepath_ds, zone_container_ds,
- sizeof (zone_container_ds));
+ if ((ret = be_make_container_ds(zonepath_ds, zone_container_ds,
+ sizeof (zone_container_ds))) != BE_SUCCESS) {
+ be_print_err(gettext("%s: failed to get BE container dataset "
+ "for %s\n"), __func__, zonepath_ds);
+ return (B_FALSE);
+ }
if (!zfs_dataset_exists(g_zfs, zone_container_ds,
ZFS_TYPE_FILESYSTEM)) {
diff --git a/usr/src/lib/libbe/common/libbe_priv.h b/usr/src/lib/libbe/common/libbe_priv.h
index ace201577f..dd73e33bb1 100644
--- a/usr/src/lib/libbe/common/libbe_priv.h
+++ b/usr/src/lib/libbe/common/libbe_priv.h
@@ -24,7 +24,7 @@
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright (c) 2015 by Delphix. All rights reserved.
- * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
*/
#ifndef _LIBBE_PRIV_H
@@ -179,9 +179,9 @@ int _be_destroy_snapshot(char *, char *);
/* be_utils.c */
boolean_t be_zfs_init(void);
void be_zfs_fini(void);
-void be_make_root_ds(const char *, const char *, char *, int);
-void be_make_container_ds(const char *, char *, int);
-void be_make_root_container_ds(const char *, char *, int);
+int be_make_root_ds(const char *, const char *, char *, int);
+int be_make_container_ds(const char *, char *, int);
+int be_make_root_container_ds(const char *, char *, int);
char *be_make_name_from_ds(const char *, char *);
int be_append_menu(char *, char *, char *, char *, char *);
int be_remove_menu(char *, char *, char *);
diff --git a/usr/src/man/man4d/Makefile b/usr/src/man/man4d/Makefile
index 8462b451d6..88a0b92076 100644
--- a/usr/src/man/man4d/Makefile
+++ b/usr/src/man/man4d/Makefile
@@ -16,8 +16,8 @@
# Copyright 2016 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
# Copyright 2018 Nexenta Systems, Inc.
# Copyright 2020 Peter Tribble
-# Copyright 2021 Oxide Computer Company
# Copyright 2022 RackTop Systems, Inc.
+# Copyright 2022 Oxide Computer Company
#
include $(SRC)/Makefile.master
@@ -241,6 +241,7 @@ i386_MANFILES= ahci.4d \
ural.4d \
urtw.4d \
usmn.4d \
+ vio9p.4d \
vioblk.4d \
vioif.4d \
vioscsi.4d \
diff --git a/usr/src/man/man4d/vio9p.4d b/usr/src/man/man4d/vio9p.4d
new file mode 100644
index 0000000000..80952da799
--- /dev/null
+++ b/usr/src/man/man4d/vio9p.4d
@@ -0,0 +1,141 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2022 Oxide Computer Company
+.\"
+.Dd August 1, 2022
+.Dt VIO9P 4D
+.Os
+.Sh NAME
+.Nm vio9p
+.Nd Virtio 9P Transport Driver
+.Sh SYNOPSIS
+.Pa /dev/9p/*
+.Sh DESCRIPTION
+The
+.Nm
+driver provides access to 9P transport devices commonly used by hypervisors
+and emulators to expose a shared file system.
+.Pp
+The
+.Nm
+driver is not a
+.Sy Committed
+interface, and may change at any time.
+.Sh APPLICATION PROGRAMMING INTERFACE
+Each device corresponds to a specific 9P channel, providing exclusive access to
+one consumer at a time.
+The device may be opened with an
+.Xr open 2
+call, which must include at least the
+.Dv O_EXCL
+and
+.Dv O_RDWR
+flags.
+The
+.Dv O_NONBLOCK
+or
+.Dv O_NDELAY
+flags may be used if non-blocking reads and writes are required.
+.Pp
+Once open,
+.Xr read 2
+and
+.Xr write 2
+calls may be made against the resulting file descriptor.
+Writes represent a 9P request message sent to the hypervisor, and reads
+represent responses to those requests.
+.Pp
+Unlike with other transports like TCP, the channel is not explicitly reset when
+the device is opened or closed.
+After a call to
+.Xr open 2 ,
+the application should use a
+.Sy version
+message to open a new session.
+This will explicitly discard any previous session, clunking any active fids in
+the process and negotiating an appropriate protocol version with the
+hypervisor.
+It is likely also appropriate to do this as part of closing the device, to
+allow the hypervisor to free any session tracking resources.
+.Pp
+Writes must be well-formed 9P messages, conforming to whichever 9P protocol
+specification is used by the hypervisor.
+In particular, each message must include a minimum of seven bytes, representing
+the message
+.Em size[4] ,
+.Em type[1] ,
+and
+.Em tag[2] .
+In most or all available protocol specifications, these fields are unsigned
+integers in little-endian order.
+The driver limits request and response size to 8192 bytes, and will fail larger
+writes with
+.Er EMSGSIZE .
+Applications should, in their initial
+.Sy version
+message,
+negotiate an
+.Em msize[4]
+value less than or equal to 8192 bytes.
+.Pp
+Reads are interruptible and will block waiting for a response to a request sent
+in a previous write.
+If insufficient buffer space is provided to the read call to receive the
+message, the call will fail with
+.Er EOVERFLOW
+and the message will remain available for a subsequent read.
+Messages are provided as-is to the application, including the
+.Em size[4] ,
+.Em type[1] ,
+and
+.Em tag[2] .
+.Pp
+Depending on the 9P server provided by the hypervisor, requests that are issued
+concurrently may result in responses that arrive out of order.
+Applications should develop a strategy for allocating unique
+.Em tag[2]
+values, so that request and response messages can be correlated.
+.Sh IOCTLS
+The driver provides an ioctl,
+.Dv VIO9P_IOC_MOUNT_TAG ,
+to expose the
+.Em Mount Tag
+string if one was provided by the hypervisor.
+The ioctl is defined in
+.In sys/vio9p.h .
+The argument must be a
+.Vt "char *" ,
+pointing to a buffer of
+.Dv VIO9P_MOUNT_TAG_SIZE
+bytes.
+On success, the buffer will contain the mount tag string as read from the
+hypervisor, followed by a null-terminating zero byte added by the driver to
+ensure the result can always be treated as a C string.
+While the hypervisor is expected to provide a human-readable C string,
+applications should take care to verify that the contents are valid for display
+or other purposes.
+Note that even if successfully read, the string may be empty.
+.Sh FILES
+.Bl -tag -width Pa
+.It Pa /dev/9p/*
+Character device for access to a 9P channel.
+.It Pa /kernel/drv/amd64/vio9p
+Device driver (x86)
+.El
+.Sh INTERFACE STABILITY
+.Sy Uncommitted
+.Sh SEE ALSO
+.Xr close 2 ,
+.Xr ioctl 2 ,
+.Xr open 2 ,
+.Xr read 2 ,
+.Xr write 2
diff --git a/usr/src/pkg/manifests/driver-storage-vio9p.p5m b/usr/src/pkg/manifests/driver-storage-vio9p.p5m
new file mode 100644
index 0000000000..f4ce42c1a6
--- /dev/null
+++ b/usr/src/pkg/manifests/driver-storage-vio9p.p5m
@@ -0,0 +1,40 @@
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2022 Oxide Computer Company
+#
+
+#
+# The default for payload-bearing actions in this package is to appear in the
+# global zone only. See the include file for greater detail, as well as
+# information about overriding the defaults.
+#
+<include global_zone_only_component>
+set name=pkg.fmri value=pkg:/driver/storage/vio9p@$(PKGVERS)
+set name=pkg.summary value="Virtio 9P transport driver"
+set name=pkg.description value="Virtio 9P transport driver"
+set name=info.classification value=org.opensolaris.category.2008:Drivers/Storage
+set name=variant.arch value=$(ARCH)
+dir path=kernel group=sys
+dir path=kernel/drv group=sys
+dir path=kernel/drv/$(ARCH64) group=sys
+file path=kernel/drv/$(ARCH64)/vio9p group=sys
+file path=usr/include/sys/vio9p.h
+dir path=usr/share/man
+dir path=usr/share/man/man4d
+file path=usr/share/man/man4d/vio9p.4d
+driver name=vio9p perms="* 0600 root root" alias=pci1af4,9
+license lic_CDDL license=lic_CDDL
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index dc5503379f..26e20efb9c 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -2128,6 +2128,9 @@ VIOIF_OBJS = vioif.o
# Virtio SCSI driver
VIOSCSI_OBJS = vioscsi.o
+# Virtio 9P transport driver
+VIO9P_OBJS = vio9p.o
+
#
# kiconv modules
#
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index c8a6ce9d0c..8ef8c5eb9d 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -1557,6 +1557,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/vioscsi/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/vio9p/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(COMMONBASE)/idspace/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
diff --git a/usr/src/uts/common/dtrace/lockstat.c b/usr/src/uts/common/dtrace/lockstat.c
index 69c8b72544..08f819d453 100644
--- a/usr/src/uts/common/dtrace/lockstat.c
+++ b/usr/src/uts/common/dtrace/lockstat.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2022 Oxide Computer Company
*/
@@ -93,7 +94,7 @@ lockstat_enable(void *arg, dtrace_id_t id, void *parg)
lockstat_probemap[probe->lsp_probe] = id;
membar_producer();
- lockstat_hot_patch();
+ lockstat_hotpatch_probe(probe->lsp_probe);
membar_producer();
/*
@@ -115,7 +116,7 @@ lockstat_disable(void *arg, dtrace_id_t id, void *parg)
ASSERT(lockstat_probemap[probe->lsp_probe]);
lockstat_probemap[probe->lsp_probe] = 0;
- lockstat_hot_patch();
+ lockstat_hotpatch_probe(probe->lsp_probe);
membar_producer();
/*
diff --git a/usr/src/uts/common/io/vio9p/vio9p.c b/usr/src/uts/common/io/vio9p/vio9p.c
new file mode 100644
index 0000000000..5302043365
--- /dev/null
+++ b/usr/src/uts/common/io/vio9p/vio9p.c
@@ -0,0 +1,839 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+/*
+ * VIRTIO 9P DRIVER
+ *
+ * This driver provides support for Virtio 9P devices. Each driver instance
+ * attaches to a single underlying 9P channel. A 9P file system will use LDI
+ * to open this device.
+ */
+
+#include <sys/modctl.h>
+#include <sys/types.h>
+#include <sys/file.h>
+#include <sys/errno.h>
+#include <sys/param.h>
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/strsubr.h>
+#include <sys/kmem.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/ksynch.h>
+#include <sys/stat.h>
+#include <sys/modctl.h>
+#include <sys/debug.h>
+#include <sys/pci.h>
+#include <sys/containerof.h>
+#include <sys/ctype.h>
+#include <sys/stdbool.h>
+#include <sys/sysmacros.h>
+#include <sys/list.h>
+
+#include "virtio.h"
+#include "vio9p_impl.h"
+
+static void *vio9p_state;
+
+uint_t vio9p_int_handler(caddr_t, caddr_t);
+static uint_t vio9p_poll(vio9p_t *);
+static int vio9p_quiesce(dev_info_t *);
+static int vio9p_attach(dev_info_t *, ddi_attach_cmd_t);
+static int vio9p_teardown(vio9p_t *, vio9p_teardown_style_t);
+static int vio9p_detach(dev_info_t *, ddi_detach_cmd_t);
+static int vio9p_open(dev_t *, int, int, cred_t *);
+static int vio9p_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+static int vio9p_close(dev_t, int, int, cred_t *);
+static int vio9p_read(dev_t, uio_t *, cred_t *);
+static int vio9p_write(dev_t, uio_t *, cred_t *);
+static vio9p_req_t *vio9p_req_alloc_impl(vio9p_t *, int);
+static void vio9p_req_free_impl(vio9p_t *, vio9p_req_t *);
+
+static struct cb_ops vio9p_cb_ops = {
+ .cb_rev = CB_REV,
+ .cb_flag = D_NEW | D_MP,
+
+ .cb_open = vio9p_open,
+ .cb_close = vio9p_close,
+ .cb_read = vio9p_read,
+ .cb_write = vio9p_write,
+ .cb_ioctl = vio9p_ioctl,
+
+ .cb_strategy = nodev,
+ .cb_print = nodev,
+ .cb_dump = nodev,
+ .cb_devmap = nodev,
+ .cb_mmap = nodev,
+ .cb_segmap = nodev,
+ .cb_chpoll = nochpoll,
+ .cb_prop_op = ddi_prop_op,
+ .cb_str = NULL,
+ .cb_aread = nodev,
+ .cb_awrite = nodev,
+};
+
+static struct dev_ops vio9p_dev_ops = {
+ .devo_rev = DEVO_REV,
+ .devo_refcnt = 0,
+
+ .devo_attach = vio9p_attach,
+ .devo_detach = vio9p_detach,
+ .devo_quiesce = vio9p_quiesce,
+
+ .devo_cb_ops = &vio9p_cb_ops,
+
+ .devo_getinfo = ddi_no_info,
+ .devo_identify = nulldev,
+ .devo_probe = nulldev,
+ .devo_reset = nodev,
+ .devo_bus_ops = NULL,
+ .devo_power = NULL,
+};
+
+static struct modldrv vio9p_modldrv = {
+ .drv_modops = &mod_driverops,
+ .drv_linkinfo = "VIRTIO 9P driver",
+ .drv_dev_ops = &vio9p_dev_ops
+};
+
+static struct modlinkage vio9p_modlinkage = {
+ .ml_rev = MODREV_1,
+ .ml_linkage = { &vio9p_modldrv, NULL }
+};
+
+/*
+ * DMA attribute template for header and status blocks.
+ */
+static const ddi_dma_attr_t vio9p_dma_attr = {
+ .dma_attr_version = DMA_ATTR_V0,
+ .dma_attr_addr_lo = 0x0000000000000000,
+ .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF,
+ .dma_attr_count_max = 0x00000000FFFFFFFF,
+ .dma_attr_align = 1,
+ .dma_attr_burstsizes = 1,
+ .dma_attr_minxfer = 1,
+ .dma_attr_maxxfer = 0x00000000FFFFFFFF,
+ .dma_attr_seg = 0x00000000FFFFFFFF,
+ .dma_attr_sgllen = VIRTIO_9P_MAX_SGL,
+ .dma_attr_granular = 1,
+ .dma_attr_flags = 0
+};
+
+uint_t
+vio9p_int_handler(caddr_t arg0, caddr_t arg1)
+{
+ vio9p_t *vin = (vio9p_t *)arg0;
+
+ mutex_enter(&vin->vin_mutex);
+ uint_t count = vio9p_poll(vin);
+ mutex_exit(&vin->vin_mutex);
+
+ return (count > 0 ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
+}
+
+static void
+vio9p_req_freelist_put(vio9p_t *vin, vio9p_req_t *vnr)
+{
+ VERIFY(!list_link_active(&vnr->vnr_link_complete));
+ VERIFY(!list_link_active(&vnr->vnr_link_free));
+
+ vin->vin_generation = 0;
+ list_insert_head(&vin->vin_req_freelist, vnr);
+
+ if (vin->vin_open) {
+ /*
+ * Wake any callers waiting in vio9p_req_alloc() for an entry:
+ */
+ cv_broadcast(&vin->vin_cv);
+ }
+}
+
+static void
+vio9p_req_free(vio9p_t *vin, vio9p_req_t *vnr)
+{
+ VERIFY(MUTEX_HELD(&vin->vin_mutex));
+
+ if (list_link_active(&vnr->vnr_link_complete)) {
+ list_remove(&vin->vin_completes, vnr);
+ }
+
+ vio9p_req_freelist_put(vin, vnr);
+}
+
+static void
+vio9p_req_free_impl(vio9p_t *vin, vio9p_req_t *vnr)
+{
+ if (vnr->vnr_chain != NULL) {
+ virtio_chain_free(vnr->vnr_chain);
+ vnr->vnr_chain = NULL;
+ }
+ if (vnr->vnr_dma_in != NULL) {
+ virtio_dma_free(vnr->vnr_dma_in);
+ vnr->vnr_dma_in = NULL;
+ }
+ if (vnr->vnr_dma_out != NULL) {
+ virtio_dma_free(vnr->vnr_dma_out);
+ vnr->vnr_dma_out = NULL;
+ }
+
+ VERIFY(!list_link_active(&vnr->vnr_link_complete));
+ VERIFY(!list_link_active(&vnr->vnr_link_free));
+
+ list_remove(&vin->vin_reqs, vnr);
+ VERIFY3U(vin->vin_nreqs, >, 0);
+ vin->vin_nreqs--;
+
+ kmem_free(vnr, sizeof (*vnr));
+}
+
+/*
+ * Allocate a request for a transaction. If one is not available and this is
+ * for a blocking request, wait for one to become available.
+ */
+static vio9p_req_t *
+vio9p_req_alloc(vio9p_t *vin, bool wait)
+{
+ vio9p_req_t *vnr;
+
+ VERIFY(MUTEX_HELD(&vin->vin_mutex));
+
+again:
+ /*
+ * Try the free list first:
+ */
+ if ((vnr = list_remove_head(&vin->vin_req_freelist)) != NULL) {
+ return (vnr);
+ }
+
+ /*
+ * Failing that, try to allocate more memory if we are under our
+ * request cap:
+ */
+ if ((vnr = vio9p_req_alloc_impl(vin, KM_NOSLEEP_LAZY)) != NULL) {
+ return (vnr);
+ }
+
+ /*
+ * If this is a blocking request, wait for an entry to become available
+ * on the free list:
+ */
+ if (wait) {
+ if (cv_wait_sig(&vin->vin_cv, &vin->vin_mutex) == 0) {
+ return (NULL);
+ }
+
+ goto again;
+ }
+
+ return (NULL);
+}
+
+static vio9p_req_t *
+vio9p_req_alloc_impl(vio9p_t *vin, int kmflag)
+{
+ dev_info_t *dip = vin->vin_dip;
+ vio9p_req_t *vnr;
+
+ if (vin->vin_nreqs >= VIRTIO_9P_MAX_REQS) {
+ /*
+ * We have reached the limit of requests that we are willing to
+ * allocate for the whole device.
+ */
+ return (NULL);
+ }
+
+ /*
+ * Note that the request object has various list link fields which are
+ * initialised to zero here and which we check at various points later.
+ */
+ if ((vnr = kmem_zalloc(sizeof (*vnr), kmflag)) == NULL) {
+ return (NULL);
+ }
+ list_insert_tail(&vin->vin_reqs, vnr);
+ vin->vin_nreqs++;
+
+ if ((vnr->vnr_chain = virtio_chain_alloc(vin->vin_vq, kmflag)) ==
+ NULL) {
+ dev_err(vin->vin_dip, CE_WARN, "!chain alloc failure");
+ goto fail;
+ }
+ virtio_chain_data_set(vnr->vnr_chain, vnr);
+
+ /*
+ * Allocate outbound request buffer:
+ */
+ if ((vnr->vnr_dma_out = virtio_dma_alloc(vin->vin_virtio,
+ VIRTIO_9P_REQ_SIZE, &vio9p_dma_attr,
+ DDI_DMA_CONSISTENT | DDI_DMA_WRITE, kmflag)) == NULL) {
+ dev_err(dip, CE_WARN, "!DMA out alloc failure");
+ goto fail;
+ }
+ VERIFY3U(virtio_dma_ncookies(vnr->vnr_dma_out), <=, VIRTIO_9P_MAX_SGL);
+
+ for (uint_t n = 0; n < virtio_dma_ncookies(vnr->vnr_dma_out); n++) {
+ if (virtio_chain_append(vnr->vnr_chain,
+ virtio_dma_cookie_pa(vnr->vnr_dma_out, n),
+ virtio_dma_cookie_size(vnr->vnr_dma_out, n),
+ VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "!chain append out failure");
+ goto fail;
+ }
+ }
+
+ /*
+ * Allocate inbound request buffer:
+ */
+ if ((vnr->vnr_dma_in = virtio_dma_alloc(vin->vin_virtio,
+ VIRTIO_9P_REQ_SIZE, &vio9p_dma_attr,
+ DDI_DMA_CONSISTENT | DDI_DMA_READ, kmflag)) == NULL) {
+ dev_err(dip, CE_WARN, "!DMA in alloc failure");
+ goto fail;
+ }
+ VERIFY3U(virtio_dma_ncookies(vnr->vnr_dma_in), <=, VIRTIO_9P_MAX_SGL);
+
+ for (uint_t n = 0; n < virtio_dma_ncookies(vnr->vnr_dma_in); n++) {
+ if (virtio_chain_append(vnr->vnr_chain,
+ virtio_dma_cookie_pa(vnr->vnr_dma_in, n),
+ virtio_dma_cookie_size(vnr->vnr_dma_in, n),
+ VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "!chain append in failure");
+ goto fail;
+ }
+ }
+
+ return (vnr);
+
+fail:
+ vio9p_req_free_impl(vin, vnr);
+ return (NULL);
+}
+
+static uint_t
+vio9p_poll(vio9p_t *vin)
+{
+ virtio_chain_t *vic;
+ uint_t count = 0;
+ bool wakeup = false;
+
+ VERIFY(MUTEX_HELD(&vin->vin_mutex));
+
+ while ((vic = virtio_queue_poll(vin->vin_vq)) != NULL) {
+ vio9p_req_t *vnr = virtio_chain_data(vic);
+
+ count++;
+
+ virtio_dma_sync(vnr->vnr_dma_in, DDI_DMA_SYNC_FORCPU);
+
+ if (!vin->vin_open ||
+ vnr->vnr_generation != vin->vin_generation) {
+ /*
+ * Either the device is not open, or the device has
+ * been closed and opened again since this request was
+ * submitted. Just free the memory and drive on.
+ */
+ vio9p_req_free(vin, vnr);
+ continue;
+ }
+
+ list_insert_tail(&vin->vin_completes, vnr);
+ wakeup = true;
+ }
+
+ if (wakeup) {
+ cv_broadcast(&vin->vin_cv);
+ }
+
+ return (count);
+}
+
+static int
+vio9p_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int instance = ddi_get_instance(dip);
+ virtio_t *vio;
+ vio9p_req_t *vnr;
+
+ if (cmd != DDI_ATTACH) {
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_soft_state_zalloc(vio9p_state, instance) != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+
+ if ((vio = virtio_init(dip, VIRTIO_9P_WANTED_FEATURES, B_TRUE)) ==
+ NULL) {
+ ddi_soft_state_free(vio9p_state, instance);
+ dev_err(dip, CE_WARN, "failed to start Virtio init");
+ return (DDI_FAILURE);
+ }
+
+ vio9p_t *vin = ddi_get_soft_state(vio9p_state, instance);
+ vin->vin_dip = dip;
+ vin->vin_virtio = vio;
+ ddi_set_driver_private(dip, vin);
+ list_create(&vin->vin_reqs, sizeof (vio9p_req_t),
+ offsetof(vio9p_req_t, vnr_link));
+ list_create(&vin->vin_completes, sizeof (vio9p_req_t),
+ offsetof(vio9p_req_t, vnr_link_complete));
+ list_create(&vin->vin_req_freelist, sizeof (vio9p_req_t),
+ offsetof(vio9p_req_t, vnr_link_free));
+
+ if (virtio_feature_present(vio, VIRTIO_9P_F_MOUNT_TAG)) {
+ uint16_t len = virtio_dev_get16(vio, VIRTIO_9P_CONFIG_TAG_SZ);
+ if (len > VIRTIO_9P_TAGLEN) {
+ len = VIRTIO_9P_TAGLEN;
+ }
+
+ /*
+ * This array is one byte longer than VIRTIO_9P_TAGLEN, and is
+ * thus always NUL-terminated by the use of
+ * ddi_soft_state_zalloc() above.
+ */
+ for (uint16_t n = 0; n < len; n++) {
+ vin->vin_tag[n] = virtio_dev_get8(vio,
+ VIRTIO_9P_CONFIG_TAG + n);
+ }
+ }
+
+ /*
+ * When allocating the request queue, we include enough slots for a
+ * full set of cookies (based on our DMA attributes) in both the in and
+ * the out direction.
+ */
+ if ((vin->vin_vq = virtio_queue_alloc(vio, VIRTIO_9P_VIRTQ_REQUESTS,
+ "requests", vio9p_int_handler, vin, B_FALSE,
+ 2 * VIRTIO_9P_MAX_SGL)) == NULL) {
+ return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_PRE_MUTEX));
+ }
+
+ if (virtio_init_complete(vio, VIRTIO_ANY_INTR_TYPE) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "failed to complete Virtio init");
+ return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_PRE_MUTEX));
+ }
+
+ cv_init(&vin->vin_cv, NULL, CV_DRIVER, NULL);
+ mutex_init(&vin->vin_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio));
+
+ /*
+ * Make sure the free list contains at least one request at attach time
+ * so that the device is always somewhat useable:
+ */
+ if ((vnr = vio9p_req_alloc_impl(vin, KM_SLEEP)) == NULL) {
+ dev_err(dip, CE_WARN, "failed to allocate first request");
+ return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_ATTACH));
+ }
+ vio9p_req_freelist_put(vin, vnr);
+
+ if (virtio_interrupts_enable(vio) != DDI_SUCCESS) {
+ return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_ATTACH));
+ }
+
+ /*
+ * Hang out a minor node so that we can be opened.
+ */
+ int minor = ddi_get_instance(dip);
+ if (ddi_create_minor_node(dip, "9p", S_IFCHR, minor, DDI_PSEUDO,
+ 0) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "could not create minor node");
+ return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_ATTACH));
+ }
+
+ ddi_report_dev(dip);
+
+ return (DDI_SUCCESS);
+}
+
+static int
+vio9p_teardown(vio9p_t *vin, vio9p_teardown_style_t style)
+{
+ dev_info_t *dip = vin->vin_dip;
+
+ if (style != VIRTIO_9P_TEARDOWN_PRE_MUTEX) {
+ /*
+ * Make sure we do not hold the mutex across interrupt disable.
+ */
+ VERIFY(MUTEX_NOT_HELD(&vin->vin_mutex));
+ }
+
+ ddi_remove_minor_node(dip, NULL);
+
+ if (vin->vin_virtio != NULL) {
+ /*
+ * Disable interrupts so that we can be sure our handler does
+ * not run again while we free things.
+ */
+ virtio_interrupts_disable(vin->vin_virtio);
+ }
+
+ /*
+ * Empty the free list:
+ */
+ for (;;) {
+ vio9p_req_t *vnr = list_remove_head(&vin->vin_req_freelist);
+ if (vnr == NULL) {
+ break;
+ }
+ vio9p_req_free_impl(vin, vnr);
+ }
+ VERIFY(list_is_empty(&vin->vin_req_freelist));
+ list_destroy(&vin->vin_req_freelist);
+
+ /*
+ * Any active requests should have been freed in vio9p_detach(), so
+ * there should be no other requests left at this point.
+ */
+ VERIFY0(vin->vin_nreqs);
+ VERIFY(list_is_empty(&vin->vin_reqs));
+ list_destroy(&vin->vin_reqs);
+
+ VERIFY(list_is_empty(&vin->vin_completes));
+ list_destroy(&vin->vin_completes);
+
+ /*
+ * Tear down the Virtio framework.
+ */
+ if (vin->vin_virtio != NULL) {
+ boolean_t failed = (style != VIRTIO_9P_TEARDOWN_DETACH);
+ virtio_fini(vin->vin_virtio, failed);
+ }
+
+ if (style != VIRTIO_9P_TEARDOWN_PRE_MUTEX) {
+ mutex_destroy(&vin->vin_mutex);
+ cv_destroy(&vin->vin_cv);
+ }
+
+ ddi_set_driver_private(dip, NULL);
+ ddi_soft_state_free(vio9p_state, ddi_get_instance(dip));
+
+ return (style == VIRTIO_9P_TEARDOWN_DETACH ? DDI_SUCCESS : DDI_FAILURE);
+}
+
+static int
+vio9p_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ vio9p_t *vin = ddi_get_driver_private(dip);
+
+ if (cmd != DDI_DETACH) {
+ return (DDI_FAILURE);
+ }
+
+ mutex_enter(&vin->vin_mutex);
+
+ /*
+ * Detach will only be called once we are no longer held open.
+ */
+ VERIFY(!vin->vin_open);
+
+ /*
+ * If a request was submitted to the hypervisor but never completed, it
+ * may still be active even though the device has been closed.
+ */
+ bool shutdown = false;
+ for (vio9p_req_t *vnr = list_head(&vin->vin_reqs);
+ vnr != NULL; vnr = list_next(&vin->vin_reqs, vnr)) {
+ if (!list_link_active(&vnr->vnr_link_free)) {
+ /*
+ * There is at least one active request. We need to
+ * reset the device to claw back the DMA memory.
+ */
+ shutdown = true;
+ break;
+ }
+ }
+
+ if (shutdown) {
+ virtio_chain_t *vic;
+
+ virtio_shutdown(vin->vin_virtio);
+ while ((vic = virtio_queue_evacuate(vin->vin_vq)) != NULL) {
+ vio9p_req_t *vnr = virtio_chain_data(vic);
+
+ virtio_dma_sync(vnr->vnr_dma_in, DDI_DMA_SYNC_FORCPU);
+
+ vio9p_req_free_impl(vin, vnr);
+ }
+ }
+
+ mutex_exit(&vin->vin_mutex);
+
+ return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_DETACH));
+}
+
+static int
+vio9p_quiesce(dev_info_t *dip)
+{
+ vio9p_t *vin;
+
+ if ((vin = ddi_get_driver_private(dip)) == NULL) {
+ return (DDI_FAILURE);
+ }
+
+ return (virtio_quiesce(vin->vin_virtio));
+}
+
+static int
+vio9p_open(dev_t *dev, int flag, int otyp, cred_t *cred)
+{
+ if (otyp != OTYP_CHR) {
+ return (EINVAL);
+ }
+
+ /*
+ * This device represents a request-response communication channel
+ * between the host and the hypervisor; as such we insist that it be
+ * opened exclusively, and for both read and write access.
+ */
+ if (!(flag & FEXCL) || !(flag & FREAD) || !(flag & FWRITE)) {
+ return (EINVAL);
+ }
+
+ vio9p_t *vin = ddi_get_soft_state(vio9p_state, getminor(*dev));
+ if (vin == NULL) {
+ return (ENXIO);
+ }
+
+ mutex_enter(&vin->vin_mutex);
+ if (vin->vin_open) {
+ mutex_exit(&vin->vin_mutex);
+ return (EBUSY);
+ }
+ vin->vin_open = true;
+
+ vin->vin_generation++;
+ if (vin->vin_generation == 0) {
+ vin->vin_generation++;
+ }
+
+ mutex_exit(&vin->vin_mutex);
+ return (0);
+}
+
+static int
+vio9p_close(dev_t dev, int flag, int otyp, cred_t *cred)
+{
+ if (otyp != OTYP_CHR) {
+ return (EINVAL);
+ }
+
+ vio9p_t *vin = ddi_get_soft_state(vio9p_state, getminor(dev));
+ if (vin == NULL) {
+ return (ENXIO);
+ }
+
+ mutex_enter(&vin->vin_mutex);
+ if (!vin->vin_open) {
+ mutex_exit(&vin->vin_mutex);
+ return (EIO);
+ }
+
+ /*
+ * Free all completed requests that have not yet been read:
+ */
+ vio9p_req_t *vnr;
+ while ((vnr = list_remove_head(&vin->vin_completes)) != NULL) {
+ vio9p_req_free(vin, vnr);
+ }
+
+ vin->vin_open = false;
+ mutex_exit(&vin->vin_mutex);
+ return (0);
+}
+
+static int
+vio9p_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
+ int *rvalp)
+{
+ vio9p_t *vin = ddi_get_soft_state(vio9p_state, getminor(dev));
+ if (vin == NULL) {
+ return (ENXIO);
+ }
+
+ switch (cmd) {
+ case VIO9P_IOC_MOUNT_TAG:
+ if (ddi_copyout(vin->vin_tag, (void *)arg,
+ sizeof (vin->vin_tag), mode) != 0) {
+ return (EFAULT);
+ }
+ return (0);
+
+ default:
+ return (ENOTTY);
+ }
+}
+
+static int
+vio9p_read(dev_t dev, struct uio *uio, cred_t *cred)
+{
+ bool blocking = (uio->uio_fmode & (FNDELAY | FNONBLOCK)) == 0;
+ vio9p_req_t *vnr;
+ vio9p_t *vin;
+
+ if ((vin = ddi_get_soft_state(vio9p_state, getminor(dev))) == NULL) {
+ return (ENXIO);
+ }
+
+ mutex_enter(&vin->vin_mutex);
+again:
+ if ((vnr = list_remove_head(&vin->vin_completes)) == NULL) {
+ if (!blocking) {
+ mutex_exit(&vin->vin_mutex);
+ return (EAGAIN);
+ }
+
+ /*
+ * There is nothing to read right now. Wait for something:
+ */
+ if (cv_wait_sig(&vin->vin_cv, &vin->vin_mutex) == 0) {
+ mutex_exit(&vin->vin_mutex);
+ return (EINTR);
+ }
+ goto again;
+ }
+
+ /*
+ * Determine the size of the response message using the initial size[4]
+ * field of the response. The various specifying documents that exist
+ * suggest this is an unsigned integer in little-endian order.
+ */
+ uint32_t msz;
+ bcopy(virtio_dma_va(vnr->vnr_dma_in, 0), &msz, sizeof (msz));
+ msz = LE_32(msz);
+ if (msz > virtio_dma_size(vnr->vnr_dma_in)) {
+ msz = virtio_dma_size(vnr->vnr_dma_in);
+ }
+
+ if (msz > uio->uio_resid) {
+ /*
+ * Tell the consumer they are going to need a bigger
+ * buffer.
+ */
+ list_insert_head(&vin->vin_completes, vnr);
+ mutex_exit(&vin->vin_mutex);
+ return (EOVERFLOW);
+ }
+
+ mutex_exit(&vin->vin_mutex);
+ int e = uiomove(virtio_dma_va(vnr->vnr_dma_in, 0), msz, UIO_READ, uio);
+ mutex_enter(&vin->vin_mutex);
+
+ if (e == 0) {
+ vio9p_req_free(vin, vnr);
+ } else {
+ /*
+ * Put the response back in the list for another try, so that
+ * we do not drop any messages:
+ */
+ list_insert_head(&vin->vin_completes, vnr);
+ }
+
+ mutex_exit(&vin->vin_mutex);
+ return (e);
+}
+
+static int
+vio9p_write(dev_t dev, struct uio *uio, cred_t *cred)
+{
+ bool blocking = (uio->uio_fmode & (FNDELAY | FNONBLOCK)) == 0;
+
+ size_t wsz = uio->uio_resid;
+ if (wsz < 7) {
+ /*
+ * Requests should be well-formed 9P messages. They must
+ * contain at least 7 bytes: msize[4] + type[1] + tag[2].
+ */
+ return (EINVAL);
+ } else if (wsz > VIRTIO_9P_REQ_SIZE) {
+ return (EMSGSIZE);
+ }
+
+ vio9p_t *vin = ddi_get_soft_state(vio9p_state, getminor(dev));
+ if (vin == NULL) {
+ return (ENXIO);
+ }
+
+ mutex_enter(&vin->vin_mutex);
+ vio9p_req_t *vnr = vio9p_req_alloc(vin, blocking);
+ if (vnr == NULL) {
+ mutex_exit(&vin->vin_mutex);
+ return (blocking ? ENOMEM : EAGAIN);
+ }
+ vnr->vnr_generation = vin->vin_generation;
+ VERIFY3U(wsz, <=, virtio_dma_size(vnr->vnr_dma_out));
+
+ mutex_exit(&vin->vin_mutex);
+ int e = uiomove(virtio_dma_va(vnr->vnr_dma_out, 0), wsz, UIO_WRITE,
+ uio);
+ mutex_enter(&vin->vin_mutex);
+
+ if (e == 0) {
+ virtio_dma_sync(vnr->vnr_dma_out, DDI_DMA_SYNC_FORDEV);
+ virtio_chain_submit(vnr->vnr_chain, B_TRUE);
+ } else {
+ vio9p_req_free(vin, vnr);
+ }
+
+ mutex_exit(&vin->vin_mutex);
+ return (e);
+}
+
+int
+_init(void)
+{
+ int r;
+
+ if ((r = ddi_soft_state_init(&vio9p_state, sizeof (vio9p_t), 0)) != 0) {
+ return (r);
+ }
+
+ if ((r = mod_install(&vio9p_modlinkage)) != 0) {
+ ddi_soft_state_fini(&vio9p_state);
+ }
+
+ return (r);
+}
+
+int
+_fini(void)
+{
+ int r;
+
+ if ((r = mod_remove(&vio9p_modlinkage)) != 0) {
+ return (r);
+ }
+
+ ddi_soft_state_fini(&vio9p_state);
+
+ return (r);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&vio9p_modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/io/vio9p/vio9p_impl.h b/usr/src/uts/common/io/vio9p/vio9p_impl.h
new file mode 100644
index 0000000000..f8718c1ed2
--- /dev/null
+++ b/usr/src/uts/common/io/vio9p/vio9p_impl.h
@@ -0,0 +1,126 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+/*
+ * VIRTIO 9P DRIVER
+ */
+
+#ifndef _VIO9P_IMPL_H
+#define _VIO9P_IMPL_H
+
+#include "virtio.h"
+#include <sys/vio9p.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * VIRTIO 9P CONFIGURATION REGISTERS
+ *
+ * These are offsets into the device-specific configuration space available
+ * through the virtio_dev_*() family of functions.
+ */
+#define VIRTIO_9P_CONFIG_TAG_SZ 0x00 /* 16 R */
+#define VIRTIO_9P_CONFIG_TAG 0x02 /* SZ R */
+
+/*
+ * VIRTIO 9P VIRTQUEUES
+ *
+ * Virtio 9P devices have just one queue which is used to make 9P requests.
+ * Each submitted chain should include appropriately sized inbound and outbound
+ * descriptors for the request and response messages. The maximum size is
+ * negotiated via the "msize" member of the 9P TVERSION request and RVERSION
+ * response. Some hypervisors may require the first 7 bytes (size, type, tag)
+ * to be contiguous in the first descriptor.
+ */
+#define VIRTIO_9P_VIRTQ_REQUESTS 0
+
+/*
+ * VIRTIO 9P FEATURE BITS
+ */
+#define VIRTIO_9P_F_MOUNT_TAG (1ULL << 0)
+
+/*
+ * These features are supported by the driver and we will request them from the
+ * device.
+ */
+#define VIRTIO_9P_WANTED_FEATURES (VIRTIO_9P_F_MOUNT_TAG)
+
+/*
+ * DRIVER PARAMETERS
+ */
+#define VIRTIO_9P_MAX_REQS 16
+#define VIRTIO_9P_REQ_SIZE 8192
+
+/*
+ * It is not clear that there is a well-defined number of cookies for this
+ * interface; QEMU may support as many as there are direct descriptors in the
+ * ring, and bhyve may support something like 128. We'll use a conservative
+ * number that's large enough to ensure we'll be able to allocate without
+ * requiring contiguous pages.
+ */
+#define VIRTIO_9P_MAX_SGL 8
+
+/*
+ * TYPE DEFINITIONS
+ */
+
+typedef enum vio9p_teardown_style {
+ VIRTIO_9P_TEARDOWN_PRE_MUTEX,
+ VIRTIO_9P_TEARDOWN_ATTACH,
+ VIRTIO_9P_TEARDOWN_DETACH,
+} vio9p_teardown_style_t;
+
+typedef struct vio9p_req {
+ virtio_dma_t *vnr_dma_in;
+ virtio_dma_t *vnr_dma_out;
+ virtio_chain_t *vnr_chain;
+ list_node_t vnr_link;
+ list_node_t vnr_link_complete;
+ list_node_t vnr_link_free;
+ uint64_t vnr_generation;
+} vio9p_req_t;
+
+typedef struct vio9p {
+ dev_info_t *vin_dip;
+ virtio_t *vin_virtio;
+ virtio_queue_t *vin_vq;
+
+ kmutex_t vin_mutex;
+ kcondvar_t vin_cv;
+
+ /*
+ * When the device is opened, select a generation number. This will be
+ * used to discard completed responses that arrive after the device was
+ * closed and reopened.
+ */
+ uint64_t vin_generation;
+ bool vin_open;
+
+ uint_t vin_nreqs;
+ list_t vin_reqs;
+ list_t vin_completes;
+
+ list_t vin_req_freelist;
+
+ char vin_tag[VIO9P_MOUNT_TAG_SIZE];
+} vio9p_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VIO9P_IMPL_H */
diff --git a/usr/src/uts/common/io/vioblk/vioblk.c b/usr/src/uts/common/io/vioblk/vioblk.c
index f6649bdd12..1c00d67184 100644
--- a/usr/src/uts/common/io/vioblk/vioblk.c
+++ b/usr/src/uts/common/io/vioblk/vioblk.c
@@ -981,7 +981,7 @@ vioblk_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
virtio_register_cfgchange_handler(vio, vioblk_cfgchange, vib);
- if (virtio_init_complete(vio, 0) != DDI_SUCCESS) {
+ if (virtio_init_complete(vio, VIRTIO_ANY_INTR_TYPE) != DDI_SUCCESS) {
dev_err(dip, CE_WARN, "failed to complete Virtio init");
goto fail;
}
diff --git a/usr/src/uts/common/io/vioif/vioif.c b/usr/src/uts/common/io/vioif/vioif.c
index ae1e2d4ee2..74f1d46a69 100644
--- a/usr/src/uts/common/io/vioif/vioif.c
+++ b/usr/src/uts/common/io/vioif/vioif.c
@@ -1916,7 +1916,7 @@ vioif_select_interrupt_types(void)
* The system may not have valid SMBIOS data, so ignore a
* failure here.
*/
- return (0);
+ return (VIRTIO_ANY_INTR_TYPE);
}
if (strcmp(info.smbi_manufacturer, "Google") == 0 &&
@@ -1931,7 +1931,7 @@ vioif_select_interrupt_types(void)
return (DDI_INTR_TYPE_FIXED);
}
- return (0);
+ return (VIRTIO_ANY_INTR_TYPE);
}
static int
diff --git a/usr/src/uts/common/io/vioscsi/vioscsi.c b/usr/src/uts/common/io/vioscsi/vioscsi.c
index 0c83b33489..6d3d1e374a 100644
--- a/usr/src/uts/common/io/vioscsi/vioscsi.c
+++ b/usr/src/uts/common/io/vioscsi/vioscsi.c
@@ -1263,7 +1263,7 @@ vioscsi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
return (DDI_FAILURE);
}
- if (virtio_init_complete(vio, 0) != DDI_SUCCESS) {
+ if (virtio_init_complete(vio, VIRTIO_ANY_INTR_TYPE) != DDI_SUCCESS) {
dev_err(dip, CE_WARN, "virtio_init_complete failed");
vioscsi_teardown(sc, B_TRUE);
return (DDI_FAILURE);
diff --git a/usr/src/uts/common/io/virtio/virtio.h b/usr/src/uts/common/io/virtio/virtio.h
index 48e15b28f2..820bc3b811 100644
--- a/usr/src/uts/common/io/virtio/virtio.h
+++ b/usr/src/uts/common/io/virtio/virtio.h
@@ -350,6 +350,12 @@ uint_t virtio_dma_ncookies(virtio_dma_t *);
uint64_t virtio_dma_cookie_pa(virtio_dma_t *, uint_t);
size_t virtio_dma_cookie_size(virtio_dma_t *, uint_t);
+/*
+ * virtio_init_complete() accepts a mask of allowed interrupt types using the
+ * DDI_INTR_TYPE_* family of constants. If no specific interrupt type is
+ * required, pass VIRTIO_ANY_INTR_TYPE instead:
+ */
+#define VIRTIO_ANY_INTR_TYPE 0
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/io/virtio/virtio_dma.c b/usr/src/uts/common/io/virtio/virtio_dma.c
index 81972b5402..b2cbbb2acf 100644
--- a/usr/src/uts/common/io/virtio/virtio_dma.c
+++ b/usr/src/uts/common/io/virtio/virtio_dma.c
@@ -11,6 +11,7 @@
/*
* Copyright 2019 Joyent, Inc.
+ * Copyright 2022 Oxide Computer Company
*/
/*
@@ -40,7 +41,21 @@
#include "virtio.h"
#include "virtio_impl.h"
+typedef int (dma_wait_t)(caddr_t);
+static dma_wait_t *
+virtio_dma_wait_from_kmflags(int kmflags)
+{
+ switch (kmflags) {
+ case KM_SLEEP:
+ return (DDI_DMA_SLEEP);
+ case KM_NOSLEEP:
+ case KM_NOSLEEP_LAZY:
+ return (DDI_DMA_DONTWAIT);
+ default:
+ panic("unexpected kmflags value 0x%x", kmflags);
+ }
+}
void
virtio_dma_sync(virtio_dma_t *vidma, int flag)
@@ -90,10 +105,7 @@ virtio_dma_init_handle(virtio_t *vio, virtio_dma_t *vidma,
{
int r;
dev_info_t *dip = vio->vio_dip;
-
- VERIFY(kmflags == KM_SLEEP || kmflags == KM_NOSLEEP);
- int (*dma_wait)(caddr_t) = (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP :
- DDI_DMA_DONTWAIT;
+ int (*dma_wait)(caddr_t) = virtio_dma_wait_from_kmflags(kmflags);
vidma->vidma_virtio = vio;
@@ -124,10 +136,7 @@ virtio_dma_init(virtio_t *vio, virtio_dma_t *vidma, size_t sz,
int r;
dev_info_t *dip = vio->vio_dip;
caddr_t va = NULL;
-
- VERIFY(kmflags == KM_SLEEP || kmflags == KM_NOSLEEP);
- int (*dma_wait)(caddr_t) = (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP :
- DDI_DMA_DONTWAIT;
+ int (*dma_wait)(caddr_t) = virtio_dma_wait_from_kmflags(kmflags);
if (virtio_dma_init_handle(vio, vidma, attr, kmflags) !=
DDI_SUCCESS) {
@@ -168,10 +177,7 @@ virtio_dma_bind(virtio_dma_t *vidma, void *va, size_t sz, int dmaflags,
int r;
dev_info_t *dip = vidma->vidma_virtio->vio_dip;
ddi_dma_cookie_t dmac;
-
- VERIFY(kmflags == KM_SLEEP || kmflags == KM_NOSLEEP);
- int (*dma_wait)(caddr_t) = (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP :
- DDI_DMA_DONTWAIT;
+ int (*dma_wait)(caddr_t) = virtio_dma_wait_from_kmflags(kmflags);
VERIFY(vidma->vidma_level & VIRTIO_DMALEVEL_HANDLE_ALLOC);
VERIFY(!(vidma->vidma_level & VIRTIO_DMALEVEL_HANDLE_BOUND));
diff --git a/usr/src/uts/common/io/virtio/virtio_main.c b/usr/src/uts/common/io/virtio/virtio_main.c
index 28dce6dc92..ec8bcd9f22 100644
--- a/usr/src/uts/common/io/virtio/virtio_main.c
+++ b/usr/src/uts/common/io/virtio/virtio_main.c
@@ -1440,7 +1440,7 @@ virtio_interrupts_setup(virtio_t *vio, int allow_types)
return (DDI_FAILURE);
}
- if (allow_types != 0) {
+ if (allow_types != VIRTIO_ANY_INTR_TYPE) {
/*
* Restrict the possible interrupt types at the request of the
* driver.
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index c761028491..3c22bb04ce 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -660,6 +660,7 @@ CHKHDRS= \
vfstab.h \
vgareg.h \
videodev2.h \
+ vio9p.h \
visual_io.h \
vlan.h \
vm.h \
diff --git a/usr/src/uts/common/sys/lockstat.h b/usr/src/uts/common/sys/lockstat.h
index 308b96326c..f2a10dcc6b 100644
--- a/usr/src/uts/common/sys/lockstat.h
+++ b/usr/src/uts/common/sys/lockstat.h
@@ -26,8 +26,6 @@
#ifndef _SYS_LOCKSTAT_H
#define _SYS_LOCKSTAT_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/dtrace.h>
#ifdef __cplusplus
@@ -123,7 +121,7 @@ extern void (*lockstat_probe)(dtrace_id_t, uintptr_t, uintptr_t,
extern int lockstat_active_threads(void);
extern int lockstat_depth(void);
-extern void lockstat_hot_patch(void);
+extern void lockstat_hotpatch_probe(int);
/*
* Macros to record lockstat probes.
@@ -175,10 +173,10 @@ extern void lockstat_hot_patch(void);
if (t_spin) { \
t_spin = gethrtime_waitfree() - t_spin; \
t_spin = CLAMP32(t_spin); \
- } \
+ } \
(*lockstat_probe)(id, (uintptr_t)(lp), t_spin, \
0, 0, 0); \
- } \
+ } \
curthread->t_lockstat--; \
}
diff --git a/usr/src/uts/common/sys/vio9p.h b/usr/src/uts/common/sys/vio9p.h
new file mode 100644
index 0000000000..359862e797
--- /dev/null
+++ b/usr/src/uts/common/sys/vio9p.h
@@ -0,0 +1,49 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#ifndef _SYS_VIO9P_H
+#define _SYS_VIO9P_H
+
+/*
+ * VIRTIO 9P DRIVER
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * If the hypervisor supports mount tags through the VIRTIO_9P_F_MOUNT_TAG
+ * feature, they will have at most this many bytes:
+ */
+#define VIRTIO_9P_TAGLEN 32
+
+/*
+ * ioctl(2) support for 9P channel devices:
+ */
+#define VIO9P_IOC_BASE (('9' << 16) | ('P' << 8))
+#define VIO9P_IOC_MOUNT_TAG (VIO9P_IOC_BASE | 0x01)
+
+/*
+ * Buffer size for the VIO9P_IOC_MOUNT_TAG ioctl, which includes one byte
+ * beyond the maximum tag length for NUL termination:
+ */
+#define VIO9P_MOUNT_TAG_SIZE (VIRTIO_9P_TAGLEN + 1)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VIO9P_H */
diff --git a/usr/src/uts/i86pc/Makefile.workarounds b/usr/src/uts/i86pc/Makefile.workarounds
index 2300e74393..21a6ff9155 100644
--- a/usr/src/uts/i86pc/Makefile.workarounds
+++ b/usr/src/uts/i86pc/Makefile.workarounds
@@ -101,9 +101,10 @@ WORKAROUND_DEFS += -DOPTERON_ERRATUM_131
WORKAROUND_DEFS += -DOPTERON_WORKAROUND_6336786
#
-# Mutex primitives don't work as expected
+# Potential Violation of Read Ordering Rules Between Semaphore Operations and
+# Unlocked Read-Modify-Write Instructions
#
-WORKAROUND_DEFS += -DOPTERON_WORKAROUND_6323525
+WORKAROUND_DEFS += -DOPTERON_ERRATUM_147
#
# Some Registered DIMMs incompatible with address parity feature
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 0b42cad19d..35476bb9ed 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -6281,7 +6281,7 @@ cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
return (!(regs.cp_edx & 0x100));
}
return (0);
- case 6323525:
+ case 147:
/*
* This erratum (K8 #147) is not present on family 10 and newer.
*/
diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c
index e90dc6466a..5310c79db9 100644
--- a/usr/src/uts/i86pc/os/mp_startup.c
+++ b/usr/src/uts/i86pc/os/mp_startup.c
@@ -30,6 +30,7 @@
* Copyright 2020 Joyent, Inc.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2022 Oxide Computer Company
*/
#include <sys/types.h>
@@ -669,8 +670,8 @@ int opteron_workaround_6336786; /* non-zero -> WA relevant and applied */
int opteron_workaround_6336786_UP = 0; /* Not needed for UP */
#endif
-#if defined(OPTERON_WORKAROUND_6323525)
-int opteron_workaround_6323525; /* if non-zero -> at least one cpu has it */
+#if defined(OPTERON_ERRATUM_147)
+int opteron_erratum_147; /* if non-zero -> at least one cpu has it */
#endif
#if defined(OPTERON_ERRATUM_298)
@@ -1166,8 +1167,8 @@ workaround_errata(struct cpu *cpu)
* 'Revision Guide for AMD Athlon 64 and AMD Opteron Processors'
* document 25759.
*/
- if (cpuid_opteron_erratum(cpu, 6323525) > 0) {
-#if defined(OPTERON_WORKAROUND_6323525)
+ if (cpuid_opteron_erratum(cpu, 147) > 0) {
+#if defined(OPTERON_ERRATUM_147)
/*
* This problem only occurs with 2 or more cores. If bit in
* MSR_AMD_BU_CFG set, then not applicable. The workaround
@@ -1178,8 +1179,8 @@ workaround_errata(struct cpu *cpu)
* It is too early in boot to call the patch routine so
* set erratum variable to be done in startup_end().
*/
- if (opteron_workaround_6323525) {
- opteron_workaround_6323525++;
+ if (opteron_erratum_147) {
+ opteron_erratum_147++;
#if defined(__xpv)
} else if (is_x86_feature(x86_featureset, X86FSET_SSE2)) {
if (DOMAIN_IS_INITDOMAIN(xen_info)) {
@@ -1188,7 +1189,7 @@ workaround_errata(struct cpu *cpu)
* operations are supported?
*/
if (xpv_nr_phys_cpus() > 1)
- opteron_workaround_6323525++;
+ opteron_erratum_147++;
} else {
/*
* We have no way to tell how many physical
@@ -1196,18 +1197,18 @@ workaround_errata(struct cpu *cpu)
* has the problem, so enable the workaround
* unconditionally (at some performance cost).
*/
- opteron_workaround_6323525++;
+ opteron_erratum_147++;
}
#else /* __xpv */
} else if (is_x86_feature(x86_featureset, X86FSET_SSE2) &&
((opteron_get_nnodes() *
cpuid_get_ncpu_per_chip(cpu)) > 1)) {
if ((xrdmsr(MSR_AMD_BU_CFG) & (UINT64_C(1) << 33)) == 0)
- opteron_workaround_6323525++;
+ opteron_erratum_147++;
#endif /* __xpv */
}
#else
- workaround_warning(cpu, 6323525);
+ workaround_warning(cpu, 147);
missing++;
#endif
}
@@ -1306,9 +1307,9 @@ workaround_errata_end()
if (opteron_workaround_6336786)
workaround_applied(6336786);
#endif
-#if defined(OPTERON_WORKAROUND_6323525)
- if (opteron_workaround_6323525)
- workaround_applied(6323525);
+#if defined(OPTERON_ERRATUM_147)
+ if (opteron_erratum_147)
+ workaround_applied(147);
#endif
#if defined(OPTERON_ERRATUM_298)
if (opteron_erratum_298) {
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index 416b3fb520..a8d3a35908 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -25,7 +25,7 @@
* Copyright 2017 Nexenta Systems, Inc.
* Copyright 2020 Joyent, Inc.
* Copyright (c) 2015 by Delphix. All rights reserved.
- * Copyright 2020 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
* Copyright (c) 2020 Carlos Neira <cneirabustos@gmail.com>
*/
/*
@@ -2125,9 +2125,9 @@ startup_end(void)
*/
cpu_event_init();
-#if defined(OPTERON_WORKAROUND_6323525)
- if (opteron_workaround_6323525)
- patch_workaround_6323525();
+#if defined(OPTERON_ERRATUM_147)
+ if (opteron_erratum_147)
+ patch_erratum_147();
#endif
/*
* If needed, load TOD module now so that ddi_get_time(9F) etc. work
diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel
index b635e100e1..cb668e8a4b 100644
--- a/usr/src/uts/intel/Makefile.intel
+++ b/usr/src/uts/intel/Makefile.intel
@@ -418,6 +418,9 @@ DRV_KMODS += vioblk
DRV_KMODS += vioif
DRV_KMODS += vioscsi
+# Virtio 9P transport driver
+DRV_KMODS += vio9p
+
#
# DTrace and DTrace Providers
#
diff --git a/usr/src/uts/intel/ml/lock_prim.s b/usr/src/uts/intel/ml/lock_prim.s
index 4267561bf7..ce2e093343 100644
--- a/usr/src/uts/intel/ml/lock_prim.s
+++ b/usr/src/uts/intel/ml/lock_prim.s
@@ -25,6 +25,7 @@
/*
* Copyright 2019 Joyent, Inc.
+ * Copyright 2022 Oxide Computer Company
*/
#include "assym.h"
@@ -36,6 +37,38 @@
#include <sys/rwlock_impl.h>
#include <sys/lockstat.h>
+
+#if defined(OPTERON_ERRATUM_147)
+
+/*
+ * Leave space for an lfence to be inserted if required by a CPU which suffers
+ * from this erratum. Pad (with nops) the location for the lfence so that it
+ * is adequately aligned for atomic hotpatching.
+ */
+#define ERRATUM147_PATCH_POINT(name) \
+ .align 4, NOP_INSTR; \
+./**/name/**/_147_patch_point: \
+ nop; \
+ nop; \
+ nop; \
+ nop;
+
+#else /* defined(OPTERON_ERRATUM_147) */
+
+/* Empty macro so ifdefs are not required for all of the erratum sites. */
+#define ERRATUM147_PATCH_POINT(name)
+
+#endif /* defined(OPTERON_ERRATUM_147) */
+
+/*
+ * Patch point for lockstat probes. When the associated probe is disabled, it
+ * will 'ret' from the function. It is hotpatched to allow execution to fall
+ * through when the probe is enabled.
+ */
+#define LOCKSTAT_RET(name) \
+./**/name/**/_lockstat_patch_point: \
+ ret;
+
/*
* lock_try(lp), ulock_try(lp)
* - returns non-zero on success.
@@ -51,8 +84,8 @@
movzbq %dl, %rax
xchgb %dl, (%rdi)
xorb %dl, %al
-.lock_try_lockstat_patch_point:
- ret
+ LOCKSTAT_RET(lock_try)
+
testb %al, %al
jnz 0f
ret
@@ -79,7 +112,7 @@
movq %rdi, %r12 /* preserve lock ptr for debugging */
leaq .ulock_panic_msg(%rip), %rdi
- pushq %rbp /* align stack properly */
+ pushq %rbp
movq %rsp, %rbp
xorl %eax, %eax /* clear for varargs */
call panic
@@ -107,8 +140,8 @@ ulock_pass:
ENTRY(lock_clear)
movb $0, (%rdi)
-.lock_clear_lockstat_patch_point:
- ret
+ LOCKSTAT_RET(lock_clear)
+
movq %rdi, %rsi /* rsi = lock addr */
movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */
movl $LS_LOCK_CLEAR_RELEASE, %edi /* edi = event */
@@ -122,7 +155,7 @@ ulock_pass:
jb ulock_clr /* uaddr < kernelbase, proceed */
leaq .ulock_clear_msg(%rip), %rdi
- pushq %rbp /* align stack properly */
+ pushq %rbp
movq %rsp, %rbp
xorl %eax, %eax /* clear for varargs */
call panic
@@ -163,12 +196,13 @@ ulock_clr:
movq 16(%rsp), %rdx /* rdx = old pil addr */
movw %ax, (%rdx) /* store old pil */
leave
-.lock_set_spl_lockstat_patch_point:
- ret
+ LOCKSTAT_RET(lock_set_spl)
+
movq %rdi, %rsi /* rsi = lock addr */
movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */
movl $LS_LOCK_SET_SPL_ACQUIRE, %edi
jmp lockstat_wrapper
+
.lss_miss:
movl 8(%rsp), %esi /* new_pil */
movq 16(%rsp), %rdx /* old_pil_addr */
@@ -197,8 +231,8 @@ ulock_clr:
xchgb %dl, (%rdi) /* try to set lock */
testb %dl, %dl /* did we get it? */
jnz lock_set_spin /* no, go to C for the hard case */
-.lock_set_lockstat_patch_point:
- ret
+ LOCKSTAT_RET(lock_set)
+
movq %rdi, %rsi /* rsi = lock addr */
movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */
movl $LS_LOCK_SET_ACQUIRE, %edi
@@ -210,40 +244,21 @@ ulock_clr:
*/
ENTRY(lock_clear_splx)
- movb $0, (%rdi) /* clear lock */
-.lock_clear_splx_lockstat_patch_point:
- jmp 0f
-0:
- movl %esi, %edi /* arg for splx */
- jmp splx /* let splx do its thing */
-.lock_clear_splx_lockstat:
- pushq %rbp /* align stack properly */
+ pushq %rbp
movq %rsp, %rbp
- subq $16, %rsp /* space to save args across splx */
- movq %rdi, 8(%rsp) /* save lock ptr across splx call */
- movl %esi, %edi /* arg for splx */
- call splx /* lower the priority */
- movq 8(%rsp), %rsi /* rsi = lock ptr */
- leave /* unwind stack */
+ pushq %rdi /* save lp across call for lockstat */
+ movb $0, (%rdi) /* clear lock */
+ movl %esi, %edi /* arg for splx */
+ call splx /* let splx do its thing */
+ popq %rsi /* retreive lp for lockstat */
+ leave
+ LOCKSTAT_RET(lock_clear_splx)
+
movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */
movl $LS_LOCK_CLEAR_SPLX_RELEASE, %edi
jmp lockstat_wrapper
SET_SIZE(lock_clear_splx)
-#if defined(__GNUC_AS__)
-#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL \
- (.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2)
-
-#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT \
- (.lock_clear_splx_lockstat_patch_point + 1)
-#else
-#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL \
- [.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2]
-
-#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT \
- [.lock_clear_splx_lockstat_patch_point + 1]
-#endif
-
/*
* mutex_enter() and mutex_exit().
*
@@ -261,11 +276,6 @@ ulock_clr:
* If we interrupt a thread in mutex_exit() that has not yet cleared
* the lock, cmnint() resets its PC back to the beginning of
* mutex_exit() so it will check again for waiters when it resumes.
- *
- * The lockstat code below is activated when the lockstat driver
- * calls lockstat_hot_patch() to hot-patch the kernel mutex code.
- * Note that we don't need to test lockstat_event_mask here -- we won't
- * patch this code in unless we're gathering ADAPTIVE_HOLD lockstats.
*/
ENTRY_NP(mutex_enter)
@@ -274,29 +284,27 @@ ulock_clr:
lock
cmpxchgq %rdx, (%rdi)
jnz mutex_vector_enter
-.mutex_enter_lockstat_patch_point:
-#if defined(OPTERON_WORKAROUND_6323525)
-.mutex_enter_6323525_patch_point:
- ret /* nop space for lfence */
- nop
- nop
-.mutex_enter_lockstat_6323525_patch_point: /* new patch point if lfence */
- nop
-#else /* OPTERON_WORKAROUND_6323525 */
- ret
-#endif /* OPTERON_WORKAROUND_6323525 */
+
+ ERRATUM147_PATCH_POINT(mutex_enter)
+
+ LOCKSTAT_RET(mutex_enter)
+
movq %rdi, %rsi
movl $LS_MUTEX_ENTER_ACQUIRE, %edi
+ jmp lockstat_wrapper
+ SET_SIZE(mutex_enter)
+
+
/*
* expects %rdx=thread, %rsi=lock, %edi=lockstat event
*/
- ALTENTRY(lockstat_wrapper)
+ ENTRY_NP(lockstat_wrapper)
incb T_LOCKSTAT(%rdx) /* curthread->t_lockstat++ */
leaq lockstat_probemap(%rip), %rax
movl (%rax, %rdi, DTRACE_IDSIZE), %eax
testl %eax, %eax /* check for non-zero probe */
jz 1f
- pushq %rbp /* align stack properly */
+ pushq %rbp
movq %rsp, %rbp
movl %eax, %edi
movq lockstat_probe, %rax
@@ -308,7 +316,6 @@ ulock_clr:
movl $1, %eax /* return success if tryenter */
ret
SET_SIZE(lockstat_wrapper)
- SET_SIZE(mutex_enter)
/*
* expects %rcx=thread, %rdx=arg, %rsi=lock, %edi=lockstat event
@@ -319,7 +326,7 @@ ulock_clr:
movl (%rax, %rdi, DTRACE_IDSIZE), %eax
testl %eax, %eax /* check for non-zero probe */
jz 1f
- pushq %rbp /* align stack properly */
+ pushq %rbp
movq %rsp, %rbp
movl %eax, %edi
movq lockstat_probe, %rax
@@ -340,20 +347,13 @@ ulock_clr:
cmpxchgq %rdx, (%rdi)
jnz mutex_vector_tryenter
not %eax /* return success (nonzero) */
-#if defined(OPTERON_WORKAROUND_6323525)
-.mutex_tryenter_lockstat_patch_point:
-.mutex_tryenter_6323525_patch_point:
- ret /* nop space for lfence */
- nop
- nop
-.mutex_tryenter_lockstat_6323525_patch_point: /* new patch point if lfence */
- nop
-#else /* OPTERON_WORKAROUND_6323525 */
-.mutex_tryenter_lockstat_patch_point:
- ret
-#endif /* OPTERON_WORKAROUND_6323525 */
+
+ ERRATUM147_PATCH_POINT(mutex_tryenter)
+
+ LOCKSTAT_RET(mutex_tryenter)
+
movq %rdi, %rsi
- movl $LS_MUTEX_ENTER_ACQUIRE, %edi
+ movl $LS_MUTEX_TRYENTER_ACQUIRE, %edi
jmp lockstat_wrapper
SET_SIZE(mutex_tryenter)
@@ -364,15 +364,10 @@ ulock_clr:
cmpxchgq %rdx, (%rdi)
jnz 0f
not %eax /* return success (nonzero) */
-#if defined(OPTERON_WORKAROUND_6323525)
-.mutex_atryenter_6323525_patch_point:
- ret /* nop space for lfence */
- nop
- nop
- nop
-#else /* OPTERON_WORKAROUND_6323525 */
+
+ ERRATUM147_PATCH_POINT(mutex_atryenter)
+
ret
-#endif /* OPTERON_WORKAROUND_6323525 */
0:
xorl %eax, %eax /* return failure */
ret
@@ -415,8 +410,8 @@ mutex_exit_critical_start: /* If interrupted, restart here */
jne mutex_vector_exit /* wrong type or wrong owner */
movq $0, (%rdi) /* clear owner AND lock */
.mutex_exit_critical_end:
-.mutex_exit_lockstat_patch_point:
- ret
+ LOCKSTAT_RET(mutex_exit)
+
movq %rdi, %rsi
movl $LS_MUTEX_EXIT_RELEASE, %edi
jmp lockstat_wrapper
@@ -448,13 +443,14 @@ mutex_exit_critical_size:
lock
cmpxchgq %rdx, (%rdi) /* try to grab read lock */
jnz rw_enter_sleep
-.rw_read_enter_lockstat_patch_point:
- ret
+ LOCKSTAT_RET(rw_read_enter)
+
movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */
movq %rdi, %rsi /* rsi = lock ptr */
movl $LS_RW_ENTER_ACQUIRE, %edi
movl $RW_READER, %edx
jmp lockstat_wrapper_arg
+
.rw_write_enter:
movq %gs:CPU_THREAD, %rdx
orq $RW_WRITE_LOCKED, %rdx /* rdx = write-locked value */
@@ -463,18 +459,9 @@ mutex_exit_critical_size:
cmpxchgq %rdx, (%rdi) /* try to grab write lock */
jnz rw_enter_sleep
-#if defined(OPTERON_WORKAROUND_6323525)
-.rw_write_enter_lockstat_patch_point:
-.rw_write_enter_6323525_patch_point:
- ret
- nop
- nop
-.rw_write_enter_lockstat_6323525_patch_point:
- nop
-#else /* OPTERON_WORKAROUND_6323525 */
-.rw_write_enter_lockstat_patch_point:
- ret
-#endif /* OPTERON_WORKAROUND_6323525 */
+ ERRATUM147_PATCH_POINT(rw_write_enter)
+
+ LOCKSTAT_RET(rw_write_enter)
movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */
movq %rdi, %rsi /* rsi = lock ptr */
@@ -492,13 +479,14 @@ mutex_exit_critical_size:
lock
cmpxchgq %rdx, (%rdi) /* try to drop read lock */
jnz rw_exit_wakeup
-.rw_read_exit_lockstat_patch_point:
- ret
+ LOCKSTAT_RET(rw_read_exit)
+
movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */
movq %rdi, %rsi /* rsi = lock ptr */
movl $LS_RW_EXIT_RELEASE, %edi
movl $RW_READER, %edx
jmp lockstat_wrapper_arg
+
.rw_not_single_reader:
testl $RW_WRITE_LOCKED, %eax /* write-locked or write-wanted? */
jnz .rw_write_exit
@@ -513,8 +501,8 @@ mutex_exit_critical_size:
lock
cmpxchgq %rdx, (%rdi) /* try to drop read lock */
jnz rw_exit_wakeup
-.rw_write_exit_lockstat_patch_point:
- ret
+ LOCKSTAT_RET(rw_write_exit)
+
movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */
movq %rdi, %rsi /* rsi - lock ptr */
movl $LS_RW_EXIT_RELEASE, %edi
@@ -522,149 +510,131 @@ mutex_exit_critical_size:
jmp lockstat_wrapper_arg
SET_SIZE(rw_exit)
-#if defined(OPTERON_WORKAROUND_6323525)
+#if defined(OPTERON_ERRATUM_147)
/*
- * If it is necessary to patch the lock enter routines with the lfence
- * workaround, workaround_6323525_patched is set to a non-zero value so that
- * the lockstat_hat_patch routine can patch to the new location of the 'ret'
- * instruction.
+ * Track if erratum 147 workaround has been hotpatched into place.
*/
- DGDEF3(workaround_6323525_patched, 4, 4)
+ DGDEF3(erratum_147_patched, 4, 4)
.long 0
-#define HOT_MUTEX_PATCH(srcaddr, dstaddr, size) \
- movq $size, %rbx; \
- movq $dstaddr, %r13; \
- addq %rbx, %r13; \
- movq $srcaddr, %r12; \
- addq %rbx, %r12; \
-0: \
- decq %r13; \
- decq %r12; \
- movzbl (%r12), %esi; \
- movq $1, %rdx; \
- movq %r13, %rdi; \
- call hot_patch_kernel_text; \
- decq %rbx; \
- testq %rbx, %rbx; \
- jg 0b;
+#define HOT_MUTEX_PATCH(iaddr, insn_reg) \
+ movq $iaddr, %rdi; \
+ movl %insn_reg, %esi; \
+ movl $4, %edx; \
+ call hot_patch_kernel_text;
+
/*
- * patch_workaround_6323525: provide workaround for 6323525
+ * void
+ * patch_erratum_147(void)
+ *
+ * Patch lock operations to work around erratum 147.
*
* The workaround is to place a fencing instruction (lfence) between the
* mutex operation and the subsequent read-modify-write instruction.
- *
- * This routine hot patches the lfence instruction on top of the space
- * reserved by nops in the lock enter routines.
*/
- ENTRY_NP(patch_workaround_6323525)
+
+ ENTRY_NP(patch_erratum_147)
pushq %rbp
movq %rsp, %rbp
pushq %r12
- pushq %r13
- pushq %rbx
/*
- * lockstat_hot_patch() to use the alternate lockstat workaround
- * 6323525 patch points (points past the lfence instruction to the
- * new ret) when workaround_6323525_patched is set.
+ * Patch `nop; nop; nop; nop` sequence to `lfence; nop`. Since those
+ * patch points have been aligned to a 4-byte boundary, we can be
+ * confident that hot_patch_kernel_text() will be able to proceed
+ * safely and successfully.
*/
- movl $1, workaround_6323525_patched
+ movl $0x90e8ae0f, %r12d
+ HOT_MUTEX_PATCH(.mutex_enter_147_patch_point, r12d)
+ HOT_MUTEX_PATCH(.mutex_tryenter_147_patch_point, r12d)
+ HOT_MUTEX_PATCH(.mutex_atryenter_147_patch_point, r12d)
+ HOT_MUTEX_PATCH(.rw_write_enter_147_patch_point, r12d)
- /*
- * patch ret/nop/nop/nop to lfence/ret at the end of the lock enter
- * routines. The 4 bytes are patched in reverse order so that the
- * the existing ret is overwritten last. This provides lock enter
- * sanity during the intermediate patching stages.
- */
- HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4)
- HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4)
- HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4)
- HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4)
+ /* Record that erratum 147 points have been hotpatched */
+ movl $1, erratum_147_patched
- popq %rbx
- popq %r13
popq %r12
movq %rbp, %rsp
popq %rbp
ret
-_lfence_insn:
- lfence
- ret
- SET_SIZE(patch_workaround_6323525)
+ SET_SIZE(patch_erratum_147)
+
+#endif /* OPTERON_ERRATUM_147 */
+ /*
+ * void
+ * lockstat_hotpatch_site(caddr_t instr_addr, int do_enable)
+ */
+ ENTRY(lockstat_hotpatch_site)
+ pushq %rbp
+ movq %rsp, %rbp
+ pushq %rdi
+ pushq %rsi
-#endif /* OPTERON_WORKAROUND_6323525 */
+ testl %esi, %esi
+ jz .do_disable
+ /* enable the probe (replace ret with nop) */
+ movl $NOP_INSTR, %esi
+ movl $1, %edx
+ call hot_patch_kernel_text
+ leave
+ ret
-#define HOT_PATCH(addr, event, active_instr, normal_instr, len) \
- movq $normal_instr, %rsi; \
- movq $active_instr, %rdi; \
- leaq lockstat_probemap(%rip), %rax; \
- movl _MUL(event, DTRACE_IDSIZE)(%rax), %eax; \
- testl %eax, %eax; \
- jz 9f; \
- movq %rdi, %rsi; \
-9: \
- movq $len, %rdx; \
- movq $addr, %rdi; \
+.do_disable:
+ /* disable the probe (replace nop with ret) */
+ movl $RET_INSTR, %esi
+ movl $1, %edx
call hot_patch_kernel_text
+ leave
+ ret
+ SET_SIZE(lockstat_hotpatch_site)
+
+#define HOT_PATCH_MATCH(name, probe, reg) \
+ cmpl $probe, %reg; \
+ jne 1f; \
+ leaq lockstat_probemap(%rip), %rax; \
+ movl _MUL(probe, DTRACE_IDSIZE)(%rax), %esi; \
+ movq $./**/name/**/_lockstat_patch_point, %rdi; \
+ call lockstat_hotpatch_site; \
+ 1:
+
+/*
+ * void
+ * lockstat_hotpatch_probe(int ls_probe)
+ *
+ * Given a lockstat probe identifier, hotpatch any associated lockstat
+ * primitive routine(s) so they fall through into the lockstat_probe() call (if
+ * the probe is enabled) or return normally (when the probe is disabled).
+ */
- ENTRY(lockstat_hot_patch)
- pushq %rbp /* align stack properly */
+ ENTRY(lockstat_hotpatch_probe)
+ pushq %rbp
movq %rsp, %rbp
+ pushq %r12
+ movl %edi, %r12d
-#if defined(OPTERON_WORKAROUND_6323525)
- cmpl $0, workaround_6323525_patched
- je 1f
- HOT_PATCH(.mutex_enter_lockstat_6323525_patch_point,
- LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
- HOT_PATCH(.mutex_tryenter_lockstat_6323525_patch_point,
- LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
- HOT_PATCH(.rw_write_enter_lockstat_6323525_patch_point,
- LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
- jmp 2f
-1:
- HOT_PATCH(.mutex_enter_lockstat_patch_point,
- LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
- HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
- LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
- HOT_PATCH(.rw_write_enter_lockstat_patch_point,
- LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
-2:
-#else /* OPTERON_WORKAROUND_6323525 */
- HOT_PATCH(.mutex_enter_lockstat_patch_point,
- LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
- HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
- LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
- HOT_PATCH(.rw_write_enter_lockstat_patch_point,
- LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
-#endif /* !OPTERON_WORKAROUND_6323525 */
- HOT_PATCH(.mutex_exit_lockstat_patch_point,
- LS_MUTEX_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
- HOT_PATCH(.rw_read_enter_lockstat_patch_point,
- LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
- HOT_PATCH(.rw_write_exit_lockstat_patch_point,
- LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
- HOT_PATCH(.rw_read_exit_lockstat_patch_point,
- LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
- HOT_PATCH(.lock_set_lockstat_patch_point,
- LS_LOCK_SET_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
- HOT_PATCH(.lock_try_lockstat_patch_point,
- LS_LOCK_TRY_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
- HOT_PATCH(.lock_clear_lockstat_patch_point,
- LS_LOCK_CLEAR_RELEASE, NOP_INSTR, RET_INSTR, 1)
- HOT_PATCH(.lock_set_spl_lockstat_patch_point,
- LS_LOCK_SET_SPL_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
-
- HOT_PATCH(LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT,
- LS_LOCK_CLEAR_SPLX_RELEASE,
- LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL, 0, 1);
- leave /* unwind stack */
+ HOT_PATCH_MATCH(mutex_enter, LS_MUTEX_ENTER_ACQUIRE, r12d)
+ HOT_PATCH_MATCH(mutex_tryenter, LS_MUTEX_TRYENTER_ACQUIRE, r12d)
+ HOT_PATCH_MATCH(mutex_exit, LS_MUTEX_EXIT_RELEASE, r12d)
+
+ HOT_PATCH_MATCH(rw_write_enter, LS_RW_ENTER_ACQUIRE, r12d)
+ HOT_PATCH_MATCH(rw_read_enter, LS_RW_ENTER_ACQUIRE, r12d)
+ HOT_PATCH_MATCH(rw_write_exit, LS_RW_EXIT_RELEASE, r12d)
+ HOT_PATCH_MATCH(rw_read_exit, LS_RW_EXIT_RELEASE, r12d)
+
+ HOT_PATCH_MATCH(lock_set, LS_LOCK_SET_ACQUIRE, r12d)
+ HOT_PATCH_MATCH(lock_try, LS_LOCK_TRY_ACQUIRE, r12d)
+ HOT_PATCH_MATCH(lock_clear, LS_LOCK_CLEAR_RELEASE, r12d)
+ HOT_PATCH_MATCH(lock_set_spl, LS_LOCK_SET_SPL_ACQUIRE, r12d)
+ HOT_PATCH_MATCH(lock_clear_splx, LS_LOCK_CLEAR_SPLX_RELEASE, r12d)
+
+ popq %r12
+ leave
ret
- SET_SIZE(lockstat_hot_patch)
+ SET_SIZE(lockstat_hotpatch_probe)
ENTRY(membar_enter)
ALTENTRY(membar_exit)
diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h
index ab62bd6deb..c16d430c2e 100644
--- a/usr/src/uts/intel/sys/x86_archext.h
+++ b/usr/src/uts/intel/sys/x86_archext.h
@@ -1366,9 +1366,9 @@ extern int opteron_erratum_100;
extern int opteron_erratum_121;
#endif
-#if defined(OPTERON_WORKAROUND_6323525)
-extern int opteron_workaround_6323525;
-extern void patch_workaround_6323525(void);
+#if defined(OPTERON_ERRATUM_147)
+extern int opteron_erratum_147;
+extern void patch_erratum_147(void);
#endif
#if !defined(__xpv)
diff --git a/usr/src/uts/intel/vio9p/Makefile b/usr/src/uts/intel/vio9p/Makefile
new file mode 100644
index 0000000000..0774962e52
--- /dev/null
+++ b/usr/src/uts/intel/vio9p/Makefile
@@ -0,0 +1,67 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2022 Oxide Computer Company
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = vio9p
+OBJECTS = $(VIO9P_OBJS:%=$(OBJS_DIR)/%)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Overrides
+#
+INC_PATH += -I$(UTSBASE)/common/io/virtio
+
+#
+# Driver depends on virtio
+#
+LDFLAGS += -N misc/virtio
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ